@article{li_agor_ozaltin_2024, title={Temporal pattern mining for knowledge discovery in the early prediction of septic shock}, volume={151}, ISSN={["1873-5142"]}, DOI={10.1016/j.patcog.2024.110436}, abstractNote={Temporal pattern mining can be employed to detect patterns and trends in a patient's health status as it evolves over time. However, these methods often produce an overwhelming number of patterns, impeding knowledge discovery and practical implementation in acute care settings. To address this, we propose a framework that focuses on identifying a concise set of relevant temporal patterns and static variables from electronic health records for the early prediction of septic shock. Sepsis is caused by an adverse immune response to infection that triggers widespread inflammation throughout the body, which can progress to septic shock and ultimately result in death if not treated promptly. The analysis of health state patterns in sepsis patients over time offers the potential to predict septic shock prior to its onset, enabling proactive healthcare interventions. Our framework incorporates a temporal pattern mining method and four feature selection techniques. We discover that selecting features based on a model-based wrapper approach yields the highest prediction performance among these techniques. On the other hand, the use of information value identifies more multi-state patterns with abnormal health states, providing healthcare providers with valuable indicators of patient deterioration.}, journal={PATTERN RECOGNITION}, author={Li, Ruoting and Agor, Joseph K. and Ozaltin, Osman Y.}, year={2024}, month={Jul} } @article{paramita_agor_mayorga_ivy_miller_ozaltin_2023, title={Quantifying association and disparities between diabetes complications and COVID-19 outcomes: A retrospective study using electronic health records}, volume={18}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0286815}, abstractNote={ Background Despite established relationships between diabetic status and an increased risk for COVID-19 severe outcomes, there is a limited number of studies examining the relationships between diabetes complications and COVID-19-related risks. We use the Adapted Diabetes Complications Severity Index to define seven diabetes complications. We aim to understand the risk for COVID-19 infection, hospitalization, mortality, and longer length of stay of diabetes patients with complications. Methods We perform a retrospective case-control study using Electronic Health Records (EHRs) to measure differences in the risks for COVID-19 severe outcomes amongst those with diabetes complications. Using multiple logistic regression, we calculate adjusted odds ratios (OR) for COVID-19 infection, hospitalization, and in-hospital mortality of the case group (patients with diabetes complications) compared to a control group (patients without diabetes). We also calculate adjusted mean difference in length of stay between the case and control groups using multiple linear regression. Results Adjusting demographics and comorbidities, diabetes patients with renal complications have the highest odds for COVID-19 infection (OR = 1.85, 95% CI = [1.71, 1.99]) while those with metabolic complications have the highest odds for COVID-19 hospitalization (OR = 5.58, 95% CI = [3.54, 8.77]) and in-hospital mortality (OR = 2.41, 95% CI = [1.35, 4.31]). The adjusted mean difference (MD) of hospital length-of-stay for diabetes patients, especially those with cardiovascular (MD = 0.94, 95% CI = [0.17, 1.71]) or peripheral vascular (MD = 1.72, 95% CI = [0.84, 2.60]) complications, is significantly higher than non-diabetes patients. African American patients have higher odds for COVID-19 infection (OR = 1.79, 95% CI = [1.66, 1.92]) and hospitalization (OR = 1.62, 95% CI = [1.39, 1.90]) than White patients in the general diabetes population. However, White diabetes patients have higher odds for COVID-19 in-hospital mortality. Hispanic patients have higher odds for COVID-19 infection (OR = 2.86, 95% CI = [2.42, 3.38]) and shorter mean length of hospital stay than non-Hispanic patients in the general diabetes population. Although there is no significant difference in the odds for COVID-19 hospitalization and in-hospital mortality between Hispanic and non-Hispanic patients in the general diabetes population, Hispanic patients have higher odds for COVID-19 hospitalization (OR = 1.83, 95% CI = [1.16, 2.89]) and in-hospital mortality (OR = 3.69, 95% CI = [1.18, 11.50]) in the diabetes population with no complications. Conclusions The presence of diabetes complications increases the risks of COVID-19 infection, hospitalization, and worse health outcomes with respect to in-hospital mortality and longer hospital length of stay. We show the presence of health disparities in COVID-19 outcomes across demographic groups in our diabetes population. One such disparity is that African American and Hispanic diabetes patients have higher odds of COVID-19 infection than White and Non-Hispanic diabetes patients, respectively. Furthermore, Hispanic patients might have less access to the hospital care compared to non-Hispanic patients when longer hospitalizations are needed due to their diabetes complications. Finally, diabetes complications, which are generally associated with worse COVID-19 outcomes, might be predominantly determining the COVID-19 severity in those infected patients resulting in less demographic differences in COVID-19 hospitalization and in-hospital mortality. }, number={9}, journal={PLOS ONE}, author={Paramita, Ni Luh Putu S. P. and Agor, Joseph K. and Mayorga, Maria E. and Ivy, Julie S. and Miller, Kristen E. and Ozaltin, Osman Y.}, year={2023}, month={Sep} } @article{agor_paramita_ozaltn_2021, title={Prediction of Sepsis Related Mortality: An Optimization Approach}, volume={25}, ISSN={["2168-2208"]}, url={https://doi.org/10.1109/JBHI.2021.3096470}, DOI={10.1109/JBHI.2021.3096470}, abstractNote={Sepsis is a condition that progresses quickly and is a major cause of mortality in hospitalized patients. Data-driven diagnostic and therapeutic interventions are essential to ensure early diagnosis and appropriate care. The Sequential Organ Failure Assessment (SOFA) score is widely utilized in clinical practice to assess septic patients for organ dysfunction. The SOFA score uses points between 0 and 4 to quantify the level of dysfunction in six organ systems. These points are determined based on expert opinion and not informed by data, thus their usefulness can vary among different medical institutions depending on the targeted use. In this study, we propose multiple strategies to adjust the SOFA score using mixed-integer programming to improve the in-hospital mortality prediction of septic patients based on Electronic Health Records (EHRs). We use the same variables and threshold values of the original SOFA score in each strategy. Thus, the proposed approach takes advantage of optimization and data analysis while taking into account the medical expertise. Our results demonstrate a statistically significant improvement ($p< 0.001$) in the prediction of in-hospital mortality among patients susceptible to sepsis when implementing our proposed strategies. Area under the receiver operator curve (AUC) and accuracy values of 0.8928 and 0.8904 are achieved by optimizing the point values of the SOFA score.}, number={11}, journal={IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Agor, Joseph K. and Paramita, Ni Luh Putu S. P. and Ozaltn, Osman Y.}, year={2021}, month={Nov}, pages={4207–4216} } @article{agor_ozaltin_ivy_capan_arnold_romero_2019, title={The value of missing information in severity of illness score development}, volume={97}, ISSN={["1532-0480"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85069932839&partnerID=MN8TOARS}, DOI={10.1016/j.jbi.2019.103255}, abstractNote={We aim to investigate the hypothesis that using information about which variables are missing along with appropriate imputation improves the performance of severity of illness scoring systems used to predict critical patient outcomes.We quantify the impact of missing and imputed variables on the performance of prediction models used in the development of a sepsis-related severity of illness scoring system. Electronic health records (EHR) data were compiled from Christiana Care Health System (CCHS) on 119,968 adult patients hospitalized between July 2013 and December 2015. Two outcomes of interest were considered for prediction: (1) first transfer to intensive care unit (ICU) and (2) in-hospital mortality. Five different prediction models were employed. Indicators were utilized in these prediction models to identify when variables were missing and imputed.We observed statistically significant gains in prediction performance when moving from models that did not indicate missing information to those that did. Moreover, this increase was higher in models that use summary variables as predictors compared to those that use all variables.When developing prediction models using longitudinal EHR data, researchers should explore the incorporation of indicators for missing variables along with appropriate imputation.}, journal={JOURNAL OF BIOMEDICAL INFORMATICS}, author={Agor, Joseph and Ozaltin, Osman Y. and Ivy, Julie S. and Capan, Muge and Arnold, Ryan and Romero, Santiago}, year={2019}, month={Sep} } @article{agor_ozaltin_2019, title={Feature selection for classification models via bilevel optimization}, volume={106}, ISSN={["1873-765X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85047058360&partnerID=MN8TOARS}, DOI={10.1016/j.cor.2018.05.005}, abstractNote={Selecting model features that would ensure adequate out-of-sample classification is difficult in real life applications of classification often because there is a large number of candidate features. We propose a bilevel programming approach to feature selection problem for classification and develop a novel genetic algorithm as a solution approach. We implement the proposed framework in three different case studies where we classify influenza strains based on antigenic variety, distinguish between good and bad quality colposcopy images, and identify splice junction sites in genetic sequences. As a benchmark for the proposed genetic algorithm, we use a derivative-free optimization method to solve the bilevel feature selection problems in these case studies. The computational experiments show that the proposed bilevel framework improves the overall classification performance while selecting the most important features for the model.}, journal={COMPUTERS & OPERATIONS RESEARCH}, author={Agor, Joseph and Ozaltin, Osman Y.}, year={2019}, month={Jun}, pages={156–168} } @misc{agor_ozaltin_2018, title={Models for predicting the evolution of influenza to inform vaccine strain selection}, volume={14}, ISSN={["2164-554X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85044212421&partnerID=MN8TOARS}, DOI={10.1080/21645515.2017.1423152}, abstractNote={ABSTRACT Influenza vaccine composition is reviewed before every flu season because influenza viruses constantly evolve through antigenic changes. To inform vaccine updates, laboratories that contribute to the World Health Organization Global Influenza Surveillance and Response System monitor the antigenic phenotypes of circulating viruses all year round. Vaccine strains are selected in anticipation of the upcoming influenza season to allow adequate time for production. A mismatch between vaccine strains and predominant strains in the flu season can significantly reduce vaccine effectiveness. Models for predicting the evolution of influenza based on the relationship of genetic mutations and antigenic characteristics of circulating viruses may inform vaccine strain selection decisions. We review the literature on state-of-the-art tools and prediction methodologies utilized in modeling the evolution of influenza to inform vaccine strain selection. We then discuss areas that are open for improvement and need further research.}, number={3}, journal={HUMAN VACCINES & IMMUNOTHERAPEUTICS}, author={Agor, Joseph K. and Ozaltin, Osman Y.}, year={2018}, pages={678–683} } @inproceedings{agor_mckenzie_mayorga_ozaltin_parikh_huddleston_2017, title={Simulating triage of patients into an internal medicine department to validate the use of an optimization-based workload score}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85044522866&partnerID=MN8TOARS}, DOI={10.1109/wsc.2017.8248011}, abstractNote={This study describes a simulation model that was used to evaluate a proposed workload score. The score was designed to assist in triaging patients into the hospital services of the Division of Hospital Internal Medicine at Mayo Clinic in an effort to more equitably balance workload among the division's provider teams (or services). The first part of this study was the development of a score, using Delphi surveys, conjoint analysis, and optimization methods, that accurately represents provider workload. A simulation model was then built to test the score using historical patient data. Preliminary simulation results reported the proportion of time that each provider team spent working at or above “maximum utilization,” as defined by Mayo Clinic experts. The model yielded a 12.1% decrease (on average) in the proportion of time provider teams spent at or above maximum utilization, while simultaneously displaying a more balanced workload across provider teams.}, booktitle={2017 winter simulation conference (wsc)}, author={Agor, J. and McKenzie, K. and Mayorga, M. E. and Ozaltin, Osman and Parikh, R. S. and Huddleston, J.}, year={2017}, pages={2881–2892} } @inproceedings{agor_mckenzie_ozaltin_mayorga_parikh_huddleston_2016, title={Simulation of triaging patients into an internal medicine department to validate the use of an optimization based workload score}, volume={0}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85014275794&partnerID=MN8TOARS}, DOI={10.1109/wsc.2016.7822411}, abstractNote={This extended abstract provides an overview of the development of a simulation model to be used in the assistance of triaging patients into the Hospital Internal Medicine (HIM) Department at The Mayo Clinic in Rochester, MN in an effort to balance workload among the department services. The main contribution of this work is the development of a score that measures provider workload more accurately. Delphi surveys, conjoint analysis, and optimization methods were used in the creation of this score and it is believed to better represent provider workload. Preliminary results were based on the proportion of time of a month that each service was at or above “maximum utilization”, which is how workload is currently viewed at an instance. A simulation model built in SIMIO 8 yielded a 12.1% decrease in the proportion of time that a service was at or above their “max utilization” on average, while also seeing a decrease in the average difference among these proportions by 8.3% (better balance among all services).}, booktitle={2016 winter simulation conference (wsc)}, author={Agor, J. and McKenzie, K. and Ozaltin, Osman and Mayorga, M. and Parikh, R. S. and Huddleston, J.}, year={2016}, pages={3708–3709} }