@article{shashaani_vahdat_2022, title={Improved feature selection with simulation optimization}, volume={5}, ISSN={["1573-2924"]}, url={http://dx.doi.org/10.1007/s11081-022-09726-3}, DOI={10.1007/s11081-022-09726-3}, journal={OPTIMIZATION AND ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shashaani, Sara and Vahdat, Kimia}, year={2022}, month={May} } @inproceedings{mao_vahdat_shashaani_swann_2022, title={Personalized Predictions for Unplanned Urinary Tract Infection Hospitalizations with Hierarchical Clustering}, url={http://dx.doi.org/10.1007/978-3-030-75166-1_34}, DOI={10.1007/978-3-030-75166-1_34}, abstractNote={Urinary Tract Infection (UTI) is the one of the most frequent and preventable healthcare-associated infections in the US and an important cause of morbidity and excess healthcare costs. This study aims to predict the 30-day risk of a beneficiary for unplanned hospitalization for UTI. Using 2008–12 Medicare fee-for-service claims and several public sources, we extracted 784 features, including patient demographics, clinical conditions, healthcare utilization, provider quality metrics, and community safety indicators. To address the challenge of high heterogeneity and imbalance in data, we propose a hierarchical clustering approach that leverages existing knowledge and data-driven algorithms to partition the population into groups of similar risk, followed by building a LASSO-Logistic Regression (LLR) model for each group. Our prediction models are trained on 237,675 2011 Medicare beneficiaries and tested on 230,042 2012 Medicare beneficiaries. We compare the clustering-based approach to a baseline LLR model using five performance metrics, including the area under the curve (AUC), the True Positive Rate (TPR), and the False Positive Rate (FPR). Results show that the hierarchical clustering approach achieves more accurate and precise predictions (AUC 0.72) than the benchmark model and offers more granular feature importance insights for each patient group.}, booktitle={Springer Proceedings in Business and Economics}, publisher={Springer International Publishing}, author={Mao, Lingchao and Vahdat, Kimia and Shashaani, Sara and Swann, Julie L.}, year={2022}, pages={453–465} } @inproceedings{vahdat_shashaani_2021, title={Non-Parametric Uncertainty Bias and Variance Estimation via Nested Bootstrapping and Influence Functions}, url={http://dx.doi.org/10.1109/wsc52266.2021.9715420}, DOI={10.1109/wsc52266.2021.9715420}, abstractNote={In using limited datasets, modeling the uncertainty via non-parametric methods arguably provides more robust estimators of the unknown value of interest. We propose a novel nested bootstrap method that accounts for the uncertainty from various sources (input data, model, and estimation) more robustly. The nested bootstrap is particularly apt to the more nuanced conditional settings in constructing prediction rules but is easily generalizable. We utilize influence functions to estimate the bias due to input uncertainty and devise a procedure to correct the estimators' bias in a simulation optimization routine. Implementations in the context of feature selection via simulation optimization on two simulated datasets prove a significant improvement in robustness and accuracy.}, booktitle={2021 Winter Simulation Conference (WSC)}, publisher={IEEE}, author={Vahdat, Kimia and Shashaani, Sara}, year={2021}, month={Dec} } @article{vahdat_shashaani_2020, title={SIMULATION OPTIMIZATION BASED FEATURE SELECTION, A STUDY ON DATA-DRIVEN OPTIMIZATION WITH INPUT UNCERTAINTY}, ISSN={["0891-7736"]}, DOI={10.1109/WSC48552.2020.9383862}, abstractNote={In machine learning, removing uninformative or redundant features from a dataset can significantly improve the construction, analysis, and interpretation of the prediction models, especially when the set of collected features is extensive. We approach this challenge with simulation optimization over a high dimensional binary space in place of the classic greedy search in forward or backward selection or regularization methods. We use genetic algorithms to generate scenarios, bootstrapping to estimate the contribution of the intrinsic and extrinsic noise and sampling strategies to expedite the procedure. By including the uncertainty from the input data in the measurement of the estimators’ variability, the new framework obtains robustness and efficiency. Our results on a simulated dataset exhibit improvement over state-of-the-art accuracy, interpretability, and reliability. Our proposed framework provides insight for leveraging Monte Carlo methodology in probabilistic data-driven modeling and analysis.}, journal={2020 WINTER SIMULATION CONFERENCE (WSC)}, author={Vahdat, Kimia and Shashaani, Sara}, year={2020}, pages={2149–2160} }