@article{shashaani_surer_plumlee_guikema_2024, title={Building Trees for Probabilistic Prediction via Scoring Rules}, volume={5}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2024.2343062}, abstractNote={Decision trees built with data remain in widespread use for nonparametric prediction. Predicting probability distributions is preferred over point predictions when uncertainty plays a prominent role in analysis and decision-making. We study modifying a tree to produce nonparametric predictive distributions. We find the standard method for building trees may not result in good predictive distributions and propose changing the splitting criteria for trees to one based on proper scoring rules. Analysis of both simulated data and several real datasets demonstrates that using these new splitting criteria results in trees with improved predictive properties considering the entire predictive distribution.}, journal={TECHNOMETRICS}, author={Shashaani, Sara and Surer, Ozge and Plumlee, Matthew and Guikema, Seth}, year={2024}, month={May} } @article{ha_shashaani_2024, title={Iteration complexity and finite-time efficiency of adaptive sampling trust-region methods for stochastic derivative-free optimization}, volume={4}, ISSN={["2472-5862"]}, url={https://doi.org/10.1080/24725854.2024.2335513}, DOI={10.1080/24725854.2024.2335513}, abstractNote={Adaptive sampling with interpolation-based trust regions or ASTRO-DF is a successful algorithm for stochastic derivative-free optimization with an easy-to-understand-and-implement concept that guarantees almost sure convergence to a first-order critical point. To reduce its dependence on the problem dimension, we present local models with diagonal Hessians constructed on interpolation points based on a coordinate basis. We also leverage the interpolation points in a direct search manner whenever possible to boost ASTRO-DF's performance in a finite time. We prove that the algorithm has a canonical iteration complexity of $\mathcal{O}(\epsilon^{-2})$ almost surely, which is the first guarantee of its kind without placing assumptions on the quality of function estimates or model quality or independence between them. Numerical experimentation reveals the computational advantage of ASTRO-DF with coordinate direct search due to saving and better steps in the early iterations of the search.}, journal={IISE TRANSACTIONS}, author={Ha, Yunsoo and Shashaani, Sara}, year={2024}, month={Apr} } @article{alizadeh_vahdat_shashaani_swann_ozaltin_2024, title={Risk score models for urinary tract infection hospitalization}, volume={19}, ISSN={["1932-6203"]}, url={https://doi.org/10.1371/journal.pone.0290215}, DOI={10.1371/journal.pone.0290215}, abstractNote={Annually, urinary tract infections (UTIs) affect over a hundred million people worldwide. Early detection of high-risk individuals can help prevent hospitalization for UTIs, which imposes significant economic and social burden on patients and caregivers. We present two methods to generate risk score models for UTI hospitalization. We utilize a sample of patients from the insurance claims data provided by the Centers for Medicare and Medicaid Services to develop and validate the proposed methods. Our dataset encompasses a wide range of features, such as demographics, medical history, and healthcare utilization of the patients along with provider quality metrics and community-based metrics. The proposed methods scale and round the coefficients of an underlying logistic regression model to create scoring tables. We present computational experiments to evaluate the prediction performance of both models. We also discuss different features of these models with respect to their impact on interpretability. Our findings emphasize the effectiveness of risk score models as practical tools for identifying high-risk patients and provide a quantitative assessment of the significance of various risk factors in UTI hospitalizations such as admission to ICU in the last 3 months, cognitive disorders and low inpatient, outpatient and carrier costs in the last 6 months.}, number={6}, journal={PLOS ONE}, author={Alizadeh, Nasrin and Vahdat, Kimia and Shashaani, Sara and Swann, Julie L. and Ozaltin, Osman Y.}, editor={Villavicencio, Guillermo PinedaEditor}, year={2024}, month={Jun} } @inproceedings{adaptive robust genetic algorithms with ranking and selection_2023, booktitle={2023 Winter Simulation Conference}, year={2023} } @article{eckman_henderson_shashaani_2023, title={Diagnostic Tools for Evaluating and Comparing Simulation- Optimization Algorithms}, volume={1}, ISSN={["1526-5528"]}, url={https://doi.org/10.1287/ijoc.2022.1261}, DOI={10.1287/ijoc.2022.1261}, abstractNote={ Simulation optimization involves optimizing some objective function that can only be estimated via stochastic simulation. Many important problems can be profitably viewed within this framework. Whereas many solvers—implementations of simulation-optimization algorithms—exist or are in development, comparisons among solvers are not standardized and are often limited in scope. Such comparisons help advance solver development, clarify the relative performance of solvers, and identify classes of problems that defy efficient solution, among many other uses. We develop performance measures and plots, and estimators thereof, to evaluate and compare solvers and diagnose their strengths and weaknesses on a testbed of simulation-optimization problems. We explain the need for two-level simulation in this context and provide supporting convergence theory. We also describe how to use bootstrapping to obtain error estimates for the estimators. }, journal={INFORMS JOURNAL ON COMPUTING}, author={Eckman, David J. and Henderson, Shane G. and Shashaani, Sara}, year={2023}, month={Jan} } @misc{iteration complexity and finite-time efficiency of adaptive sampling trust-region methods for stochastic derivative-free optimization_2023, url={https://arxiv.org/abs/2305.10650}, year={2023} } @inbook{shashaani_vahdat_2023, title={Monte Carlo Based Machine Learning}, url={http://dx.doi.org/10.1007/978-3-031-24907-5_75}, DOI={10.1007/978-3-031-24907-5_75}, abstractNote={Even though simulation is mainly used for computer models with inexact outputs, there are direct benefits in viewing results from samples of an existing dataset as replications of a stochastic simulation. We propose building Machine Learning prediction models with the Monte Carlo approach. This allows more specific accountability for the underlying distribution of the data and the impact of uncertainty in the input data in terms of bias. We opt for nonparametric input uncertainty with multi-level bootstrapping to make the framework applicable to large datasets. The cost of Monte Carlo-based model construction is controllable with optimal designs of nested bootstrapping and integrating variance reduction strategies. The benefit is substantial in providing more robustness in the predictions. Implementation in a data-driven simulation optimization problem further indicates the superiority of the proposed method compared to the state-of-the-art methods.}, booktitle={Lecture Notes in Operations Research}, author={Shashaani, Sara and Vahdat, Kimia}, year={2023} } @misc{on common-random-numbers and the complexity of adaptive sampling trust-region methods_2023, url={https://optimization-online.org/wp-content/uploads/2023/08/astrodf-complexity-online-version.pdf}, journal={https://optimization-online.org}, year={2023}, month={Aug} } @article{houser_shashaani_harrysson_jeon_2023, title={Predicting additive manufacturing defects with robust feature selection for imbalanced data}, volume={5}, ISSN={["2472-5862"]}, url={https://doi.org/10.1080/24725854.2023.2207633}, DOI={10.1080/24725854.2023.2207633}, abstractNote={Promptly predicting defects during an additive manufacturing process using only copious log data provides many advantages, albeit with computational limitations. We focus on predicting defects during electron beam melting with the black box nature of the manufacturing machine. For an accurate prediction of defects, which are rare (<2%), we extract temporal information to track abnormalities and formulate a feature selection algorithm that maximizes the expected value of a cost-sensitive accuracy. Correct identification of features responsible for the defects increases predictive power and informs manufacturers of potential corrective/preventive actions for process improvement. We solve the feature selection through resampling strategies integrated with ensemble procedures to handle data uncertainty and imbalance. Exploiting data uncertainty in our search leads to finding robust features with consistent predictive power. Our proposed methodology shows a 43% improvement in predicting defects (recall) without losing precision. Beyond additive manufacturing, this methodology has general application for rare-event prediction and imbalanced datasets.}, journal={IISE TRANSACTIONS}, author={Houser, Ethan and Shashaani, Sara and Harrysson, Ola and Jeon, Yongseok}, year={2023}, month={May} } @article{alizadeh_vahdat_shashaani_swann_ozaltin_2023, title={Risk Score Models for Unplanned Urinary Tract Infection Hospitalization}, url={https://doi.org/10.1101/2023.08.06.23293723}, DOI={10.1101/2023.08.06.23293723}, abstractNote={Abstract}, author={Alizadeh, Nasrin and Vahdat, Kimia and Shashaani, Sara and Swann, Julie L. and Ozaltin, Osman}, year={2023}, month={Aug} } @misc{robust output analysis with monte-carlo methodology_2023, year={2023} } @article{eckman_henderson_shashaani_2023, title={SimOpt: A Testbed for Simulation-Optimization Experiments}, volume={3}, ISSN={["1526-5528"]}, url={https://doi.org/10.1287/ijoc.2023.1273}, DOI={10.1287/ijoc.2023.1273}, abstractNote={ This paper introduces a major redesign of SimOpt, a testbed of simulation-optimization (SO) problems and solvers. The testbed promotes the empirical evaluation and comparison of solvers and aims to accelerate their development. Relative to previous versions of SimOpt, the redesign ports the code to an object-oriented architecture in Python; uses an implementation of the MRG32k3a random number generator that supports streams, substreams, and subsubstreams; supports the automated use of common random numbers for ease and efficiency; includes a powerful suite of plotting tools for visualizing experiment results; uses bootstrapping to obtain error estimates; accommodates the use of data farming to explore simulation models and optimization solvers as their input parameters vary; and provides a graphical user interface. The SimOpt source code is available on a GitHub repository under a permissive open-source license and as a Python package. }, journal={INFORMS JOURNAL ON COMPUTING}, author={Eckman, David J. and Henderson, Shane G. and Shashaani, Sara}, year={2023}, month={Mar} } @inproceedings{simulation optimization with stochastic constraints_2023, booktitle={2023 Winter Simulation Conference}, year={2023} } @article{jeon_pasupathy_shashaani_2023, title={Statistical Inference on Simulation Output: Batching as an Inferential Device}, url={https://arxiv.org/abs/2311.04159}, DOI={10.48550/ARXIV.2311.04159}, abstractNote={We present {batching} as an omnibus device for statistical inference on simulation output. We consider the classical context of a simulationist performing statistical inference on an estimator $\theta_n$ (of an unknown fixed quantity $\theta$) using only the output data $(Y_1,Y_2,\ldots,Y_n)$ gathered from a simulation. By \emph{statistical inference}, we mean approximating the sampling distribution of the error $\theta_n-\theta$ toward: (A) estimating an ``assessment'' functional $\psi$, e.g., bias, variance, or quantile; or (B) constructing a $(1-\alpha)$-confidence region on $\theta$. We argue that batching is a remarkably simple and effective inference device that is especially suited for handling dependent output data such as what one frequently encounters in simulation contexts. We demonstrate that if the number of batches and the extent of their overlap are chosen correctly, batching retains bootstrap's attractive theoretical properties of {strong consistency} and {higher-order accuracy}. For constructing confidence regions, we characterize two limiting distributions associated with a Studentized statistic. Our extensive numerical experience confirms theoretical insight, especially about the effects of batch size and batch overlap.}, publisher={arXiv}, author={Jeon, Yongseok and Pasupathy, Raghu and Shashaani, Sara}, year={2023} } @inproceedings{stochastic constraints: how feasible is feasible?_2023, booktitle={2023 Winter Simulation Conference}, year={2023}, month={Dec} } @inproceedings{stratification with concomitant variables in stochastic trust-region optimization_2023, booktitle={2023 Winter Simulation Conference}, year={2023} } @inproceedings{towards greener stochastic derivative-free optimization with trust regions and adaptive sampling_2023, booktitle={2023 Winter Simulation Conference}, year={2023} } @article{jain_shashaani_byon_2023, title={Wake effect parameter calibration with large-scale field operational data using stochastic optimization}, volume={347}, ISSN={["1872-9118"]}, url={https://doi.org/10.1016/j.apenergy.2023.121426}, DOI={10.1016/j.apenergy.2023.121426}, abstractNote={This study aims to show the application of stochastic optimization for efficient and robust parameter calibration of engineering wake models. Standard values of the wake effect parameters are generally used to predict power using engineering wake models, but some recent studies have shown that these values do not result in accurate prediction. The proposed approach estimates the wake effect parameters using operational data available from actual wind farms to minimize the prediction error of the wake model by using trust-region optimization. To further improve computational efficiency, we implement stratified adaptive sampling. We employ decision trees to stratify the data and propose two ways of adapting the sampling budget to the constructed strata: budget allocation with dynamic weights and fixed weights. We extend our analysis to determine the functional relationship between the turbulence intensity and wake decay coefficient. Our experiments suggest that wake parameters or a functional relationship between turbulence intensity and wake decay coefficient may need adjustments (from assumed standard values) for a particular wind farm using its operational data to characterize the wake effect better.}, journal={APPLIED ENERGY}, author={Jain, Pranav and Shashaani, Sara and Byon, Eunshin}, year={2023}, month={Oct} } @article{shashaani_vahdat_2022, title={Improved feature selection with simulation optimization}, volume={5}, ISSN={["1573-2924"]}, url={http://dx.doi.org/10.1007/s11081-022-09726-3}, DOI={10.1007/s11081-022-09726-3}, journal={OPTIMIZATION AND ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shashaani, Sara and Vahdat, Kimia}, year={2022}, month={May} } @inbook{mao_vahdat_shashaani_swann_2022, title={Personalized Predictions for Unplanned Urinary Tract Infection Hospitalizations with Hierarchical Clustering}, url={http://dx.doi.org/10.1007/978-3-030-75166-1_34}, DOI={10.1007/978-3-030-75166-1_34}, abstractNote={Urinary Tract Infection (UTI) is the one of the most frequent and preventable healthcare-associated infections in the US and an important cause of morbidity and excess healthcare costs. This study aims to predict the 30-day risk of a beneficiary for unplanned hospitalization for UTI. Using 2008–12 Medicare fee-for-service claims and several public sources, we extracted 784 features, including patient demographics, clinical conditions, healthcare utilization, provider quality metrics, and community safety indicators. To address the challenge of high heterogeneity and imbalance in data, we propose a hierarchical clustering approach that leverages existing knowledge and data-driven algorithms to partition the population into groups of similar risk, followed by building a LASSO-Logistic Regression (LLR) model for each group. Our prediction models are trained on 237,675 2011 Medicare beneficiaries and tested on 230,042 2012 Medicare beneficiaries. We compare the clustering-based approach to a baseline LLR model using five performance metrics, including the area under the curve (AUC), the True Positive Rate (TPR), and the False Positive Rate (FPR). Results show that the hierarchical clustering approach achieves more accurate and precise predictions (AUC 0.72) than the benchmark model and offers more granular feature importance insights for each patient group.}, booktitle={Springer Proceedings in Business and Economics}, publisher={Springer International Publishing}, author={Mao, Lingchao and Vahdat, Kimia and Shashaani, Sara and Swann, Julie L.}, year={2022}, pages={453–465} } @article{jain_shashaani_byon_2022, title={ROBUST SIMULATION OPTIMIZATION WITH STRATIFICATION}, ISSN={["0891-7736"]}, url={http://dx.doi.org/10.1109/wsc57314.2022.10015515}, DOI={10.1109/wsc57314.2022.10015515}, abstractNote={Stratification has been widely used as a variance reduction technique when estimating a simulation output, whereby the input variates are generated following a stratified sampling rule from previously determined strata. This study shows that an adaptive sampling class of simulation optimization solvers called ASTRO-DF could become more robust with stratification, S-ASTRO-DF. For a simulation optimization algorithm, we discuss how to monitor the robustness in terms of bias and variance of the outcome and introduce several metrics to compute and compare the robustness of solvers. We find that while stratified sampling improves the algorithm's performance, its robustness is sensitive to the stratification structure. In particular, as the number of strata increases, the stratified sampling-based algorithms may become less effective.}, journal={2022 WINTER SIMULATION CONFERENCE (WSC)}, publisher={IEEE}, author={Jain, Pranav and Shashaani, Sara and Byon, Eunshin}, year={2022}, pages={2246–2257} } @inproceedings{ha_shashaani_tran-dinh_2021, title={Improved Complexity Of Trust-Region Optimization For Zeroth-Order Stochastic Oracles with Adaptive Sampling}, url={http://dx.doi.org/10.1109/wsc52266.2021.9715529}, DOI={10.1109/wsc52266.2021.9715529}, abstractNote={We present an enhanced stochastic trust-region optimization with adaptive sampling (ASTRO-DF) in which optimizing an iteratively constructed local model on estimates of objective values with stochastic sample size guides the search. The noticeable feature is that the underdetermined quadratic model with a diagonal Hessian requires fewer function evaluations, which is particularly useful at high dimensions. This paper describes the enhanced algorithm in detail. It gives several theoretical results, including iteration complexity, and renders almost sure convergence guarantees. We report in our numerical experience the finite-time superiority of the enhanced ASTRO-DF over state-of-the-art using the SimOpt library.}, booktitle={2021 Winter Simulation Conference (WSC)}, publisher={IEEE}, author={Ha, Yunsoo and Shashaani, Sara and Tran-Dinh, Quoc}, year={2021}, month={Dec} } @inproceedings{vahdat_shashaani_2021, title={Non-Parametric Uncertainty Bias and Variance Estimation via Nested Bootstrapping and Influence Functions}, url={http://dx.doi.org/10.1109/wsc52266.2021.9715420}, DOI={10.1109/wsc52266.2021.9715420}, abstractNote={In using limited datasets, modeling the uncertainty via non-parametric methods arguably provides more robust estimators of the unknown value of interest. We propose a novel nested bootstrap method that accounts for the uncertainty from various sources (input data, model, and estimation) more robustly. The nested bootstrap is particularly apt to the more nuanced conditional settings in constructing prediction rules but is easily generalizable. We utilize influence functions to estimate the bias due to input uncertainty and devise a procedure to correct the estimators' bias in a simulation optimization routine. Implementations in the context of feature selection via simulation optimization on two simulated datasets prove a significant improvement in robustness and accuracy.}, booktitle={2021 Winter Simulation Conference (WSC)}, publisher={IEEE}, author={Vahdat, Kimia and Shashaani, Sara}, year={2021}, month={Dec} } @inproceedings{parameter calibration with stratified adaptive stochastic trust-region optimization_2021, booktitle={INFORMS Workshop on Quality, Statistics, and Reliability}, year={2021}, month={Oct} } @inproceedings{jain_shashaani_byon_2021, title={Wake Effect Calibration in Wind Power Systems with Adaptive Sampling Based Optimization}, url={https://www.proquest.com/scholarly-journals/wake-effect-calibration-wind-power-systems-with/docview/2560890092}, note={Retrieved from}, booktitle={IISE Annual Conference Proceedings}, author={Jain, P. and Shashaani, S. and Byon, E.}, year={2021}, month={May}, pages={43–48} } @article{manda_gopalswamy_shashaani_uzsoy_2020, title={A SIMULATION OPTIMIZATION APPROACH FOR MANAGING PRODUCT TRANSITIONS IN MULTISTAGE PRODUCTION LINES}, ISSN={["0891-7736"]}, url={http://dx.doi.org/10.1109/wsc48552.2020.9384036}, DOI={10.1109/wsc48552.2020.9384036}, abstractNote={We explore the problem of managing releases into a multistage production system transitioning from producing a mature product in high volume to a new one whose production process is initially unreliable but improves as experience is accumulated. We use simulation optimization to develop solutions and examine the impact of learning at a single machine on the rest of the system. This work lays the foundation for studying product transitions using realistic fab scale simulation models.}, journal={2020 WINTER SIMULATION CONFERENCE (WSC)}, publisher={IEEE}, author={Manda, Atchyuta Bharadwaj and Gopalswamy, Karthick and Shashaani, Sara and Uzsoy, Reha}, year={2020}, pages={1730–1741} } @article{vahdat_shashaani_2020, title={SIMULATION OPTIMIZATION BASED FEATURE SELECTION, A STUDY ON DATA-DRIVEN OPTIMIZATION WITH INPUT UNCERTAINTY}, ISSN={["0891-7736"]}, DOI={10.1109/WSC48552.2020.9383862}, abstractNote={In machine learning, removing uninformative or redundant features from a dataset can significantly improve the construction, analysis, and interpretation of the prediction models, especially when the set of collected features is extensive. We approach this challenge with simulation optimization over a high dimensional binary space in place of the classic greedy search in forward or backward selection or regularization methods. We use genetic algorithms to generate scenarios, bootstrapping to estimate the contribution of the intrinsic and extrinsic noise and sampling strategies to expedite the procedure. By including the uncertainty from the input data in the measurement of the estimators’ variability, the new framework obtains robustness and efficiency. Our results on a simulated dataset exhibit improvement over state-of-the-art accuracy, interpretability, and reliability. Our proposed framework provides insight for leveraging Monte Carlo methodology in probabilistic data-driven modeling and analysis.}, journal={2020 WINTER SIMULATION CONFERENCE (WSC)}, author={Vahdat, Kimia and Shashaani, Sara}, year={2020}, pages={2149–2160} } @misc{simulation optimization library_2020, url={https://github.com/simopt-admin/simopt}, year={2020} } @inproceedings{traffic signal control simulation and optimization_2020, booktitle={Winter Simulation Conference}, year={2020} } @inproceedings{astro for derivative-based stochastic optimization: algorithm description & numerical experiments_2019, url={http://dx.doi.org/10.1109/wsc40007.2019.9004904}, DOI={10.1109/wsc40007.2019.9004904}, abstractNote={Adaptive Sampling Trust-Region Optimization (ASTRO) is a class of derivative-based stochastic trust-region algorithms developed to solve stochastic unconstrained optimization problems where the objective function and its gradient are observable only through a noisy oracle or using a large dataset. ASTRO incorporates adaptively sampled function and gradient estimates within a trust-region framework to generate iterates that are guaranteed to converge almost surely to a first-order or a second-order critical point of the objective function. Efficiency in ASTRO stems from two key aspects: (i) adaptive sampling to ensure that the objective function and its gradient are sampled only to the extent needed, so that small sample sizes result when iterates are far from a critical point and large sample sizes result when iterates are near a critical point; and (ii) quasi-Newton Hessian updates using BFGS. We describe ASTRO in detail, give a sense of its theoretical guarantees, and report extensive numerical results.}, booktitle={2019 Winter Simulation Conference (WSC)}, year={2019}, month={Dec} } @article{chen_beekman_guikema_shashaani_2019, title={Statistical Modeling in Absence of System Specific Data: Exploratory Empirical Analysis for Prediction of Water Main Breaks}, volume={25}, ISSN={["1943-555X"]}, url={http://dx.doi.org/10.1061/(asce)is.1943-555x.0000482}, DOI={10.1061/(ASCE)IS.1943-555X.0000482}, abstractNote={AbstractThe replacement of deteriorating distribution pipes is an important process for water utilities. It helps reduce capital spending on water main breaks and improves customer satisfaction. To...}, number={2}, journal={JOURNAL OF INFRASTRUCTURE SYSTEMS}, author={Chen, Thomas Ying-Jeh and Beekman, Jared Anthony and Guikema, Seth David and Shashaani, Sara}, year={2019}, month={Jun} } @article{astro-df: a class of adaptive sampling trust-region algorithms for derivative-free stochastic optimization_2018, url={http://dx.doi.org/10.1137/15m1042425}, DOI={10.1137/15m1042425}, abstractNote={We consider unconstrained optimization problems where only "stochastic" estimates of the objective function are observable as replicates from a Monte Carlo oracle. The Monte Carlo oracle is assumed to provide no direct observations of the function gradient. We present ASTRO-DF --- a class of derivative-free trust-region algorithms, where a stochastic local interpolation model is constructed, optimized, and updated iteratively. Function estimation and model construction within ASTRO-DF is adaptive in the sense that the extent of Monte Carlo sampling is determined by continuously monitoring and balancing metrics of sampling error (or variance) and structural error (or model bias) within ASTRO-DF. Such balancing of errors is designed to ensure that Monte Carlo effort within ASTRO-DF is sensitive to algorithm trajectory, sampling more whenever an iterate is inferred to be close to a critical point and less when far away. We demonstrate the almost-sure convergence of ASTRO-DF's iterates to a first-order critical point when using linear or quadratic stochastic interpolation models. The question of using more complicated models, e.g., regression or stochastic kriging, in combination with adaptive sampling is worth further investigation and will benefit from the methods of proof presented here. We speculate that ASTRO-DF's iterates achieve the canonical Monte Carlo convergence rate, although a proof remains elusive.}, journal={SIAM Journal on Optimization}, year={2018}, month={Jan} } @article{shashaani_guikema_zhai_pino_quiring_2018, title={Multi-Stage Prediction for Zero-Inflated Hurricane Induced Power Outages}, volume={6}, url={https://doi.org/10.1109/ACCESS.2018.2877078}, DOI={10.1109/ACCESS.2018.2877078}, abstractNote={Predicting hurricane power outages facilitates disaster response decision-making by electric power utilities as well as other organizations of critical importance to society. Predictive models can be built on the basis of statistical learning methods that use data from past hurricanes to capture the effects of climatological, geographical, and environmental variables on the power systems. When the dataset is largely zero-inflated, as power outage datasets often are, classical data mining methods that are based on a relatively balanced number of zeros and non-zeros may fail. General accuracy evaluation metrics also become misleading because they focus on the prevalent zero-valued responses in the dataset. We develop a new framework that operates in three stages by separating the prediction of whether or not power outages will occur from the number of customers without power. In the first stage, the zero-inflation problem is handled via a series of binary classifications. In the second stage, the severity of outages is predicted leveraging clustering techniques. In the final stage, regression models estimate the number of customers without power. We introduce a weighted accuracy metric and investigate its benefits over mean absolute error. We validate the models with data from hurricanes Dennis (2005), Ivan (2004), and Katrina (2005), and then predict power outages associated with hurricanes Matthew (2016) and Irma (2017) in the central Gulf region. The results demonstrate improvement over the traditional approaches in the context of power outage prediction.}, journal={IEEE Access}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Shashaani, Sara and Guikema, Seth D. and Zhai, Chengwei and Pino, Jordan V. and Quiring, Steven M.}, year={2018}, pages={62432–62449} } @inproceedings{astro-df: adaptive sampling trust-region optimization algorithms, heuristics, and numerical experience_2016, url={http://dx.doi.org/10.1109/wsc.2016.7822121}, DOI={10.1109/wsc.2016.7822121}, abstractNote={ASTRO-DF is a class of adaptive sampling algorithms for solving simulation optimization problems in which only estimates of the objective function are available by executing a Monte Carlo simulation. ASTRO-DF algorithms are iterative trust-region algorithms, where a local model is repeatedly constructed and optimized as iterates evolve through the search space. The ASTRO-DF class of algorithms is derivative-free in the sense that it does not rely on direct observations of the function derivatives. A salient feature of ASTRO-DF is the incorporation of adaptive sampling and replication to keep the model error and the trust-region radius in lock-step, to ensure efficiency. ASTRO-DF has been demonstrated to generate iterates that globally converge to a first-order critical point with probability one. In this paper, we describe and list ASTRO-DF, and discuss key heuristics that ensure good finite-time performance. We report our numerical experience with ASTRO-DF on test problems in low to moderate dimensions.}, booktitle={2016 Winter Simulation Conference (WSC)}, year={2016}, month={Dec} } @article{a simulation optimization approach to epidemic forecasting_2013, url={http://dx.doi.org/10.1371/journal.pone.0067164}, DOI={10.1371/journal.pone.0067164}, abstractNote={Reliable forecasts of influenza can aid in the control of both seasonal and pandemic outbreaks. We introduce a simulation optimization (SIMOP) approach for forecasting the influenza epidemic curve. This study represents the final step of a project aimed at using a combination of simulation, classification, statistical and optimization techniques to forecast the epidemic curve and infer underlying model parameters during an influenza outbreak. The SIMOP procedure combines an individual-based model and the Nelder-Mead simplex optimization method. The method is used to forecast epidemics simulated over synthetic social networks representing Montgomery County in Virginia, Miami, Seattle and surrounding metropolitan regions. The results are presented for the first four weeks. Depending on the synthetic network, the peak time could be predicted within a 95% CI as early as seven weeks before the actual peak. The peak infected and total infected were also accurately forecasted for Montgomery County in Virginia within the forecasting period. Forecasting of the epidemic curve for both seasonal and pandemic influenza outbreaks is a complex problem, however this is a preliminary step and the results suggest that more can be achieved in this area.}, journal={PLoS ONE}, year={2013}, month={Jun} } @article{single-machine batch scheduling minimizing weighted flow times and delivery costs_2011, url={http://dx.doi.org/10.1016/j.apm.2010.07.023}, DOI={10.1016/j.apm.2010.07.023}, abstractNote={This paper addresses scheduling a set of jobs on a single machine for delivery in batches to one customer or to another machine for further processing. The problem is a natural extension of that of minimising the sum of weighted flow times, considering the possibility of delivering jobs in batches and introducing batch delivery costs. The scheduling objective adopted is that of minimising the sum of weighted flow times and delivery costs. The extended problem arises in the context of coordination between machine scheduling and a distribution system in a supply chain network. Structural properties of the problem are investigated and used to devise a branch-and-bound solution method. For the special case, when the maximum number of batches is fixed, the branch-and-bound scheme provided shows significant improvements over an existing dynamic-programming algorithm.}, journal={Applied Mathematical Modelling}, year={2011}, month={Jan} }