@article{cai_lu_west_mehrotra_huang_2022, title={CAPITAL: Optimal subgroup identification via constrained policy tree search}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9507}, abstractNote={Personalized medicine, a paradigm of medicine tailored to a patient's characteristics, is an increasingly attractive field in health care. An important goal of personalized medicine is to identify a subgroup of patients, based on baseline covariates, that benefits more from the targeted treatment than other comparative treatments. Most of the current subgroup identification methods only focus on obtaining a subgroup with an enhanced treatment effect without paying attention to subgroup size. Yet, a clinically meaningful subgroup learning approach should identify the maximum number of patients who can benefit from the better treatment. In this article, we present an optimal subgroup selection rule (SSR) that maximizes the number of selected patients, and in the meantime, achieves the pre‐specified clinically meaningful mean outcome, such as the average treatment effect. We derive two equivalent theoretical forms of the optimal SSR based on the contrast function that describes the treatment‐covariates interaction in the outcome. We further propose a constrained policy tree search algorithm (CAPITAL) to find the optimal SSR within the interpretable decision tree class. The proposed method is flexible to handle multiple constraints that penalize the inclusion of patients with negative treatment effects, and to address time to event data using the restricted mean survival time as the clinically interesting mean outcome. Extensive simulations, comparison studies, and real data applications are conducted to demonstrate the validity and utility of our method.}, journal={STATISTICS IN MEDICINE}, author={Cai, Hengrui and Lu, Wenbin and West, Rachel Marceau and Mehrotra, Devan V and Huang, Lingkang}, year={2022}, month={Jul} } @article{gates_cai_hu_han_griffith_burgener_hyland_zanno_2022, title={Estimating ancient biogeographic patterns with statistical model discrimination}, volume={9}, ISSN={["1932-8494"]}, url={https://doi.org/10.1002/ar.25067}, DOI={10.1002/ar.25067}, abstractNote={AbstractThe geographic ranges in which species live is a function of many factors underlying ecological and evolutionary contingencies. Observing the geographic range of an individual species provides valuable information about these historical contingencies for a lineage, determining the distribution of many distantly related species in tandem provides information about large‐scale constraints on evolutionary and ecological processes generally. We present a linear regression method that allows for the discrimination of various hypothetical biogeographical models for determining which landscape distributional pattern best matches data from the fossil record. The linear regression models used in the discrimination rely on geodesic distances between sampling sites (typically geologic formations) as the independent variable and three possible dependent variables: Dice/Sorensen similarity; Euclidean distance; and phylogenetic community dissimilarity. Both the similarity and distance measures are useful for full‐community analyses without evolutionary information, whereas the phylogenetic community dissimilarity requires phylogenetic data. Importantly, the discrimination method uses linear regression residual error to provide relative measures of support for each biogeographical model tested, not absolute answers orp‐values. When applied to a recently published dataset of Campanian pollen, we find evidence that supports two plant communities separated by a transitional zone of unknown size. A similar case study of ceratopsid dinosaurs using phylogenetic community dissimilarity provided no evidence of a biogeographical pattern, but this case study suffers from a lack of data to accurately discriminate and/or too much temporal mixing. Future research aiming to reconstruct the distribution of organisms across a landscape has a statistical‐based method for determining what biogeographic distributional model best matches the available data.}, journal={ANATOMICAL RECORD-ADVANCES IN INTEGRATIVE ANATOMY AND EVOLUTIONARY BIOLOGY}, author={Gates, Terry A. and Cai, Hengrui and Hu, Yifei and Han, Xu and Griffith, Emily and Burgener, Landon and Hyland, Ethan and Zanno, Lindsay E.}, year={2022}, month={Sep} } @article{cai_song_lu_2021, title={GEAR: On optimal decision making with auxiliary data}, volume={10}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.399}, abstractNote={Personalized optimal decision making, finding the optimal decision rule (ODR) based on individual characteristics, has attracted increasing attention recently in many fields, such as education, economics, and medicine. Current ODR methods usually require the primary outcome of interest in samples for assessing treatment effects, namely, the experimental sample. However, in many studies, treatments may have a long‐term effect, and as such, the primary outcome of interest cannot be observed in the experimental sample due to the limited duration of experiments, which makes the estimation of ODR impossible. This paper is inspired to address this challenge by making use of an auxiliary sample to facilitate the estimation of ODR in the experimental sample. We propose an auGmented inverse propensity weighted Experimental and Auxiliary sample‐based decision Rule (GEAR) by maximizing the augmented inverse propensity weighted value estimator over a class of decision rules using the experimental sample, with the primary outcome being imputed based on the auxiliary sample. The asymptotic properties of the proposed GEAR estimators and their associated value estimators are established. Simulation studies are conducted to demonstrate its empirical validity with a real AIDS application.}, number={1}, journal={STAT}, author={Cai, Hengrui and Song, Rui and Lu, Wenbin}, year={2021}, month={Dec} }