@article{gao_shi_song_2023, title={Deep spectral Q-learning with application to mobile health}, volume={12}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.564}, abstractNote={Dynamic treatment regimes assign personalized treatments to patients sequentially over time based on their baseline information and time‐varying covariates. In mobile health applications, these covariates are typically collected at different frequencies over a long time horizon. In this paper, we propose a deep spectral Q‐learning algorithm, which integrates principal component analysis (PCA) with deep Q‐learning to handle the mixed frequency data. In theory, we prove that the mean return under the estimated optimal policy converges to that under the optimal one and establish its rate of convergence. The usefulness of our proposal is further illustrated via simulations and an application to a diabetes dataset.}, number={1}, journal={STAT}, author={Gao, Yuhe and Shi, Chengchun and Song, Rui}, year={2023}, month={Jan} } @article{shi_song_lu_2021, title={Concordance and Value Information Criteria for Optimal Treatment Decision}, volume={49}, ISSN={["0090-5364"]}, DOI={10.1214/19-AOS1908}, abstractNote={Personalized medicine is a medical procedure that receives considerable scientific and commercial attention. The goal of personalized medicine is to assign the optimal treatment regime for each individual patient, according to his/her personal prognostic information. When there are a large number of pretreatment variables, it is crucial to identify those important variables that are necessary for treatment decision making. In this paper, we study two information criteria: the concordance and value information criteria, for variable selection in optimal treatment decision making. We consider both fixed-$p$ and high dimensional settings, and show our information criteria are consistent in model/tuning parameter selection. We further apply our information criteria to four estimation approaches, including robust learning, concordance-assisted learning, penalized A-learning and sparse concordance-assisted learning, and demonstrate the empirical performance of our methods by simulations.}, number={1}, journal={Annals of Statistics}, author={Shi, C. and Song, R. and Lu, W.}, year={2021}, month={Feb}, pages={49–75} } @article{shi_song_lu_li_2021, title={Statistical Inference for High-Dimensional Models via Recursive Online-Score Estimation}, volume={116}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1710154}, abstractNote={Abstract In this article, we develop a new estimation and valid inference method for single or low-dimensional regression coefficients in high-dimensional generalized linear models. The number of the predictors is allowed to grow exponentially fast with respect to the sample size. The proposed estimator is computed by solving a score function. We recursively conduct model selection to reduce the dimensionality from high to a moderate scale and construct the score equation based on the selected variables. The proposed confidence interval (CI) achieves valid coverage without assuming consistency of the model selection procedure. When the selection consistency is achieved, we show the length of the proposed CI is asymptotically the same as the CI of the “oracle” method which works as well as if the support of the control variables were known. In addition, we prove the proposed CI is asymptotically narrower than the CIs constructed based on the desparsified Lasso estimator and the decorrelated score statistic. Simulation studies and real data applications are presented to back up our theoretical findings. Supplementary materials for this article are available online.}, number={535}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin and Li, Runze}, year={2021}, month={Jul}, pages={1307–1318} } @article{shi_zhang_lu_song_2021, title={Statistical inference of the value function for reinforcement learning in infinite-horizon settings}, volume={12}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12465}, abstractNote={Abstract}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Shi, Chengchun and Zhang, Sheng and Lu, Wenbin and Song, Rui}, year={2021}, month={Dec} } @article{shi_lu_song_2020, title={A Sparse Random Projection-Based Test for Overall Qualitative Treatment Effects}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1604368}, abstractNote={Abstract In contrast to the classical “one-size-fits-all” approach, precision medicine proposes the customization of individualized treatment regimes to account for patients’ heterogeneity in response to treatments. Most of existing works in the literature focused on estimating optimal individualized treatment regimes. However, there has been less attention devoted to hypothesis testing regarding the existence of overall qualitative treatment effects, especially when there are a large number of prognostic covariates. When covariates do not have qualitative treatment effects, the optimal treatment regime will assign the same treatment to all patients regardless of their covariate values. In this article, we consider testing the overall qualitative treatment effects of patients’ prognostic covariates in a high-dimensional setting. We propose a sample splitting method to construct the test statistic, based on a nonparametric estimator of the contrast function. When the dimension of covariates is large, we construct the test based on sparse random projections of covariates into a low-dimensional space. We prove the consistency of our test statistic. In the regular cases, we show the asymptotic power function of our test statistic is asymptotically the same as the “oracle” test statistic which is constructed based on the “optimal” projection matrix. Simulation studies and real data applications validate our theoretical findings. Supplementary materials for this article are available online.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Lu, Wenbin and Song, Rui}, year={2020}, month={Jul}, pages={1201–1213} } @article{shi_song_chen_li_2019, title={LINEAR HYPOTHESIS TESTING FOR HIGH DIMENSIONAL GENERALIZED LINEAR MODELS}, volume={47}, ISSN={["0090-5364"]}, DOI={10.1214/18-AOS1761}, abstractNote={This paper is concerned with testing linear hypotheses in high-dimensional generalized linear models. To deal with linear hypotheses, we first propose constrained partial regularization method and study its statistical properties. We further introduce an algorithm for solving regularization problems with folded-concave penalty functions and linear constraints. To test linear hypotheses, we propose a partial penalized likelihood ratio test, a partial penalized score test and a partial penalized Wald test. We show that the limiting null distributions of these three test statistics are χ2 distribution with the same degrees of freedom, and under local alternatives, they asymptotically follow non-central χ2 distributions with the same degrees of freedom and noncentral parameter, provided the number of parameters involved in the test hypothesis grows to ∞ at a certain rate. Simulation studies are conducted to examine the finite sample performance of the proposed tests. Empirical analysis of a real data example is used to illustrate the proposed testing procedures.}, number={5}, journal={ANNALS OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Chen, Zhao and Li, Runze}, year={2019}, month={Oct}, pages={2671–2703} } @article{shi_song_lu_2019, title={ON TESTING CONDITIONAL QUALITATIVE TREATMENT EFFECTS}, volume={47}, ISBN={0090-5364}, DOI={10.1214/18-AOS1750}, abstractNote={Precision medicine is an emerging medical paradigm that focuses on finding the most effective treatment strategy tailored for individual patients. In the literature, most of the existing works focused on estimating the optimal treatment regime. However, there has been less attention devoted to hypothesis testing regarding the optimal treatment regime. In this paper, we first introduce the notion of conditional qualitative treatment effects (CQTE) of a set of variables given another set of variables and provide a class of equivalent representations for the null hypothesis of no CQTE. The proposed definition of CQTE does not assume any parametric form for the optimal treatment rule and plays an important role for assessing the incremental value of a set of new variables in optimal treatment decision making conditional on an existing set of prescriptive variables. We then propose novel testing procedures for no CQTE based on kernel estimation of the conditional contrast functions. We show that our test statistics have asymptotically correct size and non-negligible power against some nonstandard local alternatives. The empirical performance of the proposed tests are evaluated by simulations and an application to an AIDS data set.}, number={4}, journal={ANNALS OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin}, year={2019}, month={Aug}, pages={2348–2377} } @article{shi_lu_song_2018, title={A Massive Data Framework for M-Estimators with Cubic-Rate}, volume={113}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2017.1360779}, DOI={10.1080/01621459.2017.1360779}, abstractNote={ABSTRACT The divide and conquer method is a common strategy for handling massive data. In this article, we study the divide and conquer method for cubic-rate estimators under the massive data framework. We develop a general theory for establishing the asymptotic distribution of the aggregated M-estimators using a weighted average with weights depending on the subgroup sample sizes. Under certain condition on the growing rate of the number of subgroups, the resulting aggregated estimators are shown to have faster convergence rate and asymptotic normal distribution, which are more tractable in both computation and inference than the original M-estimators based on pooled data. Our theory applies to a wide class of M-estimators with cube root convergence rate, including the location estimator, maximum score estimator, and value search estimator. Empirical performance via simulations and a real data application also validate our theoretical findings. Supplementary materials for this article are available online.}, number={524}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Shi, Chengchun and Lu, Wenbin and Song, Rui}, year={2018}, month={Jun}, pages={1698–1709} } @article{shi_song_lu_fu_2018, title={Maximin projection learning for optimal treatment decision with heterogeneous individualized treatment effects}, volume={80}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12273}, abstractNote={Summary}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin and Fu, Bo}, year={2018}, month={Sep}, pages={681–702} } @article{shi_song_lu_2016, title={Robust learning for optimal treatment decision with NP-dimensionality}, volume={10}, ISSN={["1935-7524"]}, DOI={10.1214/16-ejs1178}, abstractNote={In order to identify important variables that are involved in making optimal treatment decision, Lu, Zhang and Zeng (2013) proposed a penalized least squared regression framework for a fixed number of predictors, which is robust against the misspecification of the conditional mean model. Two problems arise: (i) in a world of explosively big data, effective methods are needed to handle ultra-high dimensional data set, for example, with the dimension of predictors is of the non-polynomial (NP) order of the sample size; (ii) both the propensity score and conditional mean models need to be estimated from data under NP dimensionality. In this paper, we propose a robust procedure for estimating the optimal treatment regime under NP dimensionality. In both steps, penalized regressions are employed with the non-concave penalty function, where the conditional mean model of the response given predictors may be misspecified. The asymptotic properties, such as weak oracle properties, selection consistency and oracle distributions, of the proposed estimators are investigated. In addition, we study the limiting distribution of the estimated value function for the obtained optimal treatment regime. The empirical performance of the proposed estimation method is evaluated by simulations and an application to a depression dataset from the STAR*D study.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin}, year={2016}, pages={2894–2921} } @article{zhang_qiu_shi_2016, title={simplexreg: An R package for regression analysis of proportional data using the simplex distribution}, volume={71}, number={11}, journal={Journal of Statistical Software}, author={Zhang, P. and Qiu, Z. G. and Shi, C. C.}, year={2016}, pages={1–21} }