@article{liu_chen_jiang_wang_zhang_liang_xiao_song_2024, title={A crossword solving system based on Monte Carlo tree search}, volume={335}, ISSN={["1872-7921"]}, DOI={10.1016/j.artint.2024.104192}, journal={ARTIFICIAL INTELLIGENCE}, author={Liu, Jingping and Chen, Lihan and Jiang, Sihang and Wang, Chao and Zhang, Sheng and Liang, Jiaqing and Xiao, Yanghua and Song, Rui}, year={2024}, month={Oct} } @article{shi_wan_song_luo_zhu_song_2023, title={A MULTIAGENT REINFORCEMENT LEARNING FRAMEWORK FOR OFF-POLICY EVALUATION IN TWO-SIDED MARKETS}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/22-AOAS1700}, abstractNote={The two-sided markets such as ride-sharing companies often involve a group of subjects who are making sequential decisions across time and/or location. With the rapid development of smart phones and internet of things, they have substantially transformed the transportation landscape of human beings. In this paper we consider large-scale fleet management in ride-sharing companies that involve multiple units in different areas receiving sequences of products (or treatments) over time. Major technical challenges, such as policy evaluation, arise in those studies because (i) spatial and temporal proximities induce interference between locations and times; and (ii) the large number of locations results in the curse of dimensionality. To address both challenges simultaneously, we introduce a multi-agent reinforcement learning (MARL) framework for carrying policy evaluation in these studies. We propose novel estimators for mean outcomes under different products that are consistent despite the high-dimensionality of state-action space. The proposed estimator works favorably in simulation experiments. We further illustrate our method using a real dataset obtained from a two-sided marketplace company to evaluate the effects of applying different subsidizing policies. A Python implementation of our proposed method is available at https://github.com/RunzheStat/CausalMARL.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Shi, Chengchun and Wan, Runzhe and Song, Ge and Luo, Shikai and Zhu, Hongtu and Song, Rui}, year={2023}, month={Dec}, pages={2701–2722} } @article{gao_shi_song_2023, title={Deep spectral Q-learning with application to mobile health}, volume={12}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.564}, abstractNote={Dynamic treatment regimes assign personalized treatments to patients sequentially over time based on their baseline information and time‐varying covariates. In mobile health applications, these covariates are typically collected at different frequencies over a long time horizon. In this paper, we propose a deep spectral Q‐learning algorithm, which integrates principal component analysis (PCA) with deep Q‐learning to handle the mixed frequency data. In theory, we prove that the mean return under the estimated optimal policy converges to that under the optimal one and establish its rate of convergence. The usefulness of our proposal is further illustrated via simulations and an application to a diabetes dataset.}, number={1}, journal={STAT}, author={Gao, Yuhe and Shi, Chengchun and Song, Rui}, year={2023}, month={Jan} } @article{shen_cai_song_2024, title={Doubly Robust Interval Estimation for Optimal Policy Evaluation in Online Learning}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2023.2279289}, abstractNote={Evaluating the performance of an ongoing policy plays a vital role in many areas such as medicine and economics, to provide crucial instructions on the early-stop of the online experiment and timely feedback from the environment. Policy evaluation in online learning thus attracts increasing attention by inferring the mean outcome of the optimal policy (i.e., the value) in real-time. Yet, such a problem is particularly challenging due to the dependent data generated in the online environment, the unknown optimal policy, and the complex exploration and exploitation trade-off in the adaptive experiment. In this paper, we aim to overcome these difficulties in policy evaluation for online learning. We explicitly derive the probability of exploration that quantifies the probability of exploring non-optimal actions under commonly used bandit algorithms. We use this probability to conduct valid inference on the online conditional mean estimator under each action and develop the doubly robust interval estimation (DREAM) method to infer the value under the estimated optimal policy in online learning. The proposed value estimator provides double protection for consistency and is asymptotically normal with a Wald-type confidence interval provided. Extensive simulation studies and real data applications are conducted to demonstrate the empirical validity of the proposed DREAM method.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shen, Ye and Cai, Hengrui and Song, Rui}, year={2024}, month={May} } @article{ghosh_ma_song_zhong_2023, title={Flexible inference of optimal individualized treatment strategy in covariate adjusted randomization with multiple covariates}, volume={17}, ISSN={["1935-7524"]}, DOI={10.1214/23-EJS2127}, abstractNote={To maximize clinical benefit, clinicians routinely tailor treatment to the individual characteristics of each patient, where individualized treatment rules are needed and are of significant research interest to statisticians. In the covariate-adjusted randomization clinical trial with many covariates, we model the treatment effect with an unspecified function of a single index of the covariates and leave the baseline response completely arbitrary. We devise a class of estimators to consistently estimate the treatment effect function and its associated index while bypassing the estimation of the baseline response, which is subject to the curse of dimensionality. We further develop inference tools to identify predictive covariates and isolate effective treatment region. The usefulness of the methods is demonstrated in both simulations and a clinical data example.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Ghosh, Trinetri and Ma, Yanyuan and Song, Rui and Zhong, Pingshou}, year={2023}, pages={1344–1370} } @article{wan_li_lu_song_2024, title={Mining the factor zoo: Estimation of latent factor models with sufficient proxies}, volume={239}, ISSN={["1872-6895"]}, DOI={10.1016/j.jeconom.2022.08.013}, abstractNote={Latent factor model estimation typically relies on either using domain knowledge to manually pick several observed covariates as factor proxies, or purely conducting multivariate analysis such as principal component analysis. However, the former approach may suffer from the bias while the latter cannot incorporate additional information. We propose to bridge these two approaches while allowing the number of factor proxies to diverge, and hence make the latent factor model estimation robust, flexible, and statistically more accurate. As a bonus, the number of factors is also allowed to grow. At the heart of our method is a penalized reduced rank regression to combine information. To further deal with heavy-tailed data, a computationally attractive penalized robust reduced rank regression method is proposed. We establish faster rates of convergence compared with the benchmark. Extensive simulations and real examples are used to illustrate the advantages.}, number={2}, journal={JOURNAL OF ECONOMETRICS}, author={Wan, Runzhe and Li, Yingying and Lu, Wenbin and Song, Rui}, year={2024}, month={Feb} } @article{chen_lu_song_ghosh_2023, title={On Learning and Testing of Counterfactual Fairness through Data Preprocessing}, volume={4}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2023.2186885}, abstractNote={Machine learning has become more important in real-life decision-making but people are concerned about the ethical problems it may bring when used improperly. Recent work brings the discussion of machine learning fairness into the causal framework and elaborates on the concept of Counterfactual Fairness. In this paper, we develop the Fair Learning through dAta Preprocessing (FLAP) algorithm to learn counterfactually fair decisions from biased training data and formalize the conditions where different data preprocessing procedures should be used to guarantee counterfactual fairness. We also show that Counterfactual Fairness is equivalent to the conditional independence of the decisions and the sensitive attributes given the processed non-sensitive attributes, which enables us to detect discrimination in the original decision using the processed data. The performance of our algorithm is illustrated using simulated data and real-world applications.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Chen, Haoyu and Lu, Wenbin and Song, Rui and Ghosh, Pulak}, year={2023}, month={Apr} } @article{shi_wang_luo_zhu_ye_song_2022, title={Dynamic Causal Effects Evaluation in A/B Testing with a Reinforcement Learning Framework}, volume={3}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2027776}, abstractNote={Abstract A/B testing, or online experiment is a standard business strategy to compare a new product with an old one in pharmaceutical, technological, and traditional industries. Major challenges arise in online experiments of two-sided marketplace platforms (e.g., Uber) where there is only one unit that receives a sequence of treatments over time. In those experiments, the treatment at a given time impacts current outcome as well as future outcomes. The aim of this article is to introduce a reinforcement learning framework for carrying A/B testing in these experiments, while characterizing the long-term treatment effects. Our proposed testing procedure allows for sequential monitoring and online updating. It is generally applicable to a variety of treatment designs in different industries. In addition, we systematically investigate the theoretical properties (e.g., size and power) of our testing procedure. Finally, we apply our framework to both simulated data and a real-world data example obtained from a technological company to illustrate its advantage over the current practice. A Python implementation of our test is available at https://github.com/callmespring/CausalRL. Supplementary materials for this article are available online.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Wang, Xiaoyu and Luo, Shikai and Zhu, Hongtu and Ye, Jieping and Song, Rui}, year={2022}, month={Mar} } @article{pu_hu_wang_li_hu_zhu_song_song_wu_lyu_2022, title={Learning a deep dual-level network for robust DeepFake detection}, volume={130}, ISSN={["1873-5142"]}, DOI={10.1016/j.patcog.2022.108832}, abstractNote={Face manipulation techniques, especially DeepFake techniques, are causing severe social concerns and security problems. When faced with skewed data distributions such as those found in the real world, existing DeepFake detection methods exhibit significantly degraded performance, especially the AUC score. In this paper, we focus on DeepFake detection in real-world situations. We propose a dual-level collaborative framework to detect frame-level and video-level forgeries simultaneously with a joint loss function to optimize both the AUC score and error rate at the same time. Our experiments indicate that the AUC loss boosts imbalanced learning performance and outperforms focal loss, a state-of-the-art loss function to address imbalanced data. In addition, our multitask structure enables mutual reinforcement of frame-level and video-level detection and achieves outstanding performance in imbalanced learning. Our proposed method is also more robust to video quality variations and shows better generalization ability in cross-dataset evaluations than existing DeepFake detection methods. Our implementation is available online at https://github.com/PWB97/Deepfake-detection.}, journal={PATTERN RECOGNITION}, author={Pu, Wenbo and Hu, Jing and Wang, Xin and Li, Yuezun and Hu, Shu and Zhu, Bin and Song, Rui and Song, Qi and Wu, Xi and Lyu, Siwei}, year={2022}, month={Oct} } @article{shi_zhu_ye_luo_zhu_song_2022, title={Off-Policy Confidence Interval Estimation with Confounded Markov Decision Process}, volume={10}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2110878}, abstractNote={This paper is concerned with constructing a confidence interval for a target policy's value offline based on a pre-collected observational data in infinite horizon settings. Most of the existing works assume no unmeasured variables exist that confound the observed actions. This assumption, however, is likely to be violated in real applications such as healthcare and technological industries. In this paper, we show that with some auxiliary variables that mediate the effect of actions on the system dynamics, the target policy's value is identifiable in a confounded Markov decision process. Based on this result, we develop an efficient off-policy value estimator that is robust to potential model misspecification and provide rigorous uncertainty quantification. Our method is justified by theoretical results, simulated and real datasets obtained from ridesharing companies. A Python implementation of the proposed procedure is available at https://github.com/Mamba413/cope.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Zhu, Jin and Ye, Shen and Luo, Shikai and Zhu, Hongtu and Song, Rui}, year={2022}, month={Oct} } @article{chen_jiang_liu_wang_zhang_xie_liang_xiao_song_2022, title={Rule mining over knowledge graphs via reinforcement learning}, volume={242}, ISSN={["1872-7409"]}, DOI={10.1016/j.knosys.2022.108371}, abstractNote={Knowledge graphs (KGs) are an important source repository for a wide range of applications and rule mining from KGs recently attracts wide research interest in the KG-related research community. Many solutions have been proposed for the rule mining from large-scale KGs, which however are limited in the inefficiency of rule generation and ineffectiveness of rule evaluation. To solve these problems, in this paper we propose a generation-then-evaluation rule mining approach guided by reinforcement learning. Specifically, a two-phased framework is designed. The first phase aims to train a reinforcement learning agent for rule generation from KGs, and the second is to utilize the value function of the agent to guide the step-by-step rule generation. We conduct extensive experiments on several datasets and the results prove that our rule mining solution achieves state-of-the-art performance in terms of efficiency and effectiveness.}, journal={KNOWLEDGE-BASED SYSTEMS}, author={Chen, Lihan and Jiang, Sihang and Liu, Jingping and Wang, Chao and Zhang, Sheng and Xie, Chenhao and Liang, Jiaqing and Xiao, Yanghua and Song, Rui}, year={2022}, month={Apr} } @article{ding_li_song_2022, title={Statistical Learning for Individualized Asset Allocation}, volume={11}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2139265}, abstractNote={We establish a high-dimensional statistical learning framework for individualized asset allocation. Our proposed methodology addresses continuous-action decision-making with a large number of characteristics. We develop a discretization approach to model the effect of continuous actions and allow the discretization frequency to be large and diverge with the number of observations. The value function of continuous-action is estimated using penalized regression with our proposed generalized penalties that are imposed on linear transformations of the model coefficients. We show that our proposed Discretization and Regression with generalized fOlded concaVe penalty on Effect discontinuity (DROVE) approach enjoys desirable theoretical properties and allows for statistical inference of the optimal value associated with optimal decision-making. Empirically, the proposed framework is exercised with the Health and Retirement Study data in finding individualized optimal asset allocation. The results show that our individualized optimal strategy improves the population financial well-being. continuous-action with the std-scad , is}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Ding, Yi and Li, Yingying and Song, Rui}, year={2022}, month={Nov} } @article{shi_luo_le_zhu_song_2022, title={Statistically Efficient Advantage Learning for Offline Reinforcement Learning in Infinite Horizons}, volume={9}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2106868}, abstractNote={We consider reinforcement learning (RL) methods in offline domains without additional online data collection, such as mobile health applications. Most of existing policy optimization algorithms in the computer science literature are developed in online settings where data are easy to collect or simulate. Their generalizations to mobile health applications with a pre-collected offline dataset remain unknown. The aim of this paper is to develop a novel advantage learning framework in order to efficiently use pre-collected data for policy optimization. The proposed method takes an optimal Q-estimator computed by any existing state-of-the-art RL algorithms as input, and outputs a new policy whose value is guaranteed to converge at a faster rate than the policy derived based on the initial Q-estimator. Extensive numerical experiments are conducted to back up our theoretical findings. A Python implementation of our proposed method is available at https://github.com/leyuanheart/SEAL.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Luo, Shikai and Le, Yuan and Zhu, Hongtu and Song, Rui}, year={2022}, month={Sep} } @article{zhou_wang_song_zhao_2022, title={Transformation-Invariant Learning of Optimal Individualized Decision Rules with Time-to-Event Outcomes}, volume={6}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2068420}, abstractNote={Abstract In many important applications of precision medicine, the outcome of interest is time to an event (e.g., death, relapse of disease) and the primary goal is to identify the optimal individualized decision rule (IDR) to prolong survival time. Existing work in this area have been mostly focused on estimating the optimal IDR to maximize the restricted mean survival time in the population. We propose a new robust framework for estimating an optimal static or dynamic IDR with time-to-event outcomes based on an easy-to-interpret quantile criterion. The new method does not need to specify an outcome regression model and is robust for heavy-tailed distribution. The estimation problem corresponds to a nonregular M-estimation problem with both finite and infinite-dimensional nuisance parameters. Employing advanced empirical process techniques, we establish the statistical theory of the estimated parameter indexing the optimal IDR. Furthermore, we prove a novel result that the proposed approach can consistently estimate the optimal value function under mild conditions even when the optimal IDR is nonunique, which happens in the challenging setting of exceptional laws. We also propose a smoothed resampling procedure for inference. The proposed methods are implemented in the R-package QTOCen. We demonstrate the performance of the proposed new methods via extensive Monte Carlo studies and a real data application. Supplementary materials for this article are available online.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhou, Yu and Wang, Lan and Song, Rui and Zhao, Tuoyi}, year={2022}, month={Jun} } @article{liu_song_lu_xiao_2022, title={A Probit Tensor Factorization Model For Relational Learning}, volume={3}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.2003204}, abstractNote={Abstract With the proliferation of knowledge graphs, modeling data with complex multi-relational structure has gained increasing attention in the area of statistical relational learning. One of the most important goals of statistical relational learning is link prediction, that is, predicting whether certain relations exist in the knowledge graph. A large number of models and algorithms have been proposed to perform link prediction, among which tensor factorization method has proven to achieve state-of-the-art performance in terms of computation efficiency and prediction accuracy. However, a common drawback of the existing tensor factorization models is that the missing relations and nonexisting relations are treated in the same way, which results in a loss of information. To address this issue, we propose a binary tensor factorization model with probit link, which not only inherits the computation efficiency from the classic tensor factorization model but also accounts for the binary nature of relational data. Our proposed probit tensor factorization (PTF) model shows advantages in both the prediction accuracy and interpretability. Supplementary files for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Liu, Ye and Song, Rui and Lu, Wenbin and Xiao, Yanghua}, year={2022}, month={Mar} } @article{shi_song_lu_2021, title={Concordance and Value Information Criteria for Optimal Treatment Decision}, volume={49}, ISSN={["0090-5364"]}, DOI={10.1214/19-AOS1908}, abstractNote={Personalized medicine is a medical procedure that receives considerable scientific and commercial attention. The goal of personalized medicine is to assign the optimal treatment regime for each individual patient, according to his/her personal prognostic information. When there are a large number of pretreatment variables, it is crucial to identify those important variables that are necessary for treatment decision making. In this paper, we study two information criteria: the concordance and value information criteria, for variable selection in optimal treatment decision making. We consider both fixed-$p$ and high dimensional settings, and show our information criteria are consistent in model/tuning parameter selection. We further apply our information criteria to four estimation approaches, including robust learning, concordance-assisted learning, penalized A-learning and sparse concordance-assisted learning, and demonstrate the empirical performance of our methods by simulations.}, number={1}, journal={Annals of Statistics}, author={Shi, C. and Song, R. and Lu, W.}, year={2021}, month={Feb}, pages={49–75} } @article{cai_song_lu_2021, title={GEAR: On optimal decision making with auxiliary data}, volume={10}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.399}, abstractNote={Personalized optimal decision making, finding the optimal decision rule (ODR) based on individual characteristics, has attracted increasing attention recently in many fields, such as education, economics, and medicine. Current ODR methods usually require the primary outcome of interest in samples for assessing treatment effects, namely, the experimental sample. However, in many studies, treatments may have a long‐term effect, and as such, the primary outcome of interest cannot be observed in the experimental sample due to the limited duration of experiments, which makes the estimation of ODR impossible. This paper is inspired to address this challenge by making use of an auxiliary sample to facilitate the estimation of ODR in the experimental sample. We propose an auGmented inverse propensity weighted Experimental and Auxiliary sample‐based decision Rule (GEAR) by maximizing the augmented inverse propensity weighted value estimator over a class of decision rules using the experimental sample, with the primary outcome being imputed based on the auxiliary sample. The asymptotic properties of the proposed GEAR estimators and their associated value estimators are established. Simulation studies are conducted to demonstrate its empirical validity with a real AIDS application.}, number={1}, journal={STAT}, author={Cai, Hengrui and Song, Rui and Lu, Wenbin}, year={2021}, month={Dec} } @article{wan_zhang_song_2021, title={Multi-Objective Model-based Reinforcement Learning for Infectious Disease Control}, DOI={10.1145/3447548.3467303}, abstractNote={Severe infectious diseases such as the novel coronavirus (COVID-19) pose a huge threat to public health. Stringent control measures, such as school closures and stay-at-home orders, while having significant effects, also bring huge economic losses. In the face of an emerging infectious disease, a crucial question for policymakers is how to make the trade-off and implement the appropriate interventions timely given the huge uncertainty. In this work, we propose a Multi-Objective Model-based Reinforcement Learning framework to facilitate data-driven decision-making and minimize the overall long-term cost. Specifically, at each decision point, a Bayesian epidemiological model is first learned as the environment model, and then the proposed model-based multi-objective planning algorithm is applied to find a set of Pareto-optimal policies. This framework, combined with the prediction bands for each policy, provides a real-time decision support tool for policymakers. The application is demonstrated with the spread of COVID-19 in China.}, journal={KDD '21: PROCEEDINGS OF THE 27TH ACM SIGKDD CONFERENCE ON KNOWLEDGE DISCOVERY & DATA MINING}, author={Wan, Runzhe and Zhang, Xinyu and Song, Rui}, year={2021}, pages={1634–1644} } @article{chen_song_zhang_adams_sun_lu_2021, title={On estimating optimal regime for treatment initiation time based on restricted mean residual lifetime}, volume={8}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13530}, abstractNote={AbstractWhen to initiate treatment on patients is an important problem in many medical studies such as AIDS and cancer. In this article, we formulate the treatment initiation time problem for time‐to‐event data and propose an optimal individualized regime that determines the best treatment initiation time for individual patients based on their characteristics. Different from existing optimal treatment regimes where treatments are undertaken at a pre‐specified time, here new challenges arise from the complicated missing mechanisms in treatment initiation time data and the continuous treatment rule in terms of initiation time. To tackle these challenges, we propose to use restricted mean residual lifetime as a value function to evaluate the performance of different treatment initiation regimes, and develop a nonparametric estimator for the value function, which is consistent even when treatment initiation times are not completely observable and their distribution is unknown. We also establish the asymptotic properties of the resulting estimator in the decision rule and its associated value function estimator. In particular, the asymptotic distribution of the estimated value function is nonstandard, which follows a weighted chi‐squared distribution. The finite‐sample performance of the proposed method is evaluated by simulation studies and is further illustrated with an application to a breast cancer data.}, journal={BIOMETRICS}, author={Chen, Xin and Song, Rui and Zhang, Jiajia and Adams, Swann Arp and Sun, Liuquan and Lu, Wenbin}, year={2021}, month={Aug} } @article{yu_lu_song_2021, title={Online Testing of Subgroup Treatment Effects Based on Value Difference}, ISSN={["1550-4786"]}, DOI={10.1109/ICDM51629.2021.00189}, abstractNote={Online A/B testing plays a critical role in the high-tech industry to guide product development and accelerate innovation. It performs a null hypothesis statistical test to determine which variant is better. However, a typical A/B test presents two problems: (i) a fixed-horizon framework inflates the false-positive errors under continuous monitoring; (ii) the homogeneous effects assumption fails to identify a subgroup with a beneficial treatment effect. In this paper, we propose a sequential test for sub group t reatment effects based on val ue difference, named SUBTLE, to address these two problems simultaneously. The SUBTLE allows the experimenters to “peek” at the results during the experiment without harming the statistical guarantees. It assumes heterogeneous treatment effects and aims to test if some subgroup of the population will benefit from the investigative treatment. If the testing result indicates the existence of such a subgroup, a subgroup will be identified using a readily available estimated optimal treatment rule. We examine the empirical performance of our proposed test on both simulations and a real dataset. The results show that the SUBTLE has high detection power with controlled type I error at any time, is more robust to noise covariates, and can achieve early stopping compared with the corresponding fixed-horizon test.}, journal={2021 21ST IEEE INTERNATIONAL CONFERENCE ON DATA MINING (ICDM 2021)}, author={Yu, Miao and Lu, Wenbin and Song, Rui}, year={2021}, pages={1463–1468} } @article{shi_zhang_lu_song_2021, title={Statistical inference of the value function for reinforcement learning in infinite-horizon settings}, volume={12}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12465}, abstractNote={AbstractReinforcement learning is a general technique that allows an agent to learn an optimal policy and interact with an environment in sequential decision-making problems. The goodness of a policy is measured by its value function starting from some initial state. The focus of this paper was to construct confidence intervals (CIs) for a policy’s value in infinite horizon settings where the number of decision points diverges to infinity. We propose to model the action-value state function (Q-function) associated with a policy based on series/sieve method to derive its confidence interval. When the target policy depends on the observed data as well, we propose a SequentiAl Value Evaluation (SAVE) method to recursively update the estimated policy and its value estimator. As long as either the number of trajectories or the number of decision points diverges to infinity, we show that the proposed CI achieves nominal coverage even in cases where the optimal policy is not unique. Simulation studies are conducted to back up our theoretical findings. We apply the proposed method to a dataset from mobile health studies and find that reinforcement learning algorithms could help improve patient’s health status. A Python implementation of the proposed procedure is available at https://github.com/shengzhang37/SAVE.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Shi, Chengchun and Zhang, Sheng and Lu, Wenbin and Song, Rui}, year={2021}, month={Dec} } @inproceedings{yu_lu_song_2020, place={Palo, Alto, CA}, title={A New Framework for Online Testing of Heterogeneous Treatment Effect}, volume={34}, DOI={10.1609/aaai.v34i06.6594}, abstractNote={We propose a new framework for online testing of heterogeneous treatment effects. The proposed test, named sequential score test (SST), is able to control type I error under continuous monitoring and detect multi-dimensional heterogeneous treatment effects. We provide an online p-value calculation for SST, making it convenient for continuous monitoring, and extend our tests to online multiple testing settings by controlling the false discovery rate. We examine the empirical performance of the proposed tests and compare them with a state-of-art online test, named mSPRT using simulations and a real data. The results show that our proposed test controls type I error at any time, has higher detection power and allows quick inference on online A/B testing.}, number={6}, booktitle={Proceedings of the Thirty-Fourth AAAI Conference on Artificial Intelligence}, publisher={AAAI Press}, author={Yu, M. and Lu, W. and Song, R.}, year={2020}, pages={10310–10317} } @article{dong_laber_goldberg_song_yang_2020, title={Ascertaining properties of weighting in the estimation of optimal treatment regimes under monotone missingness}, volume={39}, ISSN={["1097-0258"]}, DOI={10.1002/sim.8678}, abstractNote={Dynamic treatment regimes operationalize precision medicine as a sequence of decision rules, one per stage of clinical intervention, that map up‐to‐date patient information to a recommended intervention. An optimal treatment regime maximizes the mean utility when applied to the population of interest. Methods for estimating an optimal treatment regime assume the data to be fully observed, which rarely occurs in practice. A common approach is to first use multiple imputation and then pool the estimators across imputed datasets. However, this approach requires estimating the joint distribution of patient trajectories, which can be high‐dimensional, especially when there are multiple stages of intervention. We examine the application of inverse probability weighted estimating equations as an alternative to multiple imputation in the context of monotonic missingness. This approach applies to a broad class of estimators of an optimal treatment regime including both Q‐learning and a generalization of outcome weighted learning. We establish consistency under mild regularity conditions and demonstrate its advantages in finite samples using a series of simulation experiments and an application to a schizophrenia study.}, number={25}, journal={STATISTICS IN MEDICINE}, author={Dong, Lin and Laber, Eric and Goldberg, Yair and Song, Rui and Yang, Shu}, year={2020}, month={Nov}, pages={3503–3520} } @article{pan_li_zhou_liu_song_liu_luo_huang_tian_2020, title={DHPA: Dynamic Human Preference Analytics Framework— A Case Study on Taxi Drivers' Learning Curve Analysis}, volume={11}, ISSN={["2157-6912"]}, DOI={10.1145/3360312}, abstractNote={Many real-world human behaviors can be modeled and characterized as sequential decision-making processes, such as a taxi driver’s choices of working regions and times. Each driver possesses unique preferences on the sequential choices over time and improves the driver’s working efficiency. Understanding the dynamics of such preferences helps accelerate the learning process of taxi drivers. Prior works on taxi operation management mostly focus on finding optimal driving strategies or routes, lacking in-depth analysis on what the drivers learned during the process and how they affect the performance of the driver. In this work, we make the first attempt to establish Dynamic Human Preference Analytics. We inversely learn the taxi drivers’ preferences from data and characterize the dynamics of such preferences over time. We extract two types of features (i.e., profile features and habit features) to model the decision space of drivers. Then through inverse reinforcement learning, we learn the preferences of drivers with respect to these features. The results illustrate that self-improving drivers tend to keep adjusting their preferences to habit features to increase their earning efficiency while keeping the preferences to profile features invariant. However, experienced drivers have stable preferences over time. The exploring drivers tend to randomly adjust the preferences over time.}, number={1}, journal={ACM Transactions on Intelligent Systems and Technology}, author={Pan, M. and Li, Y. and Zhou, X. and Liu, Z. and Song, R. and Liu, H. and Luo, J. and Huang, Weixiao and Tian, Zhihong}, year={2020}, month={Jan} } @article{yang_kim_song_2020, title={Doubly robust inference when combining probability and non-probability samples with high dimensional data}, volume={1}, ISSN={1369-7412}, url={http://dx.doi.org/10.1111/rssb.12354}, DOI={10.1111/rssb.12354}, abstractNote={SummaryWe consider integrating a non-probability sample with a probability sample which provides high dimensional representative covariate information of the target population. We propose a two-step approach for variable selection and finite population inference. In the first step, we use penalized estimating equations with folded concave penalties to select important variables and show selection consistency for general samples. In the second step, we focus on a doubly robust estimator of the finite population mean and re-estimate the nuisance model parameters by minimizing the asymptotic squared bias of the doubly robust estimator. This estimating strategy mitigates the possible first-step selection error and renders the doubly robust estimator root n consistent if either the sampling probability or the outcome model is correctly specified.}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley}, author={Yang, Shu and Kim, Jae Kwang and Song, Rui}, year={2020}, month={Jan} } @article{shi_song_lu_li_2021, title={Statistical Inference for High-Dimensional Models via Recursive Online-Score Estimation}, volume={116}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1710154}, abstractNote={Abstract In this article, we develop a new estimation and valid inference method for single or low-dimensional regression coefficients in high-dimensional generalized linear models. The number of the predictors is allowed to grow exponentially fast with respect to the sample size. The proposed estimator is computed by solving a score function. We recursively conduct model selection to reduce the dimensionality from high to a moderate scale and construct the score equation based on the selected variables. The proposed confidence interval (CI) achieves valid coverage without assuming consistency of the model selection procedure. When the selection consistency is achieved, we show the length of the proposed CI is asymptotically the same as the CI of the “oracle” method which works as well as if the support of the control variables were known. In addition, we prove the proposed CI is asymptotically narrower than the CIs constructed based on the desparsified Lasso estimator and the decorrelated score statistic. Simulation studies and real data applications are presented to back up our theoretical findings. Supplementary materials for this article are available online.}, number={535}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin and Li, Runze}, year={2021}, month={Jul}, pages={1307–1318} } @article{chen_lu_song_2021, title={Statistical Inference for Online Decision Making: In a Contextual Bandit Setting}, volume={116}, ISSN={["1537-274X"]}, url={http://dx.doi.org/10.1080/01621459.2020.1770098}, DOI={10.1080/01621459.2020.1770098}, abstractNote={Abstract Online decision making problem requires us to make a sequence of decisions based on incremental information. Common solutions often need to learn a reward model of different actions given the contextual information and then maximize the long-term reward. It is meaningful to know if the posited model is reasonable and how the model performs in the asymptotic sense. We study this problem under the setup of the contextual bandit framework with a linear reward model. The ε-greedy policy is adopted to address the classic exploration-and-exploitation dilemma. Using the martingale central limit theorem, we show that the online ordinary least squares estimator of model parameters is asymptotically normal. When the linear model is misspecified, we propose the online weighted least squares estimator using the inverse propensity score weighting and also establish its asymptotic normality. Based on the properties of the parameter estimators, we further show that the in-sample inverse propensity weighted value estimator is asymptotically normal. We illustrate our results using simulations and an application to a news article recommendation dataset from Yahoo!. Supplementary materials for this article are available online.}, number={533}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, publisher={Informa UK Limited}, author={Chen, Haoyu and Lu, Wenbin and Song, Rui}, year={2021}, month={Mar}, pages={240–255} } @article{shi_lu_song_2020, title={A Sparse Random Projection-Based Test for Overall Qualitative Treatment Effects}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1604368}, abstractNote={Abstract In contrast to the classical “one-size-fits-all” approach, precision medicine proposes the customization of individualized treatment regimes to account for patients’ heterogeneity in response to treatments. Most of existing works in the literature focused on estimating optimal individualized treatment regimes. However, there has been less attention devoted to hypothesis testing regarding the existence of overall qualitative treatment effects, especially when there are a large number of prognostic covariates. When covariates do not have qualitative treatment effects, the optimal treatment regime will assign the same treatment to all patients regardless of their covariate values. In this article, we consider testing the overall qualitative treatment effects of patients’ prognostic covariates in a high-dimensional setting. We propose a sample splitting method to construct the test statistic, based on a nonparametric estimator of the contrast function. When the dimension of covariates is large, we construct the test based on sparse random projections of covariates into a low-dimensional space. We prove the consistency of our test statistic. In the regular cases, we show the asymptotic power function of our test statistic is asymptotically the same as the “oracle” test statistic which is constructed based on the “optimal” projection matrix. Simulation studies and real data applications validate our theoretical findings. Supplementary materials for this article are available online.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Shi, Chengchun and Lu, Wenbin and Song, Rui}, year={2020}, month={Jul}, pages={1201–1213} } @article{shi_lu_song_2019, title={Determining the Number of Latent Factors in Statistical Multi-Relational Learning}, volume={20}, number={23}, journal={Journal of Machine Learning Research}, author={Shi, C. and Lu, W. and Song, R.}, year={2019}, pages={1–38} } @inbook{pan_li_zhou_liu_song_lu_luo_2019, title={Dissecting the Learning Curve of Taxi Drivers: A Data-Driven Approach}, ISBN={9781611975673}, url={http://dx.doi.org/10.1137/1.9781611975673.88}, DOI={10.1137/1.9781611975673.88}, abstractNote={Many real world human behaviors can be modeled and characterized as sequential decision making processes, such as taxi driver’s choices of working regions and times. Each driver possesses unique preferences on the sequential choices over time and improves their working efficiency. Understanding the dynamics of such preferences helps accelerate the learning process of taxi drivers. Prior works on taxi operation management mostly focus on finding optimal driving strategies or routes, lacking in-depth analysis on what the drivers learned during the process and how they affect the performance of the driver. In this work, we make the first attempt to inversely learn the taxi drivers’ preferences from data and characterize the dynamics of such preferences over time. We extract two types of features, i.e., profile features and habit features, to model the decision space of drivers. Then through inverse reinforcement learning we learn the preferences of drivers with respect to these features. The results illustrate that self-improving drivers tend to keep adjusting their preferences to habit features to increase their earning efficiency, while keeping the preferences to profile features invariant. On the other hand, experienced drivers have stable preferences over time.}, booktitle={Proceedings of the 2019 SIAM International Conference on Data Mining}, publisher={Society for Industrial and Applied Mathematics}, author={Pan, Menghai and Li, Yanhua and Zhou, Xun and Liu, Zhenming and Song, Rui and Lu, Hui and Luo, Jun}, year={2019}, month={May}, pages={783–791} } @article{shi_song_chen_li_2019, title={LINEAR HYPOTHESIS TESTING FOR HIGH DIMENSIONAL GENERALIZED LINEAR MODELS}, volume={47}, ISSN={["0090-5364"]}, DOI={10.1214/18-AOS1761}, abstractNote={This paper is concerned with testing linear hypotheses in high-dimensional generalized linear models. To deal with linear hypotheses, we first propose constrained partial regularization method and study its statistical properties. We further introduce an algorithm for solving regularization problems with folded-concave penalty functions and linear constraints. To test linear hypotheses, we propose a partial penalized likelihood ratio test, a partial penalized score test and a partial penalized Wald test. We show that the limiting null distributions of these three test statistics are χ2 distribution with the same degrees of freedom, and under local alternatives, they asymptotically follow non-central χ2 distributions with the same degrees of freedom and noncentral parameter, provided the number of parameters involved in the test hypothesis grows to ∞ at a certain rate. Simulation studies are conducted to examine the finite sample performance of the proposed tests. Empirical analysis of a real data example is used to illustrate the proposed testing procedures.}, number={5}, journal={ANNALS OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Chen, Zhao and Li, Runze}, year={2019}, month={Oct}, pages={2671–2703} } @article{su_lu_song_2019, title={Modelling and estimation for optimal treatment decision with interference}, volume={8}, ISSN={2049-1573 2049-1573}, url={http://dx.doi.org/10.1002/STA4.219}, DOI={10.1002/STA4.219}, abstractNote={In many network‐based intervention studies, treatment applied on an individual or his or her own characteristics may also affect the outcome of other connected people. We call this interference along network. Approaches for deriving the optimal individualized treatment regimen remain unknown after introducing the effect of interference. In this paper, we propose a novel network‐based regression model that is able to account for interaction between outcomes and treatments in a network. Both Q‐learning and A‐learning methods are derived. We show that the optimal treatment regimen under our model is independent from interference, which makes its application in practice more feasible and appealing. The asymptotic properties of the proposed estimators are established. The performance of the proposed model and methods is illustrated by extensive simulation studies and an application to a mobile game network data.}, number={1}, journal={Stat}, publisher={Wiley}, author={Su, Lin and Lu, Wenbin and Song, Rui}, year={2019}, month={Jan} } @article{shi_song_lu_2019, title={ON TESTING CONDITIONAL QUALITATIVE TREATMENT EFFECTS}, volume={47}, ISBN={0090-5364}, DOI={10.1214/18-AOS1750}, abstractNote={Precision medicine is an emerging medical paradigm that focuses on finding the most effective treatment strategy tailored for individual patients. In the literature, most of the existing works focused on estimating the optimal treatment regime. However, there has been less attention devoted to hypothesis testing regarding the optimal treatment regime. In this paper, we first introduce the notion of conditional qualitative treatment effects (CQTE) of a set of variables given another set of variables and provide a class of equivalent representations for the null hypothesis of no CQTE. The proposed definition of CQTE does not assume any parametric form for the optimal treatment rule and plays an important role for assessing the incremental value of a set of new variables in optimal treatment decision making conditional on an existing set of prescriptive variables. We then propose novel testing procedures for no CQTE based on kernel estimation of the conditional contrast functions. We show that our test statistics have asymptotically correct size and non-negligible power against some nonstandard local alternatives. The empirical performance of the proposed tests are evaluated by simulations and an application to an AIDS data set.}, number={4}, journal={ANNALS OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin}, year={2019}, month={Aug}, pages={2348–2377} } @article{su_lu_song_huang_2020, title={Testing and Estimation of Social Network Dependence With Time to Event Data}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1617153}, abstractNote={Abstract Nowadays, events are spread rapidly along social networks. We are interested in whether people’s responses to an event are affected by their friends’ characteristics. For example, how soon will a person start playing a game given that his/her friends like it? Studying social network dependence is an emerging research area. In this work, we propose a novel latent spatial autocorrelation Cox model to study social network dependence with time-to-event data. The proposed model introduces a latent indicator to characterize whether a person’s survival time might be affected by his or her friends’ features. We first propose a score-type test for detecting the existence of social network dependence. If it exists, we further develop an EM-type algorithm to estimate the model parameters. The performance of the proposed test and estimators are illustrated by simulation studies and an application to a time-to-event dataset about playing a popular mobile game from one of the largest online social network platforms. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={530}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Su, Lin and Lu, Wenbin and Song, Rui and Huang, Danyang}, year={2020}, month={Apr}, pages={570–582} } @article{liang_lu_song_2018, title={Deep advantage learning for optimal dynamic treatment regime}, volume={2}, ISSN={2475-4269 2475-4277}, url={http://dx.doi.org/10.1080/24754269.2018.1466096}, DOI={10.1080/24754269.2018.1466096}, abstractNote={ABSTRACT Recently deep learning has successfully achieved state-of-the-art performance on many difficult tasks. Deep neural networks allow for model flexibility and process features without the need of domain knowledge. Advantage learning (A-learning) is a popular method in dynamic treatment regime (DTR). It models the advantage function, which is of direct relevance to optimal treatment decision. No assumptions on baseline function are made. However, there is a paucity of literature on deep A-learning. In this paper, we present a deep A-learning approach to estimate optimal DTR. We use an inverse probability weighting method to estimate the difference between potential outcomes. Parameter sharing of convolutional neural networks (CNN) greatly reduces the amount of parameters in neural networks, which allows for high scalability. Convexified convolutional neural networks (CCNN) relax the constraints of CNN for optimisation purpose. Different architectures of CNN and CCNN are implemented for contrast function estimation. Both simulation results and application to the STAR*D (Sequenced Treatment Alternatives to Relieve Depression) trial indicate that the proposed methods outperform penalised least square estimator.}, number={1}, journal={Statistical Theory and Related Fields}, publisher={Informa UK Limited}, author={Liang, Shuhan and Lu, Wenbin and Song, Rui}, year={2018}, month={Jan}, pages={80–88} } @article{shi_song_lu_2018, title={Discussion of ’Optimal treatment allocations in space and time for on-line control of an emerging infectious disease’}, volume={67}, number={4}, journal={Journal of the Royal Statistical Society, Series C}, author={Shi, C. and Song, R. and Lu, W.}, year={2018}, pages={775–776} } @article{jiang_song_li_zeng_lu_he_xu_wang_qian_cheng_et al._2019, title={ENTROPY LEARNING FOR DYNAMIC TREATMENT REGIMES}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202018.0076}, abstractNote={Estimating optimal individualized treatment rules (ITRs) in single or multi-stage clinical trials is one key solution to personalized medicine and has received more and more attention in statistical community. Recent development suggests that using machine learning approaches can significantly improve the estimation over model-based methods. However, proper inference for the estimated ITRs has not been well established in machine learning based approaches. In this paper, we propose a entropy learning approach to estimate the optimal individualized treatment rules (ITRs). We obtain the asymptotic distributions for the estimated rules so further provide valid inference. The proposed approach is demonstrated to perform well in finite sample through extensive simulation studies. Finally, we analyze data from a multi-stage clinical trial for depression patients. Our results offer novel findings that are otherwise not revealed with existing approaches.}, number={4}, journal={STATISTICA SINICA}, author={Jiang, Binyan and Song, Rui and Li, Jialiang and Zeng, Donglin and Lu, Wenbin and He, Xin and Xu, Shirong and Wang, Junhui and Qian, Min and Cheng, Bin and et al.}, year={2019}, month={Oct}, pages={1633–1710} } @article{luo_song_styner_gilmore_zhu_2018, title={FSEM: Functional Structural Equation Models for Twin Functional Data}, volume={114}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2017.1407773}, DOI={10.1080/01621459.2017.1407773}, abstractNote={ABSTRACT The aim of this article is to develop a novel class of functional structural equation models (FSEMs) for dissecting functional genetic and environmental effects on twin functional data, while characterizing the varying association between functional data and covariates of interest. We propose a three-stage estimation procedure to estimate varying coefficient functions for various covariates (e.g., gender) as well as three covariance operators for the genetic and environmental effects. We develop an inference procedure based on weighted likelihood ratio statistics to test the genetic/environmental effect at either a fixed location or a compact region. We also systematically carry out the theoretical analysis of the estimated varying functions, the weighted likelihood ratio statistics, and the estimated covariance operators. We conduct extensive Monte Carlo simulations to examine the finite-sample performance of the estimation and inference procedures. We apply the proposed FSEM to quantify the degree of genetic and environmental effects on twin white matter tracts obtained from the UNC early brain development study. Supplementary materials for this article are available online.}, number={525}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Luo, S. and Song, R. and Styner, M. and Gilmore, J. H. and Zhu, H.}, year={2018}, month={Jul}, pages={344–357} } @article{shi_fan_song_lu_2018, title={HIGH-DIMENSIONAL A-LEARNING FOR OPTIMAL DYNAMIC TREATMENT REGIMES}, volume={46}, ISSN={["0090-5364"]}, DOI={10.1214/17-aos1570}, abstractNote={Precision medicine is a medical paradigm that focuses on finding the most effective treatment decision based on individual patient information. For many complex diseases, such as cancer, treatment decisions need to be tailored over time according to patients' responses to previous treatments. Such an adaptive strategy is referred as a dynamic treatment regime. A major challenge in deriving an optimal dynamic treatment regime arises when an extraordinary large number of prognostic factors, such as patient's genetic information, demographic characteristics, medical history and clinical measurements over time are available, but not all of them are necessary for making treatment decision. This makes variable selection an emerging need in precision medicine. In this paper, we propose a penalized multi-stage A-learning for deriving the optimal dynamic treatment regime when the number of covariates is of the non-polynomial (NP) order of the sample size. To preserve the double robustness property of the A-learning method, we adopt the Dantzig selector which directly penalizes the A-leaning estimating equations. Oracle inequalities of the proposed estimators for the parameters in the optimal dynamic treatment regime and error bounds on the difference between the value functions of the estimated optimal dynamic treatment regime and the true optimal dynamic treatment regime are established. Empirical performance of the proposed approach is evaluated by simulations and illustrated with an application to data from the STAR*D study.}, number={3}, journal={ANNALS OF STATISTICS}, author={Shi, Chengchun and Fan, Ailin and Song, Rui and Lu, Wenbin}, year={2018}, month={Jun}, pages={925–957} } @article{shi_song_lu_fu_2018, title={Maximin projection learning for optimal treatment decision with heterogeneous individualized treatment effects}, volume={80}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12273}, abstractNote={SummaryA salient feature of data from clinical trials and medical studies is inhomogeneity. Patients not only differ in baseline characteristics, but also in the way that they respond to treatment. Optimal individualized treatment regimes are developed to select effective treatments based on patient's heterogeneity. However, the optimal treatment regime might also vary for patients across different subgroups. We mainly consider patients’ heterogeneity caused by groupwise individualized treatment effects assuming the same marginal treatment effects for all groups. We propose a new maximin projection learning method for estimating a single treatment decision rule that works reliably for a group of future patients from a possibly new subpopulation. Based on estimated optimal treatment regimes for all subgroups, the proposed maximin treatment regime is obtained by solving a quadratically constrained linear programming problem, which can be efficiently computed by interior point methods. Consistency and asymptotic normality of the estimator are established. Numerical examples show the reliability of the methodology proposed.}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin and Fu, Bo}, year={2018}, month={Sep}, pages={681–702} } @article{zhu_zeng_song_2018, title={Proper Inference for Value Function in High-Dimensional Q-Learning for Dynamic Treatment Regimes}, volume={114}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2018.1506341}, DOI={10.1080/01621459.2018.1506341}, abstractNote={ABSTRACT Dynamic treatment regimes are a set of decision rules and each treatment decision is tailored over time according to patients’ responses to previous treatments as well as covariate history. There is a growing interest in development of correct statistical inference for optimal dynamic treatment regimes to handle the challenges of nonregularity problems in the presence of nonrespondents who have zero-treatment effects, especially when the dimension of the tailoring variables is high. In this article, we propose a high-dimensional Q-learning (HQ-learning) to facilitate the inference of optimal values and parameters. The proposed method allows us to simultaneously estimate the optimal dynamic treatment regimes and select the important variables that truly contribute to the individual reward. At the same time, hard thresholding is introduced in the method to eliminate the effects of the nonrespondents. The asymptotic properties for the parameter estimators as well as the estimated optimal value function are then established by adjusting the bias due to thresholding. Both simulation studies and real data analysis demonstrate satisfactory performance for obtaining the proper inference for the value function for the optimal dynamic treatment regimes. Supplementary materials for this article are available online.}, number={527}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Zhu, Wensheng and Zeng, Donglin and Song, Rui}, year={2018}, month={Oct}, pages={1404–1417} } @article{liang_lu_song_wang_2018, title={Sparse concordance-assisted learning for optimal treatment decision}, volume={18}, journal={Journal of Machine Learning Research}, author={Liang, S. H. and Lu, W. B. and Song, R. and Wang, L.}, year={2018} } @article{shi_lu_song_2018, title={A Massive Data Framework for M-Estimators with Cubic-Rate}, volume={113}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2017.1360779}, DOI={10.1080/01621459.2017.1360779}, abstractNote={ABSTRACT The divide and conquer method is a common strategy for handling massive data. In this article, we study the divide and conquer method for cubic-rate estimators under the massive data framework. We develop a general theory for establishing the asymptotic distribution of the aggregated M-estimators using a weighted average with weights depending on the subgroup sample sizes. Under certain condition on the growing rate of the number of subgroups, the resulting aggregated estimators are shown to have faster convergence rate and asymptotic normal distribution, which are more tractable in both computation and inference than the original M-estimators based on pooled data. Our theory applies to a wide class of M-estimators with cube root convergence rate, including the location estimator, maximum score estimator, and value search estimator. Empirical performance via simulations and a real data application also validate our theoretical findings. Supplementary materials for this article are available online.}, number={524}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Shi, Chengchun and Lu, Wenbin and Song, Rui}, year={2018}, month={Jun}, pages={1698–1709} } @article{jiang_lu_song_hudgens_naprvavnik_2017, title={DOUBLY ROBUST ESTIMATION OF OPTIMAL TREATMENT REGIMES FOR SURVIVAL DATA-WITH APPLICATION TO AN HIV/AIDS STUDY}, volume={11}, ISSN={["1932-6157"]}, DOI={10.1214/17-aoas1057}, abstractNote={In many biomedical settings, assigning every patient the same treatment may not be optimal due to patient heterogeneity. Individualized treatment regimes have the potential to dramatically improve clinical outcomes. When the primary outcome is censored survival time, a main interest is to find optimal treatment regimes that maximize the survival probability of patients. Since the survival curve is a function of time, it is important to balance short-term and long-term benefit when assigning treatments. In this paper, we propose a doubly robust approach to estimate optimal treatment regimes that optimize a user specified function of the survival curve, including the restricted mean survival time and the median survival time. The empirical and asymptotic properties of the proposed method are investigated. The proposed method is applied to a data set from an ongoing HIV/AIDS clinical observational study conducted by the University of North Carolina (UNC) Center of AIDS Research (CFAR), and shows the proposed methods significantly improve the restricted mean time of the initial treatment duration. Finally, the proposed methods are extended to multi-stage studies.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Jiang, Runchao and Lu, Wenbin and Song, Rui and Hudgens, Michael G. and Naprvavnik, Sonia}, year={2017}, month={Sep}, pages={1763–1786} } @article{shi_song_lu_2017, title={Discussion of ’Random Projection Ensemble Classification’}, volume={79}, number={4}, journal={Journal of the Royal Statistical Society, Series B}, author={Shi, C. and Song, R. and Lu, W.}, year={2017}, pages={1021} } @article{liu_song_zeng_zhang_2017, title={Principal components adjusted variable screening}, volume={110}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2016.12.015}, abstractNote={Marginal screening has been established as a fast and effective method for high dimensional variable selection method. There are some drawbacks associated with marginal screening, since the marginal model can be viewed as a model misspecification from the joint true model. A principal components adjusted variable screening method is proposed, which uses top principal components as surrogate covariates to account for the variability of the omitted predictors in generalized linear models. The proposed method is demonstrated with superior numerical performance compared with the competing methods. The efficiency of the method is also illustrated with the analysis of the Affymetrix genechip rat genome 230 2.0 array data and the European American SNPs data.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Liu, Zhongkai and Song, Rui and Zeng, Donglin and Zhang, Jiajia}, year={2017}, month={Jun}, pages={134–144} } @article{wang_zhou_song_sherwood_2018, title={Quantile-Optimal Treatment Regimes}, volume={113}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2017.1330204}, DOI={10.1080/01621459.2017.1330204}, abstractNote={ABSTRACT Finding the optimal treatment regime (or a series of sequential treatment regimes) based on individual characteristics has important applications in areas such as precision medicine, government policies, and active labor market interventions. In the current literature, the optimal treatment regime is usually defined as the one that maximizes the average benefit in the potential population. This article studies a general framework for estimating the quantile-optimal treatment regime, which is of importance in many real-world applications. Given a collection of treatment regimes, we consider robust estimation of the quantile-optimal treatment regime, which does not require the analyst to specify an outcome regression model. We propose an alternative formulation of the estimator as a solution of an optimization problem with an estimated nuisance parameter. This novel representation allows us to investigate the asymptotic theory of the estimated optimal treatment regime using empirical process techniques. We derive theory involving a nonstandard convergence rate and a nonnormal limiting distribution. The same nonstandard convergence rate would also occur if the mean optimality criterion is applied, but this has not been studied. Thus, our results fill an important theoretical gap for a general class of policy search methods in the literature. The article investigates both static and dynamic treatment regimes. In addition, doubly robust estimation and alternative optimality criterion such as that based on Gini’s mean difference or weighted quantiles are investigated. Numerical simulations demonstrate the performance of the proposed estimator. A data example from a trial in HIV+ patients is used to illustrate the application. Supplementary materials for this article are available online.}, number={523}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Wang, Lan and Zhou, Yu and Song, Rui and Sherwood, Ben}, year={2018}, month={Jun}, pages={1243–1254} } @article{song_luo_zeng_zhang_lu_li_2017, title={Semiparametric single-index model for estimating optimal individualized treatment strategy}, volume={11}, ISSN={["1935-7524"]}, DOI={10.1214/17-ejs1226}, abstractNote={Different from the standard treatment discovery framework which is used for finding single treatments for a homogenous group of patients, personalized medicine involves finding therapies that are tailored to each individual in a heterogeneous group. In this paper, we propose a new semiparametric additive single-index model for estimating individualized treatment strategy. The model assumes a flexible and nonparametric link function for the interaction between treatment and predictive covariates. We estimate the rule via monotone B-splines and establish the asymptotic properties of the estimators. Both simulations and an real data application demonstrate that the proposed method has a competitive performance.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Song, Rui and Luo, Shikai and Zeng, Donglin and Zhang, Hao Helen and Lu, Wenbin and Li, Zhiguo}, year={2017}, pages={364–384} } @article{kang_lu_song_2017, title={Subgroup detection and sample size calculation with proportional hazards regression for survival data}, volume={36}, ISSN={0277-6715}, url={http://dx.doi.org/10.1002/sim.7441}, DOI={10.1002/sim.7441}, abstractNote={In this paper, we propose a testing procedure for detecting and estimating the subgroup with an enhanced treatment effect in survival data analysis. Here, we consider a new proportional hazard model that includes a nonparametric component for the covariate effect in the control group and a subgroup‐treatment–interaction effect defined by a change plane. We develop a score‐type test for detecting the existence of the subgroup, which is doubly robust against misspecification of the baseline effect model or the propensity score but not both under mild assumptions for censoring. When the null hypothesis of no subgroup is rejected, the change‐plane parameters that define the subgroup can be estimated on the basis of supremum of the normalized score statistic. The asymptotic distributions of the proposed test statistic under the null and local alternative hypotheses are established. On the basis of established asymptotic distributions, we further propose a sample size calculation formula for detecting a given subgroup effect and derive a numerical algorithm for implementing the sample size calculation in clinical trial designs. The performance of the proposed approach is evaluated by simulation studies. An application to an AIDS clinical trial data is also given for illustration.}, number={29}, journal={Statistics in Medicine}, publisher={Wiley}, author={Kang, Suhyun and Lu, Wenbin and Song, Rui}, year={2017}, month={Aug}, pages={4646–4659} } @article{chen_liu_zeng_song_zhao_kosorok_2016, title={Bayesian nonparametric estimation for dynamic treatment regimes with sequential transition times comment}, volume={111}, number={515}, journal={Journal of the American Statistical Association}, author={Chen, J. X. and Liu, Y. F. and Zeng, D. L. and Song, R. and Zhao, Y. Q. and Kosorok, M. R.}, year={2016}, pages={942–947} } @article{fan_song_lu_2017, title={Change-Plane Analysis for Subgroup Detection and Sample Size Calculation}, volume={112}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2016.1166115}, DOI={10.1080/01621459.2016.1166115}, abstractNote={ABSTRACT We propose a systematic method for testing and identifying a subgroup with an enhanced treatment effect. We adopts a change-plane technique to first test the existence of a subgroup, and then identify the subgroup if the null hypothesis on nonexistence of such a subgroup is rejected. A semiparametric model is considered for the response with an unspecified baseline function and an interaction between a subgroup indicator and treatment. A doubly robust test statistic is constructed based on this model, and asymptotic distributions of the test statistic under both null and local alternative hypotheses are derived. Moreover, a sample size calculation method for subgroup detection is developed based on the proposed statistic. The finite sample performance of the proposed test is evaluated via simulations. Finally, the proposed methods for subgroup identification and sample size calculation are applied to a data from an AIDS study.}, number={518}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Fan, Ailin and Song, Rui and Lu, Wenbin}, year={2017}, month={Apr}, pages={769–778} } @article{chen_liu_zeng_song_zhao_kosorok_2016, title={Comment}, volume={111}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2016.1200914}, DOI={10.1080/01621459.2016.1200914}, abstractNote={ABSTRACT Xu, Müller, Wahed, and Thall proposed a Bayesian model to analyze an acute leukemia study involving multi-stage chemotherapy regimes. We discuss two alternative methods, Q-learning and O-learning, to solve the same problem from the machine learning point of view. The numerical studies show that these methods can be flexible and have advantages in some situations to handle treatment heterogeneity while being robust to model misspecification.}, number={515}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Chen, Jingxiang and Liu, Yufeng and Zeng, Donglin and Song, Rui and Zhao, Yingqi and Kosorok, Michael R.}, year={2016}, month={Jul}, pages={942–947} } @article{fan_lu_song_zhou_2016, title={Concordance-assisted learning for estimating optimal individualized treatment regimes}, volume={79}, ISSN={1369-7412}, url={http://dx.doi.org/10.1111/rssb.12216}, DOI={10.1111/rssb.12216}, abstractNote={Summary We propose new concordance-assisted learning for estimating optimal individualized treatment regimes. We first introduce a type of concordance function for prescribing treatment and propose a robust rank regression method for estimating the concordance function. We then find treatment regimes, up to a threshold, to maximize the concordance function, named the prescriptive index. Finally, within the class of treatment regimes that maximize the concordance function, we find the optimal threshold to maximize the value function. We establish the rate of convergence and asymptotic normality of the proposed estimator for parameters in the prescriptive index. An induced smoothing method is developed to estimate the asymptotic variance of the estimator. We also establish the n1/3-consistency of the estimated optimal threshold and its limiting distribution. In addition, a doubly robust estimator of parameters in the prescriptive index is developed under a class of monotonic index models. The practical use and effectiveness of the methodology proposed are demonstrated by simulation studies and an application to an acquired immune deficiency syndrome data set.}, number={5}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley}, author={Fan, Caiyun and Lu, Wenbin and Song, Rui and Zhou, Yong}, year={2016}, month={Oct}, pages={1565–1582} } @article{jiang_lu_song_davidian_2016, title={On estimation of optimal treatment regimes for maximizing t -year survival probability}, volume={79}, ISSN={1369-7412}, url={http://dx.doi.org/10.1111/rssb.12201}, DOI={10.1111/rssb.12201}, abstractNote={Summary A treatment regime is a deterministic function that dictates personalized treatment based on patients’ individual prognostic information. There is increasing interest in finding optimal treatment regimes, which determine treatment at one or more treatment decision points to maximize expected long-term clinical outcomes, where larger outcomes are preferred. For chronic diseases such as cancer or human immunodeficiency virus infection, survival time is often the outcome of interest, and the goal is to select treatment to maximize survival probability. We propose two non-parametric estimators for the survival function of patients following a given treatment regime involving one or more decisions, i.e. the so-called value. On the basis of data from a clinical or observational study, we estimate an optimal regime by maximizing these estimators for the value over a prespecified class of regimes. Because the value function is very jagged, we introduce kernel smoothing within the estimator to improve performance. Asymptotic properties of the proposed estimators of value functions are established under suitable regularity conditions, and simulation studies evaluate the finite sample performance of the regime estimators. The methods are illustrated by application to data from an acquired immune deficiency syndrome clinical trial.}, number={4}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley}, author={Jiang, Runchao and Lu, Wenbin and Song, Rui and Davidian, Marie}, year={2016}, month={Sep}, pages={1165–1185} } @article{bai_tsiatis_lu_song_2017, title={Optimal treatment regimes for survival endpoints using locally-efficient doubly-robust estimator from a classification perspective}, volume={23}, ISSN={["1572-9249"]}, DOI={10.1007/s10985-016-9376-x}, abstractNote={A treatment regime at a single decision point is a rule that assigns a treatment, among the available options, to a patient based on the patient's baseline characteristics. The value of a treatment regime is the average outcome of a population of patients if they were all treated in accordance to the treatment regime, where large values are desirable. The optimal treatment regime is a regime which results in the greatest value. Typically, the optimal treatment regime is estimated by positing a regression relationship for the outcome of interest as a function of treatment and baseline characteristics. However, this can lead to suboptimal treatment regimes when the regression model is misspecified. We instead consider value search estimators for the optimal treatment regime where we directly estimate the value for any treatment regime and then maximize this estimator over a class of regimes. For many studies the primary outcome of interest is survival time which is often censored. We derive a locally efficient, doubly robust, augmented inverse probability weighted complete case estimator for the value function with censored survival data and study the large sample properties of this estimator. The optimization is realized from a weighted classification perspective that allows us to use available off the shelf software. In some studies one treatment may have greater toxicity or side effects, thus we also consider estimating a quality adjusted optimal treatment regime that allows a patient to trade some additional risk of death in order to avoid the more invasive treatment.}, number={4}, journal={Lifetime Data Analysis}, author={Bai, X. and Tsiatis, A. and Lu, W. and Song, R.}, year={2017}, pages={585–604} } @article{shi_song_lu_2016, title={Robust learning for optimal treatment decision with NP-dimensionality}, volume={10}, ISSN={["1935-7524"]}, DOI={10.1214/16-ejs1178}, abstractNote={In order to identify important variables that are involved in making optimal treatment decision, Lu, Zhang and Zeng (2013) proposed a penalized least squared regression framework for a fixed number of predictors, which is robust against the misspecification of the conditional mean model. Two problems arise: (i) in a world of explosively big data, effective methods are needed to handle ultra-high dimensional data set, for example, with the dimension of predictors is of the non-polynomial (NP) order of the sample size; (ii) both the propensity score and conditional mean models need to be estimated from data under NP dimensionality. In this paper, we propose a robust procedure for estimating the optimal treatment regime under NP dimensionality. In both steps, penalized regressions are employed with the non-concave penalty function, where the conditional mean model of the response given predictors may be misspecified. The asymptotic properties, such as weak oracle properties, selection consistency and oracle distributions, of the proposed estimators are investigated. In addition, we study the limiting distribution of the estimated value function for the obtained optimal treatment regime. The empirical performance of the proposed estimation method is evaluated by simulations and an application to a depression dataset from the STAR*D study.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Shi, Chengchun and Song, Rui and Lu, Wenbin}, year={2016}, pages={2894–2921} } @article{fan_lu_song_2016, title={SEQUENTIAL ADVANTAGE SELECTION FOR OPTIMAL TREATMENT REGIME}, volume={10}, ISSN={["1932-6157"]}, DOI={10.1214/15-aoas849}, abstractNote={Variable selection for optimal treatment regime in a clinical trial or an observational study is getting more attention. Most existing variable selection techniques focused on selecting variables that are important for prediction, therefore some variables that are poor in prediction but are critical for decision-making may be ignored. A qualitative interaction of a variable with treatment arises when treatment effect changes direction as the value of this variable varies. The qualitative interaction indicates the importance of this variable for decision-making. Gunter, Zhu and Murphy (2011) proposed S-score which characterizes the magnitude of qualitative interaction of each variable with treatment individually. In this article, we developed a sequential advantage selection method based on the modified S-score. Our method selects qualitatively interacted variables sequentially, and hence excludes marginally important but jointly unimportant variables or vice versa. The optimal treatment regime based on variables selected via joint model is more comprehensive and reliable. With the proposed stopping criteria, our method can handle a large amount of covariates even if sample size is small. Simulation results show our method performs well in practical settings. We further applied our method to data from a clinical trial for depression.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Fan, Ailin and Lu, Wenbin and Song, Rui}, year={2016}, month={Mar}, pages={32–53} } @article{song_banerjee_kosorok_2016, title={ASYMPTOTICS FOR CHANGE-POINT MODELS UNDER VARYING DEGREES OF MIS-SPECIFICATION}, volume={44}, ISSN={["0090-5364"]}, DOI={10.1214/15-aos1362}, abstractNote={Change-point models are widely used by statisticians to model drastic changes in the pattern of observed data. Least squares/maximum likelihood based estimation of change-points leads to curious asymptotic phenomena. When the change-point model is correctly specified, such estimates generally converge at a fast rate (n) and are asymptotically described by minimizers of a jump process. Under complete mis-specification by a smooth curve, i.e. when a change-point model is fitted to data described by a smooth curve, the rate of convergence slows down to n1/3 and the limit distribution changes to that of the minimizer of a continuous Gaussian process. In this paper we provide a bridge between these two extreme scenarios by studying the limit behavior of change-point estimates under varying degrees of model mis-specification by smooth curves, which can be viewed as local alternatives. We find that the limiting regime depends on how quickly the alternatives approach a change-point model. We unravel a family of 'intermediate' limits that can transition, at least qualitatively, to the limits in the two extreme scenarios. The theoretical results are illustrated via a set of carefully designed simulations. We also demonstrate how inference for the change-point parameter can be performed in absence of knowledge of the underlying scenario by resorting to subsampling techniques that involve estimation of the convergence rate.}, number={1}, journal={ANNALS OF STATISTICS}, author={Song, Rui and Banerjee, Moulinath and Kosorok, Michael R.}, year={2016}, month={Feb}, pages={153–182} } @article{song_kosorok_zeng_zhao_laber_yuan_2015, title={On sparse representation for optimal individualized treatment selection with penalized outcome weighted learning}, volume={4}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/STA4.78}, DOI={10.1002/STA4.78}, abstractNote={As a new strategy for treatment, which takes individual heterogeneity into consideration, personalized medicine is of growing interest. Discovering individualized treatment rules for patients who have heterogeneous responses to treatment is one of the important areas in developing personalized medicine. As more and more information per individual is being collected in clinical studies and not all of the information is relevant for treatment discovery, variable selection becomes increasingly important in discovering individualized treatment rules. In this article, we develop a variable selection method based on penalized outcome weighted learning through which an optimal treatment rule is considered as a classification problem where each subject is weighted proportional to his or her clinical outcome. We show that the resulting estimator of the treatment rule is consistent and establish variable selection consistency and the asymptotic distribution of the estimators. The performance of the proposed approach is demonstrated via simulation studies and an analysis of chronic depression data. Copyright © 2015 John Wiley & Sons, Ltd.}, number={1}, journal={Stat}, publisher={Wiley}, author={Song, Rui and Kosorok, Michael and Zeng, Donglin and Zhao, Yingqi and Laber, Eric and Yuan, Ming}, year={2015}, month={Feb}, pages={59–68} } @article{song_wang_zeng_kosorok_2015, title={Penalized q-learning for dynamic treatment regimens}, volume={25}, number={3}, journal={Statistica Sinica}, author={Song, R. and Wang, W. W. and Zeng, D. L. and Kosorok, M. R.}, year={2015}, pages={901–920} } @article{bradic_song_2015, title={Structured estimation for the nonparametric Cox model}, volume={9}, ISSN={["1935-7524"]}, DOI={10.1214/15-ejs1004}, abstractNote={© 2015, Institute of Mathematical Statistics. All right received. In this paper, we study theoretical properties of the non-parametric Cox proportional hazards model in a high dimensional non-asymptotic setting. We establish the finite sample oracle l2 bounds for a general class of group penalties that allow possible hierarchical and overlapping structures. We approximate the log partial likelihood with a quadratic functional and use truncation arguments to reduce the error. Unlike the existing literature, we exemplify differences between bounded and possibly unbounded non-parametric covariate effects. In particular, we show that bounded effects can lead to prediction bounds similar to the simple linear models, whereas unbounded effects can lead to larger prediction bounds. In both situations we do not assume that the true parameter is necessarily sparse. Lastly, we present new theoretical results for hierarchical and smoothed estimation in the non-parametric Cox model. We provide two examples of the proposed general framework: a Cox model with interactions and an ANOVA type Cox model.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Bradic, Jelena and Song, Rui}, year={2015}, pages={492–534} } @article{laber_zhao_regh_davidian_tsiatis_stanford_zeng_song_kosorok_2015, title={Using pilot data to size a two-arm randomized trial to find a nearly optimal personalized treatment strategy}, volume={35}, ISSN={0277-6715}, url={http://dx.doi.org/10.1002/SIM.6783}, DOI={10.1002/SIM.6783}, abstractNote={A personalized treatment strategy formalizes evidence‐based treatment selection by mapping patient information to a recommended treatment. Personalized treatment strategies can produce better patient outcomes while reducing cost and treatment burden. Thus, among clinical and intervention scientists, there is a growing interest in conducting randomized clinical trials when one of the primary aims is estimation of a personalized treatment strategy. However, at present, there are no appropriate sample size formulae to assist in the design of such a trial. Furthermore, because the sampling distribution of the estimated outcome under an estimated optimal treatment strategy can be highly sensitive to small perturbations in the underlying generative model, sample size calculations based on standard (uncorrected) asymptotic approximations or computer simulations may not be reliable. We offer a simple and robust method for powering a single stage, two‐armed randomized clinical trial when the primary aim is estimating the optimal single stage personalized treatment strategy. The proposed method is based on inverting a plugin projection confidence interval and is thereby regular and robust to small perturbations of the underlying generative model. The proposed method requires elicitation of two clinically meaningful parameters from clinical scientists and uses data from a small pilot study to estimate nuisance parameters, which are not easily elicited. The method performs well in simulated experiments and is illustrated using data from a pilot study of time to conception and fertility awareness. Copyright © 2015 John Wiley & Sons, Ltd.}, number={8}, journal={Statistics in Medicine}, publisher={Wiley}, author={Laber, Eric B. and Zhao, Ying-Qi and Regh, Todd and Davidian, Marie and Tsiatis, Anastasios and Stanford, Joseph B. and Zeng, Donglin and Song, Rui and Kosorok, Michael R.}, year={2015}, month={Oct}, pages={1245–1256} } @article{song_lu_ma_jeng_2014, title={Censored rank independence screening for high-dimensional survival data}, volume={101}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/asu047}, DOI={10.1093/biomet/asu047}, abstractNote={In modern statistical applications, the dimension of covariates can be much larger than the sample size. In the context of linear models, correlation screening (Fan and Lv, 2008) has been shown to reduce the dimension of such data effectively while achieving the sure screening property, i.e., all of the active variables can be retained with high probability. However, screening based on the Pearson correlation does not perform well when applied to contaminated covariates and/or censored outcomes. In this paper, we study censored rank independence screening of high-dimensional survival data. The proposed method is robust to predictors that contain outliers, works for a general class of survival models, and enjoys the sure screening property. Simulations and an analysis of real data demonstrate that the proposed method performs competitively on survival data sets of moderate size and high-dimensional predictors, even when these are contaminated.}, number={4}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Song, R. and Lu, W. and Ma, S. and Jeng, X. (Jessie)}, year={2014}, month={Oct}, pages={799–814} } @article{goldberg_song_zeng_kosorok_2014, title={Comment on "Dynamic treatment regimes: Technical challenges and applications"}, volume={8}, journal={Electronic Journal of Statistics}, author={Goldberg, Y. and Song, R. and Zeng, D. L. and Kosorok, M. R.}, year={2014}, pages={1290–1300} } @article{song_kosorok_fine_2014, title={Comment on ”Multiscale change point inference” by Frick, Munk and Sieling}, volume={76}, number={3}, journal={Journal of the Royal Statistical Society, Series B}, author={Song, R. and Kosorok, M.R. and Fine, J.P.}, year={2014}, pages={564} } @article{zhao_zeng_laber_song_yuan_kosorok_2014, title={Doubly robust learning for estimating individualized treatment with censored data}, volume={102}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/asu050}, DOI={10.1093/biomet/asu050}, abstractNote={Individualized treatment rules recommend treatments based on individual patient characteristics in order to maximize clinical benefit. When the clinical outcome of interest is survival time, estimation is often complicated by censoring. We develop nonparametric methods for estimating an optimal individualized treatment rule in the presence of censored data. To adjust for censoring, we propose a doubly robust estimator which requires correct specification of either the censoring model or survival model, but not both; the method is shown to be Fisher consistent when either model is correct. Furthermore, we establish the convergence rate of the expected survival under the estimated optimal individualized treatment rule to the expected survival under the optimal individualized treatment rule. We illustrate the proposed methods using simulation study and data from a Phase III clinical trial on non-small cell lung cancer.}, number={1}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Zhao, Y. Q. and Zeng, D. and Laber, E. B. and Song, R. and Yuan, M. and Kosorok, M. R.}, year={2014}, month={Dec}, pages={151–168} } @article{song_yi_zou_2014, title={On varying-coefficient independence screening for high-dimensional varying-coefficient models}, volume={24}, number={4}, journal={Statistica Sinica}, author={Song, R. and Yi, F. and Zou, H.}, year={2014}, pages={1735–1752} } @inbook{goldberg_song_kosorok_2013, place={Beachwood, Ohio}, series={Institute of Mathematical Statistics Collections}, title={Adaptive Q-learning}, booktitle={From Probability to Statistics and Back: High-Dimensional Models and Processes}, publisher={Institute of Mathematical Statistics}, author={Goldberg, Y. and Song, R. and Kosorok, M.R.}, editor={Bannerjee, M. and Bunea, F. and Huang, J. and Koltchinskii, V. and Maathuis, M. H.Editors}, year={2013}, pages={150–162}, collection={Institute of Mathematical Statistics Collections} } @article{song_huang_ma_2012, title={Integrative Prescreening in Analysis of Multiple Cancer Genomic Studies}, volume={13}, DOI={10.1186/1471-2105-13-168}, abstractNote={In high throughput cancer genomic studies, results from the analysis of single datasets often suffer from a lack of reproducibility because of small sample sizes. Integrative analysis can effectively pool and analyze multiple datasets and provides a cost effective way to improve reproducibility. In integrative analysis, simultaneously analyzing all genes profiled may incur high computational cost. A computationally affordable remedy is prescreening, which fits marginal models, can be conducted in a parallel manner, and has low computational cost.}, number={168}, journal={BMC Bioinformatics}, author={Song, R. and Huang, J. and Ma, S.}, year={2012} } @article{fan_feng_song_2011, title={Nonparametric Independence Screening in Sparse Ultra-High-Dimensional Additive Models}, volume={106}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1198/jasa.2011.tm09779}, DOI={10.1198/jasa.2011.tm09779}, abstractNote={A variable screening procedure via correlation learning was proposed by Fan and Lv (2008) to reduce dimensionality in sparse ultra-high-dimensional models. Even when the true model is linear, the marginal regression can be highly nonlinear. To address this issue, we further extend the correlation learning to marginal nonparametric learning. Our nonparametric independence screening (NIS) is a specific type of sure independence screening. We propose several closely related variable screening procedures. We show that with general nonparametric models, under some mild technical conditions, the proposed independence screening methods have a sure screening property. The extent to which the dimensionality can be reduced by independence screening is also explicitly quantified. As a methodological extension, we also propose a data-driven thresholding and an iterative nonparametric independence screening (INIS) method to enhance the finite- sample performance for fitting sparse additive models. The simulation results and a real data analysis demonstrate that the proposed procedure works well with moderate sample size and large dimension and performs better than competing methods.}, number={494}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Fan, Jianqing and Feng, Yang and Song, Rui}, year={2011}, month={Jun}, pages={544–557} } @article{zhou_song_wu_qin_2010, title={Statistical Inference for a Two-Stage Outcome-Dependent Sampling Design with a Continuous Outcome}, volume={67}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2010.01446.x}, DOI={10.1111/j.1541-0420.2010.01446.x}, abstractNote={Summary The two‐stage case–control design has been widely used in epidemiology studies for its cost‐effectiveness and improvement of the study efficiency ( White, 1982 , American Journal of Epidemiology 115, 119–128; Breslow and Cain, 1988 , Biometrika 75, 11–20). The evolution of modern biomedical studies has called for cost‐effective designs with a continuous outcome and exposure variables. In this article, we propose a new two‐stage outcome‐dependent sampling (ODS) scheme with a continuous outcome variable, where both the first‐stage data and the second‐stage data are from ODS schemes. We develop a semiparametric empirical likelihood estimation for inference about the regression parameters in the proposed design. Simulation studies were conducted to investigate the small‐sample behavior of the proposed estimator. We demonstrate that, for a given statistical power, the proposed design will require a substantially smaller sample size than the alternative designs. The proposed method is illustrated with an environmental health study conducted at National Institutes of Health.}, number={1}, journal={Biometrics}, publisher={Wiley}, author={Zhou, Haibo and Song, Rui and Wu, Yuanshan and Qin, Jing}, year={2010}, month={Jun}, pages={194–202} } @article{fan_song_2010, title={Sure independence screening in generalized linear models with NP-dimensionality}, volume={38}, ISSN={0090-5364}, url={http://dx.doi.org/10.1214/10-aos798}, DOI={10.1214/10-aos798}, abstractNote={Ultrahigh dimensional variable selection plays an increasingly important role in contemporary scientific discoveries and statistical research. Among others, Fan and Lv (2008) propose an independent screening framework by ranking the marginal correlations. They showed that the correlation ranking procedure possesses a sure independence screening property within the context of the linear model with Gaussian covariates and responses. In this paper, we propose a more general version of the independent learning with ranking the maximum marginal likelihood estimates or the maximum marginal likelihood itself in generalized linear models. We show that the proposed methods, with Fan and Lv (2008) as a very special case, also possess the sure screening property with vanishing false selection rate. The conditions under which that the independence learning possesses a sure screening is surprisingly simple. This justifies the applicability of such a simple method in a wide spectrum. We quantify explicitly the extent to which the dimensionality can be reduced by independence screening, which depends on the interactions of the covariance matrix of covariates and true parameters. Simulation studies are used to illustrate the utility of the proposed approaches. In addition, we � Supported in part by Grant NSF grants DMS-0714554 and DMS-0704337. The bulk of the work was conducted when Rui Song was a postdoctoral research fellow at Princeton University. The authors would like to thank the associate editor and two referees for their constructive comments that improve the presentation and the results of the paper. AMS 2000 subject classifications: Primary 68Q32, 62J12; secondary 62E99, 60F10}, number={6}, journal={The Annals of Statistics}, publisher={Institute of Mathematical Statistics}, author={Fan, Jianqing and Song, Rui}, year={2010}, month={Dec}, pages={3567–3604} } @article{song_zhou_kosorok_2009, title={A note on semiparametric efficient inference for two-stage outcome-dependent sampling with a continuous outcome}, volume={96}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/asn073}, DOI={10.1093/biomet/asn073}, abstractNote={Outcome-dependent sampling designs have been shown to be a cost effective way to enhance study efficiency. We show that the outcome-dependent sampling design with a continuous outcome can be viewed as an extension of the two-stage case-control designs to the continuous-outcome case. We further show that the two-stage outcome-dependent sampling has a natural link with the missing-data and biased-sampling framework. Through the use of semiparametric inference and missing-data techniques, we show that a certain semiparametric maximum likelihood estimator is computationally convenient and achieves the semiparametric efficient information bound. We demonstrate this both theoretically and through simulation.}, number={1}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Song, R. and Zhou, H. and Kosorok, M. R.}, year={2009}, month={Jan}, pages={221–228} } @article{anand_carson_galle_song_boehmer_ghali_jaski_lindenfeld_o'connor_steinberg_et al._2009, title={Cardiac Resynchronization Therapy Reduces the Risk of Hospitalizations in Patients With Advanced Heart Failure}, volume={119}, ISSN={0009-7322 1524-4539}, url={http://dx.doi.org/10.1161/circulationaha.108.793273}, DOI={10.1161/circulationaha.108.793273}, abstractNote={ Background— In the Comparison of Medical Therapy, Pacing and Defibrillation in Heart Failure (COMPANION) trial, 1520 patients with advanced heart failure were assigned in a 1:2:2 ratio to optimal pharmacological therapy or optimal pharmacological therapy plus cardiac resynchronization therapy (CRT-P) or CRT with defibrillator (CRT-D). Use of CRT-P and CRT-D was associated with a significant reduction in combined risk of death or all-cause hospitalizations. Because mortality also was significantly reduced (optimal pharmacological therapy versus CRT-D only), an assessment of the true reduction in hospitalization rates must consider the competing risk of death and varying follow-up times. Methods and Results— To overcome the challenges of comparing treatment groups, we used a nonparametric test of right-censored recurrent events that accounts for multiple hospital admissions, differential follow-up time between treatment groups, and death as a competing risk. An end-point committee adjudicated and classified all hospitalizations. Compared with optimal pharmacological therapy, CRT-P and CRT-D were associated with a 21% and 25% reduction in all-cause, 34% and 37% reduction in cardiac, and 44% and 41% reduction in heart failure hospital admissions per patient-year of follow-up, respectively. Similar reductions were seen in hospitalization days per patient-year. The reduction in hospitalization rate for heart failure in the CRT groups appeared within days of randomization and remained sustained. Noncardiac hospitalization rates were not different between groups. Conclusion— Use of CRT with or without a defibrillator in advanced heart failure patients was associated with marked reductions in all-cause, cardiac, and heart failure hospitalization rates in an analysis that accounted for the competing risk of mortality and unequal follow-up time. }, number={7}, journal={Circulation}, publisher={Ovid Technologies (Wolters Kluwer Health)}, author={Anand, Inder S. and Carson, Peter and Galle, Elizabeth and Song, Rui and Boehmer, John and Ghali, Jalal K. and Jaski, Brian and Lindenfeld, JoAnn and O'Connor, Christopher and Steinberg, Jonathan S. and et al.}, year={2009}, month={Feb}, pages={969–977} } @article{song_cai_2009, title={Joint covariate-adjusted score test statistics for recurrent events and a terminal event}, volume={16}, ISSN={1380-7870 1572-9249}, url={http://dx.doi.org/10.1007/s10985-009-9140-6}, DOI={10.1007/s10985-009-9140-6}, abstractNote={Recurrent events data are frequently encountered and could be stopped by a terminal event in clinical trials. It is of interest to assess the treatment efficacy simultaneously with respect to both the recurrent events and the terminal event in many applications. In this paper we propose joint covariate-adjusted score test statistics based on joint models of recurrent events and a terminal event. No assumptions on the functional form of the covariates are needed. Simulation results show that the proposed tests can improve the efficiency over tests based on covariate unadjusted model. The proposed tests are applied to the SOLVD data for illustration.}, number={4}, journal={Lifetime Data Analysis}, publisher={Springer Science and Business Media LLC}, author={Song, Rui and Cai, Jianwen}, year={2009}, month={Dec}, pages={491–508} } @article{song_kosorok_fine_2009, title={On asymptotically optimal tests under loss of identifiability in semiparametric models}, volume={37}, ISSN={0090-5364}, url={http://dx.doi.org/10.1214/08-aos643}, DOI={10.1214/08-aos643}, abstractNote={We consider tests of hypotheses when the parameters are not identifiable under the null in semiparametric models, where regularity conditions for profile likelihood theory fail. Exponential average tests based on integrated profile likelihood are constructed and shown to be asymptotically optimal under a weighted average power criterion with respect to a prior on the nonidentifiable aspect of the model. These results extend existing results for parametric models, which involve more restrictive assumptions on the form of the alternative than do our results. Moreover, the proposed tests accommodate models with infinite dimensional nuisance parameters which either may not be identifiable or may not be estimable at the usual parametric rate. Examples include tests of the presence of a change-point in the Cox model with current status data and tests of regression parameters in odds-rate models with right censored data. Optimal tests have not previously been studied for these scenarios. We study the asymptotic distribution of the proposed tests under the null, fixed contiguous alternatives and random contiguous alternatives. We also propose a weighted bootstrap procedure for computing the critical values of the test statistics. The optimal tests perform well in simulation studies, where they may exhibit improved power over alternative tests.}, number={5A}, journal={The Annals of Statistics}, publisher={Institute of Mathematical Statistics}, author={Song, Rui and Kosorok, Michael R. and Fine, Jason P.}, year={2009}, month={Oct}, pages={2409–2444} } @article{meunier_song_lutz_andersen_doherty_bruggink_oppelt_2008, title={Proximate Cues for a Short-Distance Migratory Species: an Application of Survival Analysis}, volume={72}, ISSN={0022-541X 1937-2817}, url={http://dx.doi.org/10.2193/2006-521}, DOI={10.2193/2006-521}, abstractNote={ABSTRACT  Investigation of bird migration has often highlighted the importance of external factors in determining timing of migration. However, little distinction has been made between short‐ and long‐distance migrants and between local and flight birds (passage migrants) in describing migration chronology. In addition, measures of food abundance as a proximate factor influencing timing of migration are lacking in studies of migration chronology. To address the relationship between environmental variables and timing of migration, we quantified the relative importance of proximate external factors on migration chronology of local American woodcock (Scolopax minor), a short distance migrant, using event‐time analysis methods (survival analysis). We captured 1,094 woodcock local to our study sites in Michigan, Minnesota, and Wisconsin (USA) during autumn 2002–2004 and documented 786 departure dates for these birds. Photoperiod appeared to provide an initial proximate cue for timing of departure. Moon phase was important in modifying timing of departure, which may serve as a navigational aid in piloting and possibly orientation. Local synoptic weather variables also contributed to timing of departure by changing the rate of departure from our study sites. We found no evidence that food availability influenced timing of woodcock departure. Our results suggest that woodcock use a conservative photoperiod‐controlled strategy with proximate modifiers for timing of migration rather than relying on abundance of their primary food, earthworms. Managing harvest pressure on local birds by adjusting season lengths may be an effective management tool with consistent migration patterns from year to year based on photoperiod.}, number={2}, journal={Journal of Wildlife Management}, publisher={Wiley}, author={Meunier, Jed and Song, Rui and Lutz, R. Scott and Andersen, David E. and Doherty, Kevin E. and Bruggink, John G. and Oppelt, Eileen}, year={2008}, month={Feb}, pages={440–448} } @article{song_cook_kosorok_2008, title={What We Want versus What We Can Get: A Closer Look at Failure Time Endpoints for Cardiovascular Studies}, volume={18}, ISSN={1054-3406 1520-5711}, url={http://dx.doi.org/10.1080/10543400701697224}, DOI={10.1080/10543400701697224}, abstractNote={In this article, we review the use of all-cause mortality and the composite endpoint as the primary endpoint and propose a recurrent composite endpoint as an alternative primary endpoint. Some general suggestions to trialists and practitioners are provided. In summary, the use of the composite endpoint is an issue that needs clarification.}, number={2}, journal={Journal of Biopharmaceutical Statistics}, publisher={Informa UK Limited}, author={Song, Rui and Cook, Thomas D. and Kosorok, Michael R.}, year={2008}, month={Mar}, pages={370–381} } @article{kosorok_song_2007, title={Inference under right censoring for transformation models with a change-point based on a covariate threshold}, volume={35}, ISSN={0090-5364}, url={http://dx.doi.org/10.1214/009053606000001244}, DOI={10.1214/009053606000001244}, abstractNote={We consider linear transformation models applied to right censored survival data with a change-point based on a covariate threshold. We establish consistency and weak convergence of the nonparametric maximum lieklihood estimators. The change-point parameter is shown to be $n$-consistent, while the remaining parameters are shown to have the expected root-$n$ consistency. We show that the procedure is adaptive in the sense that the non-threshold parameters are estimable with the same precision as if the true threshold value were known. We also develop Monte-Carlo methods of inference for model parameters and score tests for the existence of a change-point. A key difficulty here is that some of the model parameters are not identifiable under the null hypothesis of no change-point. Simulation students establish the validity of the proposed score tests for finite sample sizes.}, number={3}, journal={The Annals of Statistics}, publisher={Institute of Mathematical Statistics}, author={Kosorok, Michael R. and Song, Rui}, year={2007}, month={Jul}, pages={957–989} } @article{song_kosorok_cai_2007, title={Robust Covariate-Adjusted Log-Rank Statistics and Corresponding Sample Size Formula for Recurrent Events Data}, volume={64}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2007.00948.x}, DOI={10.1111/j.1541-0420.2007.00948.x}, abstractNote={Summary Recurrent events data are frequently encountered in clinical trials. This article develops robust covariate‐adjusted log‐rank statistics applied to recurrent events data with arbitrary numbers of events under independent censoring and the corresponding sample size formula. The proposed log‐rank tests are robust with respect to different data‐generating processes and are adjusted for predictive covariates. It reduces to the Kong and Slud (1997, Biometrika84, 847–862) setting in the case of a single event. The sample size formula is derived based on the asymptotic normality of the covariate‐adjusted log‐rank statistics under certain local alternatives and a working model for baseline covariates in the recurrent event data context. When the effect size is small and the baseline covariates do not contain significant information about event times, it reduces to the same form as that of Schoenfeld (1983, Biometrics39, 499–503) for cases of a single event or independent event times within a subject. We carry out simulations to study the control of type I error and the comparison of powers between several methods in finite samples. The proposed sample size formula is illustrated using data from an rhDNase study.}, number={3}, journal={Biometrics}, publisher={Wiley}, author={Song, Rui and Kosorok, Michael R. and Cai, Jianwen}, year={2007}, month={Dec}, pages={741–750} }