@article{zhang_li_zhou_zhou_shen_2019, title={TENSOR GENERALIZED ESTIMATING EQUATIONS FOR LONGITUDINAL IMAGING ANALYSIS}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0153}, abstractNote={Longitudinal neuroimaging studies are becoming increasingly prevalent, where brain images are collected on multiple subjects at multiple time points. Analyses of such data are scientifically important, but also challenging. Brain images are in the form of multidimensional arrays, or tensors, which are characterized by both ultrahigh dimensionality and a complex structure. Longitudinally repeated images and induced temporal correlations add a further layer of complexity. Despite some recent efforts, there exist very few solutions for longitudinal imaging analyses. In response to the increasing need to analyze longitudinal imaging data, we propose several tensor generalized estimating equations (GEEs). The proposed GEE approach accounts for intra-subject correlation, and an imposed low-rank structure on the coefficient tensor effectively reduces the dimensionality. We also propose a scalable estimation algorithm, establish the asymptotic properties of the solution to the tensor GEEs, and investigate sparsity regularization for the purpose of region selection. We demonstrate the proposed method using simulations and by analyzing a real data set from the Alzheimer's Disease Neuroimaging Initiative.}, number={4}, journal={STATISTICA SINICA}, author={Zhang, Xiang and Li, Lexin and Zhou, Hua and Zhou, Yeqing and Shen, Dinggang}, year={2019}, month={Oct}, pages={1977–2005} } @article{guo_lu_li_2015, title={Forward Stagewise Shrinkage and Addition for High Dimensional Censored Regression}, volume={7}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-014-9114-4}, abstractNote={Despite enormous development on variable selection approaches in recent years, modeling and selection of high dimensional censored regression remains a challenging question. When the number of predictors p far exceeds the number of observational units n and the outcome is censored, computations of existing solutions often become difficult, or even infeasible in some situations, while performances frequently deteriorate. In this article, we aim at simultaneous model estimation and variable selection for Cox proportional hazards models with high dimensional covariates. We propose a forward stage-wise shrinkage and addition approach for that purpose. Our proposal extends a popular statistical learning technique, the boosting method. It inherits the flexible nature of boosting and is straightforward to extend to nonlinear Cox models. Meanwhile it advances the classical boosting method by adding explicit variable selection and substantially reducing the number of iterations to the algorithm convergence. Our intensive simulations have showed that the new method enjoys a competitive performance in Cox models with both p < n and p ≥ n scenarios. The new method was also illustrated with analysis of two real microarray survival datasets.}, number={2}, journal={STATISTICS IN BIOSCIENCES}, author={Guo, Zifang and Lu, Wenbin and Li, Lexin}, year={2015}, month={Oct}, pages={225–244} } @article{ding_li_zhu_2014, title={Goodness-of-fit testing-based selection for large-p-small-n problems: A two-stage ranking approach}, volume={145}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2013.08.012}, abstractNote={Abstract In this paper, we investigate two-stage ranking–selection procedures for ultra-high dimensional data in the framework of goodness-of-fit testing. We develop a k-step marginal F-test (MFTk) screening in the first stage. The MFT1 is, as a statistic, equivalent to that used in the sure independence screening (SIS) proposed by Fan and Lv (2008) . The MFTk with k ≥ 2 makes improvement over the MFT1 mainly on better handling correlations among predictors. For selecting a more parsimonious working model in the first stage, we propose a soft threshold cutoff through a sequential goodness-of-fit testing. This avoids some drawbacks of the hard threshold cutoff in Fan and Lv (2008) and the extended BIC used in Wang (2009) . In the second stage, we develop one-step backward screening to further remove those insignificant predictors from the model. Further, likewise as the iterative SIS, we provide the iterative versions of the proposed procedures to have more accurate variable selection. Extensive numerical studies and real data analysis are carried out to examine the performance of our proposed procedures.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Ding, Xiaobo and Li, Lexin and Zhu, Lixing}, year={2014}, month={Feb}, pages={148–164} } @article{zhou_li_2014, title={Regularized matrix regression}, volume={76}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12031}, abstractNote={Summary}, number={2}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Zhou, Hua and Li, Lexin}, year={2014}, month={Mar}, pages={463–483} } @article{zhao_leng_li_wang_2013, title={HIGH-DIMENSIONAL INFLUENCE MEASURE}, volume={41}, ISSN={["0090-5364"]}, DOI={10.1214/13-aos1165}, abstractNote={Influence diagnosis is important since presence of influential observations could lead to distorted analysis and misleading interpretations. For high-dimensional data, it is particularly so, as the increased dimensionality and complexity may amplify both the chance of an observation being influential, and its potential impact on the analysis. In this article, we propose a novel high-dimensional influence measure for regressions with the number of predictors far exceeding the sample size. Our proposal can be viewed as a high-dimensional counterpart to the classical Cook's distance. However, whereas the Cook's distance quantifies the individual observation's influence on the least squares regression coefficient estimate, our new diagnosis measure captures the influence on the marginal correlations, which in turn exerts serious influence on downstream analysis including coefficient estimation, variable selection and screening. Moreover, we establish the asymptotic distribution of the proposed influence measure by letting the predictor dimension go to infinity. Availability of this asymptotic distribution leads to a principled rule to determine the critical value for influential observation detection. Both simulations and real data analysis demonstrate usefulness of the new influence diagnosis measure.}, number={5}, journal={ANNALS OF STATISTICS}, author={Zhao, Junlong and Leng, Chenlei and Li, Lexin and Wang, Hansheng}, year={2013}, month={Oct}, pages={2639–2667} } @article{zhou_li_zhu_2013, title={Tensor Regression with Applications in Neuroimaging Data Analysis}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.776499}, abstractNote={Classical regression methods treat covariates as a vector and estimate a corresponding vector of regression coefficients. Modern applications in medical imaging generate covariates of more complex form such as multidimensional arrays (tensors). Traditional statistical and computational methods are proving insufficient for analysis of these high-throughput data due to their ultrahigh dimensionality as well as complex structure. In this article, we propose a new family of tensor regression models that efficiently exploit the special structure of tensor covariates. Under this framework, ultrahigh dimensionality is reduced to a manageable level, resulting in efficient estimation and prediction. A fast and highly scalable estimation algorithm is proposed for maximum likelihood estimation and its associated asymptotic properties are studied. Effectiveness of the new methods is demonstrated on both synthetic and real MRI imaging data. Supplementary materials for this article are available online.}, number={502}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhou, Hua and Li, Lexin and Zhu, Hongtu}, year={2013}, month={Jun}, pages={540–552} } @article{sun_li_2012, title={Multiple Loci Mapping via Model-free Variable Selection}, volume={68}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2011.01650.x}, abstractNote={Summary Despite recent flourish of proposals on variable selection, genome‐wide multiple loci mapping remains to be challenging. The majority of existing variable selection methods impose a model, and often the homoscedastic linear model, prior to selection. However, the true association between the phenotypical trait and the genetic markers is rarely known a priori, and the presence of epistatic interactions makes the association more complex than a linear relation. Model‐free variable selection offers a useful alternative in this context, but the fact that the number of markers p often far exceeds the number of experimental units n renders all the existing model‐free solutions that require n > p inapplicable. In this article, we examine a number of model‐free variable selection methods for small‐n‐large‐p regressions in the context of genome‐wide multiple loci mapping. We propose and advocate a multivariate group‐wise adaptive penalization solution, which requires no model prespecification and thus works for complex trait‐marker association, and handles one variable at a time so that works for n < p. Effectiveness of the new method is demonstrated through both intensive simulations and a comprehensive real data analysis across 6100 gene expression traits.}, number={1}, journal={BIOMETRICS}, author={Sun, Wei and Li, Lexin}, year={2012}, month={Mar}, pages={12–22} } @article{zhu_li_zhou_2012, title={Nonlinear dimension reduction with Wright-Fisher kernel for genotype aggregation and association mapping}, volume={28}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bts406}, abstractNote={Abstract}, number={18}, journal={BIOINFORMATICS}, author={Zhu, Hongjie and Li, Lexin and Zhou, Hua}, year={2012}, month={Sep}, pages={I375–I381} } @article{wu_li_2011, title={Asymptotic properties of sufficient dimension reduction with a diverging number of predictors}, volume={21}, number={2}, journal={Statistica Sinica}, author={Wu, Y. C. and Li, L. X.}, year={2011}, pages={707–730} } @article{zhu_li_2011, title={Biological pathway selection through nonlinear dimension reduction}, volume={12}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxq081}, abstractNote={In the analysis of high-throughput biological data, it is often believed that the biological units such as genes behave interactively by groups, that is, pathways in our context. It is conceivable that utilization of priorly available pathway knowledge would greatly facilitate both interpretation and estimation in statistical analysis of such high-dimensional biological data. In this article, we propose a 2-step procedure for the purpose of identifying pathways that are related to and influence the clinical phenotype. In the first step, a nonlinear dimension reduction method is proposed, which permits flexible within-pathway gene interactions as well as nonlinear pathway effects on the response. In the second step, a regularized model-based pathway ranking and selection procedure is developed that is built upon the summary features extracted from the first step. Simulations suggest that the new method performs favorably compared to the existing solutions. An analysis of a glioblastoma microarray data finds 4 pathways that have evidence of support from the biological literature.}, number={3}, journal={BIOSTATISTICS}, author={Zhu, Hongjie and Li, Lexin}, year={2011}, month={Jul}, pages={429–444} } @article{li_zhu_zhu_2011, title={Inference on the primary parameter of interest with the aid of dimension reduction estimation}, volume={73}, journal={Journal of the Royal Statistical Society. Series B, Statistical Methodology}, author={Li, L. X. and Zhu, L. P. and Zhu, L. X.}, year={2011}, pages={59–80} } @article{zhu_li_li_zhu_2011, title={Model-Free Feature Screening for Ultrahigh-Dimensional Data}, volume={106}, ISSN={["1537-274X"]}, DOI={10.1198/jasa.2011.tm10563}, abstractNote={With the recent explosion of scientific data of unprecedented size and complexity, feature ranking and screening are playing an increasingly important role in many scientific studies. In this article, we propose a novel feature screening procedure under a unified model framework, which covers a wide variety of commonly used parametric and semiparametric models. The new method does not require imposing a specific model structure on regression functions, and thus is particularly appealing to ultrahigh-dimensional regressions, where there are a huge number of candidate predictors but little information about the actual model forms. We demonstrate that, with the number of predictors growing at an exponential rate of the sample size, the proposed procedure possesses consistency in ranking, which is both useful in its own right and can lead to consistency in selection. The new procedure is computationally efficient and simple, and exhibits a competent empirical performance in our intensive simulations and real data analysis.}, number={496}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhu, Li-Ping and Li, Lexin and Li, Runze and Zhu, Li-Xing}, year={2011}, month={Dec}, pages={1464–1475} } @article{lu_li_2011, title={Sufficient Dimension Reduction for Censored Regressions}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01490.x}, abstractNote={Summary Methodology of sufficient dimension reduction (SDR) has offered an effective means to facilitate regression analysis of high‐dimensional data. When the response is censored, however, most existing SDR estimators cannot be applied, or require some restrictive conditions. In this article, we propose a new class of inverse censoring probability weighted SDR estimators for censored regressions. Moreover, regularization is introduced to achieve simultaneous variable selection and dimension reduction. Asymptotic properties and empirical performance of the proposed methods are examined.}, number={2}, journal={BIOMETRICS}, author={Lu, Wenbin and Li, Lexin}, year={2011}, month={Jun}, pages={513–523} } @article{reich_bondell_li_2011, title={Sufficient Dimension Reduction via Bayesian Mixture Modeling}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01501.x}, abstractNote={Summary Dimension reduction is central to an analysis of data with many predictors. Sufficient dimension reduction aims to identify the smallest possible number of linear combinations of the predictors, called the sufficient predictors, that retain all of the information in the predictors about the response distribution. In this article, we propose a Bayesian solution for sufficient dimension reduction. We directly model the response density in terms of the sufficient predictors using a finite mixture model. This approach is computationally efficient and offers a unified framework to handle categorical predictors, missing predictors, and Bayesian variable selection. We illustrate the method using both a simulation study and an analysis of an HIV data set.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D. and Li, Lexin}, year={2011}, month={Sep}, pages={886–895} } @inproceedings{cai_chow_lu_li_2010, title={Evaluation of distribution fault diagnosis algorithms using ROC curves}, DOI={10.1109/pes.2010.5588154}, abstractNote={In power distribution fault data, the percentage of faults with different causes could be very different and varies from region to region. This data imbalance issue seriously affects the performance evaluation of fault diagnosis algorithms. Due to the limitations of conventional accuracy (ACC) and geometric mean (G-mean) measures, this paper discusses the application of Receiver Operating Characteristic (ROC) curves in evaluating distribution fault diagnosis performance. After introducing how to obtain ROC curves, Artificial Neural Networks (ANN), Logistic Regression (LR), Support Vector Machines (SVM), Artificial Immune Recognition Systems (AIRS), and K-Nearest Neighbor (KNN) algorithm are compared using ROC curves and Area Under the Curve (AUC) on real-world fault datasets from Progress Energy Carolinas. Experimental results show that AIRS performs best most of the time and ANN is potentially a good algorithm with a proper decision threshold.}, booktitle={Ieee power and energy soceity general meeting 2010}, author={Cai, Y. X. and Chow, M. Y. and Lu, W. B. and Li, L. X.}, year={2010} } @article{li_li_zhu_2010, title={Groupwise Dimension Reduction}, volume={105}, ISSN={["1537-274X"]}, DOI={10.1198/jasa.2010.tm09643}, abstractNote={In many regression applications, the predictors fall naturally into a number of groups or domains, and it is often desirable to establish a domain-specific relation between the predictors and the response. In this article, we consider dimension reduction that incorporates such domain knowledge. The proposed method is based on the derivative of the conditional mean, where the differential operator is constrained to the form of a direct sum. This formulation also accommodates the situations where dimension reduction is focused only on part of the predictors; as such it extends Partial Dimension Reduction to cases where the blocked predictors are continuous. Through simulation and real data analyses, we show that the proposed method achieves greater accuracy and interpretability than the dimension reduction methods that ignore group information. Furthermore, the new method does not require the stringent conditions on the predictor distribution that are required by existing methods.}, number={491}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Li, Lexin and Li, Bing and Zhu, Li-Xing}, year={2010}, month={Sep}, pages={1188–1201} } @article{cook_li_2009, title={Dimension Reduction in Regressions With Exponential Family Predictors}, volume={18}, ISSN={["1537-2715"]}, DOI={10.1198/jcgs.2009.08005}, abstractNote={We present first methodology for dimension reduction in regressions with predictors that, given the response, follow one-parameter exponential families. Our approach is based on modeling the conditional distribution of the predictors given the response, which allows us to derive and estimate a sufficient reduction of the predictors. We also propose a method of estimating the forward regression mean function without requiring an explicit forward regression model. Whereas nearly all existing estimators of the central subspace are limited to regressions with continuous predictors only, our proposed methodology extends estimation to regressions with all categorical or a mixture of categorical and continuous predictors. Supplementary materials including the proofs and the computer code are available from the JCGS website.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Cook, R. Dennis and Li, Lexin}, year={2009}, month={Sep}, pages={774–791} } @article{li_2009, title={Exploiting predictor domain information in sufficient dimension reduction}, volume={53}, ISSN={["0167-9473"]}, DOI={10.1016/j.csda.2009.01.007}, abstractNote={Analysis of high-dimensional data is becoming the norm in a variety of scientific studies and dimension reduction methods are widely employed. As the predictor domain knowledge is often available, it is useful to incorporate such domain information into dimension reduction and subsequent model formulation. Existing solutions such as simple average, principal components analysis and partial least squares cannot assure preservation of full regression information when reducing the dimension. In this article we investigate sufficient dimension reduction strategies that can retain full regression information meanwhile utilizing prior domain knowledge. Both simulations and a real data analysis demonstrate that the new methods are effective and often superior than the existing solutions.}, number={7}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Li, Lexin}, year={2009}, month={May}, pages={2665–2672} } @article{cornish_kogan_li_turk_jacquemont_hagerman_2009, title={Lifespan changes in working memory in fragile X premutation males}, volume={69}, ISSN={["1090-2147"]}, DOI={10.1016/j.bandc.2008.11.006}, abstractNote={Fragile X syndrome is the world's most common hereditary cause of developmental delay in males and is now well characterized at the biological, brain and cognitive levels. The disorder is caused by the silencing of a single gene on the X chromosome, the FMR1 gene. The premutation (carrier) status, however, is less well documented but has an emerging literature that highlights a more subtle profile of executive cognitive deficiencies that mirror those reported in fully affected males. Rarely, however, has the issue of age-related declines in cognitive performance in premutation males been addressed. In the present study, we focus specifically on the cognitive domain of working memory and its subcomponents (verbal, spatial and central executive memory) and explore performance across a broad sample of premutation males aged 18-69 years matched on age and IQ to unaffected comparison males. We further tease apart the premutation status into those males with symptoms of the newly identified neurodegenerative disorder, the fragile X-associated tremor/ataxia syndrome (FXTAS) and those males currently symptom-free. Our findings indicate a specific vulnerability in premutation males on tasks that require simultaneous manipulation and storage of new information, so-called executive control of memory. Furthermore, this vulnerability appears to exist regardless of the presence of FXTAS symptoms. Males with FXTAS symptoms demonstrated a more general impairment encompassing phonological working memory in addition to central executive working memory. Among asymptomatic premutation males, we observed the novel finding of a relationship between increased CGG repeat size and impairment to central executive working memory.}, number={3}, journal={BRAIN AND COGNITION}, author={Cornish, Kim M. and Kogan, Cary S. and Li, Lexin and Turk, Jeremy and Jacquemont, Sebastien and Hagerman, Randi J.}, year={2009}, month={Apr}, pages={551–558} } @article{li_yin_2009, title={Longitudinal data analysis using sufficient dimension reduction method}, volume={53}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2009.04.018}, abstractNote={There have been an increasing number of applications where the number of predictors is large, meanwhile data are repeatedly measured at a sequence of time points. In this article we investigate how dimension reduction method can be employed for analyzing such high-dimensional longitudinal data. Predictor dimension can be effectively reduced while full regression means information can be retained during dimension reduction. Simultaneous variable selection along with dimension reduction is studied, and graphical diagnosis and model fitting after dimension reduction are investigated. The method is flexible enough to encompass a variety of commonly used longitudinal models.}, number={12}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Li, Lexin and Yin, Xiangrong}, year={2009}, month={Oct}, pages={4106–4115} } @article{setodji_li_2009, title={Model free multivariate reduced-rank regression with categorical predictors}, volume={19}, number={3}, journal={Statistica Sinica}, author={Setodji, C. M. and Li, L. X.}, year={2009}, pages={1119–1136} } @article{bondell_li_2009, title={Shrinkage inverse regression estimation for model-free variable selection}, volume={71}, ISSN={["1467-9868"]}, DOI={10.1111/j.1467-9868.2008.00686.x}, abstractNote={Summary}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Bondell, Howard D. and Li, Lexin}, year={2009}, pages={287–299} } @misc{li_yin_2008, title={A note on sliced inverse regression with regularizations - Reply}, volume={64}, number={3}, journal={Biometrics}, author={Li, L. X. and Yin, X. R.}, year={2008}, pages={984–986} } @article{cornish_li_kogan_jacquemont_turk_dalton_hagerman_hagerman_2008, title={Age-dependent cognitive changes in carriers of the fragile X syndrome}, volume={44}, ISSN={["0010-9452"]}, DOI={10.1016/j.cortex.2006.11.002}, abstractNote={Fragile X syndrome is a neurodevelopmental disorder that is caused by the silencing of a single gene on the X chromosome, the fragile X mental retardation 1 (FMR1) gene. Affected individuals display a unique neurocognitive phenotype that includes significant impairment in inhibitory control, selective attention, working memory, and visual–spatial cognition. In contrast, little is known about the trajectory and specificity of any cognitive impairment associated with the fragile X premutation (i.e., “carrier status”) or its relationship with the recently identified neurodegenerative disorder, fragile X-associated tremor/ataxia syndrome (FXTAS). In the present study, we evaluated a broad sample of 40 premutation males (PM) aged 18–69 years matched on age and IQ to 67 unaffected comparison males (NC). Performance was compared across a range of cognitive domains known to be impaired in fragile X syndrome (i.e., “full mutation”). Tremor was also assessed using a self-report neurological questionnaire. PM displayed statistically significant deficits in their ability to inhibit prepotent responses, differentiating them from NC from age 30 onwards. With increasing age, the two groups follow different trajectories, with PM developing progressively more severe problems in inhibitory control. This deficit also has a strong co-occurrence in males displaying FXTAS-related symptomatology (p < .001). Selective attention was also impaired in PM but did not show any disproportionate aging effect. No other cognitive deficits were observed. We conclude that an inhibitory deficit and its impact across the lifespan are specifically associated with the fragile X premutation status, and may be a precursor for development of a more severe form of cognitive impairment or dementia, which has been reported in patients with the diagnosis of FXTAS.}, number={6}, journal={CORTEX}, author={Cornish, Kim M. and Li, Lexin and Kogan, Cary S. and Jacquemont, Sebastien and Turk, Jeremy and Dalton, Ann and Hagerman, Randi J. and Hagerman, Paul J.}, year={2008}, month={Jun}, pages={628–636} } @article{li_2008, title={Augmenting the bootstrap to analyze high dimensional genomic data - Discussion}, volume={17}, number={1}, journal={TEST}, author={Li, L. X.}, year={2008}, pages={22–24} } @article{lu_li_2008, title={Boosting method for nonlinear transformation models with censored survival data}, volume={9}, ISSN={["1465-4644"]}, DOI={10.1093/biostatistics/kxn005}, abstractNote={We propose a general class of nonlinear transformation models for analyzing censored survival data, of which the nonlinear proportional hazards and proportional odds models are special cases. A cubic smoothing spline-based component-wise boosting algorithm is derived to estimate covariate effects nonparametrically using the gradient of the marginal likelihood, that is computed using importance sampling. The proposed method can be applied to survival data with high-dimensional covariates, including the case when the sample size is smaller than the number of predictors. Empirical performance of the proposed method is evaluated via simulations and analysis of a microarray survival data.}, number={4}, journal={BIOSTATISTICS}, author={Lu, Wenbin and Li, Lexin}, year={2008}, month={Oct}, pages={658–667} } @article{li_tsai_2008, title={Constrained regression model selection}, volume={138}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2008.02.006}, abstractNote={We propose two improved versions of the Akaike information criterion, AICC and AICC*, for the constrained linear and single-index models, respectively. These enhanced versions have corresponding unconstrained selection criteria as their special cases. Our Monte Carlo simulations demonstrate that AICC and AICC* are superior to the Akaike information criterion. Additionally, we illustrate the use of AICC* in an empirical example and generalize AICC* to the constrained partially linear model.}, number={12}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Li, Lexin and Tsai, Chih-Ling}, year={2008}, month={Dec}, pages={3939–3949} } @article{li_yin_2008, title={Sliced inverse regression with regularizations}, volume={64}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2007.00836.x}, abstractNote={Summary In high‐dimensional data analysis, sliced inverse regression (SIR) has proven to be an effective dimension reduction tool and has enjoyed wide applications. The usual SIR, however, cannot work with problems where the number of predictors, p, exceeds the sample size, n, and can suffer when there is high collinearity among the predictors. In addition, the reduced dimensional space consists of linear combinations of all the original predictors and no variable selection is achieved. In this article, we propose a regularized SIR approach based on the least‐squares formulation of SIR. The L2 regularization is introduced, and an alternating least‐squares algorithm is developed, to enable SIR to work with n < p and highly correlated predictors. The L1 regularization is further introduced to achieve simultaneous reduction estimation and predictor selection. Both simulations and the analysis of a microarray expression data set demonstrate the usefulness of the proposed method.}, number={1}, journal={BIOMETRICS}, author={Li, Lexin and Yin, Xiangrong}, year={2008}, month={Mar}, pages={124–131} } @article{li_lu_2008, title={Sufficient dimension reduction with missing predictors}, volume={103}, ISSN={["0162-1459"]}, DOI={10.1198/016214508000000283}, abstractNote={In high-dimensional data analysis, sufficient dimension reduction (SDR) methods are effective in reducing the predictor dimension, while retaining full regression information and imposing no parametric models. However, it is common in high-dimensional data that a subset of predictors may have missing observations. Existing SDR methods resort to the complete-case analysis by removing all the subjects with missingness in any of the predictors under inquiry. Such an approach does not make effective use of the data and is valid only when missingness is independent of both observed and unobserved quantities. In this article, we propose a new class of SDR estimators under a more general missingness mechanism that allows missingness to depend on the observed data. We focus on a widely used SDR method, sliced inverse regression, and propose an augmented inverse probability weighted sliced inverse regression estimator (AIPW–SIR). We show that AIPW–SIR is doubly robust and asymptotically consistent and demonstrate that AIPW–SIR is more effective than the complete-case analysis through both simulations and real data analysis. We also outline the extension of the AIPW strategy to other SDR methods, including sliced average variance estimation and principal Hessian directions.}, number={482}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Li, Lexin and Lu, Wenbin}, year={2008}, month={Jun}, pages={822–831} } @article{tassone_adams_berry-kravis_cohen_brusco_leehey_li_hagerman_hagerman_2007, title={CGG repeat length correlates with age of onset of motor signs of the fragile X-associated tremor/ataxia syndrome (FXTAS)}, volume={144B}, ISSN={["1552-485X"]}, DOI={10.1002/ajmg.b.30482}, abstractNote={Abstract}, number={4}, journal={AMERICAN JOURNAL OF MEDICAL GENETICS PART B-NEUROPSYCHIATRIC GENETICS}, author={Tassone, Flora and Adams, John and Berry-Kravis, Elizabeth M. and Cohen, Susannah S. and Brusco, Alfredo and Leehey, Maureen A. and Li, Lexin and Hagerman, Randi J. and Hagerman, Paul J.}, year={2007}, month={Jun}, pages={566–569} } @article{li_nachtsheim_2007, title={Comment: Fisher lecture: Dimension reduction in regression}, volume={22}, ISSN={["2168-8745"]}, DOI={10.1214/088342307000000050}, abstractNote={Professor Cook is to be congratulated for his ground breaking work in dimension reduction in regression. The paper develops a general theoretical foundation for studying principal components and other dimension re duction methods in a regression context. This frame work yields a basis for elucidating the strengths, weak nesses and relationships among the various dimension reduction methods, including ordinary least squares (OLS), principal components regression (PCR), sliced inverse regression (SIR), parametric inverse regression and partial least squares. The promising new method, principal fitted components (PFC), appears to outper form some long-standing approaches such as PCR, OLS and SIR. Finally, as a result of this contribution, the standard approach to regression, with its emphasis on fixed predictors and the need to assume away the randomness of X, and the standard approach to princi pal components, with its focus on the correlation ma trix rather than the covariance matrix, both seem to be under question. Specific contributions of Professor Cook's paper include the following: (1) It provides a theoretical foundation for the widely used principal components regression. (2) It resorts to a model and thus a like lihood function, through the inverse regression of pre dictors given response, to study sufficient reduction in a forward regression problem. Consequently, likelihood based inferences can be developed, and the inferen tial capabilities of dimension reduction are moved closer to mainstream regression methodology. (3) It permits extension to categorical or mixtures of contin uous and categorical predictors, an area that most ex isting model-free dimension reduction approaches do}, number={1}, journal={STATISTICAL SCIENCE}, author={Li, Lexin and Nachtsheim, Christopher J.}, year={2007}, month={Feb}, pages={36–39} } @article{berry-kravis_goetz_leehey_hagerman_zhang_li_nguyen_hall_tartaglia_cogswell_et al._2007, title={Neuropathic features in fragile X premutation carriers}, volume={143A}, ISSN={["1552-4833"]}, DOI={10.1002/ajmg.a.31559}, abstractNote={Abstract}, number={1}, journal={AMERICAN JOURNAL OF MEDICAL GENETICS PART A}, author={Berry-Kravis, Elizabeth and Goetz, Christopher G. and Leehey, Maureen A. and Hagerman, Randi J. and Zhang, Lin and Li, Lexin and Nguyen, Danh and Hall, Deborah A. and Tartaglia, Nicole and Cogswell, Jennifer and et al.}, year={2007}, month={Jan}, pages={19–26} } @article{li_cook_tsai_2007, title={Partial inverse regression}, volume={94}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asm043}, abstractNote={In regression with a vector of quantitative predictors, sufficient dimension reduction methods can effectively reduce the predictor dimension, while preserving full regression information and assuming no parametric model. However, all current reduction methods require the sample size n to be greater than the number of predictors p. It is well known that partial least squares can deal with problems with n < p. We first establish a link between partial least squares and sufficient dimension reduction. Motivated by this link, we then propose a new dimension reduction method, entitled partial inverse regression. We show that its sample estimator is consistent, and that its performance is similar to or superior to partial least squares when n < p, especially when the regression model is nonlinear or heteroscedastic. An example involving the spectroscopy analysis of biscuit dough is also given. Copyright 2007, Oxford University Press.}, number={3}, journal={BIOMETRIKA}, author={Li, Lexin and Cook, Dennis and Tsai, Chih-Ling}, year={2007}, month={Aug}, pages={615–625} } @article{li_2007, title={Sparse sufficient dimension reduction}, volume={94}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asm044}, abstractNote={Existing sufficient dimension reduction methods suffer from the fact that each dimension reduction component is a linear combination of all the original predictors, so that it is difficult to interpret the resulting estimates. We propose a unified estimation strategy, which combines a regression-type formulation of sufficient dimension reduction methods and shrinkage estimation, to produce sparse and accurate solutions. The method can be applied to most existing sufficient dimension reduction methods such as sliced inverse regression, sliced average variance estimation and principal Hessian directions. We demonstrate the effectiveness of the proposed method by both simulations and real data analysis. Copyright 2007, Oxford University Press.}, number={3}, journal={BIOMETRIKA}, author={Li, Lexin}, year={2007}, month={Aug}, pages={603–613} } @article{li_simonoff_tsai_2007, title={Tobit model estimation and sliced inverse regression}, volume={7}, ISSN={["1471-082X"]}, DOI={10.1177/1471082X0700700201}, abstractNote={ It is not unusual for the response variable in a regression model to be subject to censoring or truncation. Tobit regression models are specific examples of such a situation, where for some observations the observed response is not the actual response, but the censoring value (often zero), and an indicator that censoring (from below) has occurred. It is well-known that the maximum likelihood estimator for such a linear model assuming Gaussian errors is not consistent if the error term is not homoscedastic and normally distributed. In this paper, we consider estimation in the Tobit regression context when homoscedasticity and normality of errors do not hold, as well as when the true response is an unspecified nonlinear function of linear terms, using sliced inverse regression (SIR). The properties of SIR estimation for Tobit models are explored both theoretically and based on extensive Monte Carlo simulations.We show that the SIR estimator is a strong competitor to other Tobit regression estimators, in that it has good properties when the usual linear model assumptions hold, and can be much more effective than other Tobit model estimators when those assumptions break down. An example related to household charitable donations demonstrates the usefulness of the SIR estimator. }, number={2}, journal={STATISTICAL MODELLING}, author={Li, Lexin and Simonoff, Jeffrey S. and Tsai, Chih-Ling}, year={2007}, month={Jul}, pages={107–123} } @article{li_nachtsheim_2006, title={Sparse sliced inverse regression}, volume={48}, ISSN={["1537-2723"]}, DOI={10.1198/004017006000000129}, abstractNote={Sliced inverse regression (SIR) is an innovative and effective method for dimension reduction and data visualization of high-dimensional problems. It replaces the original variables with low-dimensional linear combinations of predictors without any loss of regression information and without the need to prespecify a model or an error distribution. However, it suffers from the fact that each SIR component is a linear combination of all the original predictors; thus, it is often difficult to interpret the extracted components. By representing SIR as a regression-type optimization problem, we propose in this article a new method, called sparse SIR, that combines the shrinkage idea of the lasso with SIR to produce both accurate and sparse solutions. The efficacy of the proposed method is verified by simulation, and a real data example is given.}, number={4}, journal={TECHNOMETRICS}, author={Li, Lexin and Nachtsheim, Christopher J.}, year={2006}, month={Nov}, pages={503–510} } @article{li_2006, title={Survival prediction of diffuse large-B-cell lymphoma based on both clinical and gene expression information}, volume={22}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bti824}, abstractNote={Abstract}, number={4}, journal={BIOINFORMATICS}, author={Li, LX}, year={2006}, month={Feb}, pages={466–471} }