@article{shen_wu_2022, title={Automatic structure recovery for generalized additive models}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11739}, abstractNote={Abstract}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Shen, Kai and Wu, Yichao}, year={2022}, month={Oct} } @article{zheng_wu_2020, title={Nonparametric Estimation of Multivariate Mixtures}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1635481}, abstractNote={A multivariate mixture model is determined by three elements: the number of components, the mixing proportions, and the component distributions. Assuming that the number of components is given and ...}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zheng, Chaowen and Wu, Yichao}, year={2020}, month={Jul}, pages={1456–1471} } @article{zheng_wu_2020, title={Tuning parameter selection for penalised empirical likelihood with a diverging number of parameters}, volume={32}, ISSN={["1029-0311"]}, DOI={10.1080/10485252.2020.1717491}, abstractNote={ABSTRACT Penalised likelihood methods have been a success in analysing high dimensional data. Tang and Leng [(2010), ‘Penalized High-Dimensional Empirical Likelihood’, Biometrika, 97(4), 905–920] extended the penalisation approach to the empirical likelihood scenario and showed that the penalised empirical likelihood estimator could identify the true predictors consistently in the linear regression models. However, this desired selection consistency property of the penalised empirical likelihood method relies heavily on the choice of the tuning parameter. In this work, we propose a tuning parameter selection procedure for penalised empirical likelihood to guarantee that this selection consistency can be achieved. Specifically, we propose a generalised information criterion (GIC) for the penalised empirical likelihood in the linear regression case. We show that the tuning parameter selected by the GIC yields the true model consistently even when the number of predictors diverges to infinity with the sample size. We demonstrate the performance of our procedure by numerical simulations and a real data analysis.}, number={1}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Zheng, Chaowen and Wu, Yichao}, year={2020}, month={Jan}, pages={246–261} } @article{kong_bondell_wu_2018, title={FULLY EFFICIENT ROBUST ESTIMATION, OUTLIER DETECTION AND VARIABLE SELECTION VIA PENALIZED REGRESSION}, volume={28}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202016.0441}, abstractNote={: This paper studies the outlier detection and variable selection problem in linear regression. A mean shift parameter is added to the linear model to reflect the effect of outliers, where an outlier has a nonzero shift parameter. We then apply an adaptive regularization to these shift parameters to shrink most of them to zero. Those observations with nonzero mean shift parameter estimates are regarded as outliers. An L1 penalty is added to the regression parameters to select important predictors. We propose an efficient algorithm to solve this jointly penalized optimization problem and use the extended Bayesian information criteria tuning method to select the regularization parameters, since the number of parameters exceeds the sample size. Theoretical results are provided in terms of high breakdown point, full efficiency, as well as outlier detection consistency. We illustrate our method with simulations and data. Our method is extended to high-dimensional problems with dimension much larger than the sample size.}, number={2}, journal={STATISTICA SINICA}, author={Kong, Dehan and Bondell, Howard D. and Wu, Yichao}, year={2018}, month={Apr}, pages={1031–1052} } @article{wang_shin_wu_2018, title={Principal quantile regression for sufficient dimension reduction with heteroscedasticity}, volume={12}, ISSN={["1935-7524"]}, DOI={10.1214/18-EJS1432}, abstractNote={: Sufficient dimension reduction (SDR) is a successful tool for re- ducing data dimensionality without stringent model assumptions. In practice, data often display heteroscedasticity which is of scientific importance in general but frequently overlooked since a primal goal of most existing statistical methods is to identify conditional mean relationship among variables. In this article, we propose a new SDR method called principal quantile regression (PQR) that efficiently tackles heteroscedasticity. PQR can naturally be extended to a nonlinear version via kernel trick. Asymptotic properties are established and an efficient solution path-based algo- rithm is provided. Numerical examples based on both simulated and real data demonstrate the PQR’s advantageous performance over existing SDR methods. PQR still performs very competitively even for the case without heteroscedasticity.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Wang, Chong and Shin, Seung Jun and Wu, Yichao}, year={2018}, pages={2114–2140} } @article{li_nandgaonkar_wang_zhang_krause_wei_lucia_2017, title={Laccase-immobilized bacterial cellulose/TiO2 functionalized composite membranes: Evaluation for photo- and bio-catalytic dye degradation}, volume={525}, ISSN={0376-7388}, url={http://dx.doi.org/10.1016/J.MEMSCI.2016.10.033}, DOI={10.1016/j.memsci.2016.10.033}, abstractNote={Bacterial cellulose (BC) was prepared by the fermentation of Komagataeibacter xylinus. Subsequently, through site-directed surface oxidation chemistry, the hydroxyl groups of BC were successfully oxidized into aldehyde groups that served as anchors for covalent immobilization of laccase (Lac) to the newly developed oxidized BC (OBC) membrane. TiO2 was additionally co-immobilized to OBC to produce a novel material in which dye degradation was carried out under specific conditions. Atomic Force Microscopy (AFM) and Scanning Electron Microscope (SEM) confirmed the installation of both TiO2 and laccase on the surface of OBC nanofiber membrane. The optimum pH, temperature, thermal stability, operational stability of the OBC/Lac and OBC/TiO2-Lac membrane were also studied in detail. In addition, the effect of the temperature and pH on dye degradation was also investigated. The results showed that the oxidation process successfully introduced aldehyde groups onto the BC (FT-IR), and also improved the stability of the immobilized laccase. Compared with free laccase, the optimum pH of immobilized laccase shifted to lower pH, while the optimum temperature decreased from 55 °C to 50 °C. The dye degradation experiments showed that the optimum pH for dye degradation was 5.0–6.0, while the optimum temperature was ~40 °C. Under UV illumination, the dye degradation efficiency was significantly improved. Therefore, functionalized composite bacterial cellulose nanofiber membranes with a combined bio- and photo- catalytic property are a potentially valid approach for industrial textile dye degradation.}, journal={Journal of Membrane Science}, publisher={Elsevier BV}, author={Li, Guohui and Nandgaonkar, Avinav G. and Wang, Qingqing and Zhang, Jinning and Krause, Wendy E. and Wei, Qufu and Lucia, Lucian A.}, year={2017}, month={Mar}, pages={89–98} } @article{shin_wu_zhang_liu_2017, title={Principal weighted support vector machines for sufficient dimension reduction in binary classification}, volume={104}, number={1}, journal={Biometrika}, author={Shin, S. J. and Wu, Y. C. and Zhang, H. H. and Liu, Y. F.}, year={2017}, pages={67–81} } @article{hu_yao_wu_2017, title={The robust EM-type algorithms for log-concave mixtures of regression models}, volume={111}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2017.01.004}, abstractNote={Finite mixture of regression (FMR) models can be reformulated as incomplete data problems and they can be estimated via the expectation–maximization (EM) algorithm. The main drawback is the strong parametric assumption such as FMR models with normal distributed residuals. The estimation might be biased if the model is misspecified. To relax the parametric assumption about the component error densities, a new method is proposed to estimate the mixture regression parameters by only assuming that the components have log-concave error densities but the specific parametric family is unknown. Two EM-type algorithms for the mixtures of regression models with log-concave error densities are proposed. Numerical studies are made to compare the performance of our algorithms with the normal mixture EM algorithms. When the component error densities are not normal, the new methods have much smaller MSEs when compared with the standard normal mixture EM algorithms. When the underlying component error densities are normal, the new methods have comparable performance to the normal EM algorithm.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Hu, Hao and Yao, Weixin and Wu, Yichao}, year={2017}, month={Jul}, pages={14–26} } @article{zhang_wu_wang_li_2016, title={A consistent information criterion for support vector machines in diverging model spaces}, volume={17}, journal={Journal of Machine Learning Research}, author={Zhang, X. and Wu, Y. C. and Wang, L. and Li, R. Z.}, year={2016} } @article{peng_wang_wu_2016, title={An Error bound for l-1-norm support vector machine coefficients in ultra-high dimension}, volume={17}, journal={Journal of Machine Learning Research}, author={Peng, B. and Wang, L. and Wu, Y. C.}, year={2016} } @article{chang_tang_wu_2016, title={LOCAL INDEPENDENCE FEATURE SCREENING FOR NONPARAMETRIC AND SEMIPARAMETRIC MODELS BY MARGINAL EMPIRICAL LIKELIHOOD}, volume={44}, ISSN={["0090-5364"]}, DOI={10.1214/15-aos1374}, abstractNote={We consider an independence feature screening technique for identifying explanatory variables that locally contribute to the response variable in high-dimensional regression analysis. Without requiring a specific parametric form of the underlying data model, our approach accommodates a wide spectrum of nonparametric and semiparametric model families. To detect the local contributions of explanatory variables, our approach constructs empirical likelihood locally in conjunction with marginal nonparametric regressions. Since our approach actually requires no estimation, it is advantageous in scenarios such as the single-index models where even specification and identification of a marginal model is an issue. By automatically incorporating the level of variation of the nonparametric regression and directly assessing the strength of data evidence supporting local contribution from each explanatory variable, our approach provides a unique perspective for solving feature screening problems. Theoretical analysis shows that our approach can handle data dimensionality growing exponentially with the sample size. With extensive theoretical illustrations and numerical examples, we show that the local independence screening approach performs promisingly.}, number={2}, journal={ANNALS OF STATISTICS}, author={Chang, Jinyuan and Tang, Cheng Yong and Wu, Yichao}, year={2016}, month={Apr}, pages={515–539} } @article{hu_wu_yao_2016, title={Maximum likelihood estimation of the mixture of log-concave densities}, volume={101}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2016.03.002}, abstractNote={Finite mixture models are useful tools and can be estimated via the EM algorithm. A main drawback is the strong parametric assumption about the component densities. In this paper, a much more flexible mixture model is considered, which assumes each component density to be log-concave. Under fairly general conditions, the log-concave maximum likelihood estimator (LCMLE) exists and is consistent. Numeric examples are also made to demonstrate that the LCMLE improves the clustering results while comparing with the traditional MLE for parametric mixture models.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Hu, Hao and Wu, Yichao and Yao, Weixin}, year={2016}, month={Sep}, pages={137–147} } @article{zhang_liu_wu_2016, title={On quantile regression in reproducing kernel hilbert spaces with the data sparsity constraint}, volume={17}, journal={Journal of Machine Learning Research}, author={Zhang, C. and Liu, Y. F. and Wu, Y. C.}, year={2016} } @article{yao_wu_zou_2016, title={Probability-enhanced effective dimension reduction for classifying sparse functional data}, volume={25}, number={1}, journal={TEST}, author={Yao, F. and Wu, Y. C. and Zou, J. L.}, year={2016}, pages={1–22} } @article{yao_wu_zou_2016, title={Rejoinder on: Probability enhanced effective dimension reduction for classifying sparse functional data}, volume={25}, ISSN={["1863-8260"]}, DOI={10.1007/s11749-015-0478-7}, number={1}, journal={TEST}, author={Yao, Fang and Wu, Yichao and Zou, Jialin}, year={2016}, month={Mar}, pages={52–58} } @article{zhang_wu_wang_li_2016, title={Variable selection for support vector machines in moderately high dimensions}, volume={78}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12100}, abstractNote={Summary}, number={1}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Zhang, Xiang and Wu, Yichao and Wang, Lan and Li, Runze}, year={2016}, month={Jan}, pages={53–76} } @article{wu_stefanski_2015, title={Automatic structure recovery for additive models}, volume={102}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asu070}, abstractNote={We propose an automatic structure recovery method for additive models, based on a backfitting algorithm coupled with local polynomial smoothing, in conjunction with a new kernel-based variable selection strategy. Our method produces estimates of the set of noise predictors, the sets of predictors that contribute polynomially at different degrees up to a specified degree M, and the set of predictors that contribute beyond polynomially of degree M. We prove consistency of the proposed method, and describe an extension to partially linear models. Finite-sample performance of the method is illustrated via Monte Carlo studies and a real-data example.}, number={2}, journal={BIOMETRIKA}, author={Wu, Yichao and Stefanski, Leonard A.}, year={2015}, month={Jun}, pages={381–395} } @article{xiao_wu_zhou_2015, title={ConvexLAR: An Extension of Least Angle Regression}, volume={24}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2014.962700}, abstractNote={The least angle regression (LAR) was proposed by Efron, Hastie, Johnstone and Tibshirani in the year 2004 for continuous model selection in linear regression. It is motivated by a geometric argument and tracks a path along which the predictors enter successively and the active predictors always maintain the same absolute correlation (angle) with the residual vector. Although it gains popularity quickly, its extensions seem rare compared to the penalty methods. In this expository article, we show that the powerful geometric idea of LAR can be generalized in a fruitful way. We propose a ConvexLAR algorithm that works for any convex loss function and naturally extends to group selection and data adaptive variable selection. After simple modification, it also yields new exact path algorithms for certain penalty methods such as a convex loss function with lasso or group lasso penalty. Variable selection in recurrent event and panel count data analysis, Ada-Boost, and Gaussian graphical model is reconsidered from the ConvexLAR angle. Supplementary materials for this article are available online.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Xiao, Wei and Wu, Yichao and Zhou, Hua}, year={2015}, month={Jul}, pages={603–626} } @article{kong_bondell_wu_2015, title={Domain selection for the varying coefficient model via local polynomial regression}, volume={83}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2014.10.004}, abstractNote={In this article, we consider the varying coefficient model, which allows the relationship between the predictors and response to vary across the domain of interest, such as time. In applications, it is possible that certain predictors only affect the response in particular regions and not everywhere. This corresponds to identifying the domain where the varying coefficient is nonzero. Towards this goal, local polynomial smoothing and penalized regression are incorporated into one framework. Asymptotic properties of our penalized estimators are provided. Specifically, the estimators enjoy the oracle properties in the sense that they have the same bias and asymptotic variance as the local polynomial estimators as if the sparsity is known as a priori. The choice of appropriate bandwidth and computational algorithms are discussed. The proposed method is examined via simulations and a real data example.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Kong, Dehan and Bondell, Howard D. and Wu, Yichao}, year={2015}, month={Mar}, pages={236–250} } @article{yao_lei_wu_2015, title={Effective dimension reduction for sparse functional data}, volume={102}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asv006}, abstractNote={We propose a method of effective dimension reduction for functional data, emphasizing the sparse design where one observes only a few noisy and irregular measurements for some or all of the subjects. The proposed method borrows strength across the entire sample and provides a way to characterize the effective dimension reduction space, via functional cumulative slicing. Our theoretical study reveals a bias-variance trade-off associated with the regularizing truncation and decaying structures of the predictor process and the effective dimension reduction space. A simulation study and an application illustrate the superior finite-sample performance of the method.}, number={2}, journal={BIOMETRIKA}, author={Yao, F. and Lei, E. and Wu, Y.}, year={2015}, month={Jun}, pages={421–437} } @article{ke_fan_wu_2015, title={Homogeneity Pursuit}, volume={110}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2014.892882}, abstractNote={This paper explores the homogeneity of coefficients in high-dimensional regression, which extends the sparsity concept and is more general and suitable for many applications. Homogeneity arises when regression coefficients corresponding to neighboring geographical regions or a similar cluster of covariates are expected to be approximately the same. Sparsity corresponds to a special case of homogeneity with a large cluster of known atom zero. In this article, we propose a new method called clustering algorithm in regression via data-driven segmentation (CARDS) to explore homogeneity. New mathematics are provided on the gain that can be achieved by exploring homogeneity. Statistical properties of two versions of CARDS are analyzed. In particular, the asymptotic normality of our proposed CARDS estimator is established, which reveals better estimation accuracy for homogeneous parameters than that without homogeneity exploration. When our methods are combined with sparsity exploration, further efficiency can be achieved beyond the exploration of sparsity alone. This provides additional insights into the power of exploring low-dimensional structures in high-dimensional regression: homogeneity and sparsity. Our results also shed lights on the properties of the fussed Lasso. The newly developed method is further illustrated by simulation studies and applications to real data. Supplementary materials for this article are available online.}, number={509}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Ke, Zheng Tracy and Fan, Jianqing and Wu, Yichao}, year={2015}, month={Mar}, pages={175–194} } @article{sun_liu_crowley_chen_zhou_chu_huang_kuan_li_miller_et al._2015, title={IsoDOT Detects Differential RNA-Isoform Expression/Usage With Respect to a Categorical or Continuous Covariate With High Sensitivity and Specificity}, volume={110}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2015.1040880}, abstractNote={We have developed a statistical method named IsoDOT to assess differential isoform expression (DIE) and differential isoform usage (DIU) using RNA-seq data. Here isoform usage refers to relative isoform expression given the total expression of the corresponding gene. IsoDOT performs two tasks that cannot be accomplished by existing methods: to test DIE/DIU with respect to a continuous covariate, and to test DIE/DIU for one case versus one control. The latter task is not an uncommon situation in practice, for example, comparing the paternal and maternal alleles of one individual or comparing tumor and normal samples of one cancer patient. Simulation studies demonstrate the high sensitivity and specificity of IsoDOT. We apply IsoDOT to study the effects of haloperidol treatment on the mouse transcriptome and identify a group of genes whose isoform usages respond to haloperidol treatment. Supplementary materials for this article are available online.}, number={511}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Sun, Wei and Liu, Yufeng and Crowley, James J. and Chen, Ting-Huei and Zhou, Hua and Chu, Haitao and Huang, Shunping and Kuan, Pei-Fen and Li, Yuan and Miller, Darla and et al.}, year={2015}, month={Sep}, pages={975–986} } @article{davenport_maity_wu_2015, title={Parametrically guided estimation in nonparametric varying coefficient models with quasi-likelihood}, volume={27}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84929278394&partnerID=MN8TOARS}, DOI={10.1080/10485252.2015.1026903}, abstractNote={Varying coefficient models (VCMs) allow us to generalise standard linear regression models to incorporate complex covariate effects by modelling the regression coefficients as functions of another covariate. For nonparametric varying coefficients, we can borrow the idea of parametrically guided estimation to improve asymptotic bias. In this paper, we develop a guided estimation procedure for the nonparametric VCMs. Asymptotic properties are established for the guided estimators and a method of bandwidth selection via bias-variance tradeoff is proposed. We compare the performance of the guided estimator with that of the unguided estimator via both simulation and real data examples.}, number={2}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Davenport, Clemontina A. and Maity, Arnab and Wu, Yichao}, year={2015}, month={Apr}, pages={195–213} } @article{zhou_wu_2014, title={A Generic Path Algorithm for Regularized Statistical Estimation}, volume={109}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.864166}, abstractNote={Regularization is widely used in statistics and machine learning to prevent overfitting and gear solution toward prior information. In general, a regularized estimation problem minimizes the sum of a loss function and a penalty term. The penalty term is usually weighted by a tuning parameter and encourages certain constraints on the parameters to be estimated. Particular choices of constraints lead to the popular lasso, fused-lasso, and other generalized ℓ1 penalized regression methods. In this article we follow a recent idea by Wu and propose an exact path solver based on ordinary differential equations (EPSODE) that works for any convex loss function and can deal with generalized ℓ1 penalties as well as more complicated regularization such as inequality constraints encountered in shape-restricted regressions and nonparametric density estimation. Nonasymptotic error bounds for the equality regularized estimates are derived. In practice, the EPSODE can be coupled with AIC, BIC, Cp or cross-validation to select an optimal tuning parameter, or provide a convenient model space for performing model averaging or aggregation. Our applications to generalized ℓ1 regularized generalized linear models, shape-restricted regressions, Gaussian graphical models, and nonparametric density estimation showcase the potential of the EPSODE algorithm. Supplementary materials for this article are available online.}, number={506}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhou, Hua and Wu, Yichao}, year={2014}, month={Jun}, pages={686–699} } @article{avery_wu_helen zhang_zhang_2014, title={RKHS-based functional nonparametric regression for sparse and irregular longitudinal data}, volume={42}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11215}, abstractNote={Abstract}, number={2}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Avery, Matthew and Wu, Yichao and Helen Zhang, Hao and Zhang, Jiajia}, year={2014}, month={Jun}, pages={204–216} } @article{shin_wu_zhang_2014, title={Two-dimensional solution surface for weighted support vector machines}, volume={23}, DOI={10.1080/10618600.2012.761139}, abstractNote={The support vector machine (SVM) is a popular learning method for binary classification. Standard SVMs treat all the data points equally, but in some practical problems it is more natural to assign different weights to observations from different classes. This leads to a broader class of learning, the so-called weighted SVMs (WSVMs), and one of their important applications is to estimate class probabilities besides learning the classification boundary. There are two parameters associated with the WSVM optimization problem: one is the regularization parameter and the other is the weight parameter. In this article, we first establish that the WSVM solutions are jointly piecewise-linear with respect to both the regularization and weight parameter. We then develop a state-of-the-art algorithm that can compute the entire trajectory of the WSVM solutions for every pair of the regularization parameter and the weight parameter at a feasible computational cost. The derived two-dimensional solution surface provides theoretical insight on the behavior of the WSVM solutions. Numerically, the algorithm can greatly facilitate the implementation of the WSVM and automate the selection process of the optimal regularization parameter. We illustrate the new algorithm on various examples. This article has online supplementary materials.}, number={2}, journal={Journal of Computational and Graphical Statistics}, author={Shin, S. J. and Wu, Y. C. and Zhang, H. H.}, year={2014}, pages={383–402} } @article{wu_xue_wu_wu_2014, title={Variable selection for sparse high-dimensional nonlinear regression models by combining nonnegative garrote and sure independence screening}, volume={24}, number={3}, journal={Statistica Sinica}, author={Wu, S. and Xue, H. Q. and Wu, Y. C. and Wu, H. L.}, year={2014}, pages={1365–1387} } @article{shin_wu_2014, title={Variable selection in large margin classifier-based probability estimation with high-dimensional predictors}, volume={56}, ISSN={["1521-4036"]}, DOI={10.1002/bimj.201300251}, abstractNote={This is a discussion of the papers: “Probability estimation with machine learning methods for dichotomous and multicategory outcome: Theory” by Jochen Kruppa, Yufeng Liu, Gérard Biau, Michael Kohler, Inke R. König, James D. Malley, and Andreas Ziegler; and “Probability estimation with machine learning methods for dichotomous and multicategory outcome: Applications” by Jochen Kruppa, Yufeng Liu, Hans‐Christian Diener, Theresa Holste, Christian Weimar, Inke R. König, and Andreas Ziegler.}, number={4}, journal={BIOMETRICAL JOURNAL}, author={Shin, Seung Jun and Wu, Yichao}, year={2014}, month={Jul}, pages={594–596} } @article{wu_liu_2013, title={Adaptively Weighted Large Margin Classifiers}, volume={22}, ISSN={["1061-8600"]}, DOI={10.1080/10618600.2012.680866}, abstractNote={Large margin classifiers have been shown to be very useful in many applications. The support vector machine is a canonical example of large margin classifiers. Despite their flexibility and ability in handling high-dimensional data, many large margin classifiers have serious drawbacks when the data are noisy, especially when there are outliers in the data. In this article, we propose a new weighted large margin classification technique. The weights are chosen adaptively with data. The proposed classifiers are shown to be robust to outliers and thus are able to produce more accurate classification results.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Wu, Yichao and Liu, Yufeng}, year={2013}, month={Jun}, pages={416–432} } @article{mueller_wu_yao_2013, title={Continuously additive models for nonlinear functional regression}, volume={100}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/ast004}, abstractNote={We introduce continuously additive models, which can be viewed as extensions of additive regression models with vector predictors to the case of infinite-dimensional predictors. This approach produces a class of flexible functional nonlinear regression models, where random predictor curves are coupled with scalar responses. In continuously additive modelling, integrals taken over a smooth surface along graphs of predictor functions relate the predictors to the responses in a nonlinear fashion. We use tensor product basis expansions to fit the smooth regression surface that characterizes the model. In a theoretical investigation, we show that the predictions obtained from fitting continuously additive models are consistent and asymptotically normal. We also consider extensions to generalized responses. The proposed class of models outperforms existing functional regression models in simulations and real-data examples. Copyright 2013, Oxford University Press.}, number={3}, journal={BIOMETRIKA}, author={Mueller, Hans-Georg and Wu, Yichao and Yao, Fang}, year={2013}, month={Sep}, pages={607–622} } @article{zeng_wu_2013, title={Coordinate great circle descent algorithm with application to single-index models}, volume={6}, DOI={10.4310/sii.2013.v6.n4.a9}, abstractNote={Coordinate descent algorithm has been widely used to solve high dimensional optimization problems with a non-differentiable objective function recently. To provide theoretical justification, Tseng (2001) showed that it leads to a stationary point when the non-differentiable part of the objective function is separable. Motivated by the single index model, we consider optimization problems with a unit-norm constraint in this article. Because of this unit-norm constraint, the coordinate descent algorithm cannot be applied. In addition, non-separability of the non-differentiable part of the objective function makes the result of Tseng (2001) not directly applicable. In this paper, we propose a novel coordinate great circle descent algorithm to solve this family of optimization problems. The validity of the algorithm is justified both theoretically and via simulation studies. We also use the Boston housing data to illustrate this algorithm by applying it to fit single-index models.}, number={4}, journal={Statistics and its Interface}, author={Zeng, P. and Wu, Y. C.}, year={2013}, pages={511–518} } @article{wu_liu_2013, title={Functional Robust Support Vector Machines for Sparse and Irregular Longitudinal Data}, volume={22}, ISSN={["1061-8600"]}, DOI={10.1080/10618600.2012.680823}, abstractNote={Functional and longitudinal data are becoming more and more common in practice. This article focuses on sparse and irregular longitudinal data with a multicategory response. The predictor consists of sparse and irregular observations, potentially contaminated with measurement errors, on the predictor trajectory. To deal with this type of complicated predictor, we borrow the strength of large-margin classifiers in statistical learning for classification of sparse and irregular longitudinal data. In particular, we propose functional robust truncated-hinge-loss support vector machines to perform multicategory classification with the aid of functional principal component analysis. This article has online supplementary material.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Wu, Yichao and Liu, Yufeng}, year={2013}, month={Jun}, pages={379–395} } @article{chang_tang_wu_2013, title={MARGINAL EMPIRICAL LIKELIHOOD AND SURE INDEPENDENCE FEATURE SCREENING}, volume={41}, ISSN={["0090-5364"]}, DOI={10.1214/13-aos1139}, abstractNote={We study a marginal empirical likelihood approach in scenarios when the number of variables grows exponentially with the sample size. The marginal empirical likelihood ratios as functions of the parameters of interest are systematically examined, and we find that the marginal empirical likelihood ratio evaluated at zero can be used to differentiate whether an explanatory variable is contributing to a response variable or not. Based on this finding, we propose a unified feature screening procedure for linear models and the generalized linear models. Different from most existing feature screening approaches that rely on the magnitudes of some marginal estimators to identify true signals, the proposed screening approach is capable of further incorporating the level of uncertainties of such estimators. Such a merit inherits the self-studentization property of the empirical likelihood approach, and extends the insights of existing feature screening methods. Moreover, we show that our screening approach is less restrictive to distributional assumptions, and can be conveniently adapted to be applied in a broad range of scenarios such as models specified using general moment conditions. Our theoretical results and extensive numerical examples by simulations and data analysis demonstrate the merits of the marginal empirical likelihood approach.}, number={4}, journal={ANNALS OF STATISTICS}, author={Chang, Jinyuan and Tang, Cheng Yong and Wu, Yichao}, year={2013}, month={Aug}, pages={2123–2148} } @article{fan_maity_wang_wu_2013, title={Parametrically guided generalised additive models with application to mergers and acquisitions data}, volume={25}, ISSN={["1048-5252"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875865626&partnerID=MN8TOARS}, DOI={10.1080/10485252.2012.735233}, abstractNote={Generalised nonparametric additive models present a flexible way to evaluate the effects of several covariates on a general outcome of interest via a link function. In this modelling framework, one assumes that the effect of each of the covariates is nonparametric and additive. However, in practice, often there is prior information available about the shape of the regression functions, possibly from pilot studies or exploratory analysis. In this paper, we consider such situations and propose an estimation procedure where the prior information is used as a parametric guide to fit the additive model. Specifically, we first posit a parametric family for each of the regression functions using the prior information (parametric guides). After removing these parametric trends, we then estimate the remainder of the nonparametric functions using a nonparametric generalised additive model and form the final estimates by adding back the parametric trend. We investigate the asymptotic properties of the estimates and show that when a good guide is chosen, the asymptotic variance of the estimates can be reduced significantly while keeping the asymptotic variance same as the unguided estimator. We observe the performance of our method via a simulation study and demonstrate our method by applying to a real data set on mergers and acquisitions.}, number={1}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, publisher={Informa UK Limited}, author={Fan, Jianqing and Maity, Arnab and Wang, Yihui and Wu, Yichao}, year={2013}, month={Mar}, pages={109–128} } @article{wu_2012, title={Elastic net for Cox's proportional hazards model with a solution path algorithm}, volume={22}, number={1}, journal={Statistica Sinica}, author={Wu, Y. C.}, year={2012}, pages={271–294} } @article{wang_wu_li_2012, title={Quantile Regression for Analyzing Heterogeneity in Ultra-High Dimension}, volume={107}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2012.656014}, abstractNote={Ultra-high dimensional data often display heterogeneity due to either heteroscedastic variance or other forms of non-location-scale covariate effects. To accommodate heterogeneity, we advocate a more general interpretation of sparsity, which assumes that only a small number of covariates influence the conditional distribution of the response variable, given all candidate covariates; however, the sets of relevant covariates may differ when we consider different segments of the conditional distribution. In this framework, we investigate the methodology and theory of nonconvex, penalized quantile regression in ultra-high dimension. The proposed approach has two distinctive features: (1) It enables us to explore the entire conditional distribution of the response variable, given the ultra-high-dimensional covariates, and provides a more realistic picture of the sparsity pattern; (2) it requires substantially weaker conditions compared with alternative methods in the literature; thus, it greatly alleviates the difficulty of model checking in the ultra-high dimension. In theoretic development, it is challenging to deal with both the nonsmooth loss function and the nonconvex penalty function in ultra-high-dimensional parameter space. We introduce a novel, sufficient optimality condition that relies on a convex differencing representation of the penalized loss function and the subdifferential calculus. Exploring this optimality condition enables us to establish the oracle property for sparse quantile regression in the ultra-high dimension under relaxed conditions. The proposed method greatly enhances existing tools for ultra-high-dimensional data analysis. Monte Carlo simulations demonstrate the usefulness of the proposed procedure. The real data example we analyzed demonstrates that the new approach reveals substantially more information as compared with alternative methods. This article has online supplementary material.}, number={497}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Wang, Lan and Wu, Yichao and Li, Runze}, year={2012}, month={Mar}, pages={214–222} } @article{wu_2011, title={An ordinary differential equation-based solution path algorithm}, volume={23}, ISSN={["1029-0311"]}, DOI={10.1080/10485252.2010.490584}, abstractNote={Efron, Hastie, Johnstone, and Tibshirani [(2004), ‘Least Angle Regression (with discussions)’, The Annals of Statistics, 32, 409–499] proposed least angle regression (LAR), a solution path algorithm for the least squares regression. They pointed out that a slight modification of the LAR gives the LASSO [Tibshirani, R. (1996), ‘Regression Shrinkage and Selection Via the Lasso’, Journal of the Royal Statistical Society, Series B, 58, 267–288] solution path. However, it is largely unknown how to extend this solution path algorithm to models beyond the least squares regression. In this work, we propose an extension of the LAR for generalised linear models and the quasi-likelihood model by showing that the corresponding solution path is piecewise given by solutions of ordinary differential equation (ODE) systems. Our contribution is twofold. First, we provide a theoretical understanding on how the corresponding solution path propagates. Second, we propose an ODE-based algorithm to obtain the whole solution path.}, number={1}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Wu, Yichao}, year={2011}, pages={185–199} } @article{wu_li_2011, title={Asymptotic properties of sufficient dimension reduction with a diverging number of predictors}, volume={21}, number={2}, journal={Statistica Sinica}, author={Wu, Y. C. and Li, L. X.}, year={2011}, pages={707–730} } @article{liu_zhang_wu_2011, title={Hard or Soft Classification? Large-Margin Unified Machines}, volume={106}, ISSN={["0162-1459"]}, DOI={10.1198/jasa.2011.tm10319}, abstractNote={Margin-based classifiers have been popular in both machine learning and statistics for classification problems. Among numerous classifiers, some are hard classifiers while some are soft ones. Soft classifiers explicitly estimate the class conditional probabilities and then perform classification based on estimated probabilities. In contrast, hard classifiers directly target the classification decision boundary without producing the probability estimation. These two types of classifiers are based on different philosophies and each has its own merits. In this article, we propose a novel family of large-margin classifiers, namely large-margin unified machines (LUMs), which covers a broad range of margin-based classifiers including both hard and soft ones. By offering a natural bridge from soft to hard classification, the LUM provides a unified algorithm to fit various classifiers and hence a convenient platform to compare hard and soft classification. Both theoretical consistency and numerical performance of LUMs are explored. Our numerical study sheds some light on the choice between hard and soft classifiers in various classification problems.}, number={493}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Liu, Yufeng and Zhang, Hao Helen and Wu, Yichao}, year={2011}, month={Mar}, pages={166–177} } @article{liu_wu_2011, title={Simultaneous multiple non-crossing quantile regression estimation using kernel constraints}, volume={23}, ISSN={["1029-0311"]}, DOI={10.1080/10485252.2010.537336}, abstractNote={Quantile regression (QR) is a very useful statistical tool for learning the relationship between the response variable and covariates. For many applications, one often needs to estimate multiple conditional quantile functions of the response variable given covariates. Although one can estimate multiple quantiles separately, it is of great interest to estimate them simultaneously. One advantage of simultaneous estimation is that multiple quantiles can share strength among them to gain better estimation accuracy than individually estimated quantile functions. Another important advantage of joint estimation is the feasibility of incorporating simultaneous non-crossing constraints of QR functions. In this paper, we propose a new kernel-based multiple QR estimation technique, namely simultaneous non-crossing quantile regression (SNQR). We use kernel representations for QR functions and apply constraints on the kernel coefficients to avoid crossing. Both unregularised and regularised SNQR techniques are considered. Asymptotic properties such as asymptotic normality of linear SNQR and oracle properties of the sparse linear SNQR are developed. Our numerical results demonstrate the competitive performance of our SNQR over the original individual QR estimation.}, number={2}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Liu, Yufeng and Wu, Yichao}, year={2011}, pages={415–437} } @article{zhu_wu_2010, title={Estimation and Prediction of a Class of Convolution-Based Spatial Nonstationary Models for Large Spatial Data}, volume={19}, ISSN={["1537-2715"]}, DOI={10.1198/jcgs.2009.07123}, abstractNote={In this article we address two important issues common to the analysis of large spatial datasets. One is the modeling of nonstationarity, and the other is the computational challenges in doing likelihood-based estimation and kriging prediction. We model the spatial process as a convolution of independent Gaussian processes, with the spatially varying kernel function given by the modified Bessel functions. This is a generalization of the process-convolution approach of Higdon, Swall, and Kern (1999), who used the Gaussian kernel to obtain a closed-form nonstationary covariance function. Our model can produce processes with richer local behavior similar to the processes with the Matérn class of covariance functions. Because the covariance function of our model does not have a closed-form expression, direct estimation and spatial prediction using kriging is infeasible for large datasets. Efficient algorithms for parameter estimation and spatial prediction are proposed and implemented. We compare our method with methods based on stationary model and moving window kriging. Simulation results and application to a rainfall dataset show that our method has better prediction performance. Supplemental materials for the article are available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Zhu, Zhengyuan and Wu, Yichao}, year={2010}, month={Mar}, pages={74–95} } @article{wu_zhang_liu_2010, title={Robust Model-Free Multiclass Probability Estimation}, volume={105}, ISSN={["0162-1459"]}, DOI={10.1198/jasa.2010.tm09107}, abstractNote={Classical statistical approaches for multiclass probability estimation are typically based on regression techniques such as multiple logistic regression, or density estimation approaches such as linear discriminant analysis (LDA) and quadratic discriminant analysis (QDA). These methods often make certain assumptions on the form of probability functions or on the underlying distributions of subclasses. In this article, we develop a model-free procedure to estimate multiclass probabilities based on large-margin classifiers. In particular, the new estimation scheme is employed by solving a series of weighted large-margin classifiers and then systematically extracting the probability information from these multiple classification rules. A main advantage of the proposed probability estimation technique is that it does not impose any strong parametric assumption on the underlying distribution and can be applied for a wide range of large-margin classification methods. A general computational algorithm is developed for class probability estimation. Furthermore, we establish asymptotic consistency of the probability estimates. Both simulated and real data examples are presented to illustrate competitive performance of the new approach and compare it with several other existing methods.}, number={489}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Wu, Yichao and Zhang, Hao Helen and Liu, Yufeng}, year={2010}, month={Mar}, pages={424–436} } @article{shi_campbell_jones_campagne_wen_walker_su_chu_goodsaid_pusztai_et al._2010, title={The MicroArray Quality Control (MAQC)-IIII study of common practices for the development and validation of microarray-based predictive models}, volume={28}, number={8}, journal={Nature Biotechnology}, author={Shi, L. M. and Campbell, G. and Jones, W. D. and Campagne, F. and Wen, Z. N. and Walker, S. J. and Su, Z. Q. and Chu, T. M. and Goodsaid, F. M. and Pusztai, L. and et al.}, year={2010}, pages={827–109} } @inproceedings{liu_wu_he_2010, title={Utility-based weighted multicategory robust support vector machines}, volume={3}, DOI={10.4310/sii.2010.v3.n4.a5}, abstractNote={The Support Vector Machines (SVM) has been an important classification technique in both machine learning and statistics communities. The robust SVM is an improved version of the SVM so that the resulting classifier can be less sensitive to outliers. In many practical problems, it may be advantageous to use different weights for different types of misclassification. However, the existing RSVM treats different kinds of misclassification equally. In this paper, we propose the weighted RSVM, as an extension of the standard SVM. We show that surprisingly, the cost-based weights do not work well for weighted extensions of the RSVM. To solve this problem, we propose a novel utility-based weights for the weighted RSVM. Both theoretical and numerical studies are presented to investigate the performance of the proposed weighted multicategory RSVM.}, number={4}, booktitle={Statistics and its Interface}, author={Liu, Y. F. and Wu, Y. C. and He, Q. Y.}, year={2010}, pages={465–475} } @article{wu_fan_mueller_2010, title={Varying-coefficient functional linear regression}, volume={16}, ISSN={["1573-9759"]}, DOI={10.3150/09-bej231}, abstractNote={Functional linear regression analysis aims to model regression relations which include a functional predictor. The analog of the regression parameter vector or matrix in conventional multivariate or multiple-response linear regression models is a regression parameter function in one or two arguments. If, in addition, one has scalar predictors, as is often the case in applications to longitudinal studies, the question arises how to incorporate these into a functional regression model. We study a varying-coefficient approach where the scalar covariates are modeled as additional arguments of the regression parameter function. This extension of the functional linear regression model is analogous to the extension of conventional linear regression models to varying-coefficient models and shares its advantages, such as increased flexibility; however, the details of this extension are more challenging in the functional case. Our methodology combines smoothing methods with regularization by truncation at a finite number of functional principal components. A practical version is developed and is shown to perform better than functional linear regression for longitudinal data. We investigate the asymptotic properties of varying-coefficient functional linear regression and establish consistency properties.}, number={3}, journal={BERNOULLI}, author={Wu, Yichao and Fan, Jianqing and Mueller, Hans-Georg}, year={2010}, month={Aug}, pages={730–758} } @article{ribeiro_hurd_wu_martino_jones_brighton_boucher_o'neal_2009, title={Azithromycin treatment alters gene expression in inflammatory, lipid metabolism, and cell cycle pathways in well-differentiated human airway epithelia}, volume={4}, number={6}, journal={PLoS One}, author={Ribeiro, C. M. P. and Hurd, H. and Wu, Y. C. and Martino, M. E. B. and Jones, L. and Brighton, B. and Boucher, R. C. and O'Neal, W. K.}, year={2009} } @article{fan_wu_feng_2009, title={LOCAL QUASI-LIKELIHOOD WITH A PARAMETRIC GUIDE}, volume={37}, ISSN={["0090-5364"]}, DOI={10.1214/09-AOS713}, abstractNote={Generalized linear models and quasi-likelihood method extend the ordinary regression models to accommodate more general conditional distributions of the response. Nonparametric methods need no explicit parametric specification and the resulting model is completely determined by the data themselves. However nonparametric estimation schemes generally have a slower convergence rate such as the local polynomial smoothing estimation of nonparametric generalized linear models studied in Fan, Heckman and Wand (1995). In this work, we propose two parametrically guided nonparametric estimation schemes by incorporating prior shape information on the link transformation of the response variable's conditional mean in terms of the predictor variable. Asymptotic results and numerical simulations demonstrate the improvement of our new estimation schemes over the original nonparametric counterpart.}, number={6B}, journal={ANNALS OF STATISTICS}, author={Fan, Jianqing and Wu, Yichao and Feng, Yang}, year={2009}, month={Dec}, pages={4153–4183} } @article{fan_feng_wu_2009, title={NETWORK EXPLORATION VIA THE ADAPTIVE LASSO AND SCAD PENALTIES}, volume={3}, ISSN={["1932-6157"]}, DOI={10.1214/08-AOAS215}, abstractNote={Graphical models are frequently used to explore networks, such as genetic networks, among a set of variables. This is usually carried out via exploring the sparsity of the precision matrix of the variables under consideration. Penalized likelihood methods are often used in such explorations. Yet, positive-definiteness constraints of precision matrices make the optimization problem challenging. We introduce non-concave penalties and the adaptive LASSO penalty to attenuate the bias problem in the network estimation. Through the local linear approximation to the non-concave penalty functions, the problem of precision matrix estimation is recast as a sequence of penalized likelihood problems with a weighted L(1) penalty and solved using the efficient algorithm of Friedman et al. (2008). Our estimation schemes are applied to two real datasets. Simulation experiments and asymptotic theory are used to justify our proposed methods.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Fan, Jianqing and Feng, Yang and Wu, Yichao}, year={2009}, month={Jun}, pages={521–541} } @article{wu_liu_2009, title={Stepwise multiple quantile regression estimation using non-crossing constraints}, volume={2}, DOI={10.4310/sii.2009.v2.n3.a4}, abstractNote={Quantile regression is an important statistical tool for statistical modeling. It has been widely used in various fields including econometrics, medicine, and bioinformatics. Despite its popularity in practice, individually estimated quantile regression functions often cross each other and consequently violate the basic properties of quantiles. In this paper we propose a new method for estimating multiple quantile regression functions without crossing. Both linear and kernel quantile regression models are considered. Several numerical examples are presented to illustrate competitive performance of the proposed method.}, number={3}, journal={Statistics and its Interface}, author={Wu, Y. C. and Liu, Y. F.}, year={2009}, pages={299–310} } @article{fan_samworth_wu_2009, title={Ultrahigh dimensional feature selection: Beyond the linear model}, volume={10}, journal={Journal of Machine Learning Research}, author={Fan, J. Q. and Samworth, R. and Wu, Y. C.}, year={2009}, pages={2013–2038} } @article{wu_liu_2009, title={Variable selection in quantile regression}, volume={19}, number={2}, journal={Statistica Sinica}, author={Wu, Y. C. and Liu, Y. F.}, year={2009}, pages={801–817} } @article{fan_wu_2008, title={Semiparametric Estimation of Covariance Matrixes for Longitudinal Data}, volume={103}, ISSN={["1537-274X"]}, DOI={10.1198/016214508000000742}, abstractNote={Estimation of longitudinal data covariance structure poses significant challenges because the data usually are collected at irregular time points. A viable semiparametric model for covariance matrixes has been proposed that allows one to estimate the variance function nonparametrically and to estimate the correlation function parametrically by aggregating information from irregular and sparse data points within each subject. But the asymptotic properties of the quasi-maximum likelihood estimator (QMLE) of parameters in the covariance model are largely unknown. We address this problem in the context of more general models for the conditional mean function, including parametric, nonparametric, or semiparametric. We also consider the possibility of rough mean regression function and introduce the difference-based method to reduce biases in the context of varying-coefficient partially linear mean regression models. This provides a more robust estimator of the covariance function under a wider range of situations. Under some technical conditions, consistency and asymptotic normality are obtained for the QMLE of the parameters in the correlation function. Simulation studies and a real data example are used to illustrate the proposed approach.}, number={484}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Fan, Jianqing and Wu, Yichao}, year={2008}, month={Dec}, pages={1520–1533} }