@article{hu_lu_zhou_zhou_2019, title={MM ALGORITHMS FOR VARIANCE COMPONENT ESTIMATION AND SELECTION IN LOGISTIC LINEAR MIXED MODEL}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0220}, abstractNote={Logistic linear mixed models are widely used in experimental designs and genetic analyses of binary traits. Motivated by modern applications, we consider the case of many groups of random effects, where each group corresponds to a variance component. When the number of variance components is large, fitting a logistic linear mixed model is challenging. Thus, we develop two efficient and stable minorization-maximization (MM) algorithms for estimating variance components based on a Laplace approximation of the logistic model. One of these leads to a simple iterative soft-thresholding algorithm for variance component selection using the maximum penalized approximated likelihood. We demonstrate the variance component estimation and selection performance of our algorithms by means of simulation studies and an analysis of real data.}, number={3}, journal={STATISTICA SINICA}, author={Hu, Liuyi and Lu, Wenbin and Zhou, Jin and Zhou, Hua}, year={2019}, month={Jul}, pages={1585–1605} } @article{zhou_hu_zho_lange_2019, title={MM Algorithms for Variance Components Models}, volume={28}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2018.1529601}, abstractNote={Variance components estimation and mixed model analysis are central themes in statistics with applications in numerous scientific disciplines. Despite the best efforts of generations of statisticians and numerical analysts, maximum likelihood estimation (MLE) and restricted MLE of variance component models remain numerically challenging. Building on the minorization–maximization (MM) principle, this article presents a novel iterative algorithm for variance components estimation. Our MM algorithm is trivial to implement and competitive on large data problems. The algorithm readily extends to more complicated problems such as linear mixed models, multivariate response models possibly with missing data, maximum a posteriori estimation, and penalized estimation. We establish the global convergence of the MM algorithm to a Karush–Kuhn–Tucker point and demonstrate, both numerically and theoretically, that it converges faster than the classical EM algorithm when the number of variance components is greater than two and all covariance matrices are positive definite. Supplementary materials for this article are available online.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Zhou, Hua and Hu, Liuyi and Zho, Jin and Lange, Kenneth}, year={2019}, month={Apr}, pages={350–361} } @article{zhang_li_zhou_zhou_shen_2019, title={TENSOR GENERALIZED ESTIMATING EQUATIONS FOR LONGITUDINAL IMAGING ANALYSIS}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0153}, abstractNote={Longitudinal neuroimaging studies are becoming increasingly prevalent, where brain images are collected on multiple subjects at multiple time points. Analyses of such data are scientifically important, but also challenging. Brain images are in the form of multidimensional arrays, or tensors, which are characterized by both ultrahigh dimensionality and a complex structure. Longitudinally repeated images and induced temporal correlations add a further layer of complexity. Despite some recent efforts, there exist very few solutions for longitudinal imaging analyses. In response to the increasing need to analyze longitudinal imaging data, we propose several tensor generalized estimating equations (GEEs). The proposed GEE approach accounts for intra-subject correlation, and an imposed low-rank structure on the coefficient tensor effectively reduces the dimensionality. We also propose a scalable estimation algorithm, establish the asymptotic properties of the solution to the tensor GEEs, and investigate sparsity regularization for the purpose of region selection. We demonstrate the proposed method using simulations and by analyzing a real data set from the Alzheimer's Disease Neuroimaging Initiative.}, number={4}, journal={STATISTICA SINICA}, author={Zhang, Xiang and Li, Lexin and Zhou, Hua and Zhou, Yeqing and Shen, Dinggang}, year={2019}, month={Oct}, pages={1977–2005} } @article{gaines_kim_zhou_2018, title={Algorithms for Fitting the Constrained Lasso}, volume={27}, DOI={10.1080/10618600.2018.1473777}, abstractNote={We compare alternative computing strategies for solving the constrained lasso problem. As its name suggests, the constrained lasso extends the widely-used lasso to handle linear constraints, which allow the user to incorporate prior information into the model. In addition to quadratic programming, we employ the alternating direction method of multipliers (ADMM) and also derive an efficient solution path algorithm. Through both simulations and benchmark data examples, we compare the different algorithms and provide practical recommendations in terms of efficiency and accuracy for various sizes of data. We also show that, for an arbitrary penalty matrix, the generalized lasso can be transformed to a constrained lasso, while the converse is not true. Thus, our methods can also be used for estimating a generalized lasso, which has wide-ranging applications. Code for implementing the algorithms is freely available in both the Matlab toolbox SparseReg and the Julia package ConstrainedLasso. Supplementary materials for this article are available online.}, number={4}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Gaines, Brian R. and Kim, Juhyun and Zhou, Hua}, year={2018}, pages={861–871} } @article{xiao_wu_zhou_2015, title={ConvexLAR: An Extension of Least Angle Regression}, volume={24}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2014.962700}, abstractNote={The least angle regression (LAR) was proposed by Efron, Hastie, Johnstone and Tibshirani (2004) for continuous model selection in linear regression. It is motivated by a geometric argument and tracks a path along which the predictors enter successively and the active predictors always maintain the same absolute correlation (angle) with the residual vector. Although it gains popularity quickly, its extensions seem rare compared to the penalty methods. In this expository article, we show that the powerful geometric idea of LAR can be generalized in a fruitful way. We propose a ConvexLAR algorithm that works for any convex loss function and naturally extends to group selection and data adaptive variable selection. After simple modification it also yields new exact path algorithms for certain penalty methods such as a convex loss function with lasso or group lasso penalty. Variable selection in recurrent event and panel count data analysis, Ada-Boost, and Gaussian graphical model is reconsidered from the ConvexLAR angle.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Xiao, Wei and Wu, Yichao and Zhou, Hua}, year={2015}, month={Jul}, pages={603–626} } @article{sun_liu_crowley_chen_zhou_chu_huang_kuan_li_miller_et al._2015, title={IsoDOT Detects Differential RNA-Isoform Expression/Usage With Respect to a Categorical or Continuous Covariate With High Sensitivity and Specificity}, volume={110}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2015.1040880}, abstractNote={We have developed a statistical method named IsoDOT to assess differential isoform expression (DIE) and differential isoform usage (DIU) using RNA-seq data. Here isoform usage refers to relative isoform expression given the total expression of the corresponding gene. IsoDOT performs two tasks that cannot be accomplished by existing methods: to test DIE/DIU with respect to a continuous covariate, and to test DIE/DIU for one case versus one control. The latter task is not an uncommon situation in practice, for example, comparing the paternal and maternal alleles of one individual or comparing tumor and normal samples of one cancer patient. Simulation studies demonstrate the high sensitivity and specificity of IsoDOT. We apply IsoDOT to study the effects of haloperidol treatment on the mouse transcriptome and identify a group of genes whose isoform usages respond to haloperidol treatment. Supplementary materials for this article are available online.}, number={511}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Sun, Wei and Liu, Yufeng and Crowley, James J. and Chen, Ting-Huei and Zhou, Hua and Chu, Haitao and Huang, Shunping and Kuan, Pei-Fen and Li, Yuan and Miller, Darla and et al.}, year={2015}, month={Sep}, pages={975–986} } @article{zhou_lange_2015, title={Path following in the exact penalty method of convex programming}, volume={61}, ISSN={["1573-2894"]}, DOI={10.1007/s10589-015-9732-x}, abstractNote={Classical penalty methods solve a sequence of unconstrained problems that put greater and greater stress on meeting the constraints. In the limit as the penalty constant tends to $$\infty $$ , one recovers the constrained solution. In the exact penalty method, squared penalties are replaced by absolute value penalties, and the solution is recovered for a finite value of the penalty constant. In practice, the kinks in the penalty and the unknown magnitude of the penalty constant prevent wide application of the exact penalty method in nonlinear programming. In this article, we examine a strategy of path following consistent with the exact penalty method. Instead of performing optimization at a single penalty constant, we trace the solution as a continuous function of the penalty constant. Thus, path following starts at the unconstrained solution and follows the solution path as the penalty constant increases. In the process, the solution path hits, slides along, and exits from the various constraints. For quadratic programming, the solution path is piecewise linear and takes large jumps from constraint to constraint. For a general convex program, the solution path is piecewise smooth, and path following operates by numerically solving an ordinary differential equation segment by segment. Our diverse applications to (a) projection onto a convex set, (b) nonnegative least squares, (c) quadratically constrained quadratic programming, (d) geometric programming, and (e) semidefinite programming illustrate the mechanics and potential of path following. The final detour to image denoising demonstrates the relevance of path following to regularized estimation in inverse problems. In regularized estimation, one follows the solution path as the penalty constant decreases from a large value.}, number={3}, journal={COMPUTATIONAL OPTIMIZATION AND APPLICATIONS}, author={Zhou, Hua and Lange, Kenneth}, year={2015}, month={Jul}, pages={609–634} } @article{zhao_chen_carroll_ringel-kulka_epstein_zhou_zhou_ringel_li_wu_2015, title={Testing in Microbiome-Profiling Studies with MiRKAT, the Microbiome Regression-Based Kernel Association Test}, volume={96}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2015.04.003}, abstractNote={High-throughput sequencing technology has enabled population-based studies of the role of the human microbiome in disease etiology and exposure response. Distance-based analysis is a popular strategy for evaluating the overall association between microbiome diversity and outcome, wherein the phylogenetic distance between individuals' microbiome profiles is computed and tested for association via permutation. Despite their practical popularity, distance-based approaches suffer from important challenges, especially in selecting the best distance and extending the methods to alternative outcomes, such as survival outcomes. We propose the microbiome regression-based kernel association test (MiRKAT), which directly regresses the outcome on the microbiome profiles via the semi-parametric kernel machine regression framework. MiRKAT allows for easy covariate adjustment and extension to alternative outcomes while non-parametrically modeling the microbiome through a kernel that incorporates phylogenetic distance. It uses a variance-component score statistic to test for the association with analytical p value calculation. The model also allows simultaneous examination of multiple distances, alleviating the problem of choosing the best distance. Our simulations demonstrated that MiRKAT provides correctly controlled type I error and adequate power in detecting overall association. "Optimal" MiRKAT, which considers multiple candidate distances, is robust in that it suffers from little power loss in comparison to when the best distance is used and can achieve tremendous power gain in comparison to when a poor distance is chosen. Finally, we applied MiRKAT to real microbiome datasets to show that microbial communities are associated with smoking and with fecal protease levels after confounders are controlled for.}, number={5}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Zhao, Ni and Chen, Jun and Carroll, Ian M. and Ringel-Kulka, Tamar and Epstein, Michael P. and Zhou, Hua and Zhou, Jin J. and Ringel, Yehuda and Li, Hongzhe and Wu, Michael C.}, year={2015}, month={May}, pages={797–807} } @article{lange_chi_zhou_2014, title={A Brief Survey of Modern Optimization for Statisticians}, volume={82}, ISSN={["1751-5823"]}, url={http://dx.doi.org/10.1111/insr.12022}, DOI={10.1111/insr.12022}, abstractNote={Modern computational statistics is turning more and more to high-dimensional optimization to handle the deluge of big data. Once a model is formulated, its parameters can be estimated by optimization. Because model parsimony is important, models routinely include nondifferentiable penalty terms such as the lasso. This sober reality complicates minimization and maximization. Our broad survey stresses a few important principles in algorithm design. Rather than view these principles in isolation, it is more productive to mix and match them. A few well chosen examples illustrate this point. Algorithm derivation is also emphasized, and theory is downplayed, particularly the abstractions of the convex calculus. Thus, our survey should be useful and accessible to a broad audience.}, number={1}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Lange, Kenneth and Chi, Eric C. and Zhou, Hua}, year={2014}, month={Apr}, pages={46–70} } @article{zhou_wu_2014, title={A Generic Path Algorithm for Regularized Statistical Estimation}, volume={109}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.864166}, abstractNote={Regularization is widely used in statistics and machine learning to prevent overfitting and gear solution toward prior information. In general, a regularized estimation problem minimizes the sum of a loss function and a penalty term. The penalty term is usually weighted by a tuning parameter and encourages certain constraints on the parameters to be estimated. Particular choices of constraints lead to the popular lasso, fused-lasso, and other generalized ℓ1 penalized regression methods. In this article we follow a recent idea by Wu and propose an exact path solver based on ordinary differential equations (EPSODE) that works for any convex loss function and can deal with generalized ℓ1 penalties as well as more complicated regularization such as inequality constraints encountered in shape-restricted regressions and nonparametric density estimation. Nonasymptotic error bounds for the equality regularized estimates are derived. In practice, the EPSODE can be coupled with AIC, BIC, Cp or cross-validation to select an optimal tuning parameter, or provide a convenient model space for performing model averaging or aggregation. Our applications to generalized ℓ1 regularized generalized linear models, shape-restricted regressions, Gaussian graphical models, and nonparametric density estimation showcase the potential of the EPSODE algorithm. Supplementary materials for this article are available online.}, number={506}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhou, Hua and Wu, Yichao}, year={2014}, month={Jun}, pages={686–699} } @article{chi_zhou_lange_2014, title={Distance majorization and its applications}, volume={146}, ISSN={["1436-4646"]}, url={http://link.springer.com/article/10.1007%2Fs10107-013-0697-1#}, DOI={10.1007/s10107-013-0697-1}, abstractNote={The problem of minimizing a continuously differentiable convex function over an intersection of closed convex sets is ubiquitous in applied mathematics. It is particularly interesting when it is easy to project onto each separate set, but nontrivial to project onto their intersection. Algorithms based on Newton’s method such as the interior point method are viable for small to medium-scale problems. However, modern applications in statistics, engineering, and machine learning are posing problems with potentially tens of thousands of parameters or more. We revisit this convex programming problem and propose an algorithm that scales well with dimensionality. Our proposal is an instance of a sequential unconstrained minimization technique and revolves around three ideas: the majorization-minimization principle, the classical penalty method for constrained optimization, and quasi-Newton acceleration of fixed-point algorithms. The performance of our distance majorization algorithms is illustrated in several applications.}, number={1-2}, journal={MATHEMATICAL PROGRAMMING}, publisher={Springer-Verlag}, author={Chi, Eric C. and Zhou, Hua and Lange, Kenneth}, year={2014}, month={Aug}, pages={409–436} } @article{lange_zhou_2014, title={MM algorithms for geometric and signomial programming}, volume={143}, ISSN={["1436-4646"]}, DOI={10.1007/s10107-012-0612-1}, abstractNote={This paper derives new algorithms for signomial programming, a generalization of geometric programming. The algorithms are based on a generic principle for optimization called the MM algorithm. In this setting, one can apply the geometric-arithmetic mean inequality and a supporting hyperplane inequality to create a surrogate function with parameters separated. Thus, unconstrained signomial programming reduces to a sequence of one-dimensional minimization problems. Simple examples demonstrate that the MM algorithm derived can converge to a boundary point or to one point of a continuum of minimum points. Conditions under which the minimum point is unique or occurs in the interior of parameter space are proved for geometric programming. Convergence to an interior point occurs at a linear rate. Finally, the MM framework easily accommodates equality and inequality constraints of signomial type. For the most important special case, constrained quadratic programming, the MM algorithm involves very simple updates.}, number={1-2}, journal={MATHEMATICAL PROGRAMMING}, author={Lange, Kenneth and Zhou, Hua}, year={2014}, month={Feb}, pages={339–356} } @article{zhou_li_2014, title={Regularized matrix regression}, volume={76}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12031}, abstractNote={Summary Modern technologies are producing a wealth of data with complex structures. For instance, in two-dimensional digital imaging, flow cytometry and electroencephalography, matrix-type covariates frequently arise when measurements are obtained for each combination of two underlying variables. To address scientific questions arising from those data, new regression methods that take matrices as covariates are needed, and sparsity or other forms of regularization are crucial owing to the ultrahigh dimensionality and complex structure of the matrix data. The popular lasso and related regularization methods hinge on the sparsity of the true signal in terms of the number of its non-zero coefficients. However, for the matrix data, the true signal is often of, or can be well approximated by, a low rank structure. As such, the sparsity is frequently in the form of low rank of the matrix parameters, which may seriously violate the assumption of the classical lasso. We propose a class of regularized matrix regression methods based on spectral regularization. A highly efficient and scalable estimation algorithm is developed, and a degrees-of-freedom formula is derived to facilitate model selection along the regularization path. Superior performance of the method proposed is demonstrated on both synthetic and real examples.}, number={2}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Zhou, Hua and Li, Lexin}, year={2014}, month={Mar}, pages={463–483} } @article{lange_chi_zhou_2014, title={Rejoinder}, volume={82}, ISSN={["1751-5823"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84899082443&partnerID=MN8TOARS}, DOI={10.1111/insr.12030}, abstractNote={International Statistical ReviewVolume 82, Issue 1 p. 81-89 Original Article Rejoinder Kenneth Lange, Kenneth Lange klange@ucla.edu Departments of Biomathematics and Statistics, University of California, Los Angeles, CA 90095-1766, USA Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorEric C. Chi, Eric C. Chi eric.c.chi@gmail.com Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorHua Zhou, Hua Zhou hua_zhou@ncsu.edu Department of Statistics, North Carolina State University, Raleigh, NC 27695-8203, USASearch for more papers by this author Kenneth Lange, Kenneth Lange klange@ucla.edu Departments of Biomathematics and Statistics, University of California, Los Angeles, CA 90095-1766, USA Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorEric C. Chi, Eric C. Chi eric.c.chi@gmail.com Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorHua Zhou, Hua Zhou hua_zhou@ncsu.edu Department of Statistics, North Carolina State University, Raleigh, NC 27695-8203, USASearch for more papers by this author First published: 22 April 2014 https://doi.org/10.1111/insr.12030Read the full textAboutPDF ToolsRequest permissionExport citationAdd to favoritesTrack citation ShareShare Give accessShare full text accessShare full-text accessPlease review our Terms and Conditions of Use and check box below to share full-text version of article.I have read and accept the Wiley Online Library Terms and Conditions of UseShareable LinkUse the link below to share a full-text version of this article with your friends and colleagues. Learn more.Copy URL Share a linkShare onFacebookTwitterLinked InRedditWechat Volume82, Issue1April 2014Pages 81-89 RelatedInformation}, number={1}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Lange, Kenneth and Chi, Eric C. and Zhou, Hua}, year={2014}, month={Apr}, pages={81–89} } @article{chi_zhou_chen_del vecchyo_lange_2013, title={Genotype imputation via matrix completion}, volume={23}, ISSN={["1549-5469"]}, url={http://genome.cshlp.org/content/23/3/509.full}, DOI={10.1101/gr.145821.112}, abstractNote={Most current genotype imputation methods are model-based and computationally intensive, taking days to impute one chromosome pair on 1000 people. We describe an efficient genotype imputation method based on matrix completion. Our matrix completion method is implemented in MATLAB and tested on real data from HapMap 3 , simulated pedigree data, and simulated low-coverage sequencing data derived from the 1000 Genomes Project. Compared with leading imputation programs, the matrix completion algorithm embodied in our program MENDEL-IMPUTE achieves comparable imputation accuracy while reducing run times significantly. Implementation in a lower-level language such as Fortran or C is apt to further improve computational efficiency.}, number={3}, journal={GENOME RESEARCH}, author={Chi, Eric C. and Zhou, Hua and Chen, Gary K. and Del Vecchyo, Diego Ortega and Lange, Kenneth}, year={2013}, month={Mar}, pages={509–518} } @article{lange_papp_sinsheimer_sripracha_zhou_sobel_2013, title={Mendel: the Swiss army knife of genetic analysis programs}, volume={29}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btt187}, abstractNote={Abstract Summary: Mendel is one of the few statistical genetics packages that provide a full spectrum of gene mapping methods, ranging from parametric linkage in large pedigrees to genome-wide association with rare variants. Our latest additions to Mendel anticipate and respond to the needs of the genetics community. Compared with earlier versions, Mendel is faster and easier to use and has a wider range of applications. Supported platforms include Linux, MacOS and Windows. Availability: Free from www.genetics.ucla.edu/software/mendel Contact: klange@ucla.edu Supplementary information: Supplementary data are available at Bioinformatics online.}, number={12}, journal={BIOINFORMATICS}, author={Lange, Kenneth and Papp, Jeanette C. and Sinsheimer, Janet S. and Sripracha, Ram and Zhou, Hua and Sobel, Eric M.}, year={2013}, month={Jun}, pages={1568–1570} } @article{zhou_lange_2013, title={Path Algorithm for Constrained Estimation}, volume={22}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2012.681248}, abstractNote={Many least squares problems involve affine equality and inequality constraints. Although there are variety of methods for solving such problems, most statisticians find constrained estimation challenging. The current paper proposes a new path following algorithm for quadratic programming based on exact penalization. Similar penalties arise in $l_1$ regularization in model selection. Classical penalty methods solve a sequence of unconstrained problems that put greater and greater stress on meeting the constraints. In the limit as the penalty constant tends to $\infty$, one recovers the constrained solution. In the exact penalty method, squared penalties are replaced by absolute value penalties, and the solution is recovered for a finite value of the penalty constant. The exact path following method starts at the unconstrained solution and follows the solution path as the penalty constant increases. In the process, the solution path hits, slides along, and exits from the various constraints. Path following in lasso penalized regression, in contrast, starts with a large value of the penalty constant and works its way downward. In both settings, inspection of the entire solution path is revealing. Just as with the lasso and generalized lasso, it is possible to plot the effective degrees of freedom along the solution path. For a strictly convex quadratic program, the exact penalty algorithm can be framed entirely in terms of the sweep operator of regression analysis. A few well chosen examples illustrate the mechanics and potential of path following.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Zhou, Hua and Lange, Kenneth}, year={2013}, month={Jun}, pages={261–283} } @article{zhou_li_zhu_2013, title={Tensor Regression with Applications in Neuroimaging Data Analysis}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.776499}, abstractNote={Classical regression methods treat covariates as a vector and estimate a corresponding vector of regression coefficients. Modern applications in medical imaging generate covariates of more complex form such as multidimensional arrays (tensors). Traditional statistical and computational methods are proving insufficient for analysis of these high-throughput data due to their ultrahigh dimensionality as well as complex structure. In this article, we propose a new family of tensor regression models that efficiently exploit the special structure of tensor covariates. Under this framework, ultrahigh dimensionality is reduced to a manageable level, resulting in efficient estimation and prediction. A fast and highly scalable estimation algorithm is proposed for maximum likelihood estimation and its associated asymptotic properties are studied. Effectiveness of the new methods is demonstrated on both synthetic and real MRI imaging data.}, number={502}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhou, Hua and Li, Lexin and Zhu, Hongtu}, year={2013}, month={Jun}, pages={540–552} } @article{riley_zhou_lange_sinsheimer_sehl_2012, title={Determining duration of HER2-targeted therapy using stem cell extinction models}, volume={7}, number={12}, journal={PLoS One}, author={Riley, L. and Zhou, H. and Lange, K. and Sinsheimer, J. S. and Sehl, M. E.}, year={2012} } @article{zhou_zhang_2012, title={EM vs MM: A case study}, volume={56}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2012.05.018}, abstractNote={The celebrated expectation-maximization (EM) algorithm is one of the most widely used optimization methods in statistics. In recent years it has been realized that EM algorithm is a special case of the more general minorization-maximization (MM) principle. Both algorithms creates a surrogate function in the first (E or M) step that is maximized in the second M step. This two step process always drives the objective function uphill and is iterated until the parameters converge. The two algorithms differ in the way the surrogate function is constructed. The expectation step of the EM algorithm relies on calculating conditional expectations, while the minorization step of the MM algorithm builds on crafty use of inequalities. For many problems, EM and MM derivations yield the same algorithm. This expository note walks through the construction of both algorithms for estimating the parameters of the Dirichlet-Multinomial distribution. This particular case is of interest because EM and MM derivations lead to two different algorithms with completely distinct operating characteristics. The EM algorithm converges fast but involves solving a nontrivial maximization problem in the M step. In contrast the MM updates are extremely simple but converge slowly. An EM-MM hybrid algorithm is derived which shows faster convergence than the MM algorithm in certain parameter regimes. The local convergence rates of the three algorithms are studied theoretically from the unifying MM point of view and also compared on numerical examples.}, number={12}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Zhou, Hua and Zhang, Yiwen}, year={2012}, month={Dec}, pages={3909–3920} } @inproceedings{zhou_sobel_lange_2012, title={Fast genome-wide QTL association mapping with pedigrees}, volume={36}, number={7}, booktitle={Genetic Epidemiology}, author={Zhou, H. and Sobel, E. and Lange, K.}, year={2012}, pages={771–772} } @article{zhu_li_zhou_2012, title={Nonlinear dimension reduction with Wright-Fisher kernel for genotype aggregation and association mapping}, volume={28}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bts406}, abstractNote={Abstract Motivation: Association tests based on next-generation sequencing data are often under-powered due to the presence of rare variants and large amount of neutral or protective variants. A successful strategy is to aggregate genetic information within meaningful single-nucleotide polymorphism (SNP) sets, e.g. genes or pathways, and test association on SNP sets. Many existing methods for group-wise tests require specific assumptions about the direction of individual SNP effects and/or perform poorly in the presence of interactions. Results: We propose a joint association test strategy based on two key components: a nonlinear supervised dimension reduction approach for effective SNP information aggregation and a novel kernel specially designed for qualitative genotype data. The new test demonstrates superior performance in identifying causal genes over existing methods across a large variety of disease models simulated from sequence data of real genes. In general, the proposed method provides an association test strategy that can (i) detect both rare and common causal variants, (ii) deal with both additive and interaction effect, (iii) handle both quantitative traits and disease dichotomies and (iv) incorporate non-genetic covariates. In addition, the new kernel can potentially boost the power of the entire family of kernel-based methods for genetic data analysis. Availability: The method is implemented in MATLAB. Source code is available upon request. Contact: hongjie.zhu@duke.edu}, number={18}, journal={BIOINFORMATICS}, author={Zhu, Hongjie and Li, Lexin and Zhou, Hua}, year={2012}, month={Sep}, pages={I375–I381} } @article{sehl_zhou_sinsheimer_lange_2011, title={Extinction models for cancer stem cell therapy}, volume={234}, ISSN={["1879-3134"]}, DOI={10.1016/j.mbs.2011.09.005}, abstractNote={Cells with stem cell-like properties are now viewed as initiating and sustaining many cancers. This suggests that cancer can be cured by driving these cancer stem cells to extinction. The problem with this strategy is that ordinary stem cells are apt to be killed in the process. This paper sets bounds on the killing differential (difference between death rates of cancer stem cells and normal stem cells) that must exist for the survival of an adequate number of normal stem cells. Our main tools are birth-death Markov chains in continuous time. In this framework, we investigate the extinction times of cancer stem cells and normal stem cells. Application of extreme value theory from mathematical statistics yields an accurate asymptotic distribution and corresponding moments for both extinction times. We compare these distributions for the two cell populations as a function of the killing rates. Perhaps a more telling comparison involves the number of normal stem cells NH at the extinction time of the cancer stem cells. Conditioning on the asymptotic time to extinction of the cancer stem cells allows us to calculate the asymptotic mean and variance of NH. The full distribution of NH can be retrieved by the finite Fourier transform and, in some parameter regimes, by an eigenfunction expansion. Finally, we discuss the impact of quiescence (the resting state) on stem cell dynamics. Quiescence can act as a sanctuary for cancer stem cells and imperils the proposed therapy. We approach the complication of quiescence via multitype branching process models and stochastic simulation. Improvements to the τ-leaping method of stochastic simulation make it a versatile tool in this context. We conclude that the proposed therapy must target quiescent cancer stem cells as well as actively dividing cancer stem cells. The current cancer models demonstrate the virtue of attacking the same quantitative questions from a variety of modeling, mathematical, and computational perspectives.}, number={2}, journal={MATHEMATICAL BIOSCIENCES}, author={Sehl, Mary and Zhou, Hua and Sinsheimer, Janet S. and Lange, Kenneth L.}, year={2011}, month={Dec}, pages={132–146} } @article{zhou_sehl_sinsheimer_lange_2010, title={Association screening of common and rare genetic variants by penalized regression}, volume={26}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btq448}, abstractNote={Abstract Motivation: This article extends our recent research on penalized estimation methods in genome-wide association studies to the realm of rare variants. Results: The new strategy is tested on both simulated and real data. Our findings on breast cancer data replicate previous results and shed light on variant effects within genes. Availability: Rare variant discovery by group penalized regression is now implemented in the free program Mendel at http://www.genetics.ucla.edu/software/ Contact: huazhou@ucla.edu Supplementary information: Supplementary data are available at Bioinformatics online.}, number={19}, journal={BIOINFORMATICS}, author={Zhou, Hua and Sehl, Mary E. and Sinsheimer, Janet S. and Lange, Kenneth}, year={2010}, month={Oct}, pages={2375–2382} } @article{zhou_lange_suchard_2010, title={Graphics Processing Units and High-Dimensional Optimization}, volume={25}, ISSN={["2168-8745"]}, DOI={10.1214/10-sts336}, abstractNote={This paper discusses the potential of graphics processing units (GPUs) in high-dimensional optimization problems. A single GPU card with hundreds of arithmetic cores can be inserted in a personal computer and dramatically accelerates many statistical algorithms. To exploit these devices fully, optimization algorithms should reduce to multiple parallel tasks, each accessing a limited amount of data. These criteria favor EM and MM algorithms that separate parameters and data. To a lesser extent block relaxation and coordinate descent and ascent also qualify. We demonstrate the utility of GPUs in nonnegative matrix factorization, PET image reconstruction, and multidimensional scaling. Speedups of 100 fold can easily be attained. Over the next decade, GPUs will fundamentally alter the landscape of computational statistics. It is time for more statisticians to get on-board.}, number={3}, journal={STATISTICAL SCIENCE}, author={Zhou, Hua and Lange, Kenneth and Suchard, Marc A.}, year={2010}, month={Aug}, pages={311–324} }