@article{heng_zhou_chi_2023, title={Bayesian Trend Filtering via Proximal Markov Chain Monte Carlo}, volume={2}, ISSN={["1537-2715"]}, url={https://doi.org/10.1080/10618600.2023.2170089}, DOI={10.1080/10618600.2023.2170089}, abstractNote={Abstract Proximal Markov chain Monte Carlo is a novel construct that lies at the intersection of Bayesian computation and convex optimization, which helped popularize the use of nondifferentiable priors in Bayesian statistics. Existing formulations of proximal MCMC, however, require hyperparameters and regularization parameters to be prespecified. In this article, we extend the paradigm of proximal MCMC through introducing a novel new class of nondifferentiable priors called epigraph priors. As a proof of concept, we place trend filtering, which was originally a nonparametric regression problem, in a parametric setting to provide a posterior median fit along with credible intervals as measures of uncertainty. The key idea is to replace the nonsmooth term in the posterior density with its Moreau-Yosida envelope, which enables the application of the gradient-based MCMC sampler Hamiltonian Monte Carlo. The proposed method identifies the appropriate amount of smoothing in a data-driven way, thereby automating regularization parameter selection. Compared with conventional proximal MCMC methods, our method is mostly tuning free, achieving simultaneous calibration of the mean, scale and regularization parameters in a fully Bayesian framework. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Heng, Qiang and Zhou, Hua and Chi, Eric C.}, year={2023}, month={Feb} } @article{heng_chi_liu_2023, title={Robust Low-Rank Tensor Decomposition with the L-2 Criterion}, ISSN={["1537-2723"]}, url={https://doi.org/10.1080/00401706.2023.2200541}, DOI={10.1080/00401706.2023.2200541}, abstractNote={Abstract The growing prevalence of tensor data, or multiway arrays, in science and engineering applications motivates the need for tensor decompositions that are robust against outliers. In this article, we present a robust Tucker decomposition estimator based on the L2 criterion, called the Tucker- . Our numerical experiments demonstrate that Tucker- has empirically stronger recovery performance in more challenging high-rank scenarios compared with existing alternatives. The appropriate Tucker-rank can be selected in a data-driven manner with cross-validation or hold-out validation. The practical effectiveness of Tucker- is validated on real data applications in fMRI tensor denoising, PARAFAC analysis of fluorescence data, and feature extraction for classification of corrupted images.}, journal={TECHNOMETRICS}, author={Heng, Qiang and Chi, Eric C. and Liu, Yufeng}, year={2023}, month={May} } @article{liu_chi_lange_2022, title={A Sharper Computational Tool for L2E Regression}, volume={10}, ISSN={["1537-2723"]}, url={https://doi.org/10.1080/00401706.2022.2118172}, DOI={10.1080/00401706.2022.2118172}, abstractNote={Abstract Building on previous research of Chi and Chi, this article revisits estimation in robust structured regression under the criterion. We adopt the majorization-minimization (MM) principle to design a new algorithm for updating the vector of regression coefficients. Our sharp majorization achieves faster convergence than the previous alternating proximal gradient descent algorithm by Chi and Chi. In addition, we reparameterize the model by substituting precision for scale and estimate precision via a modified Newton’s method. This simplifies and accelerates overall estimation. We also introduce distance-to-set penalties to enable constrained estimation under nonconvex constraint sets. This tactic also improves performance in coefficient estimation and structure recovery. Finally, we demonstrate the merits of our improved tactics through a rich set of simulation examples and a real data application.}, journal={TECHNOMETRICS}, author={Liu, Xiaoqian and Chi, Eric C. and Lange, Kenneth}, year={2022}, month={Oct} } @article{chi_chi_2022, title={A User-Friendly Computational Framework for Robust Structured Regression with the L2 Criterion}, url={https://doi.org/10.1080/10618600.2022.2035232}, DOI={10.1080/10618600.2022.2035232}, abstractNote={Abstract We introduce a user-friendly computational framework for implementing robust versions of a wide variety of structured regression methods with the L2 criterion. In addition to introducing an algorithm for performing L2E regression, our framework enables robust regression with the L2 criterion for additional structural constraints, works without requiring complex tuning procedures on the precision parameter, can be used to identify heterogeneous subpopulations, and can incorporate readily available nonrobust structured regression solvers. We provide convergence guarantees for the framework and demonstrate its flexibility with some examples. Supplementary materials for this article are available online.}, journal={Journal of Computational and Graphical Statistics}, author={Chi, Jocelyn T. and Chi, Eric C.}, year={2022}, month={Oct} } @article{liu_chi_2022, title={Revisiting convexity-preserving signal recovery with the linearly involved GMC penalty}, volume={156}, ISSN={["1872-7344"]}, url={https://doi.org/10.1016/j.patrec.2022.02.004}, DOI={10.1016/j.patrec.2022.02.004}, abstractNote={• A new method for setting the matrix parameter in the linearly involved GMC is proposed. • An alternative algorithm is presented to solve the linear involved convexity-preserving model. • Two properties of the solution path are proved to help with tuning parameter selection. The generalized minimax concave (GMC) penalty is a newly proposed regularizer that can maintain the convexity of the objective function. This paper deals with signal recovery with the linearly involved GMC penalty. First, we propose a new method to set the matrix parameter in the penalty via solving a feasibility problem. The new method possesses appealing advantages over the existing method. Second, we recast the linearly involved GMC model as a saddle-point problem and use the primal-dual hybrid gradient (PDHG) algorithm to compute the solution. Another important work in this paper is that we provide guidance on the tuning parameter selection by proving desirable properties of the solution path. Finally, we apply the linearly involved GMC penalty to 1-D signal recovery and matrix regression. Numerical results show that the linearly involved GMC penalty can obtain better recovery performance and preserve the signal structure more successfully in comparison with the total variation (TV) regularizer.}, journal={PATTERN RECOGNITION LETTERS}, publisher={Elsevier BV}, author={Liu, Xiaoqian and Chi, Eric C.}, year={2022}, month={Apr}, pages={60–66} } @article{liu_vardhan_wen_das_randles_chi_2021, title={An Interpretable Machine Learning Model to Classify Coronary Bifurcation Lesions}, ISSN={["1558-4615"]}, url={http://dx.doi.org/10.1109/embc46164.2021.9631082}, DOI={10.1109/embc46164.2021.9631082}, abstractNote={Coronary bifurcation lesions are a leading cause of Coronary Artery Disease (CAD). Despite its prevalence, coronary bifurcation lesions remain difficult to treat due to our incomplete understanding of how various features of lesion anatomy synergistically disrupt normal hemodynamic flow. In this work, we employ an interpretable machine learning algorithm, the Classification and Regression Tree (CART), to model the impact of these geometric features on local hemodynamic quantities. We generate a synthetic arterial database via computational fluid dynamic simulations and apply the CART approach to predict the time averaged wall shear stress (TAWSS) at two different locations within the cardiac vasculature. Our experimental results show that CART can estimate a simple, interpretable, yet accurately predictive nonlinear model of TAWSS as a function of such features.Clinical relevance— The fitted tree models have the potential to refine predictions of disturbed hemodynamic flow based on an individual’s cardiac and lesion anatomy and consequently makes progress towards personalized treatment planning for CAD patients.}, journal={2021 43RD ANNUAL INTERNATIONAL CONFERENCE OF THE IEEE ENGINEERING IN MEDICINE & BIOLOGY SOCIETY (EMBC)}, publisher={IEEE}, author={Liu, Xiaoqian and Vardhan, Madhurima and Wen, Qinrou and Das, Arpita and Randles, Amanda and Chi, Eric C.}, year={2021}, pages={4432–4435} } @article{yi_huang_mishne_chi_2021, title={COBRAC: a fast implementation of convex biclustering with compression}, volume={37}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btab248}, abstractNote={Abstract}, number={20}, journal={BIOINFORMATICS}, author={Yi, Haidong and Huang, Le and Mishne, Gal and Chi, Eric C.}, year={2021}, month={Oct}, pages={3667–3669} } @article{chi_2021, title={Discovering Geometry in Data Arrays}, volume={23}, url={https://doi.org/10.1109/MCSE.2021.3120039}, DOI={10.1109/MCSE.2021.3120039}, abstractNote={Modern technologies produce a deluge of complicated data. In neuroscience, for example, minimally invasive experimental methods can take recordings of large populations of neurons at high resolution under a multitude of conditions. Such data arrays possess nontrivial interdependencies along each of their axes. Insights into these data arrays may lay the foundations of advanced treatments for nervous system disorders. The potential impacts of such data, however, will not be fully realized unless the techniques for analyzing them keep pace. Specifically, there is an urgent, growing need for methods for estimating the low-dimensional structure and geometry in big and noisy data arrays. This article reviews a framework for identifying complicated underlying patterns in such data and also recounts the key role that the Department of Energy Computational Sciences Graduate Fellowship played in setting the stage for this work to be done by the author.}, number={6}, journal={Computing in Science & Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Chi, Eric C.}, year={2021}, month={Nov}, pages={42–51} } @article{zhang_mishne_chi_2021, title={Multi-scale affinities with missing data: Estimation and applications}, ISSN={["1932-1872"]}, url={https://doi.org/10.1002/sam.11561}, DOI={10.1002/sam.11561}, abstractNote={Abstract}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Zhang, Min and Mishne, Gal and Chi, Eric C.}, year={2021}, month={Nov} } @article{vardhan_gounley_chen_chi_kahn_leopold_randles_2021, title={Non-invasive characterization of complex coronary lesions}, volume={11}, ISSN={["2045-2322"]}, DOI={10.1038/s41598-021-86360-6}, abstractNote={Abstract}, number={1}, journal={SCIENTIFIC REPORTS}, author={Vardhan, Madhurima and Gounley, John and Chen, S. James and Chi, Eric C. and Kahn, Andrew M. and Leopold, Jane A. and Randles, Amanda}, year={2021}, month={Apr} } @article{zhou_yi_mishne_chi_2021, title={SCALABLE ALGORITHMS FOR CONVEX CLUSTERING}, DOI={10.1109/DSLW51110.2021.9523411}, abstractNote={Convex clustering is an appealing approach to many classical clustering problems. It stands out among standard methods as it enjoys the existence of a unique global optimal solution. Despite this advantage, convex clustering has not been widely adopted, due to its computationally intensive nature. To address this obstacle, especially in the “big data” setting, we introduce a Scalable cOnvex cLustering AlgoRithm via Parallel Coordinate Descent Method (SOLAR-PCDM) that improves the algorithm’s scalability by combining a parallelizable algorithm with a compression strategy. This idea is in line with the rise and ever increasing availability of high performance computing systems built around multi-core processors, GPU-accelerators, and computer clusters. SOLARPCDM consists of two parts. In the first part, we develop a method called weighted convex clustering to recover the solution path by formulating a sequence of smaller equivalent optimization problems. In the second part, we utilize the Parallel Coordinate Descent Method (PCDM) to solve a specific convex clustering problem. We demonstrate the correctness and scalability of our algorithm on both simulated and real data examples.}, journal={2021 IEEE DATA SCIENCE AND LEARNING WORKSHOP (DSLW)}, author={Zhou, Weilian and Yi, Haidong and Mishne, Gal and Chi, Eric}, year={2021} } @article{feng_xiao_chi_2021, title={Sparse Single Index Models for Multivariate Responses}, volume={30}, url={https://doi.org/10.1080/10618600.2020.1779080}, DOI={10.1080/10618600.2020.1779080}, abstractNote={ABSTRACT Joint models are popular for analyzing data with multivariate responses. We propose a sparse multivariate single index model, where responses and predictors are linked by unspecified smooth functions and multiple matrix level penalties are employed to select predictors and induce low-rank structures across responses. An alternating direction method of multipliers based algorithm is proposed for model estimation. We demonstrate the effectiveness of proposed model in simulation studies and an application to a genetic association study. Supplementary materials for this article are available online.}, number={1}, journal={Journal of Computational and Graphical Statistics}, publisher={Informa UK Limited}, author={Feng, Yuan and Xiao, Luo and Chi, Eric C.}, year={2021}, month={Jan}, pages={115–124} } @article{brantley_guinness_chi_2020, title={BASELINE DRIFT ESTIMATION FOR AIR QUALITY DATA USING QUANTILE TREND FILTERING}, volume={14}, ISSN={["1932-6157"]}, DOI={10.1214/19-AOAS1318}, abstractNote={We address the problem of estimating smoothly varying baseline trends in time series data. This problem arises in a wide range of fields, including chemistry, macroeconomics, and medicine; however, our study is motivated by the analysis of data from low cost air quality sensors. Our methods extend the quantile trend filtering framework to enable the estimation of multiple quantile trends simultaneously while ensuring that the quantiles do not cross. To handle the computational challenge posed by very long time series, we propose a parallelizable alternating direction method of moments (ADMM) algorithm. The ADMM algorthim enables the estimation of trends in a piecewise manner, both reducing the computation time and extending the limits of the method to larger data sizes. We also address smoothing parameter selection and propose a modified criterion based on the extended Bayesian Information Criterion. Through simulation studies and our motivating application to low cost air quality sensor data, we demonstrate that our model provides better quantile trend estimates than existing methods and improves signal classification of low-cost air quality sensor output.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Brantley, Halley L. and Guinness, Joseph and Chi, Eric C.}, year={2020}, month={Jun}, pages={585–604} } @article{rhyne_jeng_chi_tzeng_2020, title={FastLORS: Joint modelling for expression quantitative trait loci mapping in R}, volume={9}, ISSN={["2049-1573"]}, url={https://doi.org/10.1002/sta4.265}, DOI={10.1002/sta4.265}, abstractNote={FastLORS is a software package that implements a new algorithm to solve sparse multivariate regression for expression quantitative trait loci (eQTLs) mapping. FastLORS solves the same optimization problem as LORS, an existing popular algorithm. The optimization problem is solved through inexact block coordinate descent with updates by proximal gradient steps, which reduces the computational cost compared with LORS. We apply LORS and FastLORS to a real dataset for eQTL mapping and demonstrate that FastLORS delivers comparable results with LORS in much less computing time.}, number={1}, journal={STAT}, publisher={Wiley}, author={Rhyne, Jacob and Jeng, X. Jessie and Chi, Eric C. and Tzeng, Jung-Ying}, year={2020} } @article{stanley_chi_mishne_2020, title={Multiway Graph Signal Processing on Tensors: Integrative Analysis of Irregular Geometries}, volume={37}, ISSN={["1558-0792"]}, DOI={10.1109/MSP.2020.3013555}, abstractNote={Graph signal processing (GSP) is an important methodology for studying data residing on irregular structures. Because acquired data are increasingly taking the form of multiway tensors, new signal processing tools are needed to maximally utilize the multiway structure within the data. In this article, we review modern signal processing frameworks that generalize GSP to multiway data, starting from graph signals coupled to familiar regular axes, such as time in sensor networks, and then extending to general graphs across all tensor modes. This widely applicable paradigm motivates reformulating and improving classical problems and approaches to creatively address the challenges in tensor-based data. We synthesize common themes arising from current efforts to combine GSP with tensor analysis and highlight future directions in extending GSP to the multiway paradigm.}, number={6}, journal={IEEE SIGNAL PROCESSING MAGAZINE}, author={Stanley, Jay S., III and Chi, Eric C. and Mishne, Gal}, year={2020}, month={Nov}, pages={160–173} } @article{chi_gaines_sun_zhou_yang_2020, title={Provable Convex Co-clustering of Tensors}, volume={21}, url={http://jmlr.org/papers/v21/18-155.html}, number={214}, journal={Journal of Machine Learning Research}, author={Chi, Eric C. and Gaines, Brian J. and Sun, Will Wei and Zhou, Hua and Yang, Jian}, year={2020}, pages={1–58} } @inproceedings{mishne_chi_coifman_2019, place={Long Beach, California, USA}, title={Co-manifold learning with missing data}, volume={97}, url={http://proceedings.mlr.press/v97/mishne19a.html}, booktitle={International Conference on Machine Learning}, publisher={PMLR}, author={Mishne, Gal and Chi, Eric and Coifman, Ronald}, editor={Chaudhuri, Kamalika and Salakhutdinov, RuslanEditors}, year={2019}, pages={4605–4614} } @article{chi_hu_saibaba_rao_2019, title={Going Off the Grid: Iterative Model Selection for Biclustered Matrix Completion}, volume={28}, ISSN={["1537-2715"]}, url={https://doi.org/10.1080/10618600.2018.1482763}, DOI={10.1080/10618600.2018.1482763}, abstractNote={ABSTRACT We consider the problem of performing matrix completion with side information on row-by-row and column-by-column similarities. We build upon recent proposals for matrix estimation with smoothness constraints with respect to row and column graphs. We present a novel iterative procedure for directly minimizing an information criterion to select an appropriate amount of row and column smoothing, namely, to perform model selection. We also discuss how to exploit the special structure of the problem to scale up the estimation and model selection procedure via the Hutchinson estimator, combined with a stochastic Quasi-Newton approach. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, publisher={Informa UK Limited}, author={Chi, Eric C. and Hu, Liuyi and Saibaba, Arvind K. and Rao, Arvind U. K.}, year={2019}, month={Jan}, pages={36–47} } @misc{chi_li_2019, title={Matrix completion from a computational statistics perspective}, volume={11}, ISSN={["1939-0068"]}, url={https://doi.org/10.1002/wics.1469}, DOI={10.1002/wics.1469}, abstractNote={Abstract}, number={5}, journal={WILEY INTERDISCIPLINARY REVIEWS-COMPUTATIONAL STATISTICS}, publisher={Wiley}, author={Chi, Eric C. and Li, Tianxi}, year={2019}, month={Sep} } @article{chi_steinerberger_2019, title={Recovering Trees with Convex Clustering}, volume={1}, url={http://dx.doi.org/10.1137/18m121099x}, DOI={10.1137/18m121099x}, abstractNote={Convex clustering refers, for given $\left\{x_1, \dots, x_n\right\} \subset \mathbb{R}^p$, to the minimization of \begin{eqnarray*} u(\gamma) & = & \underset{u_1, \dots, u_n }{\arg\min}\;\sum_{i=1}^{n}{\lVert x_i - u_i \rVert^2} + \gamma \sum_{i,j=1}^{n}{w_{ij} \lVert u_i - u_j\rVert},\\ \end{eqnarray*} where $w_{ij} \geq 0$ is an affinity that quantifies the similarity between $x_i$ and $x_j$. We prove that if the affinities $w_{ij}$ reflect a tree structure in the $\left\{x_1, \dots, x_n\right\}$, then the convex clustering solution path reconstructs the tree exactly. The main technical ingredient implies the following combinatorial byproduct: for every set $\left\{x_1, \dots, x_n \right\} \subset \mathbb{R}^p$ of $n \geq 2$ distinct points, there exist at least $n/6$ points with the property that for any of these points $x$ there is a unit vector $v \in \mathbb{R}^p$ such that, when viewed from $x$, `most' points lie in the direction $v$ \begin{eqnarray*} \frac{1}{n-1}\sum_{i=1 \atop x_i \neq x}^{n}{ \left\langle \frac{x_i - x}{\lVert x_i - x \rVert}, v \right\rangle} & \geq & \frac{1}{4}. \end{eqnarray*}}, number={3}, journal={SIAM Journal on Mathematics of Data Science}, author={Chi, E. and Steinerberger, S.}, year={2019}, month={Jan}, pages={383–407} } @article{lusch_chi_kutz_2019, title={Shape Constrained Tensor Decompositions}, ISSN={["2472-1573"]}, DOI={10.1109/DSAA.2019.00044}, abstractNote={We propose a new low-rank tensor factorization where one mode is coded as a sparse linear combination of elements from an over-complete library. Our method, Shape Constrained Tensor Decomposition (SCTD) is based upon the CANDECOMP/PARAFAC (CP) decomposition which produces r-rank approximations of data tensors via outer products of vectors in each dimension of the data. The SCTD model can leverage prior knowledge about the shape of factors along a given mode, for example in tensor data where one mode represents time. By constraining the vector in the temporal dimension to known analytic forms which are selected from a large set of candidate functions, more readily interpretable decompositions are achieved and analytic time dependencies discovered. The SCTD method circumvents traditional flattening techniques where an N-way array is reshaped into a matrix in order to perform a singular value decomposition. A clear advantage of the SCTD algorithm is its ability to extract transient and intermittent phenomena which is often difficult for SVD-based methods. We motivate the SCTD method using several intuitively appealing results before applying it on a real-world data set to illustrate the efficiency of the algorithm in extracting interpretable spatio-temporal modes. With the rise of data-driven discovery methods, the decomposition proposed provides a viable technique for analyzing multitudes of data in a more comprehensible fashion.}, journal={2019 IEEE INTERNATIONAL CONFERENCE ON DATA SCIENCE AND ADVANCED ANALYTICS (DSAA 2019)}, author={Lusch, Bethany and Chi, Eric C. and Kutz, J. Nathan}, year={2019}, pages={287–297} } @article{min_chi_zhou_2019, title={Tensor canonical correlation analysis}, volume={8}, url={https://doi.org/10.1002/sta4.253}, DOI={10.1002/sta4.253}, abstractNote={Canonical correlation analysis (CCA) is a multivariate analysis technique for estimating a linear relationship between two sets of measurements. Modern acquisition technologies, for example, those arising in neuroimaging and remote sensing, produce data in the form of multidimensional arrays or tensors. Classic CCA is not appropriate for dealing with tensor data due to the multidimensional structure and ultrahigh dimensionality of such modern data. In this paper, we present tensor CCA (TCCA) to discover relationships between two tensors while simultaneously preserving multidimensional structure of the tensors and utilizing substantially fewer parameters. Furthermore, we show how to employ a parsimonious covariance structure to gain additional stability and efficiency. We delineate population and sample problems for each model and propose efficient estimation algorithms with global convergence guarantees. Also we describe a probabilistic model for TCCA that enables the generation of synthetic data with desired canonical variates and correlations. Simulation studies illustrate the performance of our methods.}, number={1}, journal={Stat}, publisher={Wiley}, author={Min, Eun Jeong and Chi, Eric C. and Zhou, Hua}, year={2019}, month={Jan} } @article{xu_chi_yang_lange_2018, title={A majorization-minimization algorithm for split feasibility problems}, volume={71}, ISSN={["1573-2894"]}, url={https://doi.org/10.1007/s10589-018-0025-z}, DOI={10.1007/s10589-018-0025-z}, number={3}, journal={COMPUTATIONAL OPTIMIZATION AND APPLICATIONS}, publisher={Springer Science and Business Media LLC}, author={Xu, Jason and Chi, Eric C. and Yang, Meng and Lange, Kenneth}, year={2018}, month={Dec}, pages={795–828} } @article{chi_allen_baraniuk_2017, title={Convex Biclustering}, volume={73}, ISSN={["1541-0420"]}, url={http://dx.doi.org/10.1111/biom.12540}, DOI={10.1111/biom.12540}, abstractNote={Summary}, number={1}, journal={BIOMETRICS}, author={Chi, Eric C. and Allen, Genevera I. and Baraniuk, Richard G.}, year={2017}, month={Mar}, pages={10–19} } @inproceedings{xu_chi_lange_2017, title={Generalized Linear Model Regression under Distance-to-set Penalties}, url={https://papers.nips.cc/paper/6737-generalized-linear-model-regression-under-distance-to-set-penalties}, booktitle={Neural Information Processing Systems}, publisher={Curran Associates, Inc.}, author={Xu, Jason and Chi, Eric and Lange, Kenneth}, editor={Guyon, I. and Luxburg, U. V. and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.Editors}, year={2017}, pages={1385–1395} } @article{long_chi_baraniuk_2016, title={ESTIMATING A COMMON PERIOD FOR A SET OF IRREGULARLY SAMPLED FUNCTIONS WITH APPLICATIONS TO PERIODIC VARIABLE STAR DATA}, volume={10}, ISSN={["1932-6157"]}, url={http://dx.doi.org/10.1214/15-AOAS885}, DOI={10.1214/15-aoas885}, abstractNote={We consider the estimation of a common period for a set of functions sampled at irregular intervals. The problem arises in astronomy, where the functions represent a star's brightness observed over time through different photometric filters. While current methods can estimate periods accurately provided that the brightness is well--sampled in at least one filter, there are no existing methods that can provide accurate estimates when no brightness function is well--sampled. In this paper we introduce two new methods for period estimation when brightnesses are poorly--sampled in all filters. The first, multiband generalized Lomb-Scargle (MGLS), extends the frequently used Lomb-Scargle method in a way that na\"{i}vely combines information across filters. The second, penalized generalized Lomb-Scargle (PGLS), builds on the first by more intelligently borrowing strength across filters. Specifically, we incorporate constraints on the phases and amplitudes across the different functions using a non--convex penalized likelihood function. We develop a fast algorithm to optimize the penalized likelihood by combining block coordinate descent with the majorization-minimization (MM) principle. We illustrate our methods on synthetic and real astronomy data. Both advance the state-of-the-art in period estimation; however, PGLS significantly outperforms MGLS when all functions are extremely poorly--sampled.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, publisher={The Institute of Mathematical Statistics}, author={Long, James P. and Chi, Eric C. and Baraniuk, Richard G.}, year={2016}, month={Mar}, pages={165–197} } @article{chi_chi_baraniuk_2016, title={k-POD: A Method for k-Means Clustering of Missing Data}, volume={70}, ISSN={["1537-2731"]}, url={http://dx.doi.org/10.1080/00031305.2015.1086685}, DOI={10.1080/00031305.2015.1086685}, abstractNote={The k-means algorithm is often used in clustering applications but its usage requires a complete data matrix. Missing data, however, are common in many applications. Mainstream approaches to clustering missing data reduce the missing data problem to a complete data formulation through either deletion or imputation but these solutions may incur significant costs. Our k-POD method presents a simple extension of k-means clustering for missing data that works even when the missingness mechanism is unknown, when external information is unavailable, and when there is significant missingness in the data. [Received November 2014. Revised August 2015.]}, number={1}, journal={AMERICAN STATISTICIAN}, author={Chi, Jocelyn T. and Chi, Eric C. and Baraniuk, Richard G.}, year={2016}, month={Jan}, pages={91–99} } @article{chen_chi_ranola_lange_2015, title={Convex Clustering: An Attractive Alternative to Hierarchical Clustering}, volume={11}, url={http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004228}, DOI={10.1371/journal.pcbi.1004228}, abstractNote={The primary goal in cluster analysis is to discover natural groupings of objects. The field of cluster analysis is crowded with diverse methods that make special assumptions about data and address different scientific aims. Despite its shortcomings in accuracy, hierarchical clustering is the dominant clustering method in bioinformatics. Biologists find the trees constructed by hierarchical clustering visually appealing and in tune with their evolutionary perspective. Hierarchical clustering operates on multiple scales simultaneously. This is essential, for instance, in transcriptome data, where one may be interested in making qualitative inferences about how lower-order relationships like gene modules lead to higher-order relationships like pathways or biological processes. The recently developed method of convex clustering preserves the visual appeal of hierarchical clustering while ameliorating its propensity to make false inferences in the presence of outliers and noise. The solution paths generated by convex clustering reveal relationships between clusters that are hidden by static methods such as k-means clustering. The current paper derives and tests a novel proximal distance algorithm for minimizing the objective function of convex clustering. The algorithm separates parameters, accommodates missing data, and supports prior information on relationships. Our program CONVEXCLUSTER incorporating the algorithm is implemented on ATI and nVidia graphics processing units (GPUs) for maximal speed. Several biological examples illustrate the strengths of convex clustering and the ability of the proximal distance algorithm to handle high-dimensional problems. CONVEXCLUSTER can be freely downloaded from the UCLA Human Genetics web site at http://www.genetics.ucla.edu/software/}, number={5}, journal={PLoS Computational Biology}, author={Chen, Gary K. and Chi, Eric C. and Ranola, John M.O. and Lange, Kenneth}, year={2015}, month={May}, pages={e1004228} } @article{chi_lange_2015, title={Splitting Methods for Convex Clustering}, volume={24}, url={http://www.tandfonline.com/doi/abs/10.1080/10618600.2014.948181}, DOI={10.1080/10618600.2014.948181}, abstractNote={Clustering is a fundamental problem in many scientific applications. Standard methods such as k-means, Gaussian mixture models, and hierarchical clustering, however, are beset by local minima, which are sometimes drastically suboptimal. Recently introduced convex relaxations of k-means and hierarchical clustering shrink cluster centroids toward one another and ensure a unique global minimizer. In this work, we present two splitting methods for solving the convex clustering problem. The first is an instance of the alternating direction method of multipliers (ADMM); the second is an instance of the alternating minimization algorithm (AMA). In contrast to previously considered algorithms, our ADMM and AMA formulations provide simple and unified frameworks for solving the convex clustering problem under the previously studied norms and open the door to potentially novel norms. We demonstrate the performance of our algorithm on both simulated and real data examples. While the differences between the two algorithms appear to be minor on the surface, complexity analysis and numerical experiments show AMA to be significantly more efficient. This article has supplementary materials available online.}, number={4}, journal={Journal of Computational and Graphical Statistics}, author={Chi, Eric C. and Lange, Kenneth}, year={2015}, month={Dec}, pages={994–1013} } @article{lange_chi_zhou_2014, title={A Brief Survey of Modern Optimization for Statisticians}, volume={82}, ISSN={["1751-5823"]}, url={http://dx.doi.org/10.1111/insr.12022}, DOI={10.1111/insr.12022}, abstractNote={Summary}, number={1}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Lange, Kenneth and Chi, Eric C. and Zhou, Hua}, year={2014}, month={Apr}, pages={46–70} } @article{chi_lange_2014, title={A Look at the Generalized Heron Problem through the Lens of Majorization-Minimization}, volume={121}, url={http://www.jstor.org/stable/info/10.4169/amer.math.monthly.121.02.095}, DOI={10.4169/amer.math.monthly.121.02.095}, abstractNote={Abstract In a recent issue of this Monthly, Mordukhovich, Nam, and Salinas pose and solve an interesting non-differentiable generalization of the Heron problem in the framework of modern convex analysis. In the generalized Heron problem, we are given k + 1 closed convex sets in ℝd equipped with its Euclidean norm and asked to find the point in the last set such that the sum of the distances to the first k sets is minimal. In later work, the authors generalize the Heron problem even further, relax its convexity assumptions, study its theoretical properties, and pursue subgradient algorithms for solving the convex case. Here, we revisit the original problem solely from the numerical perspective. By exploiting the majorizationminimization (MM) principle of computational statistics and rudimentary techniques from differential calculus, we are able to construct a very fast algorithm for solving the Euclidean version of the generalized Heron problem.}, number={2}, journal={The American Mathematical Monthly}, publisher={Mathematical Association of America}, author={Chi, Eric C. and Lange, Kenneth}, year={2014}, month={Feb}, pages={pp. 95–108} } @article{chi_zhou_lange_2014, title={Distance majorization and its applications}, volume={146}, ISSN={["1436-4646"]}, url={http://link.springer.com/article/10.1007%2Fs10107-013-0697-1#}, DOI={10.1007/s10107-013-0697-1}, abstractNote={The problem of minimizing a continuously differentiable convex function over an intersection of closed convex sets is ubiquitous in applied mathematics. It is particularly interesting when it is easy to project onto each separate set, but nontrivial to project onto their intersection. Algorithms based on Newton's method such as the interior point method are viable for small to medium-scale problems. However, modern applications in statistics, engineering, and machine learning are posing problems with potentially tens of thousands of parameters or more. We revisit this convex programming problem and propose an algorithm that scales well with dimensionality. Our proposal is an instance of a sequential unconstrained minimization technique and revolves around three ideas: the majorization-minimization principle, the classical penalty method for constrained optimization, and quasi-Newton acceleration of fixed-point algorithms. The performance of our distance majorization algorithms is illustrated in several applications.}, number={1-2}, journal={MATHEMATICAL PROGRAMMING}, publisher={Springer-Verlag}, author={Chi, Eric C. and Zhou, Hua and Lange, Kenneth}, year={2014}, month={Aug}, pages={409–436} } @article{lange_chi_zhou_2014, title={Rejoinder}, volume={82}, ISSN={["1751-5823"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84899082443&partnerID=MN8TOARS}, DOI={10.1111/insr.12030}, abstractNote={International Statistical ReviewVolume 82, Issue 1 p. 81-89 Original Article Rejoinder Kenneth Lange, Kenneth Lange klange@ucla.edu Departments of Biomathematics and Statistics, University of California, Los Angeles, CA 90095-1766, USA Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorEric C. Chi, Eric C. Chi eric.c.chi@gmail.com Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorHua Zhou, Hua Zhou hua_zhou@ncsu.edu Department of Statistics, North Carolina State University, Raleigh, NC 27695-8203, USASearch for more papers by this author Kenneth Lange, Kenneth Lange klange@ucla.edu Departments of Biomathematics and Statistics, University of California, Los Angeles, CA 90095-1766, USA Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorEric C. Chi, Eric C. Chi eric.c.chi@gmail.com Department of Human Genetics, University of California, Los Angeles, CA 90095-1766, USASearch for more papers by this authorHua Zhou, Hua Zhou hua_zhou@ncsu.edu Department of Statistics, North Carolina State University, Raleigh, NC 27695-8203, USASearch for more papers by this author First published: 22 April 2014 https://doi.org/10.1111/insr.12030Read the full textAboutPDF ToolsRequest permissionExport citationAdd to favoritesTrack citation ShareShare Give accessShare full text accessShare full-text accessPlease review our Terms and Conditions of Use and check box below to share full-text version of article.I have read and accept the Wiley Online Library Terms and Conditions of UseShareable LinkUse the link below to share a full-text version of this article with your friends and colleagues. Learn more.Copy URL Share a linkShare onFacebookTwitterLinked InRedditWechat Volume82, Issue1April 2014Pages 81-89 RelatedInformation}, number={1}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Lange, Kenneth and Chi, Eric C. and Zhou, Hua}, year={2014}, month={Apr}, pages={81–89} } @article{chi_scott_2014, title={Robust Parametric Classification and Variable Selection by a Minimum Distance Criterion}, volume={23}, url={http://www.tandfonline.com/doi/full/10.1080/10618600.2012.737296#.UyBtOFwZ6D0}, DOI={10.1080/10618600.2012.737296}, abstractNote={We investigate a robust penalized logistic regression algorithm based on a minimum distance criterion. Influential outliers are often associated with the explosion of parameter vector estimates, but in the context of standard logistic regression, the bias due to outliers always causes the parameter vector to implode, that is, shrink toward the zero vector. Thus, using LASSO-like penalties to perform variable selection in the presence of outliers can result in missed detections of relevant covariates. We show that by choosing a minimum distance criterion together with an elastic net penalty, we can simultaneously find a parsimonious model and avoid estimation implosion even in the presence of many outliers in the important small n large p situation. Minimizing the penalized minimum distance criterion is a challenging problem due to its nonconvexity. To meet the challenge, we develop a simple and efficient MM (majorization–minimization) algorithm that can be adapted gracefully to the small n large p context. Performance of our algorithm is evaluated on simulated and real datasets. This article has supplementary materials available online.}, number={1}, journal={Journal of Computational and Graphical Statistics}, author={Chi, Eric C. and Scott, David W.}, year={2014}, month={Feb}, pages={111–128} } @article{chi_lange_2014, title={Stable estimation of a covariance matrix guided by nuclear norm penalties}, volume={80}, url={http://www.sciencedirect.com/science/article/pii/S0167947314001893}, DOI={10.1016/j.csda.2014.06.018}, abstractNote={Estimation of a covariance matrix or its inverse plays a central role in many statistical methods. For these methods to work reliably, estimated matrices must not only be invertible but also well-conditioned. The current paper introduces a novel prior to ensure a well-conditioned maximum a posteriori (MAP) covariance estimate. The prior shrinks the sample covariance estimator towards a stable target and leads to a MAP estimator that is consistent and asymptotically efficient. Thus, the MAP estimator gracefully transitions towards the sample covariance matrix as the number of samples grows relative to the number of covariates. The utility of the MAP estimator is demonstrated in two standard applications - discriminant analysis and EM clustering - in this sampling regime.}, number={0}, journal={Computational Statistics & Data Analysis}, author={Chi, Eric C. and Lange, Kenneth}, year={2014}, month={Dec}, pages={117–128} } @article{chi_zhou_chen_del vecchyo_lange_2013, title={Genotype imputation via matrix completion}, volume={23}, ISSN={["1549-5469"]}, url={http://genome.cshlp.org/content/23/3/509.full}, DOI={10.1101/gr.145821.112}, abstractNote={Most current genotype imputation methods are model-based and computationally intensive, taking days to impute one chromosome pair on 1000 people. We describe an efficient genotype imputation method based on matrix completion. Our matrix completion method is implemented in MATLAB and tested on real data from HapMap 3, simulated pedigree data, and simulated low-coverage sequencing data derived from the 1000 Genomes Project. Compared with leading imputation programs, the matrix completion algorithm embodied in our program MENDEL-IMPUTE achieves comparable imputation accuracy while reducing run times significantly. Implementation in a lower-level language such as Fortran or C is apt to further improve computational efficiency.}, number={3}, journal={GENOME RESEARCH}, author={Chi, Eric C. and Zhou, Hua and Chen, Gary K. and Del Vecchyo, Diego Ortega and Lange, Kenneth}, year={2013}, month={Mar}, pages={509–518} } @inproceedings{chi_allen_zhou_kohannim_lange_thompson_2013, title={Imaging genetics via sparse canonical correlation analysis}, url={http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=6556581&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D6556581}, DOI={10.1109/ISBI.2013.6556581}, abstractNote={The collection of brain images from populations of subjects who have been genotyped with genome-wide scans makes it feasible to search for genetic effects on the brain. Even so, multivariate methods are sorely needed that can search both images and the genome for relationships, making use of the correlation structure of both datasets. Here we investigate the use of sparse canonical correlation analysis (CCA) to home in on sets of genetic variants that explain variance in a set of images. We extend recent work on penalized matrix decomposition to account for the correlations in both datasets. Such methods show promise in imaging genetics as they exploit the natural covariance in the datasets. They also avoid an astronomically heavy statistical correction for searching the whole genome and the entire image for promising associations.}, booktitle={Biomedical Imaging (ISBI), 2013 IEEE 10th International Symposium on}, author={Chi, E.C. and Allen, G.I. and Zhou, Hua and Kohannim, O. and Lange, K. and Thompson, P.M.}, year={2013}, month={Apr}, pages={740–743} } @article{chi_kolda_2012, title={On Tensors, Sparsity, and Nonnegative Factorizations}, volume={33}, url={http://epubs.siam.org/doi/abs/10.1137/110859063}, DOI={10.1137/110859063}, abstractNote={Tensors have found application in a variety of fields, ranging from chemometrics to signal processing and beyond. In this paper, we consider the problem of multilinear modeling of sparse count data. Our goal is to develop a descriptive tensor factorization model of such data, along with appropriate algorithms and theory. To do so, we propose that the random variation is best described via a Poisson distribution, which better describes the zeros observed in the data as compared to the typical assumption of a Gaussian distribution. Under a Poisson assumption, we fit a model to observed data using the negative log-likelihood score. We present a new algorithm for Poisson tensor factorization called CANDECOMP--PARAFAC alternating Poisson regression (CP-APR) that is based on a majorization-minimization approach. It can be shown that CP-APR is a generalization of the Lee--Seung multiplicative updates. We show how to prevent the algorithm from converging to non-KKT points and prove convergence of CP-APR under mil...}, number={4}, journal={SIAM Journal of Matrix Analysis and Applications}, author={Chi, E. and Kolda, T.}, year={2012}, month={Dec}, pages={1272–1299} } @article{chi_mende_fok_reeves_2006, title={Proton auroral intensifications and injections at synchronous altitude}, volume={33}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-33646463820&partnerID=MN8TOARS}, DOI={10.1029/2005GL024656}, abstractNote={In sudden flux increases at synchronous altitude the lower energy channels often show progressively more delay or dispersion. It is usually assumed that the dispersion is caused by a simultaneous injection of particles of all energies at some location, and by the subsequent drift of these particles to the synchronous altitude measurement site “downstream” of the injection event. In this paper we present a method for timing and locating the injections from proton auroral precipitation inferred by the Lyman α emission data from the IMAGE FUV instrument. We compare the timing of the proton flux increases observed by the Los Alamos National Laboratory synchronous altitude satellites to the time delay predicted by a model describing the longitudinal drift of particles in the magnetosphere. We present comparisons for eleven substorm particle injections and find that the observed azimuthal drift times are reasonably consistent with those calculated by a simple model using the Tsyganenko 89 magnetic and Volland electric field models as input. This consistency supports the concept that the proton auroral intensification at substorm onset and the proton injection in the magnetosphere occur at the same magnetic local time (longitude).}, number={6}, journal={Geophysical Research Letters}, author={Chi, E.C. and Mende, S.B. and Fok, M.-C. and Reeves, G.D.}, year={2006} } @article{gupta_chi_walrand_2005, title={Different Algorithms for Normal and Protection Paths}, volume={13}, ISSN={1064-7570 1573-7705}, url={http://dx.doi.org/10.1007/S10922-005-1845-6}, DOI={10.1007/S10922-005-1845-6}, number={1}, journal={Journal of Network and Systems Management}, publisher={Springer Science and Business Media LLC}, author={Gupta, Rajarshi and Chi, Eric and Walrand, Jean}, year={2005}, month={Mar}, pages={13–33} } @article{chi_fu_walrand_2004, title={Proactive resource provisioning}, volume={27}, ISSN={0140-3664}, url={http://dx.doi.org/10.1016/j.comcom.2004.02.019}, DOI={10.1016/j.comcom.2004.02.019}, abstractNote={Allocating resources in networks to QoS flows may require undesirable delays or costs. We consider a dynamic Service Level Agreement (SLA) negotiation scheme between peer autonomous systems (ASes) that implement DiffServ per domain behaviors. For concreteness, we tailor our scheme to account for the needs of Voice over IP transport across multiple ASes. We present a heuristic but computationally simple and distributed scheme that uses traffic statistics to forecast the near-future demand. Our scheme provides a statistical guarantee on the renegotiation frequency, to manage adjustment costs, and blocking probability, to manage penalties for undesirable adjustment delays. Simulations demonstrate that this scheme achieves more efficient usage of the reserved bandwidth than would be accomplished by using the de facto static SLA negotiation schemes.}, number={12}, journal={Computer Communications}, publisher={Elsevier BV}, author={Chi, Eric and Fu, Michael and Walrand, Jean}, year={2004}, month={Jul}, pages={1174–1182} } @article{chi_fu_walrand_2004, title={Proactive resource provisioning}, volume={27}, ISSN={0140-3664}, url={http://dx.doi.org/10.1016/j.comcom.2004.02.019}, DOI={10.1016/S0140-3664(04)00111-2}, abstractNote={Allocating resources in networks to QoS flows may require undesirable delays or costs. We consider a dynamic Service Level Agreement (SLA) negotiation scheme between peer autonomous systems (ASes) that implement DiffServ per domain behaviors. For concreteness, we tailor our scheme to account for the needs of Voice over IP transport across multiple ASes. We present a heuristic but computationally simple and distributed scheme that uses traffic statistics to forecast the near-future demand. Our scheme provides a statistical guarantee on the renegotiation frequency, to manage adjustment costs, and blocking probability, to manage penalties for undesirable adjustment delays. Simulations demonstrate that this scheme achieves more efficient usage of the reserved bandwidth than would be accomplished by using the de facto static SLA negotiation schemes.}, number={12}, journal={Computer Communications}, publisher={Elsevier BV}, author={Chi, Eric and Fu, Michael and Walrand, Jean}, year={2004}, month={Jul}, pages={1174–1182} }