@article{zou_zeng_xiao_luo_2023, title={BAYESIAN INFERENCE AND DYNAMIC PREDICTION FOR MULTIVARIATE LONGITUDINAL AND SURVIVAL DATA}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1733}, abstractNote={Alzheimer's disease (AD) is a complex neurological disorder impairing multiple domains such as cognition and daily functions. To better understand the disease and its progression, many AD research studies collect multiple longitudinal outcomes that are strongly predictive of the onset of AD dementia. We propose a joint model based on a multivariate functional mixed model framework (referred to as MFMM-JM) that simultaneously models the multiple longitudinal outcomes and the time to dementia onset. We develop six functional forms to fully investigate the complex association between longitudinal outcomes and dementia onset. Moreover, we use the Bayesian methods for statistical inference and develop a dynamic prediction framework that provides accurate personalized predictions of disease progressions based on new subject-specific data. We apply the proposed MFMM-JM to two large ongoing AD studies: the Alzheimer's Disease Neuroimaging Initiative (ADNI) and National Alzheimer's Coordinating Center (NACC), and identify the functional forms with the best predictive performance. our method is also validated by extensive simulation studies with five settings.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Zou, Haotian and Zeng, Donglin and Xiao, Luo and Luo, Sheng}, year={2023}, month={Sep}, pages={2574–2595} } @article{li_xiao_2023, title={Latent factor model for multivariate functional data}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13924}, abstractNote={For multivariate functional data, a functional latent factor model is proposed, extending the traditional latent factor model for multivariate data. The proposed model uses unobserved stochastic processes to induce the dependence among the different functions, and thus, for a large number of functions, may provide a more parsimonious and interpretable characterization of the otherwise complex dependencies between the functions. Sufficient conditions are provided to establish the identifiability of the proposed model. The performance of the proposed model is assessed through simulation studies and an application to electroencephalography data.}, journal={BIOMETRICS}, author={Li, Ruonan and Xiao, Luo}, year={2023}, month={Sep} } @article{zou_xiao_zeng_luo_2023, title={Multivariate functional mixed model with MRI data: An application to Alzheimer's disease}, volume={2}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9683}, abstractNote={Alzheimer's Disease (AD) is the leading cause of dementia and impairment in various domains. Recent AD studies, (ie, Alzheimer's Disease Neuroimaging Initiative (ADNI) study), collect multimodal data, including longitudinal neurological assessments and magnetic resonance imaging (MRI) data, to better study the disease progression. Adopting early interventions is essential to slow AD progression for subjects with mild cognitive impairment (MCI). It is of particular interest to develop an AD predictive model that leverages multimodal data and provides accurate personalized predictions. In this article, we propose a multivariate functional mixed model with MRI data (MFMM‐MRI) that simultaneously models longitudinal neurological assessments, baseline MRI data, and the survival outcome (ie, dementia onset) for subjects with MCI at baseline. Two functional forms (the random‐effects model and instantaneous model) linking the longitudinal and survival process are investigated. We use Markov Chain Monte Carlo (MCMC) method based on No‐U‐Turn Sampling (NUTS) algorithm to obtain posterior samples. We develop a dynamic prediction framework that provides accurate personalized predictions of longitudinal trajectories and survival probability. We apply MFMM‐MRI to the ADNI study and identify significant associations among longitudinal outcomes, MRI data, and the risk of dementia onset. The instantaneous model with voxels from the whole brain has the best prediction performance among all candidate models. The simulation study supports the validity of the estimation and dynamic prediction method.}, journal={STATISTICS IN MEDICINE}, author={Zou, Haotian and Xiao, Luo and Zeng, Donglin and Luo, Sheng}, year={2023}, month={Feb} } @article{cui_li_crainiceanu_xiao_2022, title={Fast Multilevel Functional Principal Component Analysis}, volume={10}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2022.2115500}, abstractNote={Abstract We introduce fast multilevel functional principal component analysis (fast MFPCA), which scales up to high dimensional functional data measured at multiple visits. The new approach is orders of magnitude faster than and achieves comparable estimation accuracy with the original MFPCA. Methods are motivated by the National Health and Nutritional Examination Survey (NHANES), which contains minute-level physical activity information of more than 10, 000 participants over multiple days and 1440 observations per day. While MFPCA takes more than five days to analyze these data, fast MFPCA takes less than five minutes. A theoretical study of the proposed method is also provided. The associated function mfpca.face() is available in the R package refund. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Cui, Erjia and Li, Ruonan and Crainiceanu, Ciprian M. and Xiao, Luo}, year={2022}, month={Oct} } @article{li_xiao_smirnova_cui_leroux_crainiceanu_2022, title={Fixed-effects inference and tests of correlation for longitudinal functional data}, volume={5}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9421}, abstractNote={We propose an inferential framework for fixed effects in longitudinal functional models and introduce tests for the correlation structures induced by the longitudinal sampling procedure. The framework provides a natural extension of standard longitudinal correlation models for scalar observations to functional observations. Using simulation studies, we compare fixed effects estimation under correctly and incorrectly specified correlation structures and also test the longitudinal correlation structure. Finally, we apply the proposed methods to a longitudinal functional dataset on physical activity. The computer code for the proposed method is available at https://github.com/rli20ST758/FILF.}, journal={STATISTICS IN MEDICINE}, author={Li, Ruonan and Xiao, Luo and Smirnova, Ekaterina and Cui, Erjia and Leroux, Andrew and Crainiceanu, Ciprian M.}, year={2022}, month={May} } @article{weaver_xiao_lu_2022, title={Functional data analysis for longitudinal data with informative observation times}, volume={3}, ISSN={["1541-0420"]}, url={https://doi.org/10.1111/biom.13646}, DOI={10.1111/biom.13646}, abstractNote={In functional data analysis for longitudinal data, the observation process is typically assumed to be noninformative, which is often violated in real applications. Thus, methods that fail to account for the dependence between observation times and longitudinal outcomes may result in biased estimation. For longitudinal data with informative observation times, we find that under a general class of shared random effect models, a commonly used functional data method may lead to inconsistent model estimation while another functional data method results in consistent and even rate‐optimal estimation. Indeed, we show that the mean function can be estimated appropriately via penalized splines and that the covariance function can be estimated appropriately via penalized tensor‐product splines, both with specific choices of parameters. For the proposed method, theoretical results are provided, and simulation studies and a real data analysis are conducted to demonstrate its performance.}, journal={BIOMETRICS}, publisher={Wiley}, author={Weaver, Caleb and Xiao, Luo and Lu, Wenbin}, year={2022}, month={Mar} } @article{li_xiao_luo_2022, title={Joint model for survival and multivariate sparse functional data with application to a study of Alzheimer's Disease}, volume={78}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13427}, abstractNote={Studies of Alzheimer's disease (AD) often collect multiple longitudinal clinical outcomes, which are correlated and predictive of AD progression. It is of great scientific interest to investigate the association between the outcomes and time to AD onset. We model the multiple longitudinal outcomes as multivariate sparse functional data and propose a functional joint model linking multivariate functional data to event time data. In particular, we propose a multivariate functional mixed model to identify the shared progression pattern and outcome‐specific progression patterns of the outcomes, which enables more interpretable modeling of associations between outcomes and AD onset. The proposed method is applied to the Alzheimer's Disease Neuroimaging Initiative study (ADNI) and the functional joint model sheds new light on inference of five longitudinal outcomes and their associations with AD onset. Simulation studies also confirm the validity of the proposed model. Data used in preparation of this article were obtained from the ADNI database.}, number={2}, journal={BIOMETRICS}, author={Li, Cai and Xiao, Luo and Luo, Sheng}, year={2022}, month={Jun}, pages={435–447} } @article{chen_caffo_stein-o'brien_liu_langmead_colantuoni_xiao_2022, title={Two-stage linked component analysis for joint decomposition of multiple biologically related data sets}, volume={3}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxac005}, abstractNote={Integrative analysis of multiple data sets has the potential of fully leveraging the vast amount of high throughput biological data being generated. In particular such analysis will be powerful in making inference from publicly available collections of genetic, transcriptomic and epigenetic data sets which are designed to study shared biological processes, but which vary in their target measurements, biological variation, unwanted noise, and batch variation. Thus, methods that enable the joint analysis of multiple data sets are needed to gain insights into shared biological processes that would otherwise be hidden by unwanted intra-data set variation. Here, we propose a method called two-stage linked component analysis (2s-LCA) to jointly decompose multiple biologically related experimental data sets with biological and technological relationships that can be structured into the decomposition. The consistency of the proposed method is established and its empirical performance is evaluated via simulation studies. We apply 2s-LCA to jointly analyze four data sets focused on human brain development and identify meaningful patterns of gene expression in human neurogenesis that have shared structure across these data sets.}, journal={BIOSTATISTICS}, author={Chen, Huan and Caffo, Brian and Stein-O'Brien, Genevieve and Liu, Jinrui and Langmead, Ben and Colantuoni, Carlo and Xiao, Luo}, year={2022}, month={Mar} } @article{ma_xiao_liu_lindquist_2021, title={A functional mixed model for scalar on function regression with application to a functional MRI study}, volume={22}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxz046}, abstractNote={Motivated by a functional magnetic resonance imaging (fMRI) study, we propose a new functional mixed model for scalar on function regression. The model extends the standard scalar on function regression for repeated outcomes by incorporating subject-specific random functional effects. Using functional principal component analysis, the new model can be reformulated as a mixed effects model and thus easily fit. A test is also proposed to assess the existence of the subject-specific random functional effects. We evaluate the performance of the model and test via a simulation study, as well as on data from the motivating fMRI study of thermal pain. The data application indicates significant subject-specific effects of the human brain hemodynamics related to pain and provides insights on how the effects might differ across subjects.}, number={3}, journal={BIOSTATISTICS}, publisher={Oxford University Press (OUP)}, author={Ma, Wanying and Xiao, Luo and Liu, Bowen and Lindquist, Martin A.}, year={2021}, month={Jul}, pages={439–454} } @article{ohuma_villar_feng_xiao_salomon_barros_ismail_stones_jaffer_oberto_et al._2021, title={Fetal growth velocity standards from the Fetal Growth Longitudinal Study of the INTERGROWTH-21 st Project}, volume={224}, ISBN={1097-6868}, DOI={10.1016/j.ajog.2020.07.054}, abstractNote={BackgroundHuman growth is susceptible to damage from insults, particularly during periods of rapid growth. Identifying those periods and the normative limits that are compatible with adequate growth and development are the first key steps toward preventing impaired growth.ObjectiveThis study aimed to construct international fetal growth velocity increment and conditional velocity standards from 14 to 40 weeks’ gestation based on the same cohort that contributed to the INTERGROWTH-21st Fetal Growth Standards.Study DesignThis study was a prospective, longitudinal study of 4321 low-risk pregnancies from 8 geographically diverse populations in the INTERGROWTH-21st Project with rigorous standardization of all study procedures, equipment, and measurements that were performed by trained ultrasonographers. Gestational age was accurately determined clinically and confirmed by ultrasound measurement of crown-rump length at <14 weeks’ gestation. Thereafter, the ultrasonographers, who were masked to the values, measured the fetal head circumference, biparietal diameter, occipitofrontal diameter, abdominal circumference, and femur length in triplicate every 5 weeks (within 1 week either side) using identical ultrasound equipment at each site (4–7 scans per pregnancy). Velocity increments across a range of intervals between measures were modeled using fractional polynomial regression.ResultsPeak velocity was observed at a similar gestational age: 16 and 17 weeks’ gestation for head circumference (12.2 mm/wk), and 16 weeks’ gestation for abdominal circumference (11.8 mm/wk) and femur length (3.2 mm/wk). However, velocity growth slowed down rapidly for head circumference, biparietal diameter, occipitofrontal diameter, and femur length, with an almost linear reduction toward term that was more marked for femur length. Conversely, abdominal circumference velocity remained relatively steady throughout pregnancy. The change in velocity with gestational age was more evident for head circumference, biparietal diameter, occipitofrontal diameter, and femur length than for abdominal circumference when the change was expressed as a percentage of fetal size at 40 weeks’ gestation. We have also shown how to obtain accurate conditional fetal velocity based on our previous methodological work.ConclusionThe fetal skeleton and abdomen have different velocity growth patterns during intrauterine life. Accordingly, we have produced international Fetal Growth Velocity Increment Standards to complement the INTERGROWTH-21st Fetal Growth Standards so as to monitor fetal well-being comprehensively worldwide. Fetal growth velocity curves may be valuable if one wants to study the pathophysiology of fetal growth. We provide an application that can be used easily in clinical practice to evaluate changes in fetal size as conditional velocity for a more refined assessment of fetal growth than is possible at present (https://lxiao5.shinyapps.io/fetal_growth/). The application is freely available with the other INTERGROWTH-21st tools at https://intergrowth21.tghn.org/standards-tools/. Human growth is susceptible to damage from insults, particularly during periods of rapid growth. Identifying those periods and the normative limits that are compatible with adequate growth and development are the first key steps toward preventing impaired growth. This study aimed to construct international fetal growth velocity increment and conditional velocity standards from 14 to 40 weeks’ gestation based on the same cohort that contributed to the INTERGROWTH-21st Fetal Growth Standards. This study was a prospective, longitudinal study of 4321 low-risk pregnancies from 8 geographically diverse populations in the INTERGROWTH-21st Project with rigorous standardization of all study procedures, equipment, and measurements that were performed by trained ultrasonographers. Gestational age was accurately determined clinically and confirmed by ultrasound measurement of crown-rump length at <14 weeks’ gestation. Thereafter, the ultrasonographers, who were masked to the values, measured the fetal head circumference, biparietal diameter, occipitofrontal diameter, abdominal circumference, and femur length in triplicate every 5 weeks (within 1 week either side) using identical ultrasound equipment at each site (4–7 scans per pregnancy). Velocity increments across a range of intervals between measures were modeled using fractional polynomial regression. Peak velocity was observed at a similar gestational age: 16 and 17 weeks’ gestation for head circumference (12.2 mm/wk), and 16 weeks’ gestation for abdominal circumference (11.8 mm/wk) and femur length (3.2 mm/wk). However, velocity growth slowed down rapidly for head circumference, biparietal diameter, occipitofrontal diameter, and femur length, with an almost linear reduction toward term that was more marked for femur length. Conversely, abdominal circumference velocity remained relatively steady throughout pregnancy. The change in velocity with gestational age was more evident for head circumference, biparietal diameter, occipitofrontal diameter, and femur length than for abdominal circumference when the change was expressed as a percentage of fetal size at 40 weeks’ gestation. We have also shown how to obtain accurate conditional fetal velocity based on our previous methodological work. The fetal skeleton and abdomen have different velocity growth patterns during intrauterine life. Accordingly, we have produced international Fetal Growth Velocity Increment Standards to complement the INTERGROWTH-21st Fetal Growth Standards so as to monitor fetal well-being comprehensively worldwide. Fetal growth velocity curves may be valuable if one wants to study the pathophysiology of fetal growth. We provide an application that can be used easily in clinical practice to evaluate changes in fetal size as conditional velocity for a more refined assessment of fetal growth than is possible at present (https://lxiao5.shinyapps.io/fetal_growth/). The application is freely available with the other INTERGROWTH-21st tools at https://intergrowth21.tghn.org/standards-tools/.}, number={2}, journal={AMERICAN JOURNAL OF OBSTETRICS AND GYNECOLOGY}, author={Ohuma, Eric O. and Villar, Jose and Feng, Yuan and Xiao, Luo and Salomon, Laurent and Barros, Fernando C. and Ismail, Leila Cheikh and Stones, William and Jaffer, Yasmin and Oberto, Manuela and et al.}, year={2021}, month={Feb} } @article{weaver_xiao_lindquist_2021, title={Single-index models with functional connectivity network predictors}, volume={5}, ISSN={["1468-4357"]}, url={https://doi.org/10.1093/biostatistics/kxab015}, DOI={10.1093/biostatistics/kxab015}, abstractNote={Functional connectivity is defined as the undirected association between two or more functional magnetic resonance imaging (fMRI) time series. Increasingly, subject-level functional connectivity data have been used to predict and classify clinical outcomes and subject attributes. We propose a single-index model wherein response variables and sparse functional connectivity network valued predictors are linked by an unspecified smooth function in order to accommodate potentially nonlinear relationships. We exploit the network structure of functional connectivity by imposing meaningful sparsity constraints, which lead not only to the identification of association of interactions between regions with the response but also the assessment of whether or not the functional connectivity associated with a brain region is related to the response variable. We demonstrate the effectiveness of the proposed model in simulation studies and in an application to a resting-state fMRI data set from the Human Connectome Project to model fluid intelligence and sex and to identify predictive links between brain regions.}, journal={BIOSTATISTICS}, publisher={Oxford University Press (OUP)}, author={Weaver, Caleb and Xiao, Luo and Lindquist, Martin A.}, year={2021}, month={May} } @article{xiao_2020, title={Asymptotic properties of penalized splines for functional data}, volume={26}, ISSN={["1573-9759"]}, DOI={10.3150/20-BEJ1209}, abstractNote={Penalized spline methods are popular for functional data analysis but their asymptotic properties have not been developed. We present a theoretic study of the L2 and uniform convergence of penalized spline estimators for estimating the mean and covariance functions for functional data under general settings. The established convergence rates for the mean function estimation are mini-max rate optimal and the rates for the covariance function estimation are comparable to those using other smoothing methods.}, number={4}, journal={BERNOULLI}, author={Xiao, Luo}, year={2020}, month={Nov}, pages={2847–2875} } @article{feng_xiao_li_chen_ohuma_2020, title={Correlation models for monitoring fetal growth}, volume={29}, ISSN={["1477-0334"]}, DOI={10.1177/0962280220905623}, abstractNote={Ultrasound growth measurements are monitored to evaluate if a fetus is growing normally compared with a defined standard chart at a specified gestational age. Using data from the Fetal Growth Longitudinal Study of the INTERGROWTH-21st project, we have modelled the longitudinal dependence of fetal head circumference, biparietal diameter, occipito-frontal diameter, abdominal circumference, and femur length using a two-stage approach. The first stage involved finding a suitable transformation of the raw fetal measurements (as the marginal distributions of ultrasound measurements were non-normal) to standardized deviations (Z-scores). In the second stage, a correlation model for a Gaussian process is fitted, yielding a correlation for any pair of observations made between 14 and 40 weeks. The correlation structure of the fetal Z-score can be used to assess whether the growth, for example, between successive measurements is satisfactory. The paper is accompanied by a Shiny application, see https://lxiao5.shinyapps.io/shinycalculator/.}, number={10}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Feng, Yuan and Xiao, Luo and Li, Cai and Chen, Stephanie T. and Ohuma, Eric O.}, year={2020}, month={Oct}, pages={2795–2813} } @article{li_xiao_luo_2020, title={Fast covariance estimation for multivariate sparse functional data}, volume={9}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.245}, abstractNote={Covariance estimation is essential yet underdeveloped for analysing multivariate functional data. We propose a fast covariance estimation method for multivariate sparse functional data using bivariate penalized splines. The tensor‐product B‐spline formulation of the proposed method enables a simple spectral decomposition of the associated covariance operator and explicit expressions of the resulting eigenfunctions as linear combinations of B‐spline bases, thereby dramatically facilitating subsequent principal component analysis. We derive a fast algorithm for selecting the smoothing parameters in covariance smoothing using leave‐one‐subject‐out cross‐validation. The method is evaluated with extensive numerical studies and applied to an Alzheimer's disease study with multiple longitudinal outcomes.}, number={1}, journal={STAT}, author={Li, Cai and Xiao, Luo and Luo, Sheng}, year={2020} } @article{xiao_nan_2020, title={Uniform convergence of penalized splines}, volume={9}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.297}, abstractNote={Penalized splines are popular for nonparametric regression. We establish the minimax rate optimality of penalized splines for uniform convergence, thus improving the existing rate in the literature. The result is applicable to several types of penalized splines that are commonly used and holds under mild conditions on the design points.}, number={1}, journal={STAT}, author={Xiao, Luo and Nan, Zhe}, year={2020} } @article{chen_xiao_staicu_2019, title={A smoothing-based goodness-of-fit test of covariance for functional data}, volume={75}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13005}, abstractNote={Functional data methods are often applied to longitudinal data as they provide a more flexible way to capture dependence across repeated observations. However, there is no formal testing procedure to determine if functional methods are actually necessary. We propose a goodness‐of‐fit test for comparing parametric covariance functions against general nonparametric alternatives for both irregularly observed longitudinal data and densely observed functional data. We consider a smoothing‐based test statistic and approximate its null distribution using a bootstrap procedure. We focus on testing a quadratic polynomial covariance induced by a linear mixed effects model and the method can be used to test any smooth parametric covariance function. Performance and versatility of the proposed test is illustrated through a simulation study and three data applications.}, number={2}, journal={BIOMETRICS}, publisher={Wiley}, author={Chen, Stephanie T. and Xiao, Luo and Staicu, Ana-Maria}, year={2019}, month={Jun}, pages={562–571} } @article{xiao_2019, title={Asymptotic theory of penalized splines}, volume={13}, ISSN={["1935-7524"]}, DOI={10.1214/19-EJS1541}, abstractNote={: The paper gives a unified study of the large sample asymp- totic theory of penalized splines including the O -splines using B-splines and an integrated squared derivative penalty [22], the P -splines which use B-splines and a discrete difference penalty [13], and the T -splines which use truncated polynomials and a ridge penalty [24]. Extending existing results for O -splines [7], it is shown that, depending on the number of knots and appropriate smoothing parameters, the L 2 risk bounds of penalized spline estimators are rate-wise similar to either those of regression splines or to those of smoothing splines and could each attain the optimal minimax rate of convergence [32]. In addition, convergence rate of the L ∞ risk bound, and local asymptotic bias and variance are derived for all three types of penalized splines.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Xiao, Luo}, year={2019}, pages={747–794} } @article{li_xiao_2019, title={Optimal design for classification of functional data}, volume={12}, DOI={10.1002/cjs.11531}, abstractNote={We study the design problem for the optimal classification of functional data. The goal is to select sampling time points so that functional data observed at these time points can be classified accurately. We propose optimal designs that are applicable to either dense or sparse functional data. Using linear discriminant analysis, we formulate our design objectives as explicit functions of the sampling points. We study the theoretical properties of the proposed design objectives and provide a practical implementation. The performance of the proposed design is evaluated through simulations and real data applications. The Canadian Journal of Statistics 48: 285–307; 2020 © 2019 Statistical Society of Canada}, journal={Canadian Journal of Statistics}, publisher={Wiley}, author={Li, Cai and Xiao, Luo}, year={2019} } @article{anderson_xiao_checkley_2019, title={Using data from multiple studies to develop a child growth correlation matrix}, volume={38}, ISSN={["1097-0258"]}, DOI={10.1002/sim.7696}, abstractNote={In many countries, the monitoring of child growth does not occur in a regular manner, and instead, we may have to rely on sporadic observations that are subject to substantial measurement error. In these countries, it can be difficult to identify patterns of poor growth, and faltering children may miss out on essential health interventions. The contribution of this paper is to provide a framework for pooling together multiple datasets, thus allowing us to overcome the issue of sparse data and provide improved estimates of growth. We use data from multiple longitudinal growth studies to construct a common correlation matrix that can be used in estimation and prediction of child growth. We propose a novel 2‐stage approach: In stage 1, we construct a raw matrix via a set of univariate meta‐analyses, and in stage 2, we smooth this raw matrix to obtain a more realistic correlation matrix. The methodology is illustrated using data from 16 child growth studies from the Bill and Melinda Gates Foundation's Healthy Birth Growth and Development knowledge integration project and identifies strong correlation for both height and weight between the ages of 4 and 12 years. We use a case study to provide an example of how this matrix can be used to help compute growth measures.}, number={19}, journal={STATISTICS IN MEDICINE}, publisher={Wiley}, author={Anderson, Craig and Xiao, Luo and Checkley, William}, year={2019}, month={Aug}, pages={3540–3554} } @article{park_xiao_willbur_staicu_jumbe_2018, title={A joint design for functional data with application to scheduling ultrasound scans}, volume={122}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2018.01.009}, abstractNote={A joint design for sampling functional data is proposed to achieve optimal prediction of both functional data and a scalar outcome. The motivating application is fetal growth, where the objective is to determine the optimal times to collect ultrasound measurements in order to recover fetal growth trajectories and to predict child birth outcomes. The joint design is formulated using an optimization criterion and implemented in a pilot study. Performance of the proposed design is evaluated via simulation study and application to fetal ultrasound data.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, publisher={Elsevier BV}, author={Park, So Young and Xiao, Luo and Willbur, Jayson D. and Staicu, Ana-Maria and Jumbe, N. L'ntshotshole}, year={2018}, month={Jun}, pages={101–114} } @article{xiao_2018, title={Asymptotics of bivariate penalised splines}, volume={31}, ISSN={1048-5252 1029-0311}, url={http://dx.doi.org/10.1080/10485252.2018.1563295}, DOI={10.1080/10485252.2018.1563295}, abstractNote={ABSTRACT We study the class of bivariate penalised splines that use tensor product splines and a smoothness penalty. Similar to Claeskens, G., Krivobokova, T., and Opsomer, J.D. [(2009), ‘Asymptotic Properties of Penalised Spline Estimators’, Biometrika, 96(3), 529–544] for the univariate penalised splines, we show that, depending on the number of knots and penalty, the global asymptotic convergence rate of bivariate penalised splines is either similar to that of tensor product regression splines or to that of thin plate splines. In each scenario, the bivariate penalised splines are found rate optimal in the sense of Stone, C.J. [(12, 1982), ‘Optimal Global Rates of Convergence for Nonparametric Regression’, The Annals of Statistics, 10(4), 1040–1053] for a corresponding class of functions with appropriate smoothness. For the scenario where a small number of knots is used, we obtain expressions for the local asymptotic bias and variance and derive the point-wise and uniform asymptotic normality. The theoretical results are applicable to tensor product regression splines.}, number={2}, journal={Journal of Nonparametric Statistics}, publisher={Informa UK Limited}, author={Xiao, Luo}, year={2018}, month={Dec}, pages={289–314} } @article{leroux_xiao_crainiceanu_checkley_2018, title={Dynamic prediction in functional concurrent regression with an application to child growth}, volume={37}, ISSN={["1097-0258"]}, DOI={10.1002/sim.7582}, abstractNote={In many studies, it is of interest to predict the future trajectory of subjects based on their historical data, referred to as dynamic prediction. Mixed effects models have traditionally been used for dynamic prediction. However, the commonly used random intercept and slope model is often not sufficiently flexible for modeling subject‐specific trajectories. In addition, there may be useful exposures/predictors of interest that are measured concurrently with the outcome, complicating dynamic prediction. To address these problems, we propose a dynamic functional concurrent regression model to handle the case where both the functional response and the functional predictors are irregularly measured. Currently, such a model cannot be fit by existing software. We apply the model to dynamically predict children's length conditional on prior length, weight, and baseline covariates. Inference on model parameters and subject‐specific trajectories is conducted using the mixed effects representation of the proposed model. An extensive simulation study shows that the dynamic functional regression model provides more accurate estimation and inference than existing methods. Methods are supported by fast, flexible, open source software that uses heavily tested smoothing techniques.}, number={8}, journal={STATISTICS IN MEDICINE}, publisher={Wiley}, author={Leroux, Andrew and Xiao, Luo and Crainiceanu, Ciprian and Checkley, William}, year={2018}, month={Apr}, pages={1376–1388} } @article{xiao_li_checkley_crainiceanu_2018, title={Fast covariance estimation for sparse functional data}, volume={28}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-017-9744-8}, abstractNote={Smoothing of noisy sample covariances is an important component in functional data analysis. We propose a novel covariance smoothing method based on penalized splines and associated software. The proposed method is a bivariate spline smoother that is designed for covariance smoothing and can be used for sparse functional or longitudinal data. We propose a fast algorithm for covariance smoothing using leave-one-subject-out cross-validation. Our simulations show that the proposed method compares favorably against several commonly used methods. The method is applied to a study of child growth led by one of coauthors and to a public dataset of longitudinal CD4 counts.}, number={3}, journal={STATISTICS AND COMPUTING}, publisher={Springer Nature}, author={Xiao, Luo and Li, Cai and Checkley, William and Crainiceanu, Ciprian}, year={2018}, month={May}, pages={511–522} } @article{xiao_li_checkley_crainiceanu_2018, title={Fast covariance estimation for sparse functional data (vol 28, pg 511, 2017)}, volume={28}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-017-9768-0}, number={3}, journal={STATISTICS AND COMPUTING}, author={Xiao, Luo and Li, Cai and Checkley, William and Crainiceanu, Ciprian}, year={2018}, month={May}, pages={523–523} } @article{grigsby_di_leroux_zipunnikov_xiao_crainiceanu_checkley_2018, title={Novel metrics for growth model selection}, volume={15}, journal={Emerging Themes in Epidemiology}, author={Grigsby, M. R. and Di, J. R. and Leroux, A. and Zipunnikov, V. and Xiao, L. and Crainiceanu, C. and Checkley, W.}, year={2018} } @misc{varma_dey_leroux_di_urbanek_xiao_zipunnikov_2018, title={Total volume of physical activity: TAC, TLAC or TAC(lambda)}, volume={106}, ISSN={["1096-0260"]}, DOI={10.1016/j.ypmed.2017.10.028}, abstractNote={Higher physical activity levels are associated with reduced cognitive decline among older adults; however, current understanding of underlying brain mechanisms is limited. This cross-sectional study investigated the relationship between actigraphy-estimated total volume of physical activity (TVPA) and magnetic resonance imaging (MRI) measures of white matter hyperintensities (WMH), and functional and structural brain connectivity, measured by resting-state functional MRI and diffusion tensor imaging. Study participants (N = 156, mean age = 71 years) included 136 with normal cognition and 20 with Mild Cognitive Impairment. Higher TVPA was associated with greater functional connectivity within the default-mode network and greater network modularity (a measure of network specialization), as well as with greater anisotropy and lower radial diffusion in white matter, suggesting better structural connectivity. These associations with functional and structural connectivity were independent of one another and independent of the level of vascular risk, APOE-ε4 status, cognitive reserve, and WMH volume, which were not associated with TVPA. Findings suggest that physical activity is beneficial for brain connectivity among older individuals with varying levels of risk for cognitive decline.}, journal={PREVENTIVE MEDICINE}, publisher={Elsevier BV}, author={Varma, Vijay R. and Dey, Debangan and Leroux, Andrew and Di, Junrui and Urbanek, Jacek and Xiao, Luo and Zipunnikov, Vadim}, year={2018}, month={Jan}, pages={233–235} } @article{bai_di_xiao_evenson_lacroix_crainiceanu_buchner_2017, title={AN ACTIVITY INDEX FOR RAW ACCELEROMETRY DATA AND ITS APPLICATION IN OLDER ADULTS}, volume={1}, DOI={10.1093/geroni/igx004.4497}, abstractNote={Accelerometers have been widely deployed in public health studies in recent years and research has mainly focused on summarized metrics provided by accelerometers manufactures, such as the activity counts (AC). Such measures do not have a publicly available formula and can vary by device manufacturer. To address these problems, we developed the activity index (AI), a new metric for summarizing raw tri-axial accelerometry data, and compared the AI to AC’s performance for distinguishing various types of activities and estimating energy expenditure. The analysis was conducted using data from the Women’s Health Initiative, in which tri-axial raw acceleration data and energy expenditure were collected at the same time. ROC analyses indicated that AI better distinguished between different types of activities than AC. AI better associated with METs as well. The proposed AI provides a transparent and reliable way to summarize densely sampled raw acceleration data.}, number={suppl_1}, journal={Innovation in Aging}, publisher={Oxford University Press (OUP)}, author={Bai, J. and Di, C. and Xiao, L. and Evenson, K.R. and LaCroix, A. and Crainiceanu, C. and Buchner, D.M.}, year={2017}, month={Jun}, pages={1239–1239} } @article{varma_dey_leroux_di_urbanek_xiao_zipunnikov_2017, title={Re-evaluating the effect of age on physical activity over the lifespan}, volume={101}, ISSN={["1096-0260"]}, DOI={10.1016/j.ypmed.2017.05.030}, abstractNote={Advancements in accelerometer analytic and visualization techniques allow researchers to more precisely identify and compare critical periods of physical activity (PA) decline by age across the lifespan, and describe how daily PA patterns may vary across age groups. We used accelerometer data from the 2003–2006 cohorts of the National Health and Nutrition Examination Survey (NHANES) (n = 12,529) to quantify total PA as well as PA by intensity across the lifespan using sex-stratified, age specific percentile curves constructed using generalized additive models. We additionally estimated minute-to-minute diurnal PA using smoothed bivariate surfaces. We found that from childhood to adolescence (ages 6–19) across sex, PA is sharply lower by age partially due to a later initiation of morning PA. Total PA levels, at age 19 are comparable to levels at age 60. Contrary to prior evidence, during young adulthood (ages 20–30) total and light intensity PA increases by age and then stabilizes during midlife (ages 31–59) partially due to an earlier initiation of morning PA. We additionally found that males compared to females have an earlier lowering in PA by age at midlife and lower total PA, higher sedentary behavior, and lower light intensity PA in older adulthood; these trends seem to be driven by lower PA in the afternoon compared to females. Our results suggest a re-evaluation of how emerging adulthood may affect PA levels and the importance of considering time of day and sex differences when developing PA interventions.}, journal={PREVENTIVE MEDICINE}, publisher={Elsevier BV}, author={Varma, Vijay R. and Dey, Debangan and Leroux, Andrew and Di, Junrui and Urbanek, Jacek and Xiao, Luo and Zipunnikov, Vadim}, year={2017}, month={Aug}, pages={102–108} } @article{park_staicu_xiao_crainiceanu_2017, title={Simple fixed-effects inference for complex functional models}, volume={19}, ISSN={1465-4644 1468-4357}, url={http://dx.doi.org/10.1093/biostatistics/kxx026}, DOI={10.1093/biostatistics/kxx026}, abstractNote={&NA; We propose simple inferential approaches for the fixed effects in complex functional mixed effects models. We estimate the fixed effects under the independence of functional residuals assumption and then bootstrap independent units (e.g. subjects) to conduct inference on the fixed effects parameters. Simulations show excellent coverage probability of the confidence intervals and size of tests for the fixed effects model parameters. Methods are motivated by and applied to the Baltimore Longitudinal Study of Aging, though they are applicable to other studies that collect correlated functional data.}, number={2}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Park, So Young and Staicu, Ana-Maria and Xiao, Luo and Crainiceanu, Ciprian M}, year={2017}, month={Jun}, pages={137–152} } @article{huang_reiss_xiao_zipunnikov_lindquist_crainiceanu_2017, title={Two-way principal component analysis for matrix-variate data, with an application to functional magnetic resonance imaging data}, volume={18}, number={2}, journal={Biostatistics (Oxford, England)}, author={Huang, L. and Reiss, P. T. and Xiao, L. and Zipunnikov, V. and Lindquist, M. A. and Crainiceanu, C. M.}, year={2017}, pages={214–229} } @article{bai_di_xiao_evenson_lacroix_crainiceanu_buchner_2016, title={An Activity Index for Raw Accelerometry Data and Its Comparison with Other Activity Metrics}, volume={11}, url={https://doi.org/10.1371/journal.pone.0160644}, DOI={10.1371/journal.pone.0160644}, abstractNote={Accelerometers have been widely deployed in public health studies in recent years. While they collect high-resolution acceleration signals (e.g., 10–100 Hz), research has mainly focused on summarized metrics provided by accelerometers manufactures, such as the activity count (AC) by ActiGraph or Actical. Such measures do not have a publicly available formula, lack a straightforward interpretation, and can vary by software implementation or hardware type. To address these problems, we propose the physical activity index (AI), a new metric for summarizing raw tri-axial accelerometry data. We compared this metric with the AC and another recently proposed metric for raw data, Euclidean Norm Minus One (ENMO), against energy expenditure. The comparison was conducted using data from the Objective Physical Activity and Cardiovascular Health Study, in which 194 women 60–91 years performed 9 lifestyle activities in the laboratory, wearing a tri-axial accelerometer (ActiGraph GT3X+) on the hip set to 30 Hz and an Oxycon portable calorimeter, to record both tri-axial acceleration time series (converted into AI, AC, and ENMO) and oxygen uptake during each activity (converted into metabolic equivalents (METs)) at the same time. Receiver operating characteristic analyses indicated that both AI and ENMO were more sensitive to moderate and vigorous physical activities than AC, while AI was more sensitive to sedentary and light activities than ENMO. AI had the highest coefficients of determination for METs (0.72) and was a better classifier of physical activity intensity than both AC (for all intensity levels) and ENMO (for sedentary and light intensity). The proposed AI provides a novel and transparent way to summarize densely sampled raw accelerometry data, and may serve as an alternative to AC. The AI’s largely improved sensitivity on sedentary and light activities over AC and ENMO further demonstrate its advantage in studies with older adults.}, number={8}, journal={PLOS ONE}, publisher={Public Library of Science}, author={Bai, Jiawei and Di, Chongzhi and Xiao, Luo and Evenson, Kelly R. and LaCroix, Andrea Z. and Crainiceanu, Ciprian M. and Buchner, David M.}, year={2016}, month={Aug}, pages={1–14} } @article{hooker_ramsay_xiao_2016, title={CollocInfer: Collocation Inference in Differential Equation Models}, volume={75}, DOI={10.18637/jss.v075.i02}, abstractNote={This monograph details the implementation and use of the CollocInfer package in R for smoothing-based estimation of continuous-time nonlinear dynamic systems. These routines represent an extension of the generalized profiling methods in Ramsay, Hooker, Campbell, and Cao (2007) for estimating parameters in nonlinear ordinary differential equations. An interface to the fda package is included. The package also supports discretetime systems. We describe the methodological and computational framework and the necessary steps to use the software. Equivalent functionality is available in MATLAB.}, number={2}, journal={Journal of Statistical Software}, publisher={Foundation for Open Access Statistic}, author={Hooker, Giles and Ramsay, James O. and Xiao, Luo}, year={2016} } @article{bien_bunea_xiao_2016, title={Convex Banding of the Covariance Matrix}, volume={111}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2015.1058265}, DOI={10.1080/01621459.2015.1058265}, abstractNote={Abstract We introduce a new sparse estimator of the covariance matrix for high-dimensional models in which the variables have a known ordering. Our estimator, which is the solution to a convex optimization problem, is equivalently expressed as an estimator that tapers the sample covariance matrix by a Toeplitz, sparsely banded, data-adaptive matrix. As a result of this adaptivity, the convex banding estimator enjoys theoretical optimality properties not attained by previous banding or tapered estimators. In particular, our convex banding estimator is minimax rate adaptive in Frobenius and operator norms, up to log factors, over commonly studied classes of covariance matrices, and over more general classes. Furthermore, it correctly recovers the bandwidth when the true covariance is exactly banded. Our convex formulation admits a simple and efficient algorithm. Empirical studies demonstrate its practical effectiveness and illustrate that our exactly banded estimator works well even when the true covariance matrix is only close to a banded matrix, confirming our theoretical results. Our method compares favorably with all existing methods, in terms of accuracy and speed. We illustrate the practical merits of the convex banding estimator by showing that it can be used to improve the performance of discriminant analysis for classifying sound recordings. Supplementary materials for this article are available online.}, number={514}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Bien, Jacob and Bunea, Florentina and Xiao, Luo}, year={2016}, month={Apr}, pages={834–845} } @article{xiao_zipunnikov_ruppert_crainiceanu_2016, title={Fast covariance estimation for high-dimensional functional data}, volume={26}, ISSN={0960-3174 1573-1375}, url={http://dx.doi.org/10.1007/S11222-014-9485-X}, DOI={10.1007/S11222-014-9485-X}, abstractNote={We propose two fast covariance smoothing methods and associated software that scale up linearly with the number of observations per function. Most available methods and software cannot smooth covariance matrices of dimension J > 500; a recently introduced sandwich smoother is an exception but is not adapted to smooth covariance matrices of large dimensions, such as J = 10, 000. We introduce two new methods that circumvent those problems: 1) a fast implementation of the sandwich smoother for covariance smoothing; and 2) a two-step procedure that first obtains the singular value decomposition of the data matrix and then smoothes the eigenvectors. These new approaches are at least an order of magnitude faster in high dimensions and drastically reduce computer memory requirements. The new approaches provide instantaneous (a few seconds) smoothing for matrices of dimension J = 10,000 and very fast (< 10 minutes) smoothing for J = 100, 000. R functions, simulations, and data analysis provide ready to use, reproducible, and scalable tools for practical data analysis of noisy high-dimensional functional data.}, number={1-2}, journal={Statistics and Computing}, publisher={Springer Science and Business Media LLC}, author={Xiao, Luo and Zipunnikov, Vadim and Ruppert, David and Crainiceanu, Ciprian}, year={2016}, month={Jan}, pages={409–421} } @article{xiao_he_koster_caserotti_lange-maia_glynn_harris_crainiceanu_2016, title={Movement prediction using accelerometers in a human population}, volume={72}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12382}, abstractNote={We introduce statistical methods for predicting the types of human activity at sub‐second resolution using triaxial accelerometry data. The major innovation is that we use labeled activity data from some subjects to predict the activity labels of other subjects. To achieve this, we normalize the data across subjects by matching the standing up and lying down portions of triaxial accelerometry data. This is necessary to account for differences between the variability in the position of the device relative to gravity, which are induced by body shape and size as well as by the ambiguous definition of device placement. We also normalize the data at the device level to ensure that the magnitude of the signal at rest is similar across devices. After normalization we use overlapping movelets (segments of triaxial accelerometry time series) extracted from some of the subjects to predict the movement type of the other subjects. The problem was motivated by and is applied to a laboratory study of 20 older participants who performed different activities while wearing accelerometers at the hip. Prediction results based on other people's labeled dictionaries of activity performed almost as well as those obtained using their own labeled dictionaries. These findings indicate that prediction of activity types for data collected during natural activities of daily living may actually be possible.}, number={2}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Xiao, Luo and He, Bing and Koster, Annemarie and Caserotti, Paolo and Lange-Maia, Brittney and Glynn, Nancy W. and Harris, Tamara B. and Crainiceanu, Ciprian M.}, year={2016}, month={Jun}, pages={513–524} } @article{huang_reiss_xiao_zipunnikov_lindquist_crainiceanu_2016, title={Two-way principal component analysis for matrix-variate data, with an application to functional magnetic resonance imaging data}, volume={8}, DOI={10.1093/biostatistics/kxw040}, abstractNote={Many modern neuroimaging studies acquire large spatial images of the brain observed sequentially over time. Such data are often stored in the forms of matrices. To model these matrix-variate data we introduce a class of separable processes using explicit latent process modeling. To account for the size and two-way structure of the data, we extend principal component analysis to achieve dimensionality reduction at the individual level. We introduce necessary identifiability conditions for each model and develop scalable estimation procedures. The method is motivated by and applied to a functional magnetic resonance imaging study designed to analyze the relationship between pain and brain activity.}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Huang, Lei and Reiss, Philip T. and Xiao, Luo and Zipunnikov, Vadim and Lindquist, Martin A. and Crainiceanu, Ciprian M.}, year={2016}, month={Aug}, pages={kxw040} } @article{yang_shmuelof_xiao_krakauer_caffo_2015, title={On tests of activation map dimensionality for fMRI-based studies of learning}, volume={9}, DOI={10.3389/fnins.2015.00085}, abstractNote={A methodology for investigating learning is developed using activation distributions, as opposed to standard voxel-level interaction tests. The approach uses tests of dimensionality to consider the ensemble of paired changes in voxel activation. The developed method allows for the investigation of non-focal and non-localized changes due to learning. In exchange for increased power to detect learning-based changes, this procedure sacrifices the localization information gained via voxel-level interaction testing. The test is demonstrated on an arc-pointing motor task for the study of motor learning, which served as the motivation for this methodological development. The proposed framework considers activation distribution, while the specific proposed test investigates linear tests of dimensionality. This paper includes: the development of the framework, a large scale simulation study, and the subsequent application to a study of motor learning in healthy adults. While the performance of the method was excellent when model assumptions held, complications arose in instances of massive numbers of null voxels or varying angles of principal dimension across subjects. Further analysis found that careful masking addressed the former concern, while an angle correction successfully resolved the latter. The simulation results demonstrated that the study of linear dimensionality is able to capture learning effects. The motivating data set used to illustrate the method evaluates two similar arc-pointing tasks, each over two sessions, with training on only one of the tasks in between sessions. The results suggests different activation distribution dimensionality when considering the trained and untrained tasks separately. Specifically, the untrained task evidences greater activation distribution dimensionality than the trained task. However, the direct comparison between the two tasks did not yield a significant result. The nature of the indication for greater dimensionality in the untrained task is explored and found to be non-linear variation in the data.}, journal={Frontiers in Neuroscience}, publisher={Frontiers Media SA}, author={Yang, Juemin and Shmuelof, Lior and Xiao, Luo and Krakauer, John W. and Caffo, Brian}, year={2015}, month={Apr} } @article{bunea_xiao_2015, title={On the sample covariance matrix estimator of reduced effective rank population matrices, with applications to fPCA}, volume={21}, DOI={10.3150/14-bej602}, abstractNote={This work provides a unified analysis of the properties of the sample covariance matrix $\Sigma_n$ over the class of $p\times p$ population covariance matrices $\Sigma$ of reduced effective rank $r_e(\Sigma)$. This class includes scaled factor models and covariance matrices with decaying spectrum. We consider $r_e(\Sigma)$ as a measure of matrix complexity, and obtain sharp minimax rates on the operator and Frobenius norm of $\Sigma_n-\Sigma$, as a function of $r_e(\Sigma)$ and $\|\Sigma\|_2$, the operator norm of $\Sigma$. With guidelines offered by the optimal rates, we define classes of matrices of reduced effective rank over which $\Sigma_n$ is an accurate estimator. Within the framework of these classes, we perform a detailed finite sample theoretical analysis of the merits and limitations of the empirical scree plot procedure routinely used in PCA. We show that identifying jumps in the empirical spectrum that consistently estimate jumps in the spectrum of $\Sigma$ is not necessarily informative for other goals, for instance for the selection of those sample eigenvalues and eigenvectors that are consistent estimates of their population counterparts. The scree plot method can still be used for selecting consistent eigenvalues, for appropriate threshold levels. We provide a threshold construction and also give a rule for checking the consistency of the corresponding sample eigenvectors. We specialize these results and analysis to population covariance matrices with polynomially decaying spectra, and extend it to covariance operators with polynomially decaying spectra. An application to fPCA illustrates how our results can be used in functional data analysis.}, number={2}, journal={Bernoulli}, publisher={Bernoulli Society for Mathematical Statistics and Probability}, author={Bunea, Florentina and Xiao, Luo}, year={2015}, month={May}, pages={1200–1230} } @article{xiao_thurston_ruppert_love_davidson_2014, title={Bayesian Models for Multiple Outcomes in Domains With Application to the Seychelles Child Development Study}, volume={109}, DOI={10.1080/01621459.2013.830070}, abstractNote={The Seychelles Child Development Study (SCDS) examines the effects of prenatal exposure to methylmercury on the functioning of the central nervous system. The SCDS data include 20 outcomes measured on 9-year-old children that can be classified broadly in four outcome classes or “domains”: cognition, memory, motor, and social behavior. Previous analyses and scientific theory suggest that these outcomes may belong to more than one of these domains, rather than only a single domain as is frequently assumed for modeling. We present a framework for examining the effects of exposure and other covariates when the outcomes may each belong to more than one domain and where we also want to learn about the assignment of outcomes to domains. Each domain is defined by a sentinel outcome, which is preassigned to that domain only. All other outcomes can belong to multiple domains and are not preassigned. Our model allows exposure and covariate effects to differ across domains and across outcomes within domains, and includes random subject-specific effects that model correlations between outcomes within and across domains. We take a Bayesian MCMC approach. Results from the Seychelles study and from extensive simulations show that our model can effectively determine sparse domain assignment, and at the same time give increased power to detect overall, domain-specific, and outcome-specific exposure and covariate effects relative to separate models for each endpoint. When fit to the Seychelles data, several outcomes were classified as partly belonging to domains other than their originally assigned domains. In retrospect, the new partial domain assignments are reasonable and, as we discuss, suggest important scientific insights about the nature of the outcomes. Checks of model misspecification were improved relative to a model that assumes each outcome is in a single domain. Supplementary materials for this article are available online.}, number={505}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Xiao, Luo and Thurston, Sally W. and Ruppert, David and Love, Tanzy M. T. and Davidson, Philip W.}, year={2014}, month={Jan}, pages={1–10} } @article{ma_xiao_wong_2014, title={Learning regulatory programs by threshold SVD regression}, volume={111}, DOI={10.1073/pnas.1417808111}, abstractNote={Significance With the increase in high-throughput data in genomic studies, the study of regulatory relationships between multidimensional predictors and responses is becoming a common task. Although high-dimensional data hold promise for revealing rich and complex regulations, it remains challenging to infer the relations between tens of thousands of responses and thousands of predictors, as the desired signal must be searched among an overwhelming number of irrelevant responses. Here we show that by formulating the regulatory programs as hidden-intermediate nodes in a linear network, a sparsity-inducing modeling and inference approach is effective in extracting the regulatory relations among very high-dimensional responses and predictors, even when the sample size is much lower. We formulate a statistical model for the regulation of global gene expression by multiple regulatory programs and propose a thresholding singular value decomposition (T-SVD) regression method for learning such a model from data. Extensive simulations demonstrate that this method offers improved computational speed and higher sensitivity and specificity over competing approaches. The method is used to analyze microRNA (miRNA) and long noncoding RNA (lncRNA) data from The Cancer Genome Atlas (TCGA) consortium. The analysis yields previously unidentified insights into the combinatorial regulation of gene expression by noncoding RNAs, as well as findings that are supported by evidence from the literature.}, number={44}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Ma, X. and Xiao, L. and Wong, W. H.}, year={2014}, month={Oct}, pages={15675–15680} } @article{xiao_huang_schrack_ferrucci_zipunnikov_crainiceanu_2014, title={Quantifying the lifetime circadian rhythm of physical activity: a covariate-dependent functional approach}, volume={16}, DOI={10.1093/biostatistics/kxu045}, abstractNote={Objective measurement of physical activity using wearable devices such as accelerometers may provide tantalizing new insights into the association between activity and health outcomes. Accelerometers can record quasi-continuous activity information for many days and for hundreds of individuals. For example, in the Baltimore Longitudinal Study on Aging physical activity was recorded every minute for [Formula: see text] adults for an average of [Formula: see text] days per adult. An important scientific problem is to separate and quantify the systematic and random circadian patterns of physical activity as functions of time of day, age, and gender. To capture the systematic circadian pattern, we introduce a practical bivariate smoother and two crucial innovations: (i) estimating the smoothing parameter using leave-one-subject-out cross validation to account for within-subject correlation and (ii) introducing fast computational techniques that overcome problems both with the size of the data and with the cross-validation approach to smoothing. The age-dependent random patterns are analyzed by a new functional principal component analysis that incorporates both covariate dependence and multilevel structure. For the analysis, we propose a practical and very fast trivariate spline smoother to estimate covariate-dependent covariances and their spectra. Results reveal several interesting, previously unknown, circadian patterns associated with human aging and gender.}, number={2}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Xiao, L. and Huang, L. and Schrack, J. A. and Ferrucci, L. and Zipunnikov, V. and Crainiceanu, C. M.}, year={2014}, month={Oct}, pages={352–367} } @article{xiao_li_ruppert_2013, title={Fast bivariateP-splines: the sandwich smoother}, volume={75}, DOI={10.1111/rssb.12007}, abstractNote={We propose a fast penalized spline method for bivariate smoothing. Univariate P‐spline smoothers are applied simultaneously along both co‐ordinates. The new smoother has a sandwich form which suggested the name ‘sandwich smoother’ to a referee. The sandwich smoother has a tensor product structure that simplifies an asymptotic analysis and it can be fast computed. We derive a local central limit theorem for the sandwich smoother, with simple expressions for the asymptotic bias and variance, by showing that the sandwich smoother is asymptotically equivalent to a bivariate kernel regression estimator with a product kernel. As far as we are aware, this is the first central limit theorem for a bivariate spline estimator of any type. Our simulation study shows that the sandwich smoother is orders of magnitude faster to compute than other bivariate spline smoothers, even when the latter are computed by using a fast generalized linear array model algorithm, and comparable with them in terms of mean integrated squared errors. We extend the sandwich smoother to array data of higher dimensions, where a generalized linear array model algorithm improves the computational speed of the sandwich smoother. One important application of the sandwich smoother is to estimate covariance functions in functional data analysis. In this application, our numerical results show that the sandwich smoother is orders of magnitude faster than local linear regression. The speed of the sandwich formula is important because functional data sets are becoming quite large.}, number={3}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley-Blackwell}, author={Xiao, Luo and Li, Yingxing and Ruppert, David}, year={2013}, month={Feb}, pages={577–599} }