@article{zou_zeng_xiao_luo_2023, title={BAYESIAN INFERENCE AND DYNAMIC PREDICTION FOR MULTIVARIATE LONGITUDINAL AND SURVIVAL DATA}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1733}, abstractNote={Alzheimer's disease (AD) is a complex neurological disorder impairing multiple domains such as cognition and daily functions. To better understand the disease and its progression, many AD research studies collect multiple longitudinal outcomes that are strongly predictive of the onset of AD dementia. We propose a joint model based on a multivariate functional mixed model framework (referred to as MFMM-JM) that simultaneously models the multiple longitudinal outcomes and the time to dementia onset. We develop six functional forms to fully investigate the complex association between longitudinal outcomes and dementia onset. Moreover, we use the Bayesian methods for statistical inference and develop a dynamic prediction framework that provides accurate personalized predictions of disease progressions based on new subject-specific data. We apply the proposed MFMM-JM to two large ongoing AD studies: the Alzheimer's Disease Neuroimaging Initiative (ADNI) and National Alzheimer's Coordinating Center (NACC), and identify the functional forms with the best predictive performance. our method is also validated by extensive simulation studies with five settings.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Zou, Haotian and Zeng, Donglin and Xiao, Luo and Luo, Sheng}, year={2023}, month={Sep}, pages={2574–2595} } @article{li_xiao_2023, title={Latent factor model for multivariate functional data}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13924}, abstractNote={Abstract}, journal={BIOMETRICS}, author={Li, Ruonan and Xiao, Luo}, year={2023}, month={Sep} } @article{zou_xiao_zeng_luo_2023, title={Multivariate functional mixed model with MRI data: An application to Alzheimer's disease}, volume={2}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9683}, abstractNote={Summary}, journal={STATISTICS IN MEDICINE}, author={Zou, Haotian and Xiao, Luo and Zeng, Donglin and Luo, Sheng}, year={2023}, month={Feb} } @article{cui_li_crainiceanu_xiao_2022, title={Fast Multilevel Functional Principal Component Analysis}, volume={10}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2022.2115500}, abstractNote={Abstract We introduce fast multilevel functional principal component analysis (fast MFPCA), which scales up to high dimensional functional data measured at multiple visits. The new approach is orders of magnitude faster than and achieves comparable estimation accuracy with the original MFPCA. Methods are motivated by the National Health and Nutritional Examination Survey (NHANES), which contains minute-level physical activity information of more than 10, 000 participants over multiple days and 1440 observations per day. While MFPCA takes more than five days to analyze these data, fast MFPCA takes less than five minutes. A theoretical study of the proposed method is also provided. The associated function mfpca.face() is available in the R package refund. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Cui, Erjia and Li, Ruonan and Crainiceanu, Ciprian M. and Xiao, Luo}, year={2022}, month={Oct} } @article{li_xiao_smirnova_cui_leroux_crainiceanu_2022, title={Fixed-effects inference and tests of correlation for longitudinal functional data}, volume={5}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9421}, abstractNote={Abstract}, journal={STATISTICS IN MEDICINE}, author={Li, Ruonan and Xiao, Luo and Smirnova, Ekaterina and Cui, Erjia and Leroux, Andrew and Crainiceanu, Ciprian M.}, year={2022}, month={May} } @article{weaver_xiao_lu_2022, title={Functional data analysis for longitudinal data with informative observation times}, volume={3}, ISSN={["1541-0420"]}, url={https://doi.org/10.1111/biom.13646}, DOI={10.1111/biom.13646}, abstractNote={Abstract}, journal={BIOMETRICS}, publisher={Wiley}, author={Weaver, Caleb and Xiao, Luo and Lu, Wenbin}, year={2022}, month={Mar} } @article{li_xiao_luo_2022, title={Joint model for survival and multivariate sparse functional data with application to a study of Alzheimer's Disease}, volume={78}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13427}, abstractNote={Abstract}, number={2}, journal={BIOMETRICS}, author={Li, Cai and Xiao, Luo and Luo, Sheng}, year={2022}, month={Jun}, pages={435–447} } @article{chen_caffo_stein-o'brien_liu_langmead_colantuoni_xiao_2022, title={Two-stage linked component analysis for joint decomposition of multiple biologically related data sets}, volume={3}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxac005}, abstractNote={SUMMARY}, journal={BIOSTATISTICS}, author={Chen, Huan and Caffo, Brian and Stein-O'Brien, Genevieve and Liu, Jinrui and Langmead, Ben and Colantuoni, Carlo and Xiao, Luo}, year={2022}, month={Mar} } @article{ma_xiao_liu_lindquist_2021, title={A functional mixed model for scalar on function regression with application to a functional MRI study}, volume={22}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxz046}, abstractNote={Summary}, number={3}, journal={BIOSTATISTICS}, publisher={Oxford University Press (OUP)}, author={Ma, Wanying and Xiao, Luo and Liu, Bowen and Lindquist, Martin A.}, year={2021}, month={Jul}, pages={439–454} } @article{ohuma_villar_feng_xiao_salomon_barros_ismail_stones_jaffer_oberto_et al._2021, title={Fetal growth velocity standards from the Fetal Growth Longitudinal Study of the INTERGROWTH-21 st Project}, volume={224}, ISBN={1097-6868}, DOI={10.1016/j.ajog.2020.07.054}, abstractNote={Human growth is susceptible to damage from insults, particularly during periods of rapid growth. Identifying those periods and the normative limits that are compatible with adequate growth and development are the first key steps toward preventing impaired growth.This study aimed to construct international fetal growth velocity increment and conditional velocity standards from 14 to 40 weeks' gestation based on the same cohort that contributed to the INTERGROWTH-21st Fetal Growth Standards.This study was a prospective, longitudinal study of 4321 low-risk pregnancies from 8 geographically diverse populations in the INTERGROWTH-21st Project with rigorous standardization of all study procedures, equipment, and measurements that were performed by trained ultrasonographers. Gestational age was accurately determined clinically and confirmed by ultrasound measurement of crown-rump length at <14 weeks' gestation. Thereafter, the ultrasonographers, who were masked to the values, measured the fetal head circumference, biparietal diameter, occipitofrontal diameter, abdominal circumference, and femur length in triplicate every 5 weeks (within 1 week either side) using identical ultrasound equipment at each site (4-7 scans per pregnancy). Velocity increments across a range of intervals between measures were modeled using fractional polynomial regression.Peak velocity was observed at a similar gestational age: 16 and 17 weeks' gestation for head circumference (12.2 mm/wk), and 16 weeks' gestation for abdominal circumference (11.8 mm/wk) and femur length (3.2 mm/wk). However, velocity growth slowed down rapidly for head circumference, biparietal diameter, occipitofrontal diameter, and femur length, with an almost linear reduction toward term that was more marked for femur length. Conversely, abdominal circumference velocity remained relatively steady throughout pregnancy. The change in velocity with gestational age was more evident for head circumference, biparietal diameter, occipitofrontal diameter, and femur length than for abdominal circumference when the change was expressed as a percentage of fetal size at 40 weeks' gestation. We have also shown how to obtain accurate conditional fetal velocity based on our previous methodological work.The fetal skeleton and abdomen have different velocity growth patterns during intrauterine life. Accordingly, we have produced international Fetal Growth Velocity Increment Standards to complement the INTERGROWTH-21st Fetal Growth Standards so as to monitor fetal well-being comprehensively worldwide. Fetal growth velocity curves may be valuable if one wants to study the pathophysiology of fetal growth. We provide an application that can be used easily in clinical practice to evaluate changes in fetal size as conditional velocity for a more refined assessment of fetal growth than is possible at present (https://lxiao5.shinyapps.io/fetal_growth/). The application is freely available with the other INTERGROWTH-21st tools at https://intergrowth21.tghn.org/standards-tools/.}, number={2}, journal={AMERICAN JOURNAL OF OBSTETRICS AND GYNECOLOGY}, author={Ohuma, Eric O. and Villar, Jose and Feng, Yuan and Xiao, Luo and Salomon, Laurent and Barros, Fernando C. and Ismail, Leila Cheikh and Stones, William and Jaffer, Yasmin and Oberto, Manuela and et al.}, year={2021}, month={Feb} } @article{weaver_xiao_lindquist_2021, title={Single-index models with functional connectivity network predictors}, volume={5}, ISSN={["1468-4357"]}, url={https://doi.org/10.1093/biostatistics/kxab015}, DOI={10.1093/biostatistics/kxab015}, abstractNote={Summary}, journal={BIOSTATISTICS}, publisher={Oxford University Press (OUP)}, author={Weaver, Caleb and Xiao, Luo and Lindquist, Martin A.}, year={2021}, month={May} } @article{xiao_2020, title={Asymptotic properties of penalized splines for functional data}, volume={26}, ISSN={["1573-9759"]}, DOI={10.3150/20-BEJ1209}, abstractNote={Penalized spline methods are popular for functional data analysis but their asymptotic properties have not been developed. We present a theoretic study of the L2 and uniform convergence of penalized spline estimators for estimating the mean and covariance functions for functional data under general settings. The established convergence rates for the mean function estimation are mini-max rate optimal and the rates for the covariance function estimation are comparable to those using other smoothing methods.}, number={4}, journal={BERNOULLI}, author={Xiao, Luo}, year={2020}, month={Nov}, pages={2847–2875} } @article{feng_xiao_li_chen_ohuma_2020, title={Correlation models for monitoring fetal growth}, volume={29}, ISSN={["1477-0334"]}, DOI={10.1177/0962280220905623}, abstractNote={ Ultrasound growth measurements are monitored to evaluate if a fetus is growing normally compared with a defined standard chart at a specified gestational age. Using data from the Fetal Growth Longitudinal Study of the INTERGROWTH-21st project, we have modelled the longitudinal dependence of fetal head circumference, biparietal diameter, occipito-frontal diameter, abdominal circumference, and femur length using a two-stage approach. The first stage involved finding a suitable transformation of the raw fetal measurements (as the marginal distributions of ultrasound measurements were non-normal) to standardized deviations (Z-scores). In the second stage, a correlation model for a Gaussian process is fitted, yielding a correlation for any pair of observations made between 14 and 40 weeks. The correlation structure of the fetal Z-score can be used to assess whether the growth, for example, between successive measurements is satisfactory. The paper is accompanied by a Shiny application, see https://lxiao5.shinyapps.io/shinycalculator/ . }, number={10}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Feng, Yuan and Xiao, Luo and Li, Cai and Chen, Stephanie T. and Ohuma, Eric O.}, year={2020}, month={Oct}, pages={2795–2813} } @article{li_xiao_luo_2020, title={Fast covariance estimation for multivariate sparse functional data}, volume={9}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.245}, abstractNote={Covariance estimation is essential yet underdeveloped for analysing multivariate functional data. We propose a fast covariance estimation method for multivariate sparse functional data using bivariate penalized splines. The tensor‐product B‐spline formulation of the proposed method enables a simple spectral decomposition of the associated covariance operator and explicit expressions of the resulting eigenfunctions as linear combinations of B‐spline bases, thereby dramatically facilitating subsequent principal component analysis. We derive a fast algorithm for selecting the smoothing parameters in covariance smoothing using leave‐one‐subject‐out cross‐validation. The method is evaluated with extensive numerical studies and applied to an Alzheimer's disease study with multiple longitudinal outcomes.}, number={1}, journal={STAT}, author={Li, Cai and Xiao, Luo and Luo, Sheng}, year={2020} } @article{xiao_nan_2020, title={Uniform convergence of penalized splines}, volume={9}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.297}, abstractNote={Penalized splines are popular for nonparametric regression. We establish the minimax rate optimality of penalized splines for uniform convergence, thus improving the existing rate in the literature. The result is applicable to several types of penalized splines that are commonly used and holds under mild conditions on the design points.}, number={1}, journal={STAT}, author={Xiao, Luo and Nan, Zhe}, year={2020} } @article{chen_xiao_staicu_2019, title={A smoothing-based goodness-of-fit test of covariance for functional data}, volume={75}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13005}, abstractNote={Abstract}, number={2}, journal={BIOMETRICS}, publisher={Wiley}, author={Chen, Stephanie T. and Xiao, Luo and Staicu, Ana-Maria}, year={2019}, month={Jun}, pages={562–571} } @article{xiao_2019, title={Asymptotic theory of penalized splines}, volume={13}, ISSN={["1935-7524"]}, DOI={10.1214/19-EJS1541}, abstractNote={: The paper gives a unified study of the large sample asymp- totic theory of penalized splines including the O -splines using B-splines and an integrated squared derivative penalty [22], the P -splines which use B-splines and a discrete difference penalty [13], and the T -splines which use truncated polynomials and a ridge penalty [24]. Extending existing results for O -splines [7], it is shown that, depending on the number of knots and appropriate smoothing parameters, the L 2 risk bounds of penalized spline estimators are rate-wise similar to either those of regression splines or to those of smoothing splines and could each attain the optimal minimax rate of convergence [32]. In addition, convergence rate of the L ∞ risk bound, and local asymptotic bias and variance are derived for all three types of penalized splines.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Xiao, Luo}, year={2019}, pages={747–794} } @article{li_xiao_2019, title={Optimal design for classification of functional data}, volume={12}, DOI={10.1002/cjs.11531}, abstractNote={Abstract}, journal={Canadian Journal of Statistics}, publisher={Wiley}, author={Li, Cai and Xiao, Luo}, year={2019} } @article{anderson_xiao_checkley_2019, title={Using data from multiple studies to develop a child growth correlation matrix}, volume={38}, ISSN={["1097-0258"]}, DOI={10.1002/sim.7696}, abstractNote={In many countries, the monitoring of child growth does not occur in a regular manner, and instead, we may have to rely on sporadic observations that are subject to substantial measurement error. In these countries, it can be difficult to identify patterns of poor growth, and faltering children may miss out on essential health interventions. The contribution of this paper is to provide a framework for pooling together multiple datasets, thus allowing us to overcome the issue of sparse data and provide improved estimates of growth. We use data from multiple longitudinal growth studies to construct a common correlation matrix that can be used in estimation and prediction of child growth. We propose a novel 2‐stage approach: In stage 1, we construct a raw matrix via a set of univariate meta‐analyses, and in stage 2, we smooth this raw matrix to obtain a more realistic correlation matrix. The methodology is illustrated using data from 16 child growth studies from the Bill and Melinda Gates Foundation's Healthy Birth Growth and Development knowledge integration project and identifies strong correlation for both height and weight between the ages of 4 and 12 years. We use a case study to provide an example of how this matrix can be used to help compute growth measures.}, number={19}, journal={STATISTICS IN MEDICINE}, publisher={Wiley}, author={Anderson, Craig and Xiao, Luo and Checkley, William}, year={2019}, month={Aug}, pages={3540–3554} } @article{park_xiao_willbur_staicu_jumbe_2018, title={A joint design for functional data with application to scheduling ultrasound scans}, volume={122}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2018.01.009}, abstractNote={A joint design for sampling functional data is proposed to achieve optimal prediction of both functional data and a scalar outcome. The motivating application is fetal growth, where the objective is to determine the optimal times to collect ultrasound measurements in order to recover fetal growth trajectories and to predict child birth outcomes. The joint design is formulated using an optimization criterion and implemented in a pilot study. Performance of the proposed design is evaluated via simulation study and application to fetal ultrasound data.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, publisher={Elsevier BV}, author={Park, So Young and Xiao, Luo and Willbur, Jayson D. and Staicu, Ana-Maria and Jumbe, N. L'ntshotshole}, year={2018}, month={Jun}, pages={101–114} } @article{xiao_2018, title={Asymptotics of bivariate penalised splines}, volume={31}, ISSN={1048-5252 1029-0311}, url={http://dx.doi.org/10.1080/10485252.2018.1563295}, DOI={10.1080/10485252.2018.1563295}, abstractNote={ABSTRACT We study the class of bivariate penalised splines that use tensor product splines and a smoothness penalty. Similar to Claeskens, G., Krivobokova, T., and Opsomer, J.D. [(2009), ‘Asymptotic Properties of Penalised Spline Estimators’, Biometrika, 96(3), 529–544] for the univariate penalised splines, we show that, depending on the number of knots and penalty, the global asymptotic convergence rate of bivariate penalised splines is either similar to that of tensor product regression splines or to that of thin plate splines. In each scenario, the bivariate penalised splines are found rate optimal in the sense of Stone, C.J. [(12, 1982), ‘Optimal Global Rates of Convergence for Nonparametric Regression’, The Annals of Statistics, 10(4), 1040–1053] for a corresponding class of functions with appropriate smoothness. For the scenario where a small number of knots is used, we obtain expressions for the local asymptotic bias and variance and derive the point-wise and uniform asymptotic normality. The theoretical results are applicable to tensor product regression splines.}, number={2}, journal={Journal of Nonparametric Statistics}, publisher={Informa UK Limited}, author={Xiao, Luo}, year={2018}, month={Dec}, pages={289–314} } @article{leroux_xiao_crainiceanu_checkley_2018, title={Dynamic prediction in functional concurrent regression with an application to child growth}, volume={37}, ISSN={["1097-0258"]}, DOI={10.1002/sim.7582}, abstractNote={In many studies, it is of interest to predict the future trajectory of subjects based on their historical data, referred to as dynamic prediction. Mixed effects models have traditionally been used for dynamic prediction. However, the commonly used random intercept and slope model is often not sufficiently flexible for modeling subject‐specific trajectories. In addition, there may be useful exposures/predictors of interest that are measured concurrently with the outcome, complicating dynamic prediction. To address these problems, we propose a dynamic functional concurrent regression model to handle the case where both the functional response and the functional predictors are irregularly measured. Currently, such a model cannot be fit by existing software. We apply the model to dynamically predict children's length conditional on prior length, weight, and baseline covariates. Inference on model parameters and subject‐specific trajectories is conducted using the mixed effects representation of the proposed model. An extensive simulation study shows that the dynamic functional regression model provides more accurate estimation and inference than existing methods. Methods are supported by fast, flexible, open source software that uses heavily tested smoothing techniques.}, number={8}, journal={STATISTICS IN MEDICINE}, publisher={Wiley}, author={Leroux, Andrew and Xiao, Luo and Crainiceanu, Ciprian and Checkley, William}, year={2018}, month={Apr}, pages={1376–1388} } @article{xiao_li_checkley_crainiceanu_2018, title={Fast covariance estimation for sparse functional data}, volume={28}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-017-9744-8}, abstractNote={Smoothing of noisy sample covariances is an important component in functional data analysis. We propose a novel covariance smoothing method based on penalized splines and associated software. The proposed method is a bivariate spline smoother that is designed for covariance smoothing and can be used for sparse functional or longitudinal data. We propose a fast algorithm for covariance smoothing using leave-one-subject-out cross-validation. Our simulations show that the proposed method compares favorably against several commonly used methods. The method is applied to a study of child growth led by one of coauthors and to a public dataset of longitudinal CD4 counts.}, number={3}, journal={STATISTICS AND COMPUTING}, publisher={Springer Nature}, author={Xiao, Luo and Li, Cai and Checkley, William and Crainiceanu, Ciprian}, year={2018}, month={May}, pages={511–522} } @article{xiao_li_checkley_crainiceanu_2018, title={Fast covariance estimation for sparse functional data (vol 28, pg 511, 2017)}, volume={28}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-017-9768-0}, number={3}, journal={STATISTICS AND COMPUTING}, author={Xiao, Luo and Li, Cai and Checkley, William and Crainiceanu, Ciprian}, year={2018}, month={May}, pages={523–523} } @article{grigsby_di_leroux_zipunnikov_xiao_crainiceanu_checkley_2018, title={Novel metrics for growth model selection}, volume={15}, journal={Emerging Themes in Epidemiology}, author={Grigsby, M. R. and Di, J. R. and Leroux, A. and Zipunnikov, V. and Xiao, L. and Crainiceanu, C. and Checkley, W.}, year={2018} } @misc{varma_dey_leroux_di_urbanek_xiao_zipunnikov_2018, title={Total volume of physical activity: TAC, TLAC or TAC(lambda)}, volume={106}, ISSN={["1096-0260"]}, DOI={10.1016/j.ypmed.2017.10.028}, abstractNote={Higher physical activity levels are associated with reduced cognitive decline among older adults; however, current understanding of underlying brain mechanisms is limited. This cross-sectional study investigated the relationship between actigraphy-estimated total volume of physical activity (TVPA) and magnetic resonance imaging (MRI) measures of white matter hyperintensities (WMH), and functional and structural brain connectivity, measured by resting-state functional MRI and diffusion tensor imaging. Study participants (N = 156, mean age = 71 years) included 136 with normal cognition and 20 with Mild Cognitive Impairment. Higher TVPA was associated with greater functional connectivity within the default-mode network and greater network modularity (a measure of network specialization), as well as with greater anisotropy and lower radial diffusion in white matter, suggesting better structural connectivity. These associations with functional and structural connectivity were independent of one another and independent of the level of vascular risk, APOE-ε4 status, cognitive reserve, and WMH volume, which were not associated with TVPA. Findings suggest that physical activity is beneficial for brain connectivity among older individuals with varying levels of risk for cognitive decline.}, journal={PREVENTIVE MEDICINE}, publisher={Elsevier BV}, author={Varma, Vijay R. and Dey, Debangan and Leroux, Andrew and Di, Junrui and Urbanek, Jacek and Xiao, Luo and Zipunnikov, Vadim}, year={2018}, month={Jan}, pages={233–235} } @article{bai_di_xiao_evenson_lacroix_crainiceanu_buchner_2017, title={AN ACTIVITY INDEX FOR RAW ACCELEROMETRY DATA AND ITS APPLICATION IN OLDER ADULTS}, volume={1}, DOI={10.1093/geroni/igx004.4497}, abstractNote={Accelerometers have been widely deployed in public health studies in recent years and research has mainly focused on summarized metrics provided by accelerometers manufactures, such as the activity counts (AC). Such measures do not have a publicly available formula and can vary by device manufacturer. To address these problems, we developed the activity index (AI), a new metric for summarizing raw tri-axial accelerometry data, and compared the AI to AC’s performance for distinguishing various types of activities and estimating energy expenditure. The analysis was conducted using data from the Women’s Health Initiative, in which tri-axial raw acceleration data and energy expenditure were collected at the same time. ROC analyses indicated that AI better distinguished between different types of activities than AC. AI better associated with METs as well. The proposed AI provides a transparent and reliable way to summarize densely sampled raw acceleration data.}, number={suppl_1}, journal={Innovation in Aging}, publisher={Oxford University Press (OUP)}, author={Bai, J. and Di, C. and Xiao, L. and Evenson, K.R. and LaCroix, A. and Crainiceanu, C. and Buchner, D.M.}, year={2017}, month={Jun}, pages={1239–1239} } @article{varma_dey_leroux_di_urbanek_xiao_zipunnikov_2017, title={Re-evaluating the effect of age on physical activity over the lifespan}, volume={101}, ISSN={["1096-0260"]}, DOI={10.1016/j.ypmed.2017.05.030}, abstractNote={Advancements in accelerometer analytic and visualization techniques allow researchers to more precisely identify and compare critical periods of physical activity (PA) decline by age across the lifespan, and describe how daily PA patterns may vary across age groups. We used accelerometer data from the 2003–2006 cohorts of the National Health and Nutrition Examination Survey (NHANES) (n = 12,529) to quantify total PA as well as PA by intensity across the lifespan using sex-stratified, age specific percentile curves constructed using generalized additive models. We additionally estimated minute-to-minute diurnal PA using smoothed bivariate surfaces. We found that from childhood to adolescence (ages 6–19) across sex, PA is sharply lower by age partially due to a later initiation of morning PA. Total PA levels, at age 19 are comparable to levels at age 60. Contrary to prior evidence, during young adulthood (ages 20–30) total and light intensity PA increases by age and then stabilizes during midlife (ages 31–59) partially due to an earlier initiation of morning PA. We additionally found that males compared to females have an earlier lowering in PA by age at midlife and lower total PA, higher sedentary behavior, and lower light intensity PA in older adulthood; these trends seem to be driven by lower PA in the afternoon compared to females. Our results suggest a re-evaluation of how emerging adulthood may affect PA levels and the importance of considering time of day and sex differences when developing PA interventions.}, journal={PREVENTIVE MEDICINE}, publisher={Elsevier BV}, author={Varma, Vijay R. and Dey, Debangan and Leroux, Andrew and Di, Junrui and Urbanek, Jacek and Xiao, Luo and Zipunnikov, Vadim}, year={2017}, month={Aug}, pages={102–108} } @article{park_staicu_xiao_crainiceanu_2017, title={Simple fixed-effects inference for complex functional models}, volume={19}, ISSN={1465-4644 1468-4357}, url={http://dx.doi.org/10.1093/biostatistics/kxx026}, DOI={10.1093/biostatistics/kxx026}, abstractNote={&NA; We propose simple inferential approaches for the fixed effects in complex functional mixed effects models. We estimate the fixed effects under the independence of functional residuals assumption and then bootstrap independent units (e.g. subjects) to conduct inference on the fixed effects parameters. Simulations show excellent coverage probability of the confidence intervals and size of tests for the fixed effects model parameters. Methods are motivated by and applied to the Baltimore Longitudinal Study of Aging, though they are applicable to other studies that collect correlated functional data.}, number={2}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Park, So Young and Staicu, Ana-Maria and Xiao, Luo and Crainiceanu, Ciprian M}, year={2017}, month={Jun}, pages={137–152} } @article{huang_reiss_xiao_zipunnikov_lindquist_crainiceanu_2017, title={Two-way principal component analysis for matrix-variate data, with an application to functional magnetic resonance imaging data}, volume={18}, number={2}, journal={Biostatistics (Oxford, England)}, author={Huang, L. and Reiss, P. T. and Xiao, L. and Zipunnikov, V. and Lindquist, M. A. and Crainiceanu, C. M.}, year={2017}, pages={214–229} } @article{bai_di_xiao_evenson_lacroix_crainiceanu_buchner_2016, title={An Activity Index for Raw Accelerometry Data and Its Comparison with Other Activity Metrics}, volume={11}, url={https://doi.org/10.1371/journal.pone.0160644}, DOI={10.1371/journal.pone.0160644}, abstractNote={Accelerometers have been widely deployed in public health studies in recent years. While they collect high-resolution acceleration signals (e.g., 10–100 Hz), research has mainly focused on summarized metrics provided by accelerometers manufactures, such as the activity count (AC) by ActiGraph or Actical. Such measures do not have a publicly available formula, lack a straightforward interpretation, and can vary by software implementation or hardware type. To address these problems, we propose the physical activity index (AI), a new metric for summarizing raw tri-axial accelerometry data. We compared this metric with the AC and another recently proposed metric for raw data, Euclidean Norm Minus One (ENMO), against energy expenditure. The comparison was conducted using data from the Objective Physical Activity and Cardiovascular Health Study, in which 194 women 60–91 years performed 9 lifestyle activities in the laboratory, wearing a tri-axial accelerometer (ActiGraph GT3X+) on the hip set to 30 Hz and an Oxycon portable calorimeter, to record both tri-axial acceleration time series (converted into AI, AC, and ENMO) and oxygen uptake during each activity (converted into metabolic equivalents (METs)) at the same time. Receiver operating characteristic analyses indicated that both AI and ENMO were more sensitive to moderate and vigorous physical activities than AC, while AI was more sensitive to sedentary and light activities than ENMO. AI had the highest coefficients of determination for METs (0.72) and was a better classifier of physical activity intensity than both AC (for all intensity levels) and ENMO (for sedentary and light intensity). The proposed AI provides a novel and transparent way to summarize densely sampled raw accelerometry data, and may serve as an alternative to AC. The AI’s largely improved sensitivity on sedentary and light activities over AC and ENMO further demonstrate its advantage in studies with older adults.}, number={8}, journal={PLOS ONE}, publisher={Public Library of Science}, author={Bai, Jiawei and Di, Chongzhi and Xiao, Luo and Evenson, Kelly R. and LaCroix, Andrea Z. and Crainiceanu, Ciprian M. and Buchner, David M.}, year={2016}, month={Aug}, pages={1–14} } @article{hooker_ramsay_xiao_2016, title={CollocInfer: Collocation Inference in Differential Equation Models}, volume={75}, DOI={10.18637/jss.v075.i02}, abstractNote={This monograph details the implementation and use of the CollocInfer package in R for smoothing-based estimation of continuous-time nonlinear dynamic systems. These routines represent an extension of the generalized profiling methods in Ramsay, Hooker, Campbell, and Cao (2007) for estimating parameters in nonlinear ordinary differential equations. An interface to the fda package is included. The package also supports discretetime systems. We describe the methodological and computational framework and the necessary steps to use the software. Equivalent functionality is available in MATLAB.}, number={2}, journal={Journal of Statistical Software}, publisher={Foundation for Open Access Statistic}, author={Hooker, Giles and Ramsay, James O. and Xiao, Luo}, year={2016} } @article{bien_bunea_xiao_2016, title={Convex Banding of the Covariance Matrix}, volume={111}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2015.1058265}, DOI={10.1080/01621459.2015.1058265}, abstractNote={Abstract We introduce a new sparse estimator of the covariance matrix for high-dimensional models in which the variables have a known ordering. Our estimator, which is the solution to a convex optimization problem, is equivalently expressed as an estimator that tapers the sample covariance matrix by a Toeplitz, sparsely banded, data-adaptive matrix. As a result of this adaptivity, the convex banding estimator enjoys theoretical optimality properties not attained by previous banding or tapered estimators. In particular, our convex banding estimator is minimax rate adaptive in Frobenius and operator norms, up to log factors, over commonly studied classes of covariance matrices, and over more general classes. Furthermore, it correctly recovers the bandwidth when the true covariance is exactly banded. Our convex formulation admits a simple and efficient algorithm. Empirical studies demonstrate its practical effectiveness and illustrate that our exactly banded estimator works well even when the true covariance matrix is only close to a banded matrix, confirming our theoretical results. Our method compares favorably with all existing methods, in terms of accuracy and speed. We illustrate the practical merits of the convex banding estimator by showing that it can be used to improve the performance of discriminant analysis for classifying sound recordings. Supplementary materials for this article are available online.}, number={514}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Bien, Jacob and Bunea, Florentina and Xiao, Luo}, year={2016}, month={Apr}, pages={834–845} } @article{xiao_zipunnikov_ruppert_crainiceanu_2016, title={Fast covariance estimation for high-dimensional functional data}, volume={26}, ISSN={0960-3174 1573-1375}, url={http://dx.doi.org/10.1007/S11222-014-9485-X}, DOI={10.1007/S11222-014-9485-X}, abstractNote={We propose two fast covariance smoothing methods and associated software that scale up linearly with the number of observations per function. Most available methods and software cannot smooth covariance matrices of dimension J > 500; a recently introduced sandwich smoother is an exception but is not adapted to smooth covariance matrices of large dimensions, such as J = 10, 000. We introduce two new methods that circumvent those problems: 1) a fast implementation of the sandwich smoother for covariance smoothing; and 2) a two-step procedure that first obtains the singular value decomposition of the data matrix and then smoothes the eigenvectors. These new approaches are at least an order of magnitude faster in high dimensions and drastically reduce computer memory requirements. The new approaches provide instantaneous (a few seconds) smoothing for matrices of dimension J = 10,000 and very fast (< 10 minutes) smoothing for J = 100, 000. R functions, simulations, and data analysis provide ready to use, reproducible, and scalable tools for practical data analysis of noisy high-dimensional functional data.}, number={1-2}, journal={Statistics and Computing}, publisher={Springer Science and Business Media LLC}, author={Xiao, Luo and Zipunnikov, Vadim and Ruppert, David and Crainiceanu, Ciprian}, year={2016}, month={Jan}, pages={409–421} } @article{xiao_he_koster_caserotti_lange-maia_glynn_harris_crainiceanu_2016, title={Movement prediction using accelerometers in a human population}, volume={72}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12382}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Xiao, Luo and He, Bing and Koster, Annemarie and Caserotti, Paolo and Lange-Maia, Brittney and Glynn, Nancy W. and Harris, Tamara B. and Crainiceanu, Ciprian M.}, year={2016}, month={Jun}, pages={513–524} } @article{huang_reiss_xiao_zipunnikov_lindquist_crainiceanu_2016, title={Two-way principal component analysis for matrix-variate data, with an application to functional magnetic resonance imaging data}, volume={8}, DOI={10.1093/biostatistics/kxw040}, abstractNote={Many modern neuroimaging studies acquire large spatial images of the brain observed sequentially over time. Such data are often stored in the forms of matrices. To model these matrix-variate data we introduce a class of separable processes using explicit latent process modeling. To account for the size and two-way structure of the data, we extend principal component analysis to achieve dimensionality reduction at the individual level. We introduce necessary identifiability conditions for each model and develop scalable estimation procedures. The method is motivated by and applied to a functional magnetic resonance imaging study designed to analyze the relationship between pain and brain activity.}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Huang, Lei and Reiss, Philip T. and Xiao, Luo and Zipunnikov, Vadim and Lindquist, Martin A. and Crainiceanu, Ciprian M.}, year={2016}, month={Aug}, pages={kxw040} } @article{yang_shmuelof_xiao_krakauer_caffo_2015, title={On tests of activation map dimensionality for fMRI-based studies of learning}, volume={9}, DOI={10.3389/fnins.2015.00085}, abstractNote={A methodology for investigating learning is developed using activation distributions, as opposed to standard voxel-level interaction tests. The approach uses tests of dimensionality to consider the ensemble of paired changes in voxel activation. The developed method allows for the investigation of non-focal and non-localized changes due to learning. In exchange for increased power to detect learning-based changes, this procedure sacrifices the localization information gained via voxel-level interaction testing. The test is demonstrated on an arc-pointing motor task for the study of motor learning, which served as the motivation for this methodological development. The proposed framework considers activation distribution, while the specific proposed test investigates linear tests of dimensionality. This paper includes: the development of the framework, a large scale simulation study, and the subsequent application to a study of motor learning in healthy adults. While the performance of the method was excellent when model assumptions held, complications arose in instances of massive numbers of null voxels or varying angles of principal dimension across subjects. Further analysis found that careful masking addressed the former concern, while an angle correction successfully resolved the latter. The simulation results demonstrated that the study of linear dimensionality is able to capture learning effects. The motivating data set used to illustrate the method evaluates two similar arc-pointing tasks, each over two sessions, with training on only one of the tasks in between sessions. The results suggests different activation distribution dimensionality when considering the trained and untrained tasks separately. Specifically, the untrained task evidences greater activation distribution dimensionality than the trained task. However, the direct comparison between the two tasks did not yield a significant result. The nature of the indication for greater dimensionality in the untrained task is explored and found to be non-linear variation in the data.}, journal={Frontiers in Neuroscience}, publisher={Frontiers Media SA}, author={Yang, Juemin and Shmuelof, Lior and Xiao, Luo and Krakauer, John W. and Caffo, Brian}, year={2015}, month={Apr} } @article{bunea_xiao_2015, title={On the sample covariance matrix estimator of reduced effective rank population matrices, with applications to fPCA}, volume={21}, DOI={10.3150/14-bej602}, abstractNote={This work provides a unified analysis of the properties of the sample covariance matrix $\Sigma_n$ over the class of $p\times p$ population covariance matrices $\Sigma$ of reduced effective rank $r_e(\Sigma)$. This class includes scaled factor models and covariance matrices with decaying spectrum. We consider $r_e(\Sigma)$ as a measure of matrix complexity, and obtain sharp minimax rates on the operator and Frobenius norm of $\Sigma_n-\Sigma$, as a function of $r_e(\Sigma)$ and $\|\Sigma\|_2$, the operator norm of $\Sigma$. With guidelines offered by the optimal rates, we define classes of matrices of reduced effective rank over which $\Sigma_n$ is an accurate estimator. Within the framework of these classes, we perform a detailed finite sample theoretical analysis of the merits and limitations of the empirical scree plot procedure routinely used in PCA. We show that identifying jumps in the empirical spectrum that consistently estimate jumps in the spectrum of $\Sigma$ is not necessarily informative for other goals, for instance for the selection of those sample eigenvalues and eigenvectors that are consistent estimates of their population counterparts. The scree plot method can still be used for selecting consistent eigenvalues, for appropriate threshold levels. We provide a threshold construction and also give a rule for checking the consistency of the corresponding sample eigenvectors. We specialize these results and analysis to population covariance matrices with polynomially decaying spectra, and extend it to covariance operators with polynomially decaying spectra. An application to fPCA illustrates how our results can be used in functional data analysis.}, number={2}, journal={Bernoulli}, publisher={Bernoulli Society for Mathematical Statistics and Probability}, author={Bunea, Florentina and Xiao, Luo}, year={2015}, month={May}, pages={1200–1230} } @article{xiao_thurston_ruppert_love_davidson_2014, title={Bayesian Models for Multiple Outcomes in Domains With Application to the Seychelles Child Development Study}, volume={109}, DOI={10.1080/01621459.2013.830070}, abstractNote={The Seychelles Child Development Study (SCDS) examines the effects of prenatal exposure to methylmercury on the functioning of the central nervous system. The SCDS data include 20 outcomes measured on 9-year-old children that can be classified broadly in four outcome classes or “domains”: cognition, memory, motor, and social behavior. Previous analyses and scientific theory suggest that these outcomes may belong to more than one of these domains, rather than only a single domain as is frequently assumed for modeling. We present a framework for examining the effects of exposure and other covariates when the outcomes may each belong to more than one domain and where we also want to learn about the assignment of outcomes to domains. Each domain is defined by a sentinel outcome, which is preassigned to that domain only. All other outcomes can belong to multiple domains and are not preassigned. Our model allows exposure and covariate effects to differ across domains and across outcomes within domains, and includes random subject-specific effects that model correlations between outcomes within and across domains. We take a Bayesian MCMC approach. Results from the Seychelles study and from extensive simulations show that our model can effectively determine sparse domain assignment, and at the same time give increased power to detect overall, domain-specific, and outcome-specific exposure and covariate effects relative to separate models for each endpoint. When fit to the Seychelles data, several outcomes were classified as partly belonging to domains other than their originally assigned domains. In retrospect, the new partial domain assignments are reasonable and, as we discuss, suggest important scientific insights about the nature of the outcomes. Checks of model misspecification were improved relative to a model that assumes each outcome is in a single domain. Supplementary materials for this article are available online.}, number={505}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Xiao, Luo and Thurston, Sally W. and Ruppert, David and Love, Tanzy M. T. and Davidson, Philip W.}, year={2014}, month={Jan}, pages={1–10} } @article{ma_xiao_wong_2014, title={Learning regulatory programs by threshold SVD regression}, volume={111}, DOI={10.1073/pnas.1417808111}, abstractNote={Significance}, number={44}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Ma, X. and Xiao, L. and Wong, W. H.}, year={2014}, month={Oct}, pages={15675–15680} } @article{xiao_huang_schrack_ferrucci_zipunnikov_crainiceanu_2014, title={Quantifying the lifetime circadian rhythm of physical activity: a covariate-dependent functional approach}, volume={16}, DOI={10.1093/biostatistics/kxu045}, abstractNote={Objective measurement of physical activity using wearable devices such as accelerometers may provide tantalizing new insights into the association between activity and health outcomes. Accelerometers can record quasi-continuous activity information for many days and for hundreds of individuals. For example, in the Baltimore Longitudinal Study on Aging physical activity was recorded every minute for [Formula: see text] adults for an average of [Formula: see text] days per adult. An important scientific problem is to separate and quantify the systematic and random circadian patterns of physical activity as functions of time of day, age, and gender. To capture the systematic circadian pattern, we introduce a practical bivariate smoother and two crucial innovations: (i) estimating the smoothing parameter using leave-one-subject-out cross validation to account for within-subject correlation and (ii) introducing fast computational techniques that overcome problems both with the size of the data and with the cross-validation approach to smoothing. The age-dependent random patterns are analyzed by a new functional principal component analysis that incorporates both covariate dependence and multilevel structure. For the analysis, we propose a practical and very fast trivariate spline smoother to estimate covariate-dependent covariances and their spectra. Results reveal several interesting, previously unknown, circadian patterns associated with human aging and gender.}, number={2}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Xiao, L. and Huang, L. and Schrack, J. A. and Ferrucci, L. and Zipunnikov, V. and Crainiceanu, C. M.}, year={2014}, month={Oct}, pages={352–367} } @article{xiao_li_ruppert_2013, title={Fast bivariateP-splines: the sandwich smoother}, volume={75}, DOI={10.1111/rssb.12007}, abstractNote={Summary}, number={3}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley-Blackwell}, author={Xiao, Luo and Li, Yingxing and Ruppert, David}, year={2013}, month={Feb}, pages={577–599} }