@article{koner_park_staicu_2024, title={PROFIT: projection-based test in longitudinal functional data}, volume={1}, ISSN={["1029-0311"]}, DOI={10.1080/10485252.2023.2294885}, abstractNote={In many modern applications, a dependent functional response is observed for each subject over repeated time, leading to longitudinal functional data. In this paper, we propose a novel statistical procedure to test whether the mean function varies over time. Our approach relies on reducing the dimension of the response using data-driven orthogonal projections and it employs a likelihood-based hypothesis testing. We investigate the methodology theoretically and discuss a computationally efficient implementation. The proposed test maintains the type I error rate, and shows excellent power to detect departures from the null hypothesis in finite sample simulation studies. We apply our method to the longitudinal diffusion tensor imaging study of multiple sclerosis (MS) patients to formally assess whether the brain's health tissue, as summarized by fractional anisotropy (FA) profile, degrades over time during the study period.}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Koner, Salil and Park, So Young and Staicu, Ana-Maria}, year={2024}, month={Jan} } @article{battagliola_sorensen_tolver_staicu_2024, title={Quantile Regression for Longitudinal Functional Data with Application to Feed Intake of Lactating Sows}, volume={2}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-024-00601-5}, abstractNote={AbstractThis article focuses on the study of lactating sows, where the main interest is the influence of temperature, measured throughout the day, on the lower quantiles of the daily feed intake. We outline a model framework and estimation methodology for quantile regression in scenarios with longitudinal data and functional covariates. The quantile regression model uses a time-varying regression coefficient function to quantify the association between covariates and the quantile level of interest, and it includes subject-specific intercepts to incorporate within-subject dependence. Estimation relies on spline representations of the unknown coefficient functions and can be carried out with existing software. We introduce bootstrap procedures for bias adjustment and computation of standard errors. Analysis of the lactation data indicates, among others, that the influence of temperature increases during the lactation period.Supplementary materials accompanying this paper appear on-line.}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Battagliola, Maria Laura and Sorensen, Helle and Tolver, Anders and Staicu, Ana-Maria}, year={2024}, month={Feb} } @article{long_reich_staicu_meitzen_2023, title={A Nonparametric Test of Group Distributional Differences for Hierarchically Clustered Functional Data}, volume={79}, ISSN={0006-341X 1541-0420}, url={http://dx.doi.org/10.1111/biom.13846}, DOI={10.1111/biom.13846}, abstractNote={Abstract Biological sex and gender are critical variables in biomedical research, but are complicated by the presence of sex-specific natural hormone cycles, such as the estrous cycle in female rodents, typically divided into phases. A common feature of these cycles are fluctuating hormone levels that induce sex differences in many behaviors controlled by the electrophysiology of neurons, such as neuronal membrane potential in response to electrical stimulus, typically summarized using a priori defined metrics. In this paper, we propose a method to test for differences in the electrophysiological properties across estrous cycle phase without first defining a metric of interest. We do this by modeling membrane potential data in the frequency domain as realizations of a bivariate process, also depending on the electrical stimulus, by adopting existing methods for longitudinal functional data. We are then able to extract the main features of the bivariate signals through a set of basis function coefficients. We use these coefficients for testing, adapting methods for multivariate data to account for an induced hierarchical structure that is a product of the experimental design. We illustrate the performance of the proposed approach in simulations and then apply the method to experimental data.}, number={4}, journal={Biometrics}, publisher={Oxford University Press (OUP)}, author={Long, Alexander S. and Reich, Brian J. and Staicu, Ana-Maria and Meitzen, John}, year={2023}, month={Feb}, pages={3778–3791} } @article{sninsky_staicu_barnes_2023, title={In Acute Severe Ulcerative Colitis Patients Who Receive Rescue Therapy, Prior Maintenance Therapy and Day 3 C-Reactive Protein After Rescue Therapy Are Associated With 12-Month Colectomy Risk}, volume={9}, ISSN={["1536-4844"]}, DOI={10.1093/ibd/izad215}, abstractNote={Lay Summary In steroid-refractory patients with acute severe ulcerative colitis, the number of advanced therapies prior to admission and day 3 C-reactive protein post–rescue therapy is associated with a higher risk of colectomy within 12 months.}, journal={INFLAMMATORY BOWEL DISEASES}, author={Sninsky, Jared A. and Staicu, Ana-Maria and Barnes, Edward L.}, year={2023}, month={Sep} } @article{koner_staicu_2023, title={Second-Generation Functional Data}, volume={10}, ISSN={["2326-831X"]}, DOI={10.1146/annurev-statistics-032921-033726}, abstractNote={ Modern studies from a variety of fields record multiple functional observations according to either multivariate, longitudinal, spatial, or time series designs. We refer to such data as second-generation functional data because their analysis—unlike typical functional data analysis, which assumes independence of the functions—accounts for the complex dependence between the functional observations and requires more advanced methods. In this article, we provide an overview of the techniques for analyzing second-generation functional data with a focus on highlighting the key methodological intricacies that stem from the need for modeling complex dependence, compared with independent functional data. For each of the four types of second-generation functional data presented—multivariate functional data, longitudinal functional data, functional time series and spatially functional data—we discuss how the widely popular functional principal component analysis can be extended to these settings to define, identify main directions of variation, and describe dependence among the functions. In addition to modeling, we also discuss prediction, statistical inference, and application to clustering. We close by discussing future directions in this area. }, journal={ANNUAL REVIEW OF STATISTICS AND ITS APPLICATION}, author={Koner, Salil and Staicu, Ana-Maria}, year={2023}, pages={547–572} } @article{weishampel_staicu_rand_2023, title={Classification of social media users with generalized functional data analysis}, volume={179}, ISSN={["1872-7352"]}, url={https://doi.org/10.1016/j.csda.2022.107647}, DOI={10.1016/j.csda.2022.107647}, abstractNote={Technological advancement has made possible the collection of data from social media platforms at unprecedented speed and volume. Current methods for analyzing such data lack interpretability, are computationally intensive, or require a rigid data specification. Functional data analysis enables the development of a flexible, yet interpretable, modeling framework to extract lower-dimensional relevant features of a user's posting behavior on social media, based on their posting activity over time. The extracted features can then be used to discriminate a malicious user from a genuine one. The proposed methodology can classify a binary time series in a computationally efficient manner and provides more insights into the posting behavior of social media agents. Performance of the method is illustrated numerically in simulation studies and on a motivating Twitter data set. The developed methods are applicable to other social media data, such as Facebook, Instagram, Reddit, or TikTok, or any form of digital interaction where the user's posting behavior is indicative of their user class.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Weishampel, Anthony and Staicu, Ana -Maria and Rand, William}, year={2023}, month={Mar} } @article{li_wang_maity_staicu_2022, title={Inference in functional linear quantile regression}, volume={190}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2022.104985}, abstractNote={In this paper, we study statistical inference in functional quantile regression for scalar response and a functional covariate. Specifically, we consider a functional linear quantile regression model where the effect of the covariate on the quantile of the response is modeled through the inner product between the functional covariate and an unknown smooth regression parameter function that varies with the level of quantile. The objective is to test that the regression parameter is constant across several quantile levels of interest. The parameter function is estimated by combining ideas from functional principal component analysis and quantile regression. An adjusted Wald testing procedure is proposed for this hypothesis of interest, and its chi-square asymptotic null distribution is derived. The testing procedure is investigated numerically in simulations involving sparse and noisy functional covariates and in a capital bike share data application. The proposed approach is easy to implement and the R code is published online at https://github.com/xylimeng/fQR-testing.}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Li, Meng and Wang, Kehui and Maity, Arnab and Staicu, Ana-Maria}, year={2022}, month={Jul} } @article{battagliola_sorensen_tolver_staicu_2022, title={A bias-adjusted estimator in quantile regression for clustered data}, volume={23}, ISSN={["2452-3062"]}, DOI={10.1016/j.ecosta.2021.07.003}, abstractNote={Quantile regression models with random effects are useful for studying associations between covariates and quantiles of the response distribution for clustered data. Parameter estimation is examined for a class of mixed-effects quantile regression models, with focus on settings with many but small clusters. The main contributions are the following: (i) documenting that existing methods may lead to severely biased estimators for fixed effects parameters; (ii) proposing a new two-step estimation methodology where predictions of the random effects are first computed by a pseudo likelihood approach (the LQMM method) and then used as offsets in standard quantile regression; (iii) proposing a novel bootstrap sampling procedure in order to reduce bias of the two-step estimator and compute confidence intervals. The proposed estimation and associated inference is assessed numerically through rigorous simulation studies and applied to an AIDS Clinical Trial Group (ACTG) study.}, journal={ECONOMETRICS AND STATISTICS}, author={Battagliola, Maria Laura and Sorensen, Helle and Tolver, Anders and Staicu, Ana-Maria}, year={2022}, month={Jul}, pages={165–186} } @article{cui_singh_staicu_reich_2021, title={Bayesian variable selection for high-dimensional rank data}, volume={5}, ISSN={["1099-095X"]}, DOI={10.1002/env.2682}, abstractNote={AbstractThe study of microbiomes has become a topic of intense interest in last several decades as the development of new sequencing technologies has made DNA data accessible across disciplines. In this paper, we analyze a global dataset to investigate environmental factors that affect topsoil microbiome. As yet, much associated work has focused on linking indicators of microbial health to specific outcomes in various fields, rather than understanding how external factors may influence the microbiome composition itself. This is partially due to limited statistical methods to model abundance counts. The counts are high‐dimensional, overdispersed, often zero‐inflated, and exhibit complex dependence structures. Additionally, the raw counts are often noisy and compositional, and thus are not directly comparable across samples. Often, practitioners transform the counts to presence–absence indicators, but this transformation discards much of the data. As an alternative, we propose transforming to taxa ranks and develop a Bayesian variable selection model that uses ranks to identify covariates that influence microbiome composition. We show by simulation that the proposed model outperforms competitors across various settings and particular improvement in recall for small magnitude and low prevalence covariates. When applied to the topsoil data, the proposed method identifies several factors that affect microbiome composition.}, journal={ENVIRONMETRICS}, author={Cui, Can and Singh, Susheela P. and Staicu, Ana-Maria and Reich, Brian J.}, year={2021}, month={May} } @article{xu_laber_staicu_lascelles_2021, title={Novel approach to modeling high-frequency activity data to assess therapeutic effects of analgesics in chronic pain conditions}, volume={11}, ISSN={["2045-2322"]}, DOI={10.1038/s41598-021-87304-w}, abstractNote={AbstractOsteoarthritis (OA) is a chronic condition often associated with pain, affecting approximately fourteen percent of the population, and increasing in prevalence. A globally aging population have made treating OA-associated pain as well as maintaining mobility and activity a public health priority. OA affects all mammals, and the use of spontaneous animal models is one promising approach for improving translational pain research and the development of effective treatment strategies. Accelerometers are a common tool for collecting high-frequency activity data on animals to study the effects of treatment on pain related activity patterns. There has recently been increasing interest in their use to understand treatment effects in human pain conditions. However, activity patterns vary widely across subjects; furthermore, the effects of treatment may manifest in higher or lower activity counts or in subtler ways like changes in the frequency of certain types of activities. We use a zero inflated Poisson hidden semi-Markov model to characterize activity patterns and subsequently derive estimators of the treatment effect in terms of changes in activity levels or frequency of activity type. We demonstrate the application of our model, and its advance over traditional analysis methods, using data from a naturally occurring feline OA-associated pain model.}, number={1}, journal={SCIENTIFIC REPORTS}, author={Xu, Zekun and Laber, Eric and Staicu, Ana-Maria and Lascelles, B. Duncan X.}, year={2021}, month={Apr} } @article{roy_reich_guinness_shinohara_staicu_2021, title={Spatial Shrinkage Via the Product Independent Gaussian Process Prior}, volume={6}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.1923512}, abstractNote={Abstract We study the problem of sparse signal detection on a spatial domain. We propose a novel approach to model continuous signals that are sparse and piecewise-smooth as the product of independent Gaussian (PING) processes with a smooth covariance kernel. The smoothness of the PING process is ensured by the smoothness of the covariance kernels of the Gaussian components in the product, and sparsity is controlled by the number of components. The bivariate kurtosis of the PING process implies that more components in the product results in the thicker tail and sharper peak at zero. We develop an efficient computation algorithm based on spectral methods. The simulation results demonstrate superior estimation using the PING prior over Gaussian process prior for different image regressions. We apply our method to a longitudinal magnetic resonance imaging dataset to detect the regions that are affected by multiple sclerosis computation in this domain. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Roy, Arkaprava and Reich, Brian J. and Guinness, Joseph and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2021}, month={Jun} } @inbook{xu_laber_staicu_2020, place={Cham, Switzerland}, series={Emerging Topics of Statistics and Biostatistics Book Series}, title={Hierarchical continuous time hidden Markov model, with application in zero-inflated accelerometer data}, ISBN={978-3-030-33416-1}, DOI={10.1007/978-3-030-33416-1_7}, abstractNote={Wearable devices including accelerometers are increasingly being used to collect high-frequency human activity data in situ. There is tremendous potential to use such data to inform medical decision making and public health policies. However, modeling such data is challenging as they are high-dimensional, heterogeneous, and subject to informative missingness, e.g., zero readings when the device is removed by the participant. We propose a flexible and extensible continuous-time hidden Markov model to extract meaningful activity patterns from human accelerometer data. To facilitate estimation with massive data we derive an efficient learning algorithm that exploits the hierarchical structure of the parameters indexing the proposed model. We also propose a bootstrap procedure for interval estimation. The proposed methods are illustrated using data from the 2003–2004 and 2005–2006 National Health and Nutrition Examination Survey.}, booktitle={Statistical Modeling for Biomedical Research: Contemporary Topics and Voices in the Field}, publisher={Springer}, author={Xu, Z. and Laber, E.B. and Staicu, A.}, editor={Zhao, Y. and Chen, D.GEditors}, year={2020}, pages={125–142}, collection={Emerging Topics of Statistics and Biostatistics Book Series} } @article{stallrich_islam_staicu_crouch_pan_huang_2020, title={OPTIMAL EMG PLACEMENT FOR A ROBOTIC PROSTHESIS CONTROLLER WITH SEQUENTIAL, ADAPTIVE FUNCTIONAL ESTIMATION (SAFE)}, volume={14}, ISSN={["1932-6157"]}, url={http://dx.doi.org/10.1214/20-aoas1324}, DOI={10.1214/20-AOAS1324}, abstractNote={Robotic hand prostheses require a controller to decode muscle contraction information, such as electromyogram (EMG) signals, into the user’s desired hand movement. State-of-the-art decoders demand extensive training, require data from a large number of EMG sensors, and are prone to poor predictions. Biomechanical models of a single movement degree-of-freedom tell us that relatively few muscles, and hence fewer EMG sensors, are needed to predict movement. We propose a novel decoder based on a dynamic, functional linear model with velocity or acceleration as its response and the recent past EMG signals as functional covariates. The effect of each EMG signal varies with the recent position to account for biomechanical features of hand movement, increasing the predictive capability of a single EMG signal compared to existing decoders. The effects are estimated with a multi-stage, adaptive estimation procedure we call Sequential Adaptive Functional Estimation (SAFE). Starting with 16 potential EMG sensors, our method correctly identifies the few EMG signals that are known to be important for an able-bodied subject. Furthermore, the estimated effects are interpretable and can significantly improve understanding and development of robotic hand prostheses.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, publisher={Institute of Mathematical Statistics}, author={Stallrich, Jonathan and Islam, Md Nazmul and Staicu, Ana-Maria and Crouch, Dustin and Pan, Lizhi and Huang, He}, year={2020}, month={Sep}, pages={1164–1181} } @article{hazra_reich_staicu_2020, title={A multivariate spatial skew-t process for joint modeling of extreme precipitation indexes}, volume={31}, ISSN={["1099-095X"]}, DOI={10.1002/env.2602}, abstractNote={AbstractTo study trends in extreme precipitation across the United States over the years 1951–2017, we analyze 10 climate indexes that represent extreme precipitation, such as annual maximum of daily precipitation and annual maximum of consecutive five‐day average precipitation. We consider the gridded data produced by the CLIMDEX project (http://www.climdex.org/gewocs.html), constructed using daily precipitation data. These indexes exhibit spatial and mutual dependence. In this paper, we propose a multivariate spatial skew‐t process for joint modeling of extreme precipitation indexes and discuss its theoretical properties. The model framework allows Bayesian inference while maintaining a computational time that is competitive with common multivariate geostatistical approaches. In a numerical study, we find that the proposed model outperforms several simpler alternatives in terms of various model selection criteria. We apply the proposed model to estimate the average decadal change in the extreme precipitation indexes throughout the United States and find several significant local changes.}, number={3}, journal={ENVIRONMETRICS}, author={Hazra, Arnab and Reich, Brian J. and Staicu, Ana-Maria}, year={2020}, month={May} } @article{singh_staicu_dunn_fierer_reich_2019, title={A nonparametric spatial test to identify factors that shape a microbiome}, volume={13}, ISSN={1932-6157}, url={http://dx.doi.org/10.1214/19-aoas1262}, DOI={10.1214/19-aoas1262}, abstractNote={The advent of high-throughput sequencing technologies has made data from DNA material readily available, leading to a surge of microbiome-related research establishing links between markers of microbiome health and specific outcomes. However, to harness the power of microbial communities we must understand not only how they affect us, but also how they can be influenced to improve outcomes. This area has been dominated by methods that reduce community composition to summary metrics, which can fail to fully exploit the complexity of community data. Recently, methods have been developed to model the abundance of taxa in a community, but they can be computationally intensive and do not account for spatial effects underlying microbial settlement. These spatial effects are particularly relevant in the microbiome setting because we expect communities that are close together to be more similar than those that are far apart. In this paper, we propose a flexible Bayesian spike-and-slab variable selection model for presence-absence indicators that accounts for spatial dependence and cross-dependence between taxa while reducing dimensionality in both directions. We show by simulation that in the presence of spatial dependence, popular distance-based hypothesis testing methods fail to preserve their advertised size, and the proposed method improves variable selection. Finally, we present an application of our method to an indoor fungal community found with homes across the contiguous United States.}, number={4}, journal={The Annals of Applied Statistics}, publisher={Institute of Mathematical Statistics}, author={Singh, Susheela P. and Staicu, Ana-Maria and Dunn, Robert R. and Fierer, Noah and Reich, Brian J.}, year={2019}, month={Dec}, pages={2341–2362} } @article{park_li_benavides_heugten_staicu_2019, title={Conditional Analysis for Mixed Covariates, with Application to Feed Intake of Lactating Sows}, volume={2019}, ISSN={["1687-9538"]}, DOI={10.1155/2019/3743762}, abstractNote={We propose a novel modeling framework to study the effect of covariates of various types on the conditional distribution of the response. The methodology accommodates flexible model structure, allows for joint estimation of the quantiles at all levels, and provides a computationally efficient estimation algorithm. Extensive numerical investigation confirms good performance of the proposed method. The methodology is motivated by and applied to a lactating sow study, where the primary interest is to understand how the dynamic change of minute-by-minute temperature in the farrowing rooms within a day (functional covariate) is associated with low quantiles of feed intake of lactating sows, while accounting for other sow-specific information (vector covariate).}, journal={JOURNAL OF PROBABILITY AND STATISTICS}, author={Park, S. Y. and Li, C. and Benavides, S. M. Mendoza and Heugten, E. and Staicu, A. M.}, year={2019}, month={Jul} } @article{staicu_islam_dumitru_heugten_2019, title={Longitudinal dynamic functional regression}, volume={69}, ISBN={1467-9876}, ISSN={0035-9254 1467-9876}, url={http://dx.doi.org/10.1111/rssc.12376}, DOI={10.1111/rssc.12376}, abstractNote={SummaryThe paper develops a parsimonious modelling framework to study the time-varying association between scalar outcomes and functional predictors observed at many instances, in longitudinal studies. The methods enable us to reconstruct the full trajectory of the response and are applicable to Gaussian and non-Gaussian responses. The idea is to model the time-varying functional predictors by using orthogonal basis functions and to expand the time-varying regression coefficient by using the same basis. Numerical investigation through simulation studies and data analysis show excellent performance in terms of accurate prediction and efficient computations, when compared with existing alternatives. The methods are inspired and applied to an animal science application, where of interest is to study the association between the feed intake of lactating sows and the minute-by-minute temperature throughout the 21 days of their lactation period. R code and an R illustration are provided.}, number={1}, journal={Journal of the Royal Statistical Society: Series C (Applied Statistics)}, publisher={Wiley}, author={Staicu, Ana‐Maria and Islam, Md Nazmul and Dumitru, Raluca and Heugten, Eric van}, year={2019}, month={Sep}, pages={25–46} } @article{tekbudak_alfaro-córdoba_maity_staicu_2018, title={A comparison of testing methods in scalar-on-function regression}, volume={103}, ISSN={1863-8171 1863-818X}, url={http://dx.doi.org/10.1007/S10182-018-00337-X}, DOI={10.1007/S10182-018-00337-X}, abstractNote={A scalar-response functional model describes the association between a scalar response and a set of functional covariates. An important problem in the functional data literature is to test nullity or linearity of the effect of the functional covariate in the context of scalar-on-function regression. This article provides an overview of the existing methods for testing both the null hypotheses that there is no relationship and that there is a linear relationship between the functional covariate and scalar response, and a comprehensive numerical comparison of their performance. The methods are compared for a variety of realistic scenarios: when the functional covariate is observed at dense or sparse grids and measurements include noise or not. Finally, the methods are illustrated on the Tecator data set.}, number={3}, journal={AStA Advances in Statistical Analysis}, publisher={Springer Science and Business Media LLC}, author={Tekbudak, Merve Yasemin and Alfaro-Córdoba, Marcela and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, month={Oct}, pages={411–436} } @article{king_staicu_davis_reich_eder_2018, title={A functional data analysis of spatiotemporal trends and variation in fine particulate matter}, volume={184}, ISSN={["1873-2844"]}, DOI={10.1016/j.atmosenv.2018.04.001}, abstractNote={In this paper we illustrate the application of modern functional data analysis methods to study the spatiotemporal variability of particulate matter components across the United States. The approach models the pollutant annual profiles in a way that describes the dynamic behavior over time and space. This new technique allows us to predict yearly profiles for locations and years at which data are not available and also offers dimension reduction for easier visualization of the data. Additionally it allows us to study changes of pollutant levels annually or for a particular season. We apply our method to daily concentrations of two particular components of PM2.5 measured by two networks of monitoring sites across the United States from 2003 to 2015. Our analysis confirms existing findings and additionally reveals new trends in the change of the pollutants across seasons and years that may not be as easily determined from other common approaches such as Kriging.}, journal={ATMOSPHERIC ENVIRONMENT}, author={King, Meredith C. and Staicu, Ana-Maria and Davis, Jerry M. and Reich, Brian J. and Eder, Brian}, year={2018}, month={Jul}, pages={233–243} } @article{park_xiao_willbur_staicu_jumbe_2018, title={A joint design for functional data with application to scheduling ultrasound scans}, volume={122}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2018.01.009}, abstractNote={A joint design for sampling functional data is proposed to achieve optimal prediction of both functional data and a scalar outcome. The motivating application is fetal growth, where the objective is to determine the optimal times to collect ultrasound measurements in order to recover fetal growth trajectories and to predict child birth outcomes. The joint design is formulated using an optimization criterion and implemented in a pilot study. Performance of the proposed design is evaluated via simulation study and application to fetal ultrasound data.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, publisher={Elsevier BV}, author={Park, So Young and Xiao, Luo and Willbur, Jayson D. and Staicu, Ana-Maria and Jumbe, N. L'ntshotshole}, year={2018}, month={Jun}, pages={101–114} } @article{chen_xiao_staicu_2019, title={A smoothing-based goodness-of-fit test of covariance for functional data}, volume={75}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13005}, abstractNote={Abstract Functional data methods are often applied to longitudinal data as they provide a more flexible way to capture dependence across repeated observations. However, there is no formal testing procedure to determine if functional methods are actually necessary. We propose a goodness-of-fit test for comparing parametric covariance functions against general nonparametric alternatives for both irregularly observed longitudinal data and densely observed functional data. We consider a smoothing-based test statistic and approximate its null distribution using a bootstrap procedure. We focus on testing a quadratic polynomial covariance induced by a linear mixed effects model and the method can be used to test any smooth parametric covariance function. Performance and versatility of the proposed test is illustrated through a simulation study and three data applications.}, number={2}, journal={BIOMETRICS}, publisher={Wiley}, author={Chen, Stephanie T. and Xiao, Luo and Staicu, Ana-Maria}, year={2019}, month={Jun}, pages={562–571} } @article{kim_maity_staicu_2018, title={Additive nonlinear functional concurrent model}, volume={11}, ISSN={1938-7989 1938-7997}, url={http://dx.doi.org/10.4310/sii.2018.v11.n4.a11}, DOI={10.4310/sii.2018.v11.n4.a11}, abstractNote={We propose a flexible regression model to study the association between a functional response and multiple functional covariates that are observed on the same domain. Specifically, we relate the mean of the current response to current values of the covariates by a sum of smooth unknown bivariate functions, where each of the functions depends on the current value of the covariate and the time point itself. In this framework, we develop estimation methodology that accommodates realistic scenarios where the covariates are sampled with or without error on a sparse and irregular design, and prediction that accounts for unknown model correlation structure. We also discuss the problem of testing the null hypothesis that the covariate has no association with the response. The proposed methods are evaluated numerically through simulations and two real data applications.}, number={4}, journal={Statistics and Its Interface}, publisher={International Press of Boston}, author={Kim, Janet S. and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, pages={669–685} } @article{geden_staicu_feng_2018, title={Reduced Target Facilitation and Increased Distractor Suppression During Mind Wandering}, volume={65}, ISSN={["2190-5142"]}, DOI={10.1027/1618-3169/a000417}, abstractNote={ Abstract. The perceptual decoupling hypothesis suggests a general mechanism that while mind wandering, our attention is detached from our environment, resulting in diminished processing of external stimuli. This study focused on examining two possible specific mechanisms: the global suppression of all external stimuli, and a combination of reduced target facilitation and increased distractor suppression. An attentional capture task was used in which certain trials measured distractor suppression effects and others assessed target facilitation effects. The global suppression account predicts negative impacts on both types of trials, while the combined mechanisms of reduced target facilitation and increased distractor suppression suggest that only target-present trials would be affected. Results showed no cost of mind wandering on target-absent trials, but significant distractor suppression and target facilitation effects during mind wandering on target-present trials. These findings suggest that rather than perceptual decoupling globally suppressing all stimuli, it is more selective, falling in line with evidence on strong top-down modulation. }, number={6}, journal={EXPERIMENTAL PSYCHOLOGY}, author={Geden, Michael and Staicu, Ana-Maria and Feng, Jing}, year={2018}, month={Nov}, pages={345–352} } @article{hazra_reich_reich_shinohara_staicu_2019, title={A spatio-temporal model for longitudinal image-on-image regression}, volume={11}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-017-9206-z}, abstractNote={Neurologists and radiologists often use magnetic resonance imaging (MRI) in the management of subjects with multiple sclerosis (MS) because it is sensitive to inflammatory and demyelinative changes in the white matter of the brain and spinal cord. Two conventional modalities used for identifying lesions are T1-weighted (T1) and T2-weighted fluid-attenuated inversion recovery (FLAIR) imaging, which are used clinically and in research studies. Magnetization transfer ratio (MTR), which is available only in research settings, is an advanced MRI modality that has been used extensively for measuring disease-related demyelination both in white matter lesions as well across normal-appearing white matter. Acquiring MTR is not standard in clinical practice, due to the increased scan time and cost. Hence, prediction of MTR based on the modalities T1 and FLAIR could have great impact on the availability of these promising measures for improved patient management. We propose a spatio-temporal regression model for image response and image predictors that are acquired longitudinally, with images being co-registered within the subject but not across subjects. The model is additive, with the response at a voxel being dependent on the available covariates not only through the current voxel but also on the imaging information from the voxels within a neighboring spatial region as well as their temporal gradients. We propose a dynamic Bayesian estimation procedure that updates the parameters of the subject-specific regression model as data accummulates. To bypass the computational challenges associated with a Bayesian approach for high-dimensional imaging data, we propose an approximate Bayesian inference technique. We assess the model fitting and the prediction performance using longitudinally acquired MRI images from 46 MS patients.}, number={1}, journal={Statistics in Biosciences}, author={Hazra, A. and Reich, B.J. and Reich, D.S. and Shinohara, R.T. and Staicu, A.M.}, year={2019}, pages={22–46} } @article{kim_staicu_maity_carroll_ruppert_2018, title={Additive Function-on-Function Regression}, volume={27}, ISSN={["1537-2715"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85045617567&partnerID=MN8TOARS}, DOI={10.1080/10618600.2017.1356730}, abstractNote={ABSTRACT We study additive function-on-function regression where the mean response at a particular time point depends on the time point itself, as well as the entire covariate trajectory. We develop a computationally efficient estimation methodology based on a novel combination of spline bases with an eigenbasis to represent the trivariate kernel function. We discuss prediction of a new response trajectory, propose an inference procedure that accounts for total variability in the predicted response curves, and construct pointwise prediction intervals. The estimation/inferential procedure accommodates realistic scenarios, such as correlated error structure as well as sparse and/or irregular designs. We investigate our methodology in finite sample size through simulations and two real data applications. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Kim, Janet S. and Staicu, Ana-Maria and Maity, Arnab and Carroll, Raymond J. and Ruppert, David}, year={2018}, pages={234–244} } @article{reich_guinness_vandekar_shinohara_staicu_2018, title={Fully Bayesian spectral methods for imaging data}, volume={74}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12782}, abstractNote={Summary Medical imaging data with thousands of spatially correlated data points are common in many fields. Methods that account for spatial correlation often require cumbersome matrix evaluations which are prohibitive for data of this size, and thus current work has either used low-rank approximations or analyzed data in blocks. We propose a method that accounts for nonstationarity, functional connectivity of distant regions of interest, and local signals, and can be applied to large multi-subject datasets using spectral methods combined with Markov Chain Monte Carlo sampling. We illustrate using simulated data that properly accounting for spatial dependence improves precision of estimates and yields valid statistical inference. We apply the new approach to study associations between cortical thickness and Alzheimer's disease, and find several regions of the cortex where patients with Alzheimer's disease are thinner on average than healthy controls.}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Guinness, Joseph and Vandekar, Simon N. and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2018}, month={Jun}, pages={645–652} } @article{laber_staicu_2018, title={Functional Feature Construction for Individualized Treatment Regimes}, volume={113}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2017.1321545}, abstractNote={ABSTRACT Evidence-based personalized medicine formalizes treatment selection as an individualized treatment regime that maps up-to-date patient information into the space of possible treatments. Available patient information may include static features such race, gender, family history, genetic and genomic information, as well as longitudinal information including the emergence of comorbidities, waxing and waning of symptoms, side-effect burden, and adherence. Dynamic information measured at multiple time points before treatment assignment should be included as input to the treatment regime. However, subject longitudinal measurements are typically sparse, irregularly spaced, noisy, and vary in number across subjects. Existing estimators for treatment regimes require equal information be measured on each subject and thus standard practice is to summarize longitudinal subject information into a scalar, ad hoc summary during data preprocessing. This reduction of the longitudinal information to a scalar feature precedes estimation of a treatment regime and is therefore not informed by subject outcomes, treatments, or covariates. Furthermore, we show that this reduction requires more stringent causal assumptions for consistent estimation than are necessary. We propose a data-driven method for constructing maximally prescriptive yet interpretable features that can be used with standard methods for estimating optimal treatment regimes. In our proposed framework, we treat the subject longitudinal information as a realization of a stochastic process observed with error at discrete time points. Functionals of this latent process are then combined with outcome models to estimate an optimal treatment regime. The proposed methodology requires weaker causal assumptions than Q-learning with an ad hoc scalar summary and is consistent for the optimal treatment regime. Supplementary materials for this article are available online.}, number={523}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Laber, Eric B. and Staicu, Ana-Maria}, year={2018}, pages={1219–1227} } @article{staicu_reid_2017, title={Interview with Nancy Reid}, volume={85}, number={3}, journal={International Statistical Review}, author={Staicu, A. M. and Reid, N.}, year={2017}, pages={381–403} } @article{kang_reich_staicu_2018, title={Scalar-on-image regression via the soft-thresholded Gaussian process}, volume={105}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asx075}, abstractNote={Summary This work concerns spatial variable selection for scalar‐on‐image regression. We propose a new class of Bayesian nonparametric models and develop an efficient posterior computational algorithm. The proposed soft‐thresholded Gaussian process provides large prior support over the class of piecewise‐smooth, sparse, and continuous spatially varying regression coefficient functions. In addition, under some mild regularity conditions the soft‐thresholded Gaussian process prior leads to the posterior consistency for parameter estimation and variable selection for scalar‐on‐image regression, even when the number of predictors is larger than the sample size. The proposed method is compared to alternatives via simulation and applied to an electroencephalography study of alcoholism.}, number={1}, journal={BIOMETRIKA}, author={Kang, Jian and Reich, Brian J. and Staicu, Ana-Maria}, year={2018}, month={Mar}, pages={165–184} } @article{park_staicu_xiao_crainiceanu_2017, title={Simple fixed-effects inference for complex functional models}, volume={19}, ISSN={1465-4644 1468-4357}, url={http://dx.doi.org/10.1093/biostatistics/kxx026}, DOI={10.1093/biostatistics/kxx026}, abstractNote={&NA; We propose simple inferential approaches for the fixed effects in complex functional mixed effects models. We estimate the fixed effects under the independence of functional residuals assumption and then bootstrap independent units (e.g. subjects) to conduct inference on the fixed effects parameters. Simulations show excellent coverage probability of the confidence intervals and size of tests for the fixed effects model parameters. Methods are motivated by and applied to the Baltimore Longitudinal Study of Aging, though they are applicable to other studies that collect correlated functional data.}, number={2}, journal={Biostatistics}, publisher={Oxford University Press (OUP)}, author={Park, So Young and Staicu, Ana-Maria and Xiao, Luo and Crainiceanu, Ciprian M}, year={2017}, month={Jun}, pages={137–152} } @article{gruen_alfaro-córdoba_thomson_worth_staicu_lascelles_2017, title={The Use of Functional Data Analysis to Evaluate Activity in a Spontaneous Model of Degenerative Joint Disease Associated Pain in Cats}, volume={12}, ISSN={1932-6203}, url={http://dx.doi.org/10.1371/journal.pone.0169576}, DOI={10.1371/journal.pone.0169576}, abstractNote={Introduction and objectives Accelerometry is used as an objective measure of physical activity in humans and veterinary species. In cats, one important use of accelerometry is in the study of therapeutics designed to treat degenerative joint disease (DJD) associated pain, where it serves as the most widely applied objective outcome measure. These analyses have commonly used summary measures, calculating the mean activity per-minute over days and comparing between treatment periods. While this technique has been effective, information about the pattern of activity in cats is lost. In this study, functional data analysis was applied to activity data from client-owned cats with (n = 83) and without (n = 15) DJD. Functional data analysis retains information about the pattern of activity over the 24-hour day, providing insight into activity over time. We hypothesized that 1) cats without DJD would have higher activity counts and intensity of activity than cats with DJD; 2) that activity counts and intensity of activity in cats with DJD would be inversely correlated with total radiographic DJD burden and total orthopedic pain score; and 3) that activity counts and intensity would have a different pattern on weekends versus weekdays. Results and conclusions Results showed marked inter-cat variability in activity. Cats exhibited a bimodal pattern of activity with a sharp peak in the morning and broader peak in the evening. Results further showed that this pattern was different on weekends than weekdays, with the morning peak being shifted to the right (later). Cats with DJD showed different patterns of activity from cats without DJD, though activity and intensity were not always lower; instead both the peaks and troughs of activity were less extreme than those of the cats without DJD. Functional data analysis provides insight into the pattern of activity in cats, and an alternative method for analyzing accelerometry data that incorporates fluctuations in activity across the day.}, number={1}, journal={PLOS ONE}, publisher={Public Library of Science (PLoS)}, author={Gruen, Margaret E. and Alfaro-Córdoba, Marcela and Thomson, Andrea E. and Worth, Alicia C. and Staicu, Ana-Maria and Lascelles, B. Duncan X.}, editor={Harezlak, JaroslawEditor}, year={2017}, month={Jan}, pages={e0169576} } @article{geden_staicu_feng_2018, title={The impacts of perceptual load and driving duration on mind wandering in driving}, volume={57}, ISSN={1369-8478}, url={http://dx.doi.org/10.1016/J.TRF.2017.07.004}, DOI={10.1016/J.TRF.2017.07.004}, abstractNote={A significant portion of the risk of driver distraction comes from the cognitive consequences of attention deviating from the current task. While distraction can be due to external stimulations such as flashing billboards or a ringing phone, simply engaging in internally-generated task-unrelated thoughts (i.e., mind wandering) could raise one's crash risk as well. Compared to the extensive efforts in recent years to understand the mechanisms of external distraction, relatively little is known about internal distraction such as mind wandering. This study investigated how perceptual load and driving duration can impact both the rate of mind wandering and its costs on drivers' performance in vehicular control. Generalized additive mixed effects models were used to estimate these effects in both a lower perceptual load scenario and a higher perceptual load scenario in simulated driving. Our study found that, under a higher perceptual load, participants' minds wandered less often. Significant nonlinear effects for driving duration were found on vehicular control during mind wandering for both perceptual load conditions, while the effect of driving duration was linear for on-task periods. These results suggest that, while mind wandering, individuals' driving performance fluctuates greatly, which has significant implications on driving safety for individual drivers and overall traffic flow.}, journal={Transportation Research Part F: Traffic Psychology and Behaviour}, publisher={Elsevier BV}, author={Geden, Michael and Staicu, Ana-Maria and Feng, Jing}, year={2018}, month={Aug}, pages={75–83} } @article{pomann_staicu_lobaton_mejia_dewey_reich_sweeney_shinohara_2016, title={A LAG FUNCTIONAL LINEAR MODEL FOR PREDICTION OF MAGNETIZATION TRANSFER RATIO IN MULTIPLE SCLEROSIS LESIONS}, volume={10}, ISSN={["1932-6157"]}, url={http://dx.doi.org/10.1214/16-aoas981}, DOI={10.1214/16-aoas981}, abstractNote={We propose a lag functional linear model to predict a response using multiple functional predictors observed at discrete grids with noise. Two procedures are proposed to estimate the regression parameter functions: (1) an approach that ensures smoothness for each value of time using generalized cross-validation; and (2) a global smoothing approach using a restricted maximum likelihood framework. Numerical studies are presented to analyze predictive accuracy in many realistic scenarios. The methods are employed to estimate a magnetic resonance imaging (MRI)-based measure of tissue damage (the magnetization transfer ratio, or MTR) in multiple sclerosis (MS) lesions, a disease that causes damage to the myelin sheaths around axons in the central nervous system. Our method of estimation of MTR within lesions is useful retrospectively in research applications where MTR was not acquired, as well as in clinical practice settings where acquiring MTR is not currently part of the standard of care. The model facilitates the use of commonly acquired imaging modalities to estimate MTR within lesions, and outperforms cross-sectional models that do not account for temporal patterns of lesion development and repair.}, number={4}, journal={Annals of Applied Statistics}, publisher={Institute of Mathematical Statistics}, author={Pomann, Gina-Maria and Staicu, Ana-Maria and Lobaton, Edgar J. and Mejia, Amanda F. and Dewey, Blake E. and Reich, Daniel S. and Sweeney, Elizabeth M. and Shinohara, Russell T.}, year={2016}, month={Dec}, pages={2325–2348} } @article{zhang_roell_truong_tanguay_reif_2017, title={A data-driven weighting scheme for multivariate phenotypic endpoints recapitulates zebrafish developmental cascades}, volume={314}, ISSN={["1096-0333"]}, url={https://dx.doi.org/10.1016/j.taap.2016.11.010}, DOI={10.1016/j.taap.2016.11.010}, abstractNote={Zebrafish have become a key alternative model for studying health effects of environmental stressors, partly due to their genetic similarity to humans, fast generation time, and the efficiency of generating high-dimensional systematic data. Studies aiming to characterize adverse health effects in zebrafish typically include several phenotypic measurements (endpoints). While there is a solid biomedical basis for capturing a comprehensive set of endpoints, making summary judgments regarding health effects requires thoughtful integration across endpoints. Here, we introduce a Bayesian method to quantify the informativeness of 17 distinct zebrafish endpoints as a data-driven weighting scheme for a multi-endpoint summary measure, called weighted Aggregate Entropy (wAggE). We implement wAggE using high-throughput screening (HTS) data from zebrafish exposed to five concentrations of all 1060 ToxCast chemicals. Our results show that our empirical weighting scheme provides better performance in terms of the Receiver Operating Characteristic (ROC) curve for identifying significant morphological effects and improves robustness over traditional curve-fitting approaches. From a biological perspective, our results suggest that developmental cascade effects triggered by chemical exposure can be recapitulated by analyzing the relationships among endpoints. Thus, wAggE offers a powerful approach for analysis of multivariate phenotypes that can reveal underlying etiological processes.}, journal={TOXICOLOGY AND APPLIED PHARMACOLOGY}, publisher={Elsevier BV}, author={Zhang, Guozhu and Roell, Kyle R. and Truong, Lisa and Tanguay, Robert L. and Reif, David M.}, year={2017}, month={Jan}, pages={109–117} } @article{gertheiss_goldsmith_staicu_2017, title={A note on modeling sparse exponential-family functional response curves}, volume={105}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2016.07.010}, abstractNote={Non-Gaussian functional data are considered and modeling through functional principal components analysis (FPCA) is discussed. The direct extension of popular FPCA techniques to the generalized case incorrectly uses a marginal mean estimate for a model that has an inherently conditional interpretation, and thus leads to biased estimates of population and subject-level effects. The methods proposed address this shortcoming by using either a two-stage or joint estimation strategy. The performance of all methods is compared numerically in simulations. An application to ambulatory heart rate monitoring is used to further illustrate the distinctions between approaches.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Gertheiss, Jan and Goldsmith, Jeff and Staicu, Ana-Maria}, year={2017}, month={Jan}, pages={46–52} } @article{pomann_staicu_ghosh_2016, title={A two-sample distribution-free test for functional data with application to a diffusion tensor imaging study of multiple sclerosis}, volume={65}, ISSN={["1467-9876"]}, url={https://europepmc.org/articles/PMC4812165}, DOI={10.1111/rssc.12130}, abstractNote={SummaryMotivated by an imaging study, the paper develops a non-parametric testing procedure for testing the null hypothesis that two samples of curves observed at discrete grids and with noise have the same underlying distribution. The objective is to compare formally white matter tract profiles between healthy individuals and multiple-sclerosis patients, as assessed by conventional diffusion tensor imaging measures. We propose to decompose the curves by using functional principal component analysis of a mixture process, which we refer to as marginal functional principal component analysis. This approach reduces the dimension of the testing problem in a way that enables the use of traditional non-parametric univariate testing procedures. The procedure is computationally efficient and accommodates different sampling designs. Numerical studies are presented to validate the size and power properties of the test in many realistic scenarios. In these cases, the test proposed has been found to be more powerful than its primary competitor. Application to the diffusion tensor imaging data reveals that all the tracts studied are associated with multiple sclerosis and the choice of the diffusion tensor image measurement is important when assessing axonal disruption.}, number={3}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Pomann, Gina-Maria and Staicu, Ana-Maria and Ghosh, Sujit}, year={2016}, month={Apr}, pages={395–414} } @article{kong_staicu_maity_2016, title={Classical testing in functional linear models}, volume={28}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84988336276&partnerID=MN8TOARS}, DOI={10.1080/10485252.2016.1231806}, abstractNote={ABSTRACT We extend four tests common in classical regression – Wald, score, likelihood ratio and F tests – to functional linear regression, for testing the null hypothesis, that there is no association between a scalar response and a functional covariate. Using functional principal component analysis, we re-express the functional linear model as a standard linear model, where the effect of the functional covariate can be approximated by a finite linear combination of the functional principal component scores. In this setting, we consider application of the four traditional tests. The proposed testing procedures are investigated theoretically for densely observed functional covariates when the number of principal components diverges. Using the theoretical distribution of the tests under the alternative hypothesis, we develop a procedure for sample size calculation in the context of functional linear regression. The four tests are further compared numerically for both densely and sparsely observed noisy functional data in simulation experiments and using two real data applications.}, number={4}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Kong, Dehan and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={813–838} } @article{usset_staicu_maity_2016, title={Interaction models for functional regression}, volume={94}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2015.08.020}, DOI={10.1016/J.CSDA.2015.08.020}, abstractNote={A functional regression model with a scalar response and multiple functional predictors is proposed that accommodates two-way interactions in addition to their main effects. The proposed estimation procedure models the main effects using penalized regression splines, and the interaction effect by a tensor product basis. Extensions to generalized linear models and data observed on sparse grids or with measurement error are presented. A hypothesis testing procedure for the functional interaction effect is described. The proposed method can be easily implemented through existing software. Numerical studies show that fitting an additive model in the presence of interaction leads to both poor estimation performance and lost prediction power, while fitting an interaction model where there is in fact no interaction leads to negligible losses. The methodology is illustrated on the AneuRisk65 study data.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Usset, Joseph and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Feb}, pages={317–329} } @article{wrobel_park_staicu_goldsmith_2016, title={Interactive graphics for functional data analyses}, volume={5}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.109}, abstractNote={Although there are established graphics that accompany the most common functional data analyses, generating these graphics for each dataset and analysis can be cumbersome and time‐consuming. Often, the barriers to visualization inhibit useful exploratory data analyses and prevent the development of intuition for a method and its application to a particular dataset. The refund.shiny package was developed to address these issues for several of the most common functional data analyses. After conducting an analysis, the plot_shiny() function is used to generate an interactive visualization environment that contains several distinct graphics, many of which are updated in response to user input. These visualizations reduce the burden of exploratory analyses and can serve as a useful tool for the communication of results to non‐statisticians. Copyright © 2016 John Wiley & Sons, Ltd.}, number={1}, journal={STAT}, author={Wrobel, Julia and Park, So Young and Staicu, Ana Maria and Goldsmith, Jeff}, year={2016}, pages={108–118} } @article{zhang_staicu_maity_2016, title={Testing for additivity in non-parametric regression}, volume={44}, ISSN={["1708-945X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84982980889&partnerID=MN8TOARS}, DOI={10.1002/cjs.11295}, abstractNote={AbstractThis article discusses a novel approach for testing for additivity in non‐parametric regression. We represent the model using a linear mixed model framework and equivalently rewrite the original testing problem as testing for a subset of zero variance components. We propose two testing procedures: the restricted likelihood ratio test and the generalized F test. We develop the finite sample null distribution of the restricted likelihood ratio test and generalized F test using the spectral decomposition of the restricted likelihood ratio and the residual sum of squares, respectively. The null distribution is non‐standard and we provide a fast algorithm to simulate from the null distribution of the tests. We show, through numerical investigation, that the proposed testing procedures outperform the available alternatives and apply the methods to a diabetes data set. The Canadian Journal of Statistics 44: 445–462; 2016 © 2016 Statistical Society of Canada}, number={4}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, publisher={Wiley-Blackwell}, author={Zhang, Yichi and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={445–462} } @article{usset_maity_staicu_schwartzman_2015, title={Glacier Terminus Estimation from Landsat Image Intensity Profiles}, volume={20}, ISSN={1085-7117 1537-2693}, url={http://dx.doi.org/10.1007/S13253-015-0207-4}, DOI={10.1007/S13253-015-0207-4}, number={2}, journal={Journal of Agricultural, Biological, and Environmental Statistics}, publisher={Springer Science and Business Media LLC}, author={Usset, Joseph and Maity, Arnab and Staicu, Ana-Maria and Schwartzman, Armin}, year={2015}, month={May}, pages={279–298} } @article{park_staicu_2015, title={Longitudinal functional data analysis}, volume={4}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/STA4.89}, DOI={10.1002/STA4.89}, abstractNote={We consider dependent functional data that are correlated because of a longitudinal‐based design: each subject is observed at repeated times and at each time, a functional observation (curve) is recorded. We propose a novel parsimonious modelling framework for repeatedly observed functional observations that allows to extract low‐dimensional features. The proposed methodology accounts for the longitudinal design, is designed to study the dynamic behaviour of the underlying process, allows prediction of full future trajectory and is computationally fast. Theoretical properties of this framework are studied, and numerical investigations confirm excellent behaviour in finite samples. The proposed method is motivated by and applied to a diffusion tensor imaging study of multiple sclerosis. Copyright © 2015 John Wiley & Sons, Ltd.}, number={1}, journal={Stat}, publisher={Wiley}, author={Park, So Young and Staicu, Ana-Maria}, year={2015}, month={Feb}, pages={212–226} } @article{gertheiss_maier_hessel_staicu_2015, title={Marginal Functional Regression Models for Analyzing the Feeding Behavior of Pigs}, volume={20}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-015-0212-7}, abstractNote={We observe a group of pigs over a period of about 100 days. Using high frequency radio frequency identification, it is recorded when each pig is feeding, leading to very dense binary functional data for each pig and day. One aim of the data analysis is to find pig-specific feeding profiles showing us the typical feeding pattern of each pig. For modeling the data, we use a marginal functional logistic regression approach, allowing us to model the densely observed binary measurements by assuming an underlying smooth subject-specific profile. The method also allows to incorporate additional covariates such as temperature and humidity that may influence the pigs' behavior. To account for correlation of measurements, we use robust standard errors and corresponding pointwise confidence intervals. Before analyzing the feeding behavior of pigs, the method employed is evaluated in simulation studies. As our approach is rather general, it may also be applied to other types of generalized functional data with similar characteristics as the pig data.}, number={3}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Gertheiss, Jan and Maier, Verena and Hessel, Engel F. and Staicu, Ana-Maria}, year={2015}, month={Sep}, pages={353–370} } @article{pomann_sweeney_reich_staicu_shinohara_2015, title={Scan-stratified case-control sampling for modeling blood-brain barrier integrity in multiple sclerosis}, volume={34}, ISSN={["1097-0258"]}, DOI={10.1002/sim.6520}, abstractNote={Multiple sclerosis (MS) is an immune‐mediated neurological disease that causes morbidity and disability. In patients with MS, the accumulation of lesions in the white matter of the brain is associated with disease progression and worse clinical outcomes. Breakdown of the blood–brain barrier in newer lesions is indicative of more active disease‐related processes and is a primary outcome considered in clinical trials of treatments for MS. Such abnormalities in active MS lesions are evaluated in vivo using contrast‐enhanced structural MRI, during which patients receive an intravenous infusion of a costly magnetic contrast agent. In some instances, the contrast agents can have toxic effects. Recently, local image regression techniques have been shown to have modest performance for assessing the integrity of the blood–brain barrier based on imaging without contrast agents. These models have centered on the problem of cross‐sectional classification in which patients are imaged at a single study visit and pre‐contrast images are used to predict post‐contrast imaging. In this paper, we extend these methods to incorporate historical imaging information, and we find the proposed model to exhibit improved performance. We further develop scan‐stratified case‐control sampling techniques that reduce the computational burden of local image regression models, while respecting the low proportion of the brain that exhibits abnormal vascular permeability. Copyright © 2015 John Wiley & Sons, Ltd.}, number={20}, journal={STATISTICS IN MEDICINE}, author={Pomann, Gina-Maria and Sweeney, Elizabeth M. and Reich, Daniel S. and Staicu, Ana-Maria and Shinohara, Russell T.}, year={2015}, month={Sep}, pages={2872–2880} } @article{staicu_lu_2014, title={Analysis of AneuRisk65 data: Classification and curve registration}, volume={8}, ISSN={["1935-7524"]}, DOI={10.1214/14-ejs938c}, abstractNote={Abstract: This paper concerns the relationship between the geometry of the Inner Carotid Artery, as described by its centerline curvature and its radius, and the location of the aneurysm for the AneuRisk65 data. Fisher Rao curve registration is used to align the curvature of the artery, and this alignment is then used to register both the curvature and the radius profiles. Based on this alignment, interesting results are found regarding the discrepancy between the arteries of patients with aneurysms at or after the terminal bifurcation (upper group) and the arteries of subjects with aneurysms before bifurcation, or without aneurysms (lower-no group).}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Staicu, Ana-Maria and Lu, Xiaosun}, year={2014}, pages={1914–1919} } @article{scheipl_staicu_greven_2015, title={Functional additive mixed models}, volume={24}, DOI={10.1080/10618600.2014.901914}, abstractNote={We propose an extensive framework for additive regression models for correlated functional responses, allowing for multiple partially nested or crossed functional random effects with flexible correlation structures for, for example, spatial, temporal, or longitudinal functional data. Additionally, our framework includes linear and nonlinear effects of functional and scalar covariates that may vary smoothly over the index of the functional response. It accommodates densely or sparsely observed functional responses and predictors which may be observed with additional error and includes both spline-based and functional principal component-based terms. Estimation and inference in this framework is based on standard additive mixed models, allowing us to take advantage of established methods and robust, flexible algorithms. We provide easy-to-use open source software in the pffr() function for the R package refund. Simulations show that the proposed method recovers relevant effects reliably, handles small sample sizes well, and also scales to larger datasets. Applications with spatially and longitudinally observed functional data demonstrate the flexibility in modeling and interpretability of results of our approach.}, number={2}, journal={Journal of Computational and Graphical Statistics}, author={Scheipl, F. and Staicu, Ana-Maria and Greven, S.}, year={2015}, pages={477–501} } @article{zhao_bell_maity_staicu_joubert_london_wu_2015, title={Global Analysis of Methylation Profiles From High Resolution CpG Data}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921023434&partnerID=MN8TOARS}, DOI={10.1002/gepi.21874}, abstractNote={ABSTRACTNew high throughput technologies are now enabling simultaneous epigenetic profiling of DNA methylation at hundreds of thousands of CpGs across the genome. A problem of considerable practical interest is identification of large scale, global changes in methylation that are associated with environmental variables, clinical outcomes, or other experimental conditions. However, there has been little statistical research on methods for global methylation analysis using technologies with individual CpG resolution. To address this critical gap in the literature, we develop a new strategy for global analysis of methylation profiles using a functional regression approach wherein we approximate either the density or the cumulative distribution function (CDF) of the methylation values for each individual using B‐spline basis functions. The spline coefficients for each individual are allowed to summarize the individual's overall methylation profile. We then test for association between the overall distribution and a continuous or dichotomous outcome variable using a variance component score test that naturally accommodates the correlation between spline coefficients. Simulations indicate that our proposed approach has desirable power while protecting type I error. The method was applied to detect methylation differences, both genome wide and at LINE1 elements, between the blood samples from rheumatoid arthritis patients and healthy controls and to detect the epigenetic changes of human hepatocarcinogenesis in the context of alcohol abuse and hepatitis C virus infection. A free implementation of our methods in the R language is available in the Global Analysis of Methylation Profiles (GAMP) package at http://research.fhcrc.org/wu/en.html.}, number={2}, journal={GENETIC EPIDEMIOLOGY}, author={Zhao, Ni and Bell, Douglas A. and Maity, Arnab and Staicu, Ana-Maria and Joubert, Bonnie R. and London, Stephanie J. and Wu, Michael C.}, year={2015}, month={Feb}, pages={53–64} } @article{li_staicu_bondell_2015, title={Incorporating covariates in skewed functional data models}, volume={16}, DOI={10.1093/biostatistics/kxu055}, abstractNote={We introduce a class of covariate-adjusted skewed functional models (cSFM) designed for functional data exhibiting location-dependent marginal distributions. We propose a semi-parametric copula model for the pointwise marginal distributions, which are allowed to depend on covariates, and the functional dependence, which is assumed covariate invariant. The proposed cSFM framework provides a unifying platform for pointwise quantile estimation and trajectory prediction. We consider a computationally feasible procedure that handles densely as well as sparsely observed functional data. The methods are examined numerically using simulations and is applied to a new tractography study of multiple sclerosis. Furthermore, the methodology is implemented in the R package cSFM, which is publicly available on CRAN.}, number={3}, journal={Biostatistics (Oxford, England)}, author={Li, M. and Staicu, Ana-Maria and Bondell, H. D.}, year={2015}, pages={413–426} } @article{staicu_li_crainiceanu_ruppert_2014, title={Likelihood Ratio Tests for Dependent Data with Applications to Longitudinal and Functional Data Analysis}, volume={41}, ISSN={["1467-9469"]}, DOI={10.1111/sjos.12075}, abstractNote={ABSTRACTThis paper introduces a general framework for testing hypotheses about the structure of the mean function of complex functional processes. Important particular cases of the proposed framework are as follows: (1) testing the null hypothesis that the mean of a functional process is parametric against a general alternative modelled by penalized splines; and (2) testing the null hypothesis that the means of two possibly correlated functional processes are equal or differ by only a simple parametric function. A global pseudo‐likelihood ratio test is proposed, and its asymptotic distribution is derived. The size and power properties of the test are confirmed in realistic simulation scenarios. Finite‐sample power results indicate that the proposed test is much more powerful than competing alternatives. Methods are applied to testing the equality between the means of normalized δ‐power of sleep electroencephalograms of subjects with sleep‐disordered breathing and matched controls.}, number={4}, journal={SCANDINAVIAN JOURNAL OF STATISTICS}, author={Staicu, Ana-Maria and Li, Yingxing and Crainiceanu, Ciprian M. and Ruppert, David}, year={2014}, month={Dec}, pages={932–949} } @article{ivanescu_staicu_scheipl_greven_2015, title={Penalized function-on-function regression}, volume={30}, ISSN={["1613-9658"]}, DOI={10.1007/s00180-014-0548-4}, abstractNote={A general framework for smooth regression of a functional response on one or multiple functional predictors is proposed. Using the mixed model representation of penalized regression expands the scope of function-on-function regression to many realistic scenarios. In particular, the approach can accommodate a densely or sparsely sampled functional response as well as multiple functional predictors that are observed on the same or different domains than the functional response, on a dense or sparse grid, and with or without noise. It also allows for seamless integration of continuous or categorical covariates and provides approximate confidence intervals as a by-product of the mixed model inference. The proposed methods are accompanied by easy to use and robust software implemented in the pffr function of the R package refund. Methodological developments are general, but were inspired by and applied to a diffusion tensor imaging brain tractography dataset.}, number={2}, journal={COMPUTATIONAL STATISTICS}, author={Ivanescu, Andrada E. and Staicu, Ana-Maria and Scheipl, Fabian and Greven, Sonja}, year={2015}, month={Jun}, pages={539–568} } @article{staicu_lahiri_carroll_2015, title={Significance tests for functional data with complex dependence structure}, volume={156}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2014.08.006}, abstractNote={We propose an L2-norm based global testing procedure for the null hypothesis that multiple group mean functions are equal, for functional data with complex dependence structure. Specifically, we consider the setting of functional data with a multilevel structure of the form groups-clusters or subjects-units, where the unit-level profiles are spatially correlated within the cluster, and the cluster-level data are independent. Orthogonal series expansions are used to approximate the group mean functions and the test statistic is estimated using the basis coefficients. The asymptotic null distribution of the test statistic is developed, under mild regularity conditions. To our knowledge this is the first work that studies hypothesis testing, when data have such complex multilevel functional and spatial structure. Two small-sample alternatives, including a novel block bootstrap for functional data, are proposed, and their performance is examined in simulation studies. The paper concludes with an illustration of a motivating experiment.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Staicu, Ana-Maria and Lahiri, Soumen N. and Carroll, Raymond J.}, year={2015}, month={Jan}, pages={1–13} } @article{serban_staicu_carroll_2013, title={Multilevel Cross-Dependent Binary Longitudinal Data}, volume={69}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12083}, abstractNote={Summary We provide insights into new methodology for the analysis of multilevel binary data observed longitudinally, when the repeated longitudinal measurements are correlated. The proposed model is logistic functional regression conditioned on three latent processes describing the within- and between-variability, and describing the cross-dependence of the repeated longitudinal measurements. We estimate the model components without employing mixed-effects modeling but assuming an approximation to the logistic link function. The primary objectives of this article are to highlight the challenges in the estimation of the model components, to compare two approximations to the logistic regression function, linear and exponential, and to discuss their advantages and limitations. The linear approximation is computationally efficient whereas the exponential approximation applies for rare events functional data. Our methods are inspired by and applied to a scientific experiment on spectral backscatter from long range infrared light detection and ranging (LIDAR) data. The models are general and relevant to many new binary functional data sets, with or without dependence between repeated functional measurements.}, number={4}, journal={BIOMETRICS}, author={Serban, Nicoleta and Staicu, Ana-Maria and Carroll, Raymond J.}, year={2013}, month={Dec}, pages={903–913} } @article{gertheiss_maity_staicu_2013, title={Variable selection in generalized functional linear models}, volume={2}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/sta4.20}, DOI={10.1002/sta4.20}, abstractNote={Modern research data, where a large number of functional predictors is collected on few subjects are becoming increasingly common. In this paper we propose a variable selection technique, when the predictors are functional and the response is scalar. Our approach is based on adopting a generalized functional linear model framework and using a penalized likelihood method that simultaneously controls the sparsity of the model and the smoothness of the corresponding coefficient functions by adequate penalization. The methodology is characterized by high predictive accuracy, and yields interpretable models, while retaining computational efficiency. The proposed method is investigated numerically in finite samples, and applied to a diffusion tensor imaging tractography data set and a chemometric data set. Copyright © 2013 John Wiley & Sons Ltd}, number={1}, journal={Stat}, publisher={Wiley}, author={Gertheiss, Jan and Maity, Arnab and Staicu, Ana-Maria}, year={2013}, month={May}, pages={86–101} } @article{crainiceanu_staicu_ray_punjabi_2012, title={Bootstrap-based inference on the difference in the means of two correlated functional processes}, volume={31}, ISSN={["1097-0258"]}, DOI={10.1002/sim.5439}, abstractNote={We propose nonparametric inference methods on the mean difference between two correlated functional processes. We compare methods that (1) incorporate different levels of smoothing of the mean and covariance; (2) preserve the sampling design; and (3) use parametric and nonparametric estimation of the mean functions. We apply our method to estimating the mean difference between average normalized δ power of sleep electroencephalograms for 51 subjects with severe sleep apnea and 51 matched controls in the first 4 ;h after sleep onset. We obtain data from the Sleep Heart Health Study, the largest community cohort study of sleep. Although methods are applied to a single case study, they can be applied to a large number of studies that have correlated functional data. Copyright © 2012 John Wiley & Sons, Ltd.}, number={26}, journal={STATISTICS IN MEDICINE}, author={Crainiceanu, Ciprian M. and Staicu, Ana-Maria and Ray, Shubankar and Punjabi, Naresh}, year={2012}, month={Nov}, pages={3223–3240} } @article{jiang_serban_2012, title={Clustering Random Curves Under Spatial Interdependence With Application to Service Accessibility}, volume={54}, ISSN={0040-1706 1537-2723}, url={http://dx.doi.org/10.1080/00401706.2012.657106}, DOI={10.1080/00401706.2012.657106}, abstractNote={Service accessibility is defined as the access of a community to the nearby site locations in a service network consisting of multiple geographically distributed service sites. Leveraging new statistical methods, this article estimates and classifies service accessibility patterns varying over a large geographic area (Georgia) and over a period of 16 years. The focus of this study is on financial services but it generally applies to any other service operation. To this end, we introduce a model-based method for clustering random time-varying functions that are spatially interdependent. The underlying clustering model is nonparametric with spatially correlated errors. We also assume that the clustering membership is a realization from a Markov random field. Under these model assumptions, we borrow information across functions corresponding to nearby spatial locations resulting in enhanced estimation accuracy of the cluster effects and of the cluster membership as shown in a simulation study. Supplementary materials including the estimation algorithm, additional maps of the data, and the C++ computer programs for analyzing the data in our case study are available online.}, number={2}, journal={Technometrics}, publisher={Informa UK Limited}, author={Jiang, Huijing and Serban, Nicoleta}, year={2012}, month={May}, pages={108–119} } @article{crainiceanu_staicu_2012, title={Comment}, volume={54}, ISSN={["0040-1706"]}, DOI={10.1080/00401706.2011.649821}, abstractNote={We would like to congratulate the authors for their excellent and thought-provoking article. The article provides new insights into and methods for clustering spatially indexed timedependent curves. To provide a context for our discussion, we first clarify how the model in the article by Jiang and Serban (henceforth, JS) interacts with data. For data Yij observed in spatial cluster sj at time ti , the underlying model is Yij = fsj (ti) + σ ij , (1)}, number={2}, journal={TECHNOMETRICS}, author={Crainiceanu, Ciprian M. and Staicu, Ana-Maria}, year={2012}, month={May}, pages={120–122} } @article{mclean_hooker_staicu_scheipl_ruppert_2014, title={Functional Generalized Additive Models}, volume={23}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2012.729985}, abstractNote={We introduce the functional generalized additive model (FGAM), a novel regression model for association studies between a scalar response and a functional predictor. We model the link-transformed mean response as the integral with respect to t of F{X(t), t} where F( ·, ·) is an unknown regression function and X(t) is a functional covariate. Rather than having an additive model in a finite number of principal components as by Müller and Yao (2008), our model incorporates the functional predictor directly and thus our model can be viewed as the natural functional extension of generalized additive models. We estimate F( ·, ·) using tensor-product B-splines with roughness penalties. A pointwise quantile transformation of the functional predictor is also considered to ensure each tensor-product B-spline has observed data on its support. The methods are evaluated using simulated data and their predictive performance is compared with other competing scalar-on-function regression alternatives. We illustrate the usefulness of our approach through an application to brain tractography, where X(t) is a signal from diffusion tensor imaging at position, t, along a tract in the brain. In one example, the response is disease-status (case or control) and in a second example, it is the score on a cognitive test. The FGAM is implemented in R in the refund package. There are additional supplementary materials available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={McLean, Mathew W. and Hooker, Giles and Staicu, Ana-Maria and Scheipl, Fabian and Ruppert, David}, year={2014}, month={Mar}, pages={249–269} } @article{staicu_crainiceanu_reich_ruppert_2011, title={Modeling Functional Data with Spatially Heterogeneous Shape Characteristics}, volume={68}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2011.01669.x}, DOI={10.1111/j.1541-0420.2011.01669.x}, abstractNote={Summary We propose a novel class of models for functional data exhibiting skewness or other shape characteristics that vary with spatial or temporal location. We use copulas so that the marginal distributions and the dependence structure can be modeled independently. Dependence is modeled with a Gaussian or t‐copula, so that there is an underlying latent Gaussian process. We model the marginal distributions using the skew t family. The mean, variance, and shape parameters are modeled nonparametrically as functions of location. A computationally tractable inferential framework for estimating heterogeneous asymmetric or heavy‐tailed marginal distributions is introduced. This framework provides a new set of tools for increasingly complex data collected in medical and public health studies. Our methods were motivated by and are illustrated with a state‐of‐the‐art study of neuronal tracts in multiple sclerosis patients and healthy controls. Using the tools we have developed, we were able to find those locations along the tract most affected by the disease. However, our methods are general and highly relevant to many functional data sets. In addition to the application to one‐dimensional tract profiles illustrated here, higher‐dimensional extensions of the methodology could have direct applications to other biological data including functional and structural magnetic resonance imaging (MRI).}, number={2}, journal={Biometrics}, publisher={Wiley}, author={Staicu, Ana-Maria and Crainiceanu, Ciprian M. and Reich, Daniel S. and Ruppert, David}, year={2011}, month={Nov}, pages={331–343} } @article{staicu_crainiceanu_carroll_2010, title={Fast methods for spatially correlated multilevel functional data}, volume={11}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxp058}, abstractNote={We propose a new methodological framework for the analysis of hierarchical functional data when the functions at the lowest level of the hierarchy are correlated. For small data sets, our methodology leads to a computational algorithm that is orders of magnitude more efficient than its closest competitor (seconds versus hours). For large data sets, our algorithm remains fast and has no current competitors. Thus, in contrast to published methods, we can now conduct routine simulations, leave-one-out analyses, and nonparametric bootstrap sampling. Our methods are inspired by and applied to data obtained from a state-of-the-art colon carcinogenesis scientific experiment. However, our models are general and will be relevant to many new data sets where the object of inference are functions or images that remain dependent even after conditioning on the subject on which they are measured. Supplementary materials are available at Biostatistics online.}, number={2}, journal={BIOSTATISTICS}, author={Staicu, Ana-Maria and Crainiceanu, Ciprian M. and Carroll, Raymond J.}, year={2010}, month={Apr}, pages={177–194} } @article{staicu_2010, title={On the equivalence of prospective and retrospective likelihood methods in case-control studies}, volume={97}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asq054}, abstractNote={We present new approaches to analyzing case-control studies using prospective likelihood methods. In the classical framework, we extend the equality of the profile likelihoods to the Barndorff-Nielsen modified profile likelihoods for prospective and retrospective models. This enables simple and accurate approximate conditional inference for stratified case-control studies of moderate stratum size. In the Bayesian framework, we provide sufficient conditions on priors for the prospective model parameters to yield a prospective marginal posterior density equal to its retrospective counterpart. Our results extend the prospective-retrospective equivalence in the Bayesian paradigm with a more general class of priors than has previously been investigated. Copyright 2010, Oxford University Press.}, number={4}, journal={BIOMETRIKA}, author={Staicu, Ana-Maria}, year={2010}, month={Dec}, pages={990–996} } @article{fraser_fraser_staicu_2010, title={Second order ancillary: A differential view from continuity}, volume={16}, ISSN={["1573-9759"]}, DOI={10.3150/10-bej248}, abstractNote={Second order approximate ancillaries have evolved as the primary ingredient for recent likelihood development in statistical inference. This uses quantile functions rather than the equivalent distribution functions, and the intrinsic ancillary contour is given explicitly as the plug-in estimate of the vector quantile function. The derivation uses a Taylor expansion of the full quantile function, and the linear term gives a tangent to the observed ancillary contour. For the scalar parameter case, there is a vector field that integrates to give the ancillary contours, but for the vector case, there are multiple vector fields and the Frobenius conditions for mutual consistency may not hold. We demonstrate, however, that the conditions hold in a restricted way and that this verifies the second order ancillary contours in moderate deviations. The methodology can generate an appropriate exact ancillary when such exists or an approximate ancillary for the numerical or Monte Carlo calculation of p-values and confidence quantiles. Examples are given, including nonlinear regression and several enigmatic examples from the literature.}, number={4}, journal={BERNOULLI}, author={Fraser, Ailana M. and Fraser, D. A. S. and Staicu, Ana-Maria}, year={2010}, month={Nov}, pages={1208–1223} } @article{crainiceanu_staicu_di_2009, title={Generalized Multilevel Functional Regression}, volume={104}, ISSN={["1537-274X"]}, DOI={10.1198/jasa.2009.tm08564}, abstractNote={We introduce Generalized Multilevel Functional Linear Models (GMFLMs), a novel statistical framework for regression models where exposure has a multilevel functional structure. We show that GMFLMs are, in fact, generalized multilevel mixed models. Thus, GMFLMs can be analyzed using the mixed effects inferential machinery and can be generalized within a well-researched statistical framework. We propose and compare two methods for inference: (1) a two-stage frequentist approach; and (2) a joint Bayesian analysis. Our methods are motivated by and applied to the Sleep Heart Health Study, the largest community cohort study of sleep. However, our methods are general and easy to apply to a wide spectrum of emerging biological and medical datasets. Supplemental materials for this article are available online.}, number={488}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Crainiceanu, Ciprian M. and Staicu, Ana-Maria and Di, Chong-Zhi}, year={2009}, month={Dec}, pages={1550–1561} } @article{staicu_2009, title={Higher-order approximations for interval estimation in binomial settings}, volume={139}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/j.jspi.2009.03.021}, DOI={10.1016/j.jspi.2009.03.021}, abstractNote={In this paper we revisit the classical problem of interval estimation for one-binomial parameter and for the log odds ratio of two binomial parameters. We examine the confidence intervals provided by two versions of the modified log likelihood root: the usual Barndorff-Nielsen's r* and a Bayesian version of the r* test statistic. For the one-binomial problem, this work updates the findings of Brown et al. [2003. Interval estimation in exponential families. Statistica Sinica 13, 19–49; 2002. Confidence intervals for a binomial proportion and asymptotic expansion. The Annals of Statistics 30, 160–201] and Cai [2005. One-sided confidence intervals in discrete distributions. Journal of Statistical Planning and Inference 131, 63–88] to higher-order methods. For the log odds ratio of two binomial parameters we show via Edgeworth expansion that both versions of the r* statistics give confidence intervals which nearly completely eliminate the systematic bias in the unconditional smooth coverage probability. We also give expansions for the length of the confidence intervals.}, number={10}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Staicu, Ana-Maria}, year={2009}, month={Oct}, pages={3393–3404} } @article{staicu_fraser_2010, title={The second order ancillary is rotation based}, volume={140}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/j.jspi.2009.09.011}, DOI={10.1016/j.jspi.2009.09.011}, abstractNote={Abstract This paper concerns the approximate ancillary needed for higher order asymptotic likelihood inference. The existence of an exact or approximate ancillary is the key to such inference. The exact ancillary is, however, only available for transformation model and an approximate ancillary is typically very difficult to find. Fraser (1988) revealed that the high order inference can be obtained by using only the tangent directions to the second order ancillary, thus avoiding its specification. The present work discuses the second order ancillary that has the observed contour tangent to the sensitivity directions v = ∂ y / ∂ θ | { y 0 ; θ ^ ( y 0 ) } corresponding to a pivot and derived by keeping the pivotal variable fixed at the observed fitted value; these directions are always available ( Fraser and Reid, 1995 , Fraser and Reid, 2001 ). For scalar parameter case, such an approximate ancillary is well defined and can be described as a rotation. Our approach provides insights into the second order ancillary outlined by the sensitivity directions in the vector parameter case.}, number={3}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Staicu, Ana-Maria and Fraser, Donald A.S.}, year={2010}, month={Mar}, pages={831–836} } @article{staicu_reid_2008, title={On probability matching priors}, volume={36}, ISSN={0319-5724 1708-945X}, url={http://dx.doi.org/10.1002/cjs.5550360408}, DOI={10.1002/cjs.5550360408}, abstractNote={AbstractFirst‐order probability matching priors are priors for which Bayesian and frequentist inference, in the form of posterior quantiles, or confidence intervals, agree to a second order of approximation. The authors show that the matching priors developed by Peers (1965) and Tibshirani (1989) are readily and uniquely implemented in a third‐order approximation to the posterior marginal density. The authors further show how strong orthogonality of parameters simplifies the arguments. Several examples illustrate their results.}, number={4}, journal={Canadian Journal of Statistics}, publisher={Wiley}, author={Staicu, Ana-Maria and Reid, Nancy M.}, year={2008}, month={Dec}, pages={613–622} }