@article{rose_bentley_maity_maguire_planchart_spasojevic_liu_thorp jr_hoyo_2024, title={Association between F2-isoprostanes and self-reported stressors in pregnant americans of African and European ancestry}, volume={10}, ISSN={["2405-8440"]}, DOI={10.1016/j.heliyon.2024.e25578}, abstractNote={BackgroundPoor birth outcomes such as preterm birth/delivery disproportionately affect African Americans compared to White individuals. Reasons for this disparity are likely multifactorial, and include prenatal psychosocial stressors, and attendant increased lipid peroxidation; however, empirical data linking psychosocial stressors during pregnancy to oxidative status are limited.MethodsWe used established scales to measure five psychosocial stressors. Maternal adverse childhood experiences, financial stress, social support, anxiety, and depression were measured among 50 African American and White pregnant women enrolled in the Stress and Health in Pregnancy cohort. Liquid chromatography‐tandem mass spectrometry was used to measure biomarkers of oxidative stress (four urinary F2-isoprostane isomers), to estimate oxidative status. Linear regression models were used to evaluate associations between psychosocial stressors, prenatal oxidative status and preterm birth.ResultsAfter adjusting for maternal obesity, gestational diabetes, and cigarette smoking, African American women with higher oxidative status were more likely to report higher maternal adverse childhood experience scores (β = 0.16, se = 1.07, p-value = 0.024) and depression scores (β = 0.05, se = 0.02, p = 0.014). Higher oxidative status was also associated with lower gestational age at birth (β = −0.13, se = 0.06, p = 0.04) in this population. These associations were not apparent in Whites. However, none of the cross-product terms for race/ethnicity and social stressors reached statistical significance (p > 0.05).ConclusionWhile the small sample size limits inference, our novel data suggest that psychosocial stressors may contribute significantly to oxidative stress during pregnancy, and preterm birth or delivery African Americans. If replicated in larger studies, these findings would support oxidative stress reduction using established dietary or pharmacological approaches present a potential avenue to mitigate adverse effects of psychosocial stressors on birth outcomes.}, number={3}, journal={HELIYON}, author={Rose, Deborah K. and Bentley, Loren and Maity, Arnab and Maguire, Rachel L. and Planchart, Antonio and Spasojevic, Ivan and Liu, Andy J. and Thorp Jr, John and Hoyo, Cathrine}, year={2024}, month={Feb} } @article{ghosal_maity_2023, title={Variable selection in function-on-scalar single-index model via the alternating direction method of multipliers}, volume={9}, ISSN={["1863-8260"]}, DOI={10.1007/s11749-023-00884-9}, journal={TEST}, author={Ghosal, Rahul and Maity, Arnab}, year={2023}, month={Sep} } @article{ghosal_maity_2022, title={A Score Based Test for Functional Linear Concurrent Regression}, volume={21}, ISSN={["2452-3062"]}, DOI={10.1016/j.ecosta.2021.05.003}, abstractNote={A novel method for testing the null hypothesis of no effect of a covariate on the response is proposed in functional linear concurrent regression. An equivalent random effects formulation of the functional regression model is established under which the testing problem reduces to testing for zero variance component for random effects. For this purpose, a one-sided score test approach is used, which is an extension of the classical score test. Theoretical justification is provided as to why the proposed testing procedure has the correct levels (asymptotically) under null using standard assumptions. Using numerical simulations, the testing method is shown to have the desired type I error rate and higher power compared to a bootstrapped F test currently existing in the literature. The model and the testing procedure give good performances even when the data are sparsely observed, and the functional covariate is contaminated with noise. Applications of the proposed testing method are demonstrated on gait data and a study of child mortality.}, journal={ECONOMETRICS AND STATISTICS}, author={Ghosal, Rahul and Maity, Arnab}, year={2022}, month={Jan}, pages={114–130} } @article{li_wang_maity_staicu_2022, title={Inference in functional linear quantile regression}, volume={190}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2022.104985}, abstractNote={In this paper, we study statistical inference in functional quantile regression for scalar response and a functional covariate. Specifically, we consider a functional linear quantile regression model where the effect of the covariate on the quantile of the response is modeled through the inner product between the functional covariate and an unknown smooth regression parameter function that varies with the level of quantile. The objective is to test that the regression parameter is constant across several quantile levels of interest. The parameter function is estimated by combining ideas from functional principal component analysis and quantile regression. An adjusted Wald testing procedure is proposed for this hypothesis of interest, and its chi-square asymptotic null distribution is derived. The testing procedure is investigated numerically in simulations involving sparse and noisy functional covariates and in a capital bike share data application. The proposed approach is easy to implement and the R code is published online at https://github.com/xylimeng/fQR-testing.}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Li, Meng and Wang, Kehui and Maity, Arnab and Staicu, Ana-Maria}, year={2022}, month={Jul} } @article{gonzalez-nahm_marchesoni_maity_maguire_house_tucker_atkinson_murphy_hoyo_2022, title={Maternal Mediterranean Diet Adherence and Its Associations with Maternal Prenatal Stressors and Child Growth}, volume={6}, ISSN={["2475-2991"]}, DOI={10.1093/cdn/nzac146}, abstractNote={ABSTRACT Background Psychosocial and physiologic stressors, such as depression and obesity, during pregnancy can have negative consequences, such as increased systemic inflammation, contributing to chronic disease for both mothers and their unborn children. These conditions disproportionately affect racial/ethnic minorities. The effects of recommended dietary patterns in mitigating the effects of these stressors remain understudied. Objectives We aimed to evaluate the relations between maternal Mediterranean diet adherence (MDA) and maternal and offspring outcomes during the first decade of life in African Americans, Hispanics, and Whites. Methods This study included 929 mother–child dyads from the NEST (Newborn Epigenetics STudy), a prospective cohort study. FFQs were used to estimate MDA in pregnant women. Weight and height were measured in children between birth and age 8 y. Multivariable linear regression models were used to examine associations between maternal MDA, inflammatory cytokines, and pregnancy and postnatal outcomes. Results More than 55% of White women reported high MDA during the periconceptional period compared with 22% of Hispanic and 18% of African American women (P < 0.05). Higher MDA was associated with lower likelihood of depressive mood (β = −0.45; 95% CI: −0.90, −0.18; P = 0.02) and prepregnancy obesity (β = −0.29; 95% CI: −0.57, −0.0002; P = 0.05). Higher MDA was also associated with lower body size at birth, which was maintained to ages 3–5 and 6–8 y—this association was most apparent in White children (3–5 y: β = −2.9, P = 0.02; 6–8 y: β = −3.99, P = 0.01). Conclusions If replicated in larger studies, our data suggest that MDA provides a potent avenue by which effects of prenatal stressors on maternal and fetal outcomes can be mitigated to reduce ethnic disparities in childhood obesity.}, number={11}, journal={CURRENT DEVELOPMENTS IN NUTRITION}, author={Gonzalez-Nahm, Sarah and Marchesoni, Joddy and Maity, Arnab and Maguire, Rachel L. and House, John S. and Tucker, Rachel and Atkinson, Tamara and Murphy, Susan K. and Hoyo, Cathrine}, year={2022}, month={Nov} } @article{alam_maity_sinha_rizopoulos_sattar_2021, title={Joint modeling of longitudinal continuous, longitudinal ordinal, and time-to-event outcomes}, volume={27}, ISSN={["1572-9249"]}, DOI={10.1007/s10985-020-09511-3}, abstractNote={In this paper, we propose an innovative method for jointly analyzing survival data and longitudinally measured continuous and ordinal data. We use a random effects accelerated failure time model for survival outcomes, a linear mixed model for continuous longitudinal outcomes and a proportional odds mixed model for ordinal longitudinal outcomes, where these outcome processes are linked through a set of association parameters. A primary objective of this study is to examine the effects of association parameters on the estimators of joint models. The model parameters are estimated by the method of maximum likelihood. The finite-sample properties of the estimators are studied using Monte Carlo simulations. The empirical study suggests that the degree of association among the outcome processes influences the bias, efficiency, and coverage probability of the estimators. Our proposed joint model estimators are approximately unbiased and produce smaller mean squared errors as compared to the estimators obtained from separate models. This work is motivated by a large multicenter study, referred to as the Genetic and Inflammatory Markers of Sepsis (GenIMS) study. We apply our proposed method to the GenIMS data analysis.}, number={1}, journal={LIFETIME DATA ANALYSIS}, author={Alam, Khurshid and Maity, Arnab and Sinha, Sanjoy K. and Rizopoulos, Dimitris and Sattar, Abdus}, year={2021}, month={Jan}, pages={64–90} } @article{mehrotra_maity_2021, title={Simultaneous variable selection, clustering, and smoothing in function-on-scalar regression}, volume={11}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11668}, abstractNote={We address the problem of multicollinearity in a function‐on‐scalar regression model by using a prior that simultaneously selects, clusters, and smooths functional effects. Our methodology groups the effects of highly correlated predictors, performing dimension reduction without dropping relevant predictors from the model. We validate our approach via a simulation study, showing superior performance relative to existing dimension‐reduction approaches described in the function‐on‐scalar literature. We also demonstrate the use of our model on a data set of age‐specific fertility rates from the United Nations Gender Information database.}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Mehrotra, Suchit and Maity, Arnab}, year={2021}, month={Nov} } @article{ghosal_maity_2021, title={Variable selection in nonlinear function-on-scalar regression}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13564}, abstractNote={Abstract}, journal={BIOMETRICS}, author={Ghosal, Rahul and Maity, Arnab}, year={2021}, month={Sep} } @article{ghosal_maity_2021, title={Variable selection in nonparametric functional concurrent regression}, volume={9}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11654}, abstractNote={We develop a new method for variable selection in nonparametric functional concurrent regression. The commonly used functional linear concurrent model (FLCM) is far too restrictive in assuming linearity of the covariate effects, which is not necessarily true in many real‐world applications. The nonparametric functional concurrent model (NPFCM), on the other hand, is much more flexible and can capture complex dynamic relationships present between the response and the covariates. We extend the classically used variable selection methods, e.g., group LASSO, group SCAD and group MCP, to perform variable selection in NPFCM. We show via numerical simulations that the proposed variable selection method with the non‐convex penalties can identify the true functional predictors with minimal false‐positive rate and negligible false‐negative rate. The proposed method also provides better out‐of‐sample prediction accuracy compared to the FLCM in the presence of nonlinear effects of the functional predictors. The proposed method's application is demonstrated by identifying the influential predictor variables in two real data studies: a dietary calcium absorption study, and some bike‐sharing data.}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Ghosal, Rahul and Maity, Arnab}, year={2021}, month={Sep} } @article{martinez_maity_yolken_sullivan_tzeng_2020, title={Robust kernel association testing (RobKAT)}, volume={44}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22280}, DOI={10.1002/gepi.22280}, abstractNote={Abstract}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Martinez, Kara and Maity, Arnab and Yolken, Robert H. and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2020}, month={Apr}, pages={272–282} } @article{ghosal_maity_clark_longo_2020, title={Variable selection in functional linear concurrent regression}, volume={69}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12408}, abstractNote={Summary}, number={3}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Ghosal, Rahul and Maity, Arnab and Clark, Timothy and Longo, Stefano B.}, year={2020}, month={Jun}, pages={565–587} } @article{zhao_zhang_clark_maity_wu_2019, title={Composite kernel machine regression based on likelihood ratio test for joint testing of genetic and gene–environment interaction effect}, volume={75}, url={http://dx.doi.org/10.1111/biom.13003}, DOI={10.1111/biom.13003}, abstractNote={Abstract}, number={2}, journal={Biometrics}, author={Zhao, N. and Zhang, H. and Clark, J.J. and Maity, A. and Wu, M.C.}, year={2019}, month={Jun}, pages={625–637} } @article{szatkiewicz_marceau_yilmaz_bulik_crowley_mattheisen_sullivan_lu_maity_tzeng_et al._2019, title={VARIANCE COMPONENT TEST FOR CROSS-DISORDER PATHWAY ANALYSIS}, volume={29}, ISSN={["1873-7862"]}, DOI={10.1016/j.euroneuro.2018.08.252}, journal={EUROPEAN NEUROPSYCHOPHARMACOLOGY}, author={Szatkiewicz, Jin and Marceau, Rachel and Yilmaz, Zeynep and Bulik, Cynthia and Crowley, James and Mattheisen, Manuel and Sullivan, Patrick and Lu, Wenbin and Maity, Arnab and Tzeng, Jung-Ying and et al.}, year={2019}, pages={1204–1205} } @article{tekbudak_alfaro-córdoba_maity_staicu_2018, title={A comparison of testing methods in scalar-on-function regression}, volume={103}, ISSN={1863-8171 1863-818X}, url={http://dx.doi.org/10.1007/S10182-018-00337-X}, DOI={10.1007/s10182-018-00337-x}, abstractNote={A scalar-response functional model describes the association between a scalar response and a set of functional covariates. An important problem in the functional data literature is to test nullity or linearity of the effect of the functional covariate in the context of scalar-on-function regression. This article provides an overview of the existing methods for testing both the null hypotheses that there is no relationship and that there is a linear relationship between the functional covariate and scalar response, and a comprehensive numerical comparison of their performance. The methods are compared for a variety of realistic scenarios: when the functional covariate is observed at dense or sparse grids and measurements include noise or not. Finally, the methods are illustrated on the Tecator data set.}, number={3}, journal={AStA Advances in Statistical Analysis}, publisher={Springer Science and Business Media LLC}, author={Tekbudak, Merve Yasemin and Alfaro-Córdoba, Marcela and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, month={Oct}, pages={411–436} } @article{kim_staicu_maity_carroll_ruppert_2018, title={Additive Function-on-Function Regression}, volume={27}, ISSN={["1537-2715"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85045617567&partnerID=MN8TOARS}, DOI={10.1080/10618600.2017.1356730}, abstractNote={ABSTRACT We study additive function-on-function regression where the mean response at a particular time point depends on the time point itself, as well as the entire covariate trajectory. We develop a computationally efficient estimation methodology based on a novel combination of spline bases with an eigenbasis to represent the trivariate kernel function. We discuss prediction of a new response trajectory, propose an inference procedure that accounts for total variability in the predicted response curves, and construct pointwise prediction intervals. The estimation/inferential procedure accommodates realistic scenarios, such as correlated error structure as well as sparse and/or irregular designs. We investigate our methodology in finite sample size through simulations and two real data applications. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Kim, Janet S. and Staicu, Ana-Maria and Maity, Arnab and Carroll, Raymond J. and Ruppert, David}, year={2018}, pages={234–244} } @article{kim_maity_staicu_2018, title={Additive nonlinear functional concurrent model}, volume={11}, ISSN={1938-7989 1938-7997}, url={http://dx.doi.org/10.4310/sii.2018.v11.n4.a11}, DOI={10.4310/SII.2018.v11.n4.a11}, abstractNote={We propose a flexible regression model to study the association between a functional response and multiple functional covariates that are observed on the same domain. Specifically, we relate the mean of the current response to current values of the covariates by a sum of smooth unknown bivariate functions, where each of the functions depends on the current value of the covariate and the time point itself. In this framework, we develop estimation methodology that accommodates realistic scenarios where the covariates are sampled with or without error on a sparse and irregular design, and prediction that accounts for unknown model correlation structure. We also discuss the problem of testing the null hypothesis that the covariate has no association with the response. The proposed methods are evaluated numerically through simulations and two real data applications.}, number={4}, journal={Statistics and Its Interface}, publisher={International Press of Boston}, author={Kim, Janet S. and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, pages={669–685} } @article{bandyopadhyay_maity_2018, title={Asymptotic theory for varying coefficient regression models with dependent data}, volume={70}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85014934996&partnerID=MN8TOARS}, DOI={10.1007/s10463-017-0607-z}, number={4}, journal={Annals of the Institute of Statistical Mathematics}, author={Bandyopadhyay, S. and Maity, A.}, year={2018}, pages={745–759} } @article{paul_maity_maiti_2018, title={Bayesian comparative study on binary time series}, volume={88}, ISSN={0094-9655 1563-5163}, url={http://dx.doi.org/10.1080/00949655.2018.1488256}, DOI={10.1080/00949655.2018.1488256}, abstractNote={ABSTRACT In this paper, we consider the Bayesian analysis of binary time series with different priors, namely normal, Students' t, and Jeffreys prior, and compare the results with the frequentist methods through some simulation experiments and one real data on daily rainfall in inches at Mount Washington, NH. Among Bayesian methods, our results show that the Jeffreys prior perform better in most of the situations for both the simulation and the rainfall data. Furthermore, among weakly informative priors considered, Student's t prior with 7 degrees of freedom fits the data most adequately.}, number={14}, journal={Journal of Statistical Computation and Simulation}, publisher={Informa UK Limited}, author={Paul, Erina and Maity, Arnab Kumar and Maiti, Raju}, year={2018}, month={Jun}, pages={2811–2826} } @article{maity_pradhan_das_2018, title={Bias Reduction in Logistic Regression with Missing Responses When the Missing Data Mechanism is Nonignorable}, volume={73}, ISSN={0003-1305 1537-2731}, url={http://dx.doi.org/10.1080/00031305.2017.1407359}, DOI={10.1080/00031305.2017.1407359}, abstractNote={ABSTRACT In logistic regression with nonignorable missing responses, Ibrahim and Lipsitz proposed a method for estimating regression parameters. It is known that the regression estimates obtained by using this method are biased when the sample size is small. Also, another complexity arises when the iterative estimation process encounters separation in estimating regression coefficients. In this article, we propose a method to improve the estimation of regression coefficients. In our likelihood-based method, we penalize the likelihood by multiplying it by a noninformative Jeffreys prior as a penalty term. The proposed method reduces bias and is able to handle the issue of separation. Simulation results show substantial bias reduction for the proposed method as compared to the existing method. Analyses using real world data also support the simulation findings. An R package called brlrmr is developed implementing the proposed method and the Ibrahim and Lipsitz method.}, number={4}, journal={The American Statistician}, publisher={Informa UK Limited}, author={Maity, Arnab Kumar and Pradhan, Vivek and Das, Ujjwal}, year={2018}, month={Jul}, pages={340–349} } @article{davenport_maity_baladandayuthapani_2018, title={Functional interaction-based nonlinear models with application to multiplatform genomics data}, volume={37}, ISSN={["1097-0258"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85046549815&partnerID=MN8TOARS}, DOI={10.1002/sim.7671}, abstractNote={Functional regression allows for a scalar response to be dependent on a functional predictor; however, not much work has been done when a scalar exposure that interacts with the functional covariate is introduced. In this paper, we present 2 functional regression models that account for this interaction and propose 2 novel estimation procedures for the parameters in these models. These estimation methods allow for a noisy and/or sparsely observed functional covariate and are easily extended to generalized exponential family responses. We compute standard errors of our estimators, which allows for further statistical inference and hypothesis testing. We compare the performance of the proposed estimators to each other and to one found in the literature via simulation and demonstrate our methods using a real data example.}, number={18}, journal={STATISTICS IN MEDICINE}, author={Davenport, Clemontina A. and Maity, Arnab and Baladandayuthapani, Veerabhadran}, year={2018}, month={Aug}, pages={2715–2733} } @article{maity_zhao_sullivan_tzeng_2018, title={Inference on phenotype-specific effects of genes using multivariate kernel machine regression}, volume={42}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22096}, DOI={10.1002/gepi.22096}, abstractNote={ABSTRACT}, number={1}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Zhao, Jing and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2018}, month={Feb}, pages={64–79} } @article{luo_maity_wu_smith_duan_li_tzeng_2018, title={On the substructure controls in rare variant analysis: Principal components or variance components?}, volume={42}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85039155216&partnerID=MN8TOARS}, DOI={10.1002/gepi.22102}, abstractNote={Abstract}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Luo, Yiwen and Maity, Arnab and Wu, Michael C. and Smith, Chris and Duan, Qing and Li, Yun and Tzeng, Jung-Ying}, year={2018}, month={Apr}, pages={276–287} } @misc{kong_maity_hsu_tzeng_2018, title={Rejoinder to "A note on testing and estimation in marker-set association study using semiparametric quantile regression kernel machine"}, volume={74}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85032786553&partnerID=MN8TOARS}, DOI={10.1111/biom.12786}, abstractNote={Dehan Kong , Arnab Maity, Fang-Chi Hsu, and Jung-Ying Tzeng Department of Statistical Sciences, University of Toronto, Ontario, Canada Department of Statistics, North Carolina State University, North Carolina, U.S.A. Department of Biostatistical Sciences, Wake Forest University, North Carolina, U.S.A. Department of Statistics and Bioinformatics Research Center North Carolina State University, North Carolina, U.S.A. Department of Statistics, National Cheng-Kung University, Taiwan ∗email: kongdehan@utstat.toronto.edu}, number={2}, journal={BIOMETRICS}, author={Kong, Dehan and Maity, Arnab and Hsu, Fang-Chi and Tzeng, Jung-Ying}, year={2018}, month={Jun}, pages={767–768} } @article{davenport_maity_sullivan_tzeng_2017, title={A Powerful Test for SNP Effects on Multivariate Binary Outcomes Using Kernel Machine Regression}, volume={10}, ISSN={1867-1764 1867-1772}, url={http://dx.doi.org/10.1007/S12561-017-9189-9}, DOI={10.1007/s12561-017-9189-9}, abstractNote={Evaluating multiple binary outcomes is common in genetic studies of complex diseases. These outcomes are often correlated because they are collected from the same individual and they may share common marker effects. In this paper, we propose a procedure to test for effect of a single nucleotide polymorphism-set on multiple, possibly correlated, binary responses. We develop a score-based test using a non-parametric modeling framework that jointly models the global effect of the marker set. We account for the non-linear effects and potentially complicated interaction between markers using reproducing kernels. Our testing procedure only requires estimation under the null hypothesis and we use multivariate generalized estimating equations to estimate the model components to account for the correlation among the outcomes. We evaluate finite sample performance of our test via simulation study and demonstrate our methods using the Clinical Antipsychotic Trials of Intervention Effectiveness antibody study data and the CoLaus study data.}, number={1}, journal={Statistics in Biosciences}, publisher={Springer Science and Business Media LLC}, author={Davenport, Clemontina A. and Maity, Arnab and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2017}, month={Mar}, pages={117–138} } @article{zhan_tong_zhao_maity_wu_chen_2017, title={A small-sample multivariate kernel machine test for microbiome association studies}, volume={41}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85014518911&partnerID=MN8TOARS}, DOI={10.1002/gepi.22030}, abstractNote={High‐throughput sequencing technologies have enabled large‐scale studies of the role of the human microbiome in health conditions and diseases. Microbial community level association test, as a critical step to establish the connection between overall microbiome composition and an outcome of interest, has now been routinely performed in many studies. However, current microbiome association tests all focus on a single outcome. It has become increasingly common for a microbiome study to collect multiple, possibly related, outcomes to maximize the power of discovery. As these outcomes may share common mechanisms, jointly analyzing these outcomes can amplify the association signal and improve statistical power to detect potential associations. We propose the multivariate microbiome regression‐based kernel association test (MMiRKAT) for testing association between multiple continuous outcomes and overall microbiome composition, where the kernel used in MMiRKAT is based on Bray‐Curtis or UniFrac distance. MMiRKAT directly regresses all outcomes on the microbiome profiles via a semiparametric kernel machine regression framework, which allows for covariate adjustment and evaluates the association via a variance‐component score test. Because most of the current microbiome studies have small sample sizes, a novel small‐sample correction procedure is implemented in MMiRKAT to correct for the conservativeness of the association test when the sample size is small or moderate. The proposed method is assessed via simulation studies and an application to a real data set examining the association between host gene expression and mucosal microbiome composition. We demonstrate that MMiRKAT is more powerful than large sample based multivariate kernel association test, while controlling the type I error. A free implementation of MMiRKAT in R language is available at http://research.fhcrc.org/wu/en.html.}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Zhan, Xiang and Tong, Xingwei and Zhao, Ni and Maity, Arnab and Wu, Michael C. and Chen, Jun}, year={2017}, month={Apr}, pages={210–220} } @article{luo_mccullough_tzeng_darrah_vengosh_maguire_maity_samuel-hodge_murphy_mendez_et al._2017, title={Maternal blood cadmium, lead and arsenic levels, nutrient combinations, and offspring birthweight}, volume={17}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85018564741&partnerID=MN8TOARS}, DOI={10.1186/s12889-017-4225-8}, abstractNote={Cadmium (Cd), lead (Pb) and arsenic (As) are common environmental contaminants that have been associated with lower birthweight. Although some essential metals may mitigate exposure, data are inconsistent. This study sought to evaluate the relationship between toxic metals, nutrient combinations and birthweight among 275 mother-child pairs.Non-essential metals, Cd, Pb, As, and essential metals, iron (Fe), zinc (Zn), selenium (Se), copper (Cu), calcium (Ca), magnesium (Mg), and manganese (Mn) were measured in maternal whole blood obtained during the first trimester using inductively coupled plasma mass spectrometry. Folate concentrations were measured by microbial assay. Birthweight was obtained from medical records. We used quantile regression to evaluate the association between toxic metals and nutrients due to their underlying wedge-shaped relationship. Ordinary linear regression was used to evaluate associations between birth weight and toxic metals.After multivariate adjustment, the negative association between Pb or Cd and a combination of Fe, Se, Ca and folate was robust, persistent and dose-dependent (p < 0.05). However, a combination of Zn, Cu, Mn and Mg was positively associated with Pb and Cd levels. While prenatal blood Cd and Pb were also associated with lower birthweight. Fe, Se, Ca and folate did not modify these associations.Small sample size and cross-sectional design notwithstanding, the robust and persistent negative associations between some, but not all, nutrient combinations with these ubiquitous environmental contaminants suggest that only some recommended nutrient combinations may mitigate toxic metal exposure in chronically exposed populations. Larger longitudinal studies are required to confirm these findings.}, number={1}, journal={BMC Public Health}, author={Luo, Y. and McCullough, L.E. and Tzeng, J.-Y. and Darrah, T. and Vengosh, A. and Maguire, R.L. and MAITY, ARNAB and Samuel-Hodge, C. and Murphy, S.K. and Mendez, M.A. and et al.}, year={2017} } @article{maity_2017, title={Nonparametric functional concurrent regression models}, volume={9}, url={https://doi.org/10.1002/wics.1394}, DOI={10.1002/wics.1394}, abstractNote={Function‐on‐function regression refers to the situation where both independent and dependent variables in a regression model are of functional nature. Functional concurrent regression is a specific type of function‐on‐function regression that relates the response function at a specific point to the covariate value at that point and the point itself. Standard functional concurrent models are linear (a linear combination of the covariates is used), and often criticized due to their linearity assumption and lack of flexibility. This gives rise to nonparametric functional concurrent regression that models the response function at a specific point using a multivariate nonparametric function of both the point and the covariate value at that point. Such models allow for much more flexibility and predictive accuracy, especially when the underlying relationship is nonlinear. In the past decade, several methods have been proposed to perform estimation, prediction and inference in the nonparametric concurrent models using various methods such as spline smoothing, Gaussian process regression and local polynomial kernel regression. Such models have been shown to be useful tools in functional regression as well as stepping stone for further development. WIREs Comput Stat 2017, 9:e1394. doi: 10.1002/wics.1394}, number={2}, journal={WIREs Computational Statistics}, author={Maity, Arnab}, year={2017}, month={Mar} } @misc{maity_2017, title={Nonparametric functional concurrent regression models}, volume={9}, number={2}, journal={Wiley Interdisciplinary Reviews: Computational Statistics}, author={Maity, A.}, year={2017} } @article{terry_zhang_maity_arshad_karmaus_2017, title={Unified variable selection in semi-parametric models}, volume={26}, ISSN={["1477-0334"]}, DOI={10.1177/0962280215610928}, abstractNote={ We propose a Bayesian variable selection method in semi-parametric models with applications to genetic and epigenetic data (e.g., single nucleotide polymorphisms and DNA methylation, respectively). The data are individually standardized to reduce heterogeneity and facilitate simultaneous selection of categorical (single nucleotide polymorphisms) and continuous (DNA methylation) variables. The Gaussian reproducing kernel is applied to the transformed data to evaluate joint effect of the variables, which may include complex interactions between, e.g., single nucleotide polymorphisms and DNA methylation. Indicator variables are introduced to the model for the purpose of variable selection. The method is demonstrated and evaluated using simulations under different scenarios. We apply the method to identify informative DNA methylation sites and single nucleotide polymorphisms in a set of genes based on their joint effect on allergic sensitization. The selected single nucleotide polymorphisms and methylation sites have the potential to serve as early markers for allergy prediction, and consequently benefit medical and clinical research to prevent allergy before its manifestation. }, number={6}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, publisher={SAGE Publications}, author={Terry, William and Zhang, Hongmei and Maity, Arnab and Arshad, Hasan and Karmaus, Wilfried}, year={2017}, month={Dec}, pages={2821–2831} } @article{kong_staicu_maity_2016, title={Classical testing in functional linear models}, volume={28}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84988336276&partnerID=MN8TOARS}, DOI={10.1080/10485252.2016.1231806}, abstractNote={ABSTRACT We extend four tests common in classical regression – Wald, score, likelihood ratio and F tests – to functional linear regression, for testing the null hypothesis, that there is no association between a scalar response and a functional covariate. Using functional principal component analysis, we re-express the functional linear model as a standard linear model, where the effect of the functional covariate can be approximated by a finite linear combination of the functional principal component scores. In this setting, we consider application of the four traditional tests. The proposed testing procedures are investigated theoretically for densely observed functional covariates when the number of principal components diverges. Using the theoretical distribution of the tests under the alternative hypothesis, we develop a procedure for sample size calculation in the context of functional linear regression. The four tests are further compared numerically for both densely and sparsely observed noisy functional data in simulation experiments and using two real data applications.}, number={4}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Kong, Dehan and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={813–838} } @article{usset_staicu_maity_2016, title={Interaction models for functional regression}, volume={94}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2015.08.020}, DOI={10.1016/j.csda.2015.08.020}, abstractNote={A functional regression model with a scalar response and multiple functional predictors is proposed that accommodates two-way interactions in addition to their main effects. The proposed estimation procedure models the main effects using penalized regression splines, and the interaction effect by a tensor product basis. Extensions to generalized linear models and data observed on sparse grids or with measurement error are presented. A hypothesis testing procedure for the functional interaction effect is described. The proposed method can be easily implemented through existing software. Numerical studies show that fitting an additive model in the presence of interaction leads to both poor estimation performance and lost prediction power, while fitting an interaction model where there is in fact no interaction leads to negligible losses. The methodology is illustrated on the AneuRisk65 study data.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Usset, Joseph and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Feb}, pages={317–329} } @inbook{tzeng_maity_2016, title={Marker-set Approaches for Assessing Gene-Environment Interactions at Gene Level}, booktitle={Statistical Approaches to Gene x Environment Interactions for Complex Phenotypes}, author={Tzeng, J.Y. and Maity, A.}, year={2016} } @article{kong_maity_hsu_tzeng_biometrics_2016, title={Testing and estimation in marker-set association study using semiparametric quantile regression kernel machine}, volume={72}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84978986071&partnerID=MN8TOARS}, DOI={10.1111/biom.12438}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, author={Kong, D. and Maity, A. and Hsu, F.C. and Tzeng, J.Y. and Biometrics}, year={2016}, month={Jun}, pages={364–371} } @article{zhang_staicu_maity_2016, title={Testing for additivity in non-parametric regression}, volume={44}, ISSN={["1708-945X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84982980889&partnerID=MN8TOARS}, DOI={10.1002/cjs.11295}, abstractNote={Abstract}, number={4}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, publisher={Wiley-Blackwell}, author={Zhang, Yichi and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={445–462} } @article{zhang_maity_arshad_holloway_karmaus_lawson_lee_macnab_2016, title={Variable selection in semi-parametric models}, volume={25}, ISSN={["1477-0334"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84983741909&partnerID=MN8TOARS}, DOI={10.1177/0962280213499679}, abstractNote={We propose Bayesian variable selection methods in semi-parametric models in the framework of partially linear Gaussian and problit regressions. Reproducing kernels are utilized to evaluate possibly non-linear joint effect of a set of variables. Indicator variables are introduced into the reproducing kernels for the inclusion or exclusion of a variable. Different scenarios based on posterior probabilities of including a variable are proposed to select important variables. Simulations are used to demonstrate and evaluate the methods. It was found that the proposed methods can efficiently select the correct variables regardless of the feature of the effects, linear or non-linear in an unknown form. The proposed methods are applied to two real data sets to identify cytosine phosphate guanine methylation sites associated with maternal smoking and cytosine phosphate guanine sites associated with cotinine levels with creatinine levels adjusted. The selected methylation sites have the potential to advance our understanding of the underlying mechanism for the impact of smoking exposure on health outcomes, and consequently benefit medical research in disease intervention.}, number={4}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Zhang, H. and MAITY, ARNAB and Arshad, H. and Holloway, J. and Karmaus, W. and Lawson, A.B. and Lee, D. and MacNab, Y.}, year={2016}, month={Aug}, pages={1736–1752} } @article{wang_maity_luo_neely_tzeng_2015, title={Complete Effect-Profile Assessment in Association Studies With Multiple Genetic and Multiple Environmental Factors}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921048438&partnerID=MN8TOARS}, DOI={10.1002/gepi.21877}, abstractNote={ABSTRACT}, number={2}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Wang, Zhi and Maity, Arnab and Luo, Yiwen and Neely, Megan L. and Tzeng, Jung-Ying}, year={2015}, month={Feb}, pages={122–133} } @article{usset_maity_staicu_schwartzman_2015, title={Glacier Terminus Estimation from Landsat Image Intensity Profiles}, volume={20}, ISSN={1085-7117 1537-2693}, url={http://dx.doi.org/10.1007/S13253-015-0207-4}, DOI={10.1007/s13253-015-0207-4}, number={2}, journal={Journal of Agricultural, Biological, and Environmental Statistics}, publisher={Springer Science and Business Media LLC}, author={Usset, Joseph and Maity, Arnab and Staicu, Ana-Maria and Schwartzman, Armin}, year={2015}, month={May}, pages={279–298} } @article{zhao_bell_maity_staicu_joubert_london_wu_2015, title={Global Analysis of Methylation Profiles From High Resolution CpG Data}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921023434&partnerID=MN8TOARS}, DOI={10.1002/gepi.21874}, abstractNote={ABSTRACT}, number={2}, journal={GENETIC EPIDEMIOLOGY}, author={Zhao, Ni and Bell, Douglas A. and Maity, Arnab and Staicu, Ana-Maria and Joubert, Bonnie R. and London, Stephanie J. and Wu, Michael C.}, year={2015}, month={Feb}, pages={53–64} } @article{wang_maity_hsiao_voora_kaddurah-daouk_tzeng_2015, title={Module-based association analysis for omics data with network structure.}, volume={10}, url={http://europepmc.org/abstract/med/25822417}, DOI={10.1371/journal.pone.0122309}, abstractNote={Module-based analysis (MBA) aims to evaluate the effect of a group of biological elements sharing common features, such as SNPs in the same gene or metabolites in the same pathways, and has become an attractive alternative to traditional single bio-element approaches. Because bio-elements regulate and interact with each other as part of network, incorporating network structure information can more precisely model the biological effects, enhance the ability to detect true associations, and facilitate our understanding of the underlying biological mechanisms. How-ever, most MBA methods ignore the network structure information, which depicts the interaction and regulation relationship among basic functional units in biology system. We construct the con-nectivity kernel and the topology kernel to capture the relationship among bio-elements in a mod-ule, and use a kernel machine framework to evaluate the joint effect of bio-elements. Our proposed kernel machine approach directly incorporates network structure so to enhance the study effi-ciency; it can assess interactions among modules, account covariates, and is computational effi-cient. Through simulation studies and real data application, we demonstrate that the proposed network-based methods can have markedly better power than the approaches ignoring network information under a range of scenarios.}, number={3}, journal={PLoS ONE}, author={Wang, Z and Maity, A and Hsiao, CK and Voora, D and Kaddurah-Daouk, R and Tzeng, JY}, year={2015}, pages={0122309} } @article{davenport_maity_wu_2015, title={Parametrically guided estimation in nonparametric varying coefficient models with quasi-likelihood}, volume={27}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84929278394&partnerID=MN8TOARS}, DOI={10.1080/10485252.2015.1026903}, abstractNote={Varying coefficient models (VCMs) allow us to generalise standard linear regression models to incorporate complex covariate effects by modelling the regression coefficients as functions of another covariate. For nonparametric varying coefficients, we can borrow the idea of parametrically guided estimation to improve asymptotic bias. In this paper, we develop a guided estimation procedure for the nonparametric VCMs. Asymptotic properties are established for the guided estimators and a method of bandwidth selection via bias-variance tradeoff is proposed. We compare the performance of the guided estimator with that of the unguided estimator via both simulation and real data examples.}, number={2}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Davenport, Clemontina A. and Maity, Arnab and Wu, Yichao}, year={2015}, month={Apr}, pages={195–213} } @article{urrutia_lee_maity_zhao_shen_li_wu_2015, title={Rare variant testing across methods and thresholds using the multi-kernel sequence kernel association test (MK-SKAT)}, volume={8}, ISSN={["1938-7997"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84945274985&partnerID=MN8TOARS}, DOI={10.4310/sii.2015.v8.n4.a8}, abstractNote={Analysis of rare genetic variants has focused on region-based analysis wherein a subset of the variants within a genomic region is tested for association with a complex trait. Two important practical challenges have emerged. First, it is difficult to choose which test to use. Second, it is unclear which group of variants within a region should be tested. Both depend on the unknown true state of nature. Therefore, we develop the Multi-Kernel SKAT (MK-SKAT) which tests across a range of rare variant tests and groupings. Specifically, we demonstrate that several popular rare variant tests are special cases of the sequence kernel association test which compares pair-wise similarity in trait value to similarity in the rare variant genotypes between subjects as measured through a kernel function. Choosing a particular test is equivalent to choosing a kernel. Similarly, choosing which group of variants to test also reduces to choosing a kernel. Thus, MK-SKAT uses perturbation to test across a range of kernels. Simulations and real data analyses show that our framework controls type I error while maintaining high power across settings: MK-SKAT loses power when compared to the kernel for a particular scenario but has much greater power than poor choices.}, number={4}, journal={STATISTICS AND ITS INTERFACE}, author={Urrutia, Eugene and Lee, Seunggeun and Maity, Arnab and Zhao, Ni and Shen, Judong and Li, Yun and Wu, Michael C.}, year={2015}, pages={495–505} } @article{maity_williams_ryan_missmer_coull_hauser_2014, title={Analysis of in vitro fertilization data with multiple outcomes using discrete time-to-event analysis}, volume={33}, ISSN={["1097-0258"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84898005340&partnerID=MN8TOARS}, DOI={10.1002/sim.6050}, abstractNote={In vitro fertilization (IVF) is an increasingly common method of assisted reproductive technology. Because of the careful observation and follow‐up required as part of the procedure, IVF studies provide an ideal opportunity to identify and assess clinical and demographic factors along with environmental exposures that may impact successful reproduction. A major challenge in analyzing data from IVF studies is handling the complexity and multiplicity of outcome, resulting from both multiple opportunities for pregnancy loss within a single IVF cycle in addition to multiple IVF cycles. To date, most evaluations of IVF studies do not make use of full data because of its complex structure. In this paper, we develop statistical methodology for analysis of IVF data with multiple cycles and possibly multiple failure types observed for each individual. We develop a general analysis framework based on a generalized linear modeling formulation that allows implementation of various types of models including shared frailty models, failure‐specific frailty models, and transitional models, using standard software. We apply our methodology to data from an IVF study conducted at the Brigham and Women's Hospital, Massachusetts. We also summarize the performance of our proposed methods on the basis of a simulation study. Copyright © 2013 John Wiley & Sons, Ltd.}, number={10}, journal={STATISTICS IN MEDICINE}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Williams, Paige L. and Ryan, Louise and Missmer, Stacey A. and Coull, Brent A. and Hauser, Russ}, year={2014}, month={May}, pages={1738–1749} } @article{carmona_sofer_hutchinson_cantone_coull_maity_vokonas_lin_schwartz_baccarelli_et al._2014, title={Short-Term airborne particulate matter exposure alters the epigenetic landscape of human genes associated with the mitogen-Activated protein kinase network: A cross-sectional study}, volume={13}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84988698028&partnerID=MN8TOARS}, DOI={10.1186/1476-069X-13-94}, abstractNote={Exposure to air particulate matter is known to elevate blood biomarkers of inflammation and to increase cardiopulmonary morbidity and mortality. Major components of airborne particulate matter typically include black carbon from traffic and sulfates from coal-burning power plants. DNA methylation is thought to be sensitive to these environmental toxins and possibly mediate environmental effects on clinical outcomes via regulation of gene networks. The underlying mechanisms may include epigenetic modulation of major inflammatory pathways, yet the details remain unclear.We sought to elucidate how short-term exposure to air pollution components, singly and/or in combination, alter blood DNA methylation in certain inflammation-associated gene networks, MAPK and NF-κB, which may transmit the environmental signal(s) and influence the inflammatory pathway in vivo. To this end, we utilized a custom-integrated workflow-molecular processing, pollution surveillance, biostatical analysis, and bioinformatic visualization-to map novel human (epi)gene pathway-environment interactions.Specifically, out of 84 MAPK pathway genes considered, we identified 11 whose DNA methylation status was highly associated with black carbon exposure, after adjusting for potential confounders-age, sulfate exposure, smoking, blood cell composition, and blood pressure. Moreover, after adjusting for these confounders, multi-pollutant analysis of synergistic DNA methylations significantly associated with sulfate and BC exposures yielded 14 MAPK genes. No associations were found with the NF-κB pathway.Exposure to short-term air pollution components thus resulted in quantifiable epigenetic changes in the promoter areas of MAPK pathway genes. Bioinformatic mapping of single- vs. multi-exposure-associated epigenetic changes suggests that these alterations might affect biological pathways in nuanced ways that are not simply additive or fully predictable via individual-level exposure assessments.}, number={1}, journal={Environmental Health: A Global Access Science Source}, author={Carmona, JJ and Sofer, T and Hutchinson, J and Cantone, L and Coull, B and Maity, A and Vokonas, P and Lin, X and Schwartz, J and Baccarelli, AA and et al.}, year={2014}, pages={94} } @article{liu_maity_lin_wright_christiani_2013, title={Design and Analysis Issues in Gene and Environment Studies}, volume={11}, DOI={10.1201/b16304-14}, journal={Exploring Connections Between Genetic Mechanisms and Disease Expression}, publisher={Apple Academic Press}, author={Liu, Chen-Yu and Maity, Arnab and Lin, Xihong and Wright, Robert and Christiani, David}, year={2013}, pages={339–370} } @article{sofer_baccarelli_cantone_coull_maity_lin_schwartz_2013, title={Exposure to airborne particulate matter is associated with methylation pattern in the asthma pathway}, volume={5}, ISSN={["1750-192X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84876099049&partnerID=MN8TOARS}, DOI={10.2217/epi.13.16}, abstractNote={ Background: Asthma exacerbation and other respiratory symptoms are associated with exposure to air pollution. Since environment affects gene methylation, it is hypothesized that asthmatic responses to pollution are mediated through methylation. Materials & methods: We study the possibility that airborne particulate matter affects gene methylation in the asthma pathway. We measured methylation array data in clinic visits of 141 subjects from the Normative Aging Study. Black carbon and sulfate measures from a central monitoring site were recorded and 30-day averages were calculated for each clinic visit. Gene-specific methylation scores were calculated for the genes in the asthma pathway, and the association between the methylation in the asthma pathway and the pollution measures was analyzed using sparse Canonical Correlation Analysis. Results: The analysis found that exposures to black carbon and sulfate were significantly associated with the methylation pattern in the asthma pathway (p-values 0.05 and 0.02, accordingly). Specific genes that contributed to this association were identified. Conclusion: These results suggest that the effect of air pollution on asthmatic and respiratory responses may be mediated through gene methylation. }, number={2}, journal={EPIGENOMICS}, publisher={Future Medicine Ltd}, author={Sofer, Tamar and Baccarelli, Andrea and Cantone, Laura and Coull, Brent and Maity, Arnab and Lin, Xihong and Schwartz, Joel}, year={2013}, month={Apr}, pages={147–154} } @article{wu_maity_lee_simmons_harmon_lin_engel_molldrem_armistead_2013, title={Kernel Machine SNP-Set Testing Under Multiple Candidate Kernels}, volume={37}, ISSN={["0741-0395"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875658304&partnerID=MN8TOARS}, DOI={10.1002/gepi.21715}, abstractNote={ABSTRACT}, number={3}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Wu, Michael C. and Maity, Arnab and Lee, Seunggeun and Simmons, Elizabeth M. and Harmon, Quaker E. and Lin, Xinyi and Engel, Stephanie M. and Molldrem, Jeffrey J. and Armistead, Paul M.}, year={2013}, month={Apr}, pages={267–275} } @article{xun_cao_mallick_maity_carroll_2013, title={Parameter Estimation of Partial Differential Equation Models}, volume={108}, ISSN={["1537-274X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84890083793&partnerID=MN8TOARS}, DOI={10.1080/01621459.2013.794730}, abstractNote={Partial differential equation (PDE) models are commonly used to model complex dynamic systems in applied sciences such as biology and finance. The forms of these PDE models are usually proposed by experts based on their prior knowledge and understanding of the dynamic system. Parameters in PDE models often have interesting scientific interpretations, but their values are often unknown and need to be estimated from the measurements of the dynamic system in the presence of measurement errors. Most PDEs used in practice have no analytic solutions, and can only be solved with numerical methods. Currently, methods for estimating PDE parameters require repeatedly solving PDEs numerically under thousands of candidate parameter values, and thus the computational load is high. In this article, we propose two methods to estimate parameters in PDE models: a parameter cascading method and a Bayesian approach. In both methods, the underlying dynamic process modeled with the PDE model is represented via basis function expansion. For the parameter cascading method, we develop two nested levels of optimization to estimate the PDE parameters. For the Bayesian method, we develop a joint model for data and the PDE and develop a novel hierarchical model allowing us to employ Markov chain Monte Carlo (MCMC) techniques to make posterior inference. Simulation studies show that the Bayesian method and parameter cascading method are comparable, and both outperform other available methods in terms of estimation accuracy. The two methods are demonstrated by estimating parameters in a PDE model from long-range infrared light detection and ranging data. Supplementary materials for this article are available online.}, number={503}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, publisher={Informa UK Limited}, author={Xun, Xiaolei and Cao, Jiguo and Mallick, Bani and Maity, Arnab and Carroll, Raymond J.}, year={2013}, month={Sep}, pages={1009–1020} } @article{fan_maity_wang_wu_2013, title={Parametrically guided generalised additive models with application to mergers and acquisitions data}, volume={25}, ISSN={["1048-5252"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875865626&partnerID=MN8TOARS}, DOI={10.1080/10485252.2012.735233}, abstractNote={Generalised nonparametric additive models present a flexible way to evaluate the effects of several covariates on a general outcome of interest via a link function. In this modelling framework, one assumes that the effect of each of the covariates is nonparametric and additive. However, in practice, often there is prior information available about the shape of the regression functions, possibly from pilot studies or exploratory analysis. In this paper, we consider such situations and propose an estimation procedure where the prior information is used as a parametric guide to fit the additive model. Specifically, we first posit a parametric family for each of the regression functions using the prior information (parametric guides). After removing these parametric trends, we then estimate the remainder of the nonparametric functions using a nonparametric generalised additive model and form the final estimates by adding back the parametric trend. We investigate the asymptotic properties of the estimates and show that when a good guide is chosen, the asymptotic variance of the estimates can be reduced significantly while keeping the asymptotic variance same as the unguided estimator. We observe the performance of our method via a simulation study and demonstrate our method by applying to a real data set on mergers and acquisitions.}, number={1}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, publisher={Informa UK Limited}, author={Fan, Jianqing and Maity, Arnab and Wang, Yihui and Wu, Yichao}, year={2013}, month={Mar}, pages={109–128} } @article{gertheiss_maity_staicu_2013, title={Variable selection in generalized functional linear models}, volume={2}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/sta4.20}, DOI={10.1002/sta4.20}, abstractNote={Modern research data, where a large number of functional predictors is collected on few subjects are becoming increasingly common. In this paper we propose a variable selection technique, when the predictors are functional and the response is scalar. Our approach is based on adopting a generalized functional linear model framework and using a penalized likelihood method that simultaneously controls the sparsity of the model and the smoothness of the corresponding coefficient functions by adequate penalization. The methodology is characterized by high predictive accuracy, and yields interpretable models, while retaining computational efficiency. The proposed method is investigated numerically in finite samples, and applied to a diffusion tensor imaging tractography data set and a chemometric data set. Copyright © 2013 John Wiley & Sons Ltd}, number={1}, journal={Stat}, publisher={Wiley}, author={Gertheiss, Jan and Maity, Arnab and Staicu, Ana-Maria}, year={2013}, month={May}, pages={86–101} } @article{maity_2012, title={A powerful test for comparing multiple regression functions}, volume={24}, ISSN={["1048-5252"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84865271146&partnerID=MN8TOARS}, DOI={10.1080/10485252.2012.677842}, abstractNote={In this article, we address the important problem of comparison of two or more population regression functions. Recently, Pardo-Fernández, Van Keilegom and González-Manteiga [2007, ‘Testing for Equality of k Regression Curves’, Statistica Sinica, 17, 1115–1137] developed test statistics for simple nonparametric regression models: Y ij =θ j (Z ij )+σ j (Z ij )ε ij , based on empirical distributions of the errors in each population j=1, …, J. In this article, we propose a test for equality of the θ j (·) based on the concept of generalised likelihood ratio type statistics. We also generalise our test for other nonparametric regression set-ups, for example, nonparametric logistic regression, where the log-likelihood for population j is any general smooth function ℒ{Y j , θ j (Z j )}. We describe a resampling procedure to obtain the critical values of the test. In addition, we present a simulation study to evaluate the performance of the proposed test and compare our results to those in Pardo-Fernández et al. [2007, ‘Testing for Equality of k Regression Curves’, Statistica Sinica, 17, 1115–1137].}, number={3}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, publisher={Informa UK Limited}, author={Maity, Arnab}, year={2012}, pages={563–576} } @article{mahalingaiah_missmer_maity_williams_meeker_berry_ehrlich_perry_cramer_hauser_et al._2012, title={Association of Hexachlorobenzene (HCB), Dichlorodiphenyltrichloroethane (DDT), and Dichlorodiphenyldichloroethylene (DDE) with in Vitro Fertilization (IVF) Outcomes}, volume={120}, ISSN={["1552-9924"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84856736612&partnerID=MN8TOARS}, DOI={10.1289/ehp.1103696}, abstractNote={Background: Hexachlorobenzene (HCB), dichlorodiphenyltrichloroethane (DDT), and dichlorodiphenyldichloroethylene (DDE) are persistent chlorinated pesticides with endocrine activity that may adversely affect the early stages of human reproduction. Objective: Our goal was to determine the association of serum levels of HCB, DDT, and DDE with implantation failure, chemical pregnancy, and spontaneous abortion in women undergoing in vitro fertilization (IVF) from 1994 to 2003. Methods: Levels of HCB and congeners of DDT and DDE were measured in serum collected during the follicular phase. Multivariable-adjusted statistical models accommodating multiple outcomes and multiple cycles per woman were used to estimate the relation between serum pesticide levels and IVF outcomes. Results: A total of 720 women with a mean ± SD age 35.4 ± 4.2 years at enrollment contributed 774 IVF cycles. All samples had detectable levels of HCB, DDT, and DDE, with median levels of 0.087 ng/g serum for HCB, 1.12 ng/g serum for total DDT, and 1.04 ng/g serum for p,p´-DDE. Compared with the lowest quartile (Q1) of HCB, the lipid- and multivariable-adjusted odds ratio (OR) for failed implantation was significantly elevated for those with higher HCB quartiles [Q2–Q4; adjusted ORs: for Q2, 1.71; 95% confidence interval (CI): 1.03, 2.82; for Q3, 2.30; 95% CI: 1.39, 3.81; for Q4, 2.32; 95% CI: 1.38, 3.90] and showed a significantly increasing trend (p = 0.001). No statistically significant associations were observed between DDT/DDE and IVF outcomes or between HCB and chemical pregnancy or spontaneous abortion. Conclusions: Serum HCB concentrations were on average lower than that of the general U.S. population and associated with failed implantation among women undergoing IVF.}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, publisher={Environmental Health Perspectives}, author={Mahalingaiah, Shruthi and Missmer, Stacey A. and Maity, Arnab and Williams, Paige L. and Meeker, John D. and Berry, Katharine and Ehrlich, Shelley and Perry, Melissa J. and Cramer, Daniel W. and Hauser, Russ and et al.}, year={2012}, month={Feb}, pages={316–320} } @article{liu_maity_lin_wright_christiani_2012, title={Design and analysis issues in gene and environment studies}, volume={11}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84872769763&partnerID=MN8TOARS}, DOI={10.1186/1476-069X-11-93}, abstractNote={Abstract}, number={1}, journal={Environmental Health: A Global Access Science Source}, author={Liu, C.-Y. and Maity, A. and Lin, X. and Wright, R.O. and Christiani, D.C.}, year={2012} } @misc{liu_maity_lin_wright_christiani_2012, title={Design and analysis issues in gene and environment studies}, volume={11}, journal={Environmental Health}, author={Liu, C. Y. and Maity, A. and Lin, X. H. and Wright, R. O. and Christiani, D. C.}, year={2012} } @article{sofer_maity_coull_baccarelli_schwartz_lin_2012, title={Multivariate Gene Selection and Testing in Studying the Exposure Effects on a Gene Set}, volume={4}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84870393737&partnerID=MN8TOARS}, DOI={10.1007/s12561-012-9072-7}, abstractNote={Studying the association between a gene set (e.g., pathway) and exposures using multivariate regression methods is of increasing importance in genomic studies. Such an analysis is often more powerful and interpretable than individual-gene analysis. Since many genes in a gene set are likely not affected by exposures, one is often interested in identifying a subset of genes in the gene set that are affected by exposures. This allows for better understanding of the underlying biological mechanism and for pursuing further biological investigation of these genes. The selected subset of “signal” genes also provides an attractive vehicle for a more powerful test for the association between the gene set and exposures. We propose two computationally simple Canonical Correlation Analysis (CCA) based variable selection methods: Sparse Outcome Selection (SOS) CCA and step CCA, to jointly select a subset of genes in a gene set that are associated with exposures. Several model selection criteria, such as BIC and the new Correlation Information Criterion (CIC), are proposed and compared. We also develop a global test procedure for testing the exposure effects on the whole gene set, accounting for gene selection. Through simulation studies, we show that the proposed methods improve upon an existing method when the genes are correlated and are more computationally efficient. We apply the proposed methods to the analysis of the Normative Aging DNA methylation Study to examine the effects of airborne particular matter exposures on DNA methylations in a genetic pathway.}, number={2}, journal={Statistics in Biosciences}, publisher={Springer Science + Business Media}, author={Sofer, T. and Maity, A. and Coull, B. and Baccarelli, A.A. and Schwartz, J. and Lin, X.}, year={2012}, pages={319–338} } @article{maity_sullivan_tzeng_2012, title={Multivariate Phenotype Association Analysis by Marker-Set Kernel Machine Regression}, volume={36}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84867539542&partnerID=MN8TOARS}, DOI={10.1002/gepi.21663}, abstractNote={Genetic studies of complex diseases often collect multiple phenotypes relevant to the disorders. As these phenotypes can be correlated and share common genetic mechanisms, jointly analyzing these traits may bring more power to detect genes influencing individual or multiple phenotypes. Given the advancement brought by the multivariate phenotype approaches and the multimarker kernel machine regression, we construct a multivariate regression based on kernel machine to facilitate the joint evaluation of multimarker effects on multiple phenotypes. The kernel machine serves as a powerful dimension‐reduction tool to capture complex effects among markers. The multivariate framework incorporates the potentially correlated multidimensional phenotypic information and accommodates common or different environmental covariates for each trait. We derive the multivariate kernel machine test based on a score‐like statistic, and conduct simulations to evaluate the validity and efficacy of the method. We also study the performance of the commonly adapted strategies for kernel machine analysis on multiple phenotypes, including the multiple univariate kernel machine tests with original phenotypes or with their principal components. Our results suggest that none of these approaches has the uniformly best power, and the optimal test depends on the magnitude of the phenotype correlation and the effect patterns. However, the multivariate test retains to be a reasonable approach when the multiple phenotypes have none or mild correlations, and gives the best power once the correlation becomes stronger or when there exist genes that affect more than one phenotype. We illustrate the utility of the multivariate kernel machine method through the Clinical Antipsychotic Trails of Intervention Effectiveness antibody study.}, number={7}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Sullivan, Patrick E. and Tzeng, Jung-Ying}, year={2012}, month={Nov}, pages={686–695} } @article{maity_huang_2012, title={Partially linear varying coefficient models stratified by a functional covariate}, volume={82}, ISSN={["1879-2103"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84864042756&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2012.06.002}, abstractNote={We consider the problem of estimation in semiparametric varying coefficient models where the covariate modifying the varying coefficients is functional and is modeled nonparametrically. We develop a kernel-based estimator of the nonparametric component and a profiling estimator of the parametric component of the model, and derive their asymptotic properties. Specifically, we show the consistency of the nonparametric functional estimates and derive the asymptotic expansion of the estimates of the parametric component. We illustrate the performance of our methodology using a simulation study and a real data application.}, number={10}, journal={STATISTICS & PROBABILITY LETTERS}, publisher={Elsevier BV}, author={Maity, Arnab and Huang, Jianhua Z.}, year={2012}, month={Oct}, pages={1807–1814} } @article{he_zhang_maity_zou_hussey_karmaus_2012, title={Power of a reproducing kernel-based method for testing the joint effect of a set of single-nucleotide polymorphisms}, volume={140}, ISSN={["1573-6857"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84873408116&partnerID=MN8TOARS}, DOI={10.1007/s10709-012-9690-5}, abstractNote={This study explored a semi-parametric method built upon reproducing kernels for estimating and testing the joint effect of a set of single nucleotide polymorphisms (SNPs). The kernel adopted is the identity-by-state kernel that measures SNP similarity between subjects. In this article, through simulations we first assessed its statistical power under different situations. It was found that in addition to the effect of sample size, the testing power was impacted by the strength of association between SNPs and the outcome of interest, and by the SNP similarity among the subjects. A quadratic relationship between SNP similarity and testing power was identified, and this relationship was further affected by sample sizes. Next we applied the method to a SNP-lung function data set to estimate and test the joint effect of a set of SNPs on forced vital capacity, one type of lung function measure. The findings were then connected to the patterns observed in simulation studies and further explored via variable importance indices of each SNP inferred from a variable selection procedure.}, number={10-12}, journal={GENETICA}, publisher={Springer Science + Business Media}, author={He, Hong and Zhang, Hongmei and Maity, Arnab and Zou, Yubo and Hussey, James and Karmaus, Wilfried}, year={2012}, month={Dec}, pages={421–427} } @article{maity_sherman_2012, title={Testing for spatial isotropy under general designs}, volume={142}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84856023088&partnerID=MN8TOARS}, DOI={10.1016/j.jspi.2011.11.013}, abstractNote={Spatial modeling is typically composed of a specification of a mean function and a model for the correlation structure. A common assumption on the spatial correlation is that it is isotropic. This means that the correlation between any two observations depends only on the distance between those sites and not on their relative orientation. The assumption of isotropy is often made due to a simpler interpretation of correlation behavior and to an easier estimation problem under an assumed isotropy. The assumption of isotropy, however, can have serious deleterious effects when not appropriate. In this paper we formulate a test of isotropy for spatial observations located according to a general class of stochastic designs. Distribution theory of our test statistic is derived and we carry out extensive simulations which verify the efficacy of our approach. We apply our methodology to a data set on longleaf pine trees from an oldgrowth forest in the southern United States.}, number={5}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Maity, Arnab and Sherman, Michael}, year={2012}, pages={1081–1091} } @article{bandyopadhyay_maity_2011, title={Analysis of Sabine river flow data using semiparametric spline modeling}, volume={399}, ISSN={["1879-2707"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79952008745&partnerID=MN8TOARS}, DOI={10.1016/j.jhydrol.2011.01.006}, abstractNote={In this article, a modeling approach for the mean annual flow in different segments of Sabine river, as released in the NHDPlus data in 2007, as a function of five predictor variables is described. Modeling flow is extremely complex and the deterministic flow models are widely used for that purpose. The justification for using these deterministic models comes from the fact that the flow is governed by some explicitly stated physical laws. In contrast, in this article, this complex issue is addressed from a completely statistical point of view. A semiparametric model is proposed to analyze the spatial distribution of the mean annual flow of Sabine river. Semiparametric additive models allow explicit consideration of the linear and nonlinear relations with relevant explanatory variables. We use a conditionally specified Gaussian model for the estimation of the univariate conditional distributions of flow to incorporate auxiliary information and this formulation does not require the target variable to be independent.}, number={3-4}, journal={JOURNAL OF HYDROLOGY}, publisher={Elsevier BV}, author={Bandyopadhyay, Soutir and Maity, Arnab}, year={2011}, month={Mar}, pages={274–280} } @article{maity_apanasovich_2011, title={Estimation via corrected scores in general semiparametric regression models with error-prone covariates}, volume={5}, ISSN={["1935-7524"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84859832500&partnerID=MN8TOARS}, DOI={10.1214/11-ejs647}, abstractNote={This paper considers the problem of estimation in a general semiparametric regression model when error-prone covariates are modeled parametrically while covariates measured without error are modeled nonparametrically. To account for the effects of measurement error, we apply a correction to a criterion function. The specific form of the correction proposed allows Monte Carlo simulations in problems for which the direct calculation of a corrected criterion is difficult. Therefore, in contrast to methods that require solving integral equations of possibly multiple dimensions, as in the case of multiple error-prone covariates, we propose methodology which offers a simple implementation. The resulting methods are functional, they make no assumptions about the distribution of the mismeasured covariates. We utilize profile kernel and backfitting estimation methods and derive the asymptotic distribution of the resulting estimators. Through numerical studies we demonstrate the applicability of proposed methods to Poisson, logistic and multivariate Gaussian partially linear models. We show that the performance of our methods is similar to a computationally demanding alternative. Finally, we demonstrate the practical value of our methods when applied to Nevada Test Site (NTS) Thyroid Disease Study data.}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Maity, Arnab and Apanasovich, Tatiyana V.}, year={2011}, pages={1424–1449} } @article{sherman_maity_wang_2011, title={Inferences for the ratio: Fieller's interval, log ratio, and large sample based confidence intervals}, volume={95}, ISSN={["1863-8171"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960996265&partnerID=MN8TOARS}, DOI={10.1007/s10182-011-0162-5}, number={3}, journal={ASTA-ADVANCES IN STATISTICAL ANALYSIS}, publisher={Springer Science + Business Media}, author={Sherman, Michael and Maity, Arnab and Wang, Suojin}, year={2011}, month={Sep}, pages={313–323} } @article{maity_lin_2011, title={Powerful Tests for Detecting a Gene Effect in the Presence of Possible Gene-Gene Interactions Using Garrote Kernel Machines}, volume={67}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79955121487&partnerID=MN8TOARS}, DOI={10.1111/j.1541-0420.2011.01598.x}, abstractNote={Summary We propose in this article a powerful testing procedure for detecting a gene effect on a continuous outcome in the presence of possible gene–gene interactions (epistasis) in a gene set, e.g., a genetic pathway or network. Traditional tests for this purpose require a large number of degrees of freedom by testing the main effect and all the corresponding interactions under a parametric assumption, and hence suffer from low power. In this article, we propose a powerful kernel machine based test. Specifically, our test is based on a garrote kernel method and is constructed as a score test. Here, the term garrote refers to an extra nonnegative parameter that is multiplied to the covariate of interest so that our score test can be formulated in terms of this nonnegative parameter. A key feature of the proposed test is that it is flexible and developed for both parametric and nonparametric models within a unified framework, and is more powerful than the standard test by accounting for the correlation among genes and hence often uses a much smaller degrees of freedom. We investigate the theoretical properties of the proposed test. We evaluate its finite sample performance using simulation studies, and apply the method to the Michigan prostate cancer gene expression data.}, number={4}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Lin, Xihong}, year={2011}, month={Dec}, pages={1271–1284} } @article{perry_chen_mcauliffe_maity_deloid_2011, title={Semi-Automated Scoring of Triple-probe FISH in Human Sperm: Methods and Further Validation}, volume={79A}, ISSN={["1552-4930"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960558287&partnerID=MN8TOARS}, DOI={10.1002/cyto.a.21078}, abstractNote={Abstract}, number={8}, journal={CYTOMETRY PART A}, publisher={Wiley-Blackwell}, author={Perry, Melissa J. and Chen, Xing and McAuliffe, Megan E. and Maity, Arnab and Deloid, Glen M.}, year={2011}, month={Aug}, pages={661–666} } @article{meeker_maity_missmer_williams_mahalingaiah_ehrlich_berry_altshul_perry_cramer_et al._2011, title={Serum Concentrations of Polychlorinated Biphenyls in Relation to in Vitro Fertilization Outcomes}, volume={119}, ISSN={["0091-6765"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960065763&partnerID=MN8TOARS}, DOI={10.1289/ehp.1002922}, abstractNote={Background: Human exposure to polychlorinated biphenyls (PCBs) remains widespread. PCBs have been associated with adverse reproductive health outcomes including reduced fecundability and increased risk of pregnancy loss, although the human data remain largely inconclusive. Objective: Our goal was to explore the relationship between serum PCB concentrations and early pregnancy loss among a large cohort of women undergoing in vitro fertilization (IVF) between 1994 and 2003. Methods: Concentrations of 57 PCB congeners were measured in serum samples collected during 827 IVF/intracytoplasmic sperm injection cycles from 765 women. Joint statistical models that accommodate multiple outcomes and multiple cycles per woman were used to assess the relationship between serum PCB quartiles and implantation failure, chemical pregnancies (human chorionic gonadotropin level > 5.0 mIU/mL) that did not result in clinical pregnancy, or spontaneous abortion, while also adjusting for confounders. Results: PCB-153 was the congener present in the highest concentration (median, 46.2 ng/g lipid). Increasing quartiles of PCB-153 and the sum of all measured PCB congeners (ΣPCBs) were associated with significantly elevated dose-dependent odds of failed implantation. Adjusted odds ratios (95% confidence interval) for highest versus lowest quartile were 2.0 (1.2–3.4) for PCB-153 and 1.7 (1.0–2.9) for ΣPCBs. There were suggestive trends for increased odds of implantation failure for PCB-118 and cytochrome P450–inducing congeners (p-values for trend = 0.06). No statistically significant associations between PCBs and chemical pregnancy or spontaneous abortion were found. Conclusions: Serum PCB concentrations at levels similar to the U.S. general population were associated with failed implantation among women undergoing IVF. These findings may help explain previous reports of reduced fecundability among women exposed to PCBs.}, number={7}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, publisher={Environmental Health Perspectives}, author={Meeker, John D. and Maity, Arnab and Missmer, Stacey A. and Williams, Paige L. and Mahalingaiah, Shruthi and Ehrlich, Shelley and Berry, Katharine F. and Altshul, Larisa and Perry, Melissa J. and Cramer, Daniel W. and et al.}, year={2011}, month={Jul}, pages={1010–1016} } @article{wei_carroll_maity_2011, title={Testing for constant nonparametric effects in general semiparametric regression models with interactions}, volume={81}, ISSN={["1879-2103"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79955146434&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2010.11.002}, abstractNote={We consider the problem of testing for a constant nonparametric effect in a general semiparametric regression model when there is a potential for interaction between the parametrically and nonparametrically modeled variables. The work was originally motivated by a unique testing problem in genetic epidemiology (Chatterjee et al., 2006) that involved a typical generalized linear model but with an additional term reminiscent of the Tukey 1-degree-of-freedom formulation, and their interest was in testing for main effects of the genetic variables, while gaining statistical power by allowing for a possible interaction between genes and the environment. Later work (Maity et al., 2009) involved the possibility of modeling the environmental variable nonparametrically, but they focused on whether there was a parametric main effect for the genetic variables. In this paper, we consider the complementary problem, where the interest is in testing for the main effect of the nonparametrically modeled environmental variable. We derive a generalized likelihood ratio test for this hypothesis, show how to implement it, and provide evidence that our method can improve statistical power when compared to standard partially linear models with main effects only. We use the method for the primary purpose of analyzing data from a case-control study of colorectal adenoma.}, number={7}, journal={STATISTICS & PROBABILITY LETTERS}, publisher={Elsevier BV}, author={Wei, Jiawei and Carroll, Raymond J. and Maity, Arnab}, year={2011}, month={Jul}, pages={717–723} } @inproceedings{schwartz_sofer_maity_lin_baccarelli_2010, title={Particulate Air Pollution Modifies Methylation Of NFKb Pathways}, volume={5}, DOI={10.1164/ajrccm-conference.2010.181.1_meetingabstracts.a4008}, booktitle={C16. GENETICS OF LUNG DISEASE AND GENE: ENVIRONMENT INTERACTIONS}, publisher={American Thoracic Society}, author={Schwartz, Joel and Sofer, Tamar and Maity, Arnab and Lin, Xihong and Baccarelli, Andrea}, year={2010} } @inbook{dey_ghosh_mallick_2010, place={Boca Raton}, title={Proportional Hazards Regression Using Bayesian Kernel Machines}, url={http://www.crcnetbase.com/doi/book/10.1201/EBK1420070170.}, DOI={10.1201/EBK1420070170.}, booktitle={Bayesian Modeling in Bioinformatics}, publisher={CRC Press}, author={Dey, D. and Ghosh, S. and Mallick, B.K.}, year={2010} } @article{zhou_huang_martinez_maity_baladandayuthapani_carroll_2010, title={Reduced Rank Mixed Effects Models for Spatially Correlated Hierarchical Functional Data}, volume={105}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-77952558527&partnerID=MN8TOARS}, DOI={10.1198/jasa.2010.tm08737}, abstractNote={Hierarchical functional data are widely seen in complex studies where subunits are nested within units, which in turn are nested within treatment groups. We propose a general framework of functional mixed effects model for such data: within-unit and within-subunit variations are modeled through two separate sets of principal components; the subunit level functions are allowed to be correlated. Penalized splines are used to model both the mean functions and the principal components functions, where roughness penalties are used to regularize the spline fit. An expectation–maximization (EM) algorithm is developed to fit the model, while the specific covariance structure of the model is utilized for computational efficiency to avoid storage and inversion of large matrices. Our dimension reduction with principal components provides an effective solution to the difficult tasks of modeling the covariance kernel of a random function and modeling the correlation between functions. The proposed methodology is illustrated using simulations and an empirical dataset from a colon carcinogenesis study. Supplemental materials are available online.}, number={489}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Zhou, Lan and Huang, Jianhua Z. and Martinez, Josue G. and Maity, Arnab and Baladandayuthapani, Veerabhadran and Carroll, Raymond J.}, year={2010}, pages={390–400} } @article{carroll_maity_mammen_yu_2009, title={Efficient Semiparametric Marginal Estimation for the Partially Linear Additive Model for Longitudinal/Clustered Data}, volume={1}, DOI={10.1007/s12561-009-9000-7}, abstractNote={We consider the efficient estimation of a regression parameter in a partially linear additive nonparametric regression model from repeated measures data when the covariates are multivariate. To date, while there is some literature in the scalar covariate case, the problem has not been addressed in the multivariate additive model case. Ours represents a first contribution in this direction. As part of this work, we first describe the behavior of nonparametric estimators for additive models with repeated measures when the underlying model is not additive. These results are critical when one considers variants of the basic additive model. We apply them to the partially linear additive repeated-measures model, deriving an explicit consistent estimator of the parametric component; if the errors are in addition Gaussian, the estimator is semiparametric efficient. We also apply our basic methods to a unique testing problem that arises in genetic epidemiology; in combination with a projection argument we develop an efficient and easily computed testing scheme. Simulations and an empirical example from nutritional epidemiology illustrate our methods.}, number={1}, journal={Stat Biosci}, publisher={Springer Science + Business Media}, author={Carroll, Raymond and Maity, Arnab and Mammen, Enno and Yu, Kyusang}, year={2009}, pages={10–31} } @article{carroll_maity_mammen_yu_2009, title={Nonparametric Additive Regression for Repeatedly Measured Data}, volume={96}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-66249142283&partnerID=MN8TOARS}, DOI={10.1093/biomet/asp015}, abstractNote={We develop an easily computed smooth backfitting algorithm for additive model fitting in repeated measures problems. Our methodology easily copes with various settings, such as when some covariates are the same over repeated response measurements. We allow for a working covariance matrix for the regression errors, showing that our method is most efficient when the correct covariance matrix is used. The component functions achieve the known asymptotic variance lower bound for the scalar argument case. Smooth backfitting also leads directly to design-independent biases in the local linear case. Simulations show our estimator has smaller variance than the usual kernel estimator. This is also illustrated by an example from nutritional epidemiology. Copyright 2009, Oxford University Press.}, number={2}, journal={Biometrika}, author={Carroll, R.J. and Maity, A. and Mammen, E. and Yu, K.}, year={2009}, pages={383–398} } @article{apanasovich_carroll_maity_2009, title={SIMEX and standard error estimation in semiparametric measurement error models}, volume={3}, DOI={10.1214/08-ejs341}, abstractNote={SIMEX is a general-purpose technique for measurement error correction. There is a substantial literature on the application and theory of SIMEX for purely parametric problems, as well as for purely non-parametric regression problems, but there is neither application nor theory for semiparametric problems. Motivated by an example involving radiation dosimetry, we develop the basic theory for SIMEX in semiparametric problems using kernel-based estimation methods. This includes situations that the mismeasured variable is modeled purely parametrically, purely non-parametrically, or that the mismeasured variable has components that are modeled both parametrically and nonparametrically. Using our asymptotic expansions, easily computed standard error formulae are derived, as are the bias properties of the nonparametric estimator. The standard error method represents a new method for estimating variability of nonparametric estimators in semiparametric problems, and we show in both simulations and in our example that it improves dramatically on first order methods.We find that for estimating the parametric part of the model, standard bandwidth choices of order O(n(-1/5)) are sufficient to ensure asymptotic normality, and undersmoothing is not required. SIMEX has the property that it fits misspecified models, namely ones that ignore the measurement error. Our work thus also more generally describes the behavior of kernel-based methods in misspecified semiparametric problems.}, number={0}, journal={Electronic Journal of Statistics}, publisher={Institute of Mathematical Statistics - care of Project Euclid}, author={Apanasovich, Tatiyana V. and Carroll, Raymond J. and Maity, Arnab}, year={2009}, pages={318–348} } @article{maity_carroll_mammen_chatterjee_2009, title={Testing in semiparametric models with interaction, with applications to gene-environment interactions}, volume={71}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-58149340050&partnerID=MN8TOARS}, DOI={10.1111/j.1467-9868.2008.00671.x}, abstractNote={Summary}, number={1}, journal={Journal of the Royal Statistical Society. Series B: Statistical Methodology}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Carroll, Raymond J. and Mammen, Enno and Chatterjee, Nilanjan}, year={2009}, pages={75–96} } @phdthesis{efficient inference in general semiparametric regression models_2008, url={http://oaktrust.library.tamu.edu/handle/1969.1/ETD-TAMU-3075}, year={2008} } @article{maity_2008, title={Efficient estimation of population quantiles in general semiparametric regression models}, volume={78}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-53249086611&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2008.03.022}, abstractNote={The problem of quantile estimation in general semiparametric regression models is considered. We derive plug-in kernel-based estimators, investigate their asymptotic distribution and establish the semiparametric efficiency of these estimators under mild assumptions. We apply our methodology in an example in nutritional epidemiology. The generalization to the important case where responses are missing at random is also addressed.}, number={16}, journal={Statistics and Probability Letters}, publisher={Elsevier BV}, author={Maity, Arnab}, year={2008}, pages={2744–2750} } @article{maity_apanasovich_carroll_2008, title={Estimation of population-level summaries in general semiparametric repeated measures regression models}, DOI={10.1214/193940307000000095}, abstractNote={This paper considers a wide family of semiparametric repeated measures regression models, in which the main interest is on estimating population-level quantities such as mean, variance, probabilities etc. Examples of our framework include generalized linear models for clustered/longitudinal data, among many others. We derive plug-in kernel-based estimators of the population level quantities and derive their asymptotic distribution. An exam- ple involving estimation of the survival function of hemoglobin measures in the Kenya hemoglobin study data is presented to demonstrate our methodology. This paper is about semiparametric regression models with repeated measures when the primary goal is to estimate a population quantity such as mean, variance, prob- ability, etc. We will construct estimators of these quantities which utilize the under- lying semiparametric structure of the model and derive their limiting distribution. The work is motivated by the following example: the Kenya hemoglobin data. The goal is to study the changes of hemoglobin over time during the first year of birth. The data set consists of 68 families with 2 children per family. For each child, 4 repeated measures are taken over time in the first year since birth: the time of visit varied from child to child. The factors include mother's age at child birth, child sex and placental parasitemia density (PDEN), a marker of malaria that could affect hemoglobin. To model these data, Lin and Carroll (2) considered a semiparametric model where the mother's age effect is modeled nonparametrically and (sex, PDEN) is modeled parametrically. The model is given by the repeated measures partially linear model Yijk = X T ijkβ0 + θ0(Zij) + ǫijk,}, journal={Collections}, publisher={Institute of Mathematical Statistics - care of Project Euclid}, author={Maity, Arnab and Apanasovich, Tatiyana V. and Carroll, Raymond J.}, year={2008}, pages={123–137} } @article{maity_sherman_2008, title={On adaptive linear regression}, volume={35}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-54049132878&partnerID=MN8TOARS}, DOI={10.1080/02664760802382475}, abstractNote={Ordinary least squares (OLS) is omnipresent in regression modeling. Occasionally, least absolute deviations (LAD) or other methods are used as an alternative when there are outliers. Although some data adaptive estimators have been proposed, they are typically difficult to implement. In this paper, we propose an easy to compute adaptive estimator which is simply a linear combination of OLS and LAD. We demonstrate large sample normality of our estimator and show that its performance is close to best for both light-tailed (e.g. normal and uniform) and heavy-tailed (e.g. double exponential and t 3) error distributions. We demonstrate this through three simulation studies and illustrate our method on state public expenditures and lutenizing hormone data sets. We conclude that our method is general and easy to use, which gives good efficiency across a wide range of error distributions.}, number={12}, journal={Journal of Applied Statistics}, publisher={Informa UK Limited}, author={Maity, Arnab and Sherman, Michael}, year={2008}, pages={1409–1422} } @article{carroll_maity_2007, title={Comments on: Nonparametric inference with generalized likelihood ratio tests}, volume={16}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-36448987181&partnerID=MN8TOARS}, DOI={10.1007/s11749-007-0085-3}, number={3}, journal={Test}, publisher={Springer Science + Business Media}, author={Carroll, Raymond J. and Maity, Arnab}, year={2007}, pages={456–458} } @article{maity_ma_carroll_2007, title={Efficient estimation of population-level summaries in general semiparametric regression models}, volume={102}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-33947277465&partnerID=MN8TOARS}, DOI={10.1198/016214506000001103}, abstractNote={This article considers a wide class of semiparametric regression models in which interest focuses on population-level quantities that combine both the parametric and the nonparametric parts of the model. Special cases in this approach include generalized partially linear models, generalized partially linear single-index models, structural measurement error models, and many others. For estimating the parametric part of the model efficiently, profile likelihood kernel estimation methods are well established in the literature. Here our focus is on estimating general population-level quantities that combine the parametric and nonparametric parts of the model (e.g., population mean, probabilities, etc.). We place this problem in a general context, provide a general kernel-based methodology, and derive the asymptotic distributions of estimates of these population-level quantities, showing that in many cases the estimates are semiparametric efficient. For estimating the population mean with no missing data, we show that the sample mean is semiparametric efficient for canonical exponential families, but not in general. We apply the methods to a problem in nutritional epidemiology, where estimating the distribution of usual intake is of primary interest and semiparametric methods are not available. Extensions to the case of missing response data are also discussed.}, number={477}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Maity, A. and Ma, Y. and Carroll, R.J.}, year={2007}, pages={123–139} } @article{maity_sherman_2006, title={The two-sample t test with one variance unknown}, volume={60}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-33744525717&partnerID=MN8TOARS}, DOI={10.1198/000313006X108567}, abstractNote={We consider the situation in two-sample testing when one variance is assumed to be known and the other variance is considered unknown. This situation arises, for example, when one is interested in comparing a standard treatment with a new treatment. Although this situation occurs relatively infrequently, our example discusses the important tool of moment matching and makes the classic two-sample Satterthwaite t approximation transparent.}, number={2}, journal={American Statistician}, author={Maity, A. and Sherman, M.}, year={2006}, pages={163–166} } @article{a perturbation technique for sample moment matching in kernel density estimation_2005, url={http://dx.doi.org/10.1177/0008068320050510}, DOI={10.1177/0008068320050510}, abstractNote={ Summary }, journal={Calcutta Statistical Association Bulletin}, year={2005}, month={Mar} }