@article{rose_bentley_maity_maguire_planchart_spasojevic_liu_thorp jr_hoyo_2024, title={Association between F2-isoprostanes and self-reported stressors in pregnant americans of African and European ancestry}, volume={10}, ISSN={["2405-8440"]}, DOI={10.1016/j.heliyon.2024.e25578}, abstractNote={BackgroundPoor birth outcomes such as preterm birth/delivery disproportionately affect African Americans compared to White individuals. Reasons for this disparity are likely multifactorial, and include prenatal psychosocial stressors, and attendant increased lipid peroxidation; however, empirical data linking psychosocial stressors during pregnancy to oxidative status are limited.MethodsWe used established scales to measure five psychosocial stressors. Maternal adverse childhood experiences, financial stress, social support, anxiety, and depression were measured among 50 African American and White pregnant women enrolled in the Stress and Health in Pregnancy cohort. Liquid chromatography‐tandem mass spectrometry was used to measure biomarkers of oxidative stress (four urinary F2-isoprostane isomers), to estimate oxidative status. Linear regression models were used to evaluate associations between psychosocial stressors, prenatal oxidative status and preterm birth.ResultsAfter adjusting for maternal obesity, gestational diabetes, and cigarette smoking, African American women with higher oxidative status were more likely to report higher maternal adverse childhood experience scores (β = 0.16, se = 1.07, p-value = 0.024) and depression scores (β = 0.05, se = 0.02, p = 0.014). Higher oxidative status was also associated with lower gestational age at birth (β = −0.13, se = 0.06, p = 0.04) in this population. These associations were not apparent in Whites. However, none of the cross-product terms for race/ethnicity and social stressors reached statistical significance (p > 0.05).ConclusionWhile the small sample size limits inference, our novel data suggest that psychosocial stressors may contribute significantly to oxidative stress during pregnancy, and preterm birth or delivery African Americans. If replicated in larger studies, these findings would support oxidative stress reduction using established dietary or pharmacological approaches present a potential avenue to mitigate adverse effects of psychosocial stressors on birth outcomes.}, number={3}, journal={HELIYON}, author={Rose, Deborah K. and Bentley, Loren and Maity, Arnab and Maguire, Rachel L. and Planchart, Antonio and Spasojevic, Ivan and Liu, Andy J. and Thorp Jr, John and Hoyo, Cathrine}, year={2024}, month={Feb} } @article{ghosal_maity_2023, title={Variable selection in function-on-scalar single-index model via the alternating direction method of multipliers}, volume={9}, ISSN={["1863-8260"]}, DOI={10.1007/s11749-023-00884-9}, journal={TEST}, author={Ghosal, Rahul and Maity, Arnab}, year={2023}, month={Sep} } @article{li_wang_maity_staicu_2022, title={Inference in functional linear quantile regression}, volume={190}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2022.104985}, abstractNote={In this paper, we study statistical inference in functional quantile regression for scalar response and a functional covariate. Specifically, we consider a functional linear quantile regression model where the effect of the covariate on the quantile of the response is modeled through the inner product between the functional covariate and an unknown smooth regression parameter function that varies with the level of quantile. The objective is to test that the regression parameter is constant across several quantile levels of interest. The parameter function is estimated by combining ideas from functional principal component analysis and quantile regression. An adjusted Wald testing procedure is proposed for this hypothesis of interest, and its chi-square asymptotic null distribution is derived. The testing procedure is investigated numerically in simulations involving sparse and noisy functional covariates and in a capital bike share data application. The proposed approach is easy to implement and the R code is published online at https://github.com/xylimeng/fQR-testing.}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Li, Meng and Wang, Kehui and Maity, Arnab and Staicu, Ana-Maria}, year={2022}, month={Jul} } @article{gonzalez-nahm_marchesoni_maity_maguire_house_tucker_atkinson_murphy_hoyo_2022, title={Maternal Mediterranean Diet Adherence and Its Associations with Maternal Prenatal Stressors and Child Growth}, volume={6}, ISSN={["2475-2991"]}, url={https://doi.org/10.1093/cdn/nzac146}, DOI={10.1093/cdn/nzac146}, abstractNote={ABSTRACT Background Psychosocial and physiologic stressors, such as depression and obesity, during pregnancy can have negative consequences, such as increased systemic inflammation, contributing to chronic disease for both mothers and their unborn children. These conditions disproportionately affect racial/ethnic minorities. The effects of recommended dietary patterns in mitigating the effects of these stressors remain understudied. Objectives We aimed to evaluate the relations between maternal Mediterranean diet adherence (MDA) and maternal and offspring outcomes during the first decade of life in African Americans, Hispanics, and Whites. Methods This study included 929 mother–child dyads from the NEST (Newborn Epigenetics STudy), a prospective cohort study. FFQs were used to estimate MDA in pregnant women. Weight and height were measured in children between birth and age 8 y. Multivariable linear regression models were used to examine associations between maternal MDA, inflammatory cytokines, and pregnancy and postnatal outcomes. Results More than 55% of White women reported high MDA during the periconceptional period compared with 22% of Hispanic and 18% of African American women (P < 0.05). Higher MDA was associated with lower likelihood of depressive mood (β = −0.45; 95% CI: −0.90, −0.18; P = 0.02) and prepregnancy obesity (β = −0.29; 95% CI: −0.57, −0.0002; P = 0.05). Higher MDA was also associated with lower body size at birth, which was maintained to ages 3–5 and 6–8 y—this association was most apparent in White children (3–5 y: β = −2.9, P = 0.02; 6–8 y: β = −3.99, P = 0.01). Conclusions If replicated in larger studies, our data suggest that MDA provides a potent avenue by which effects of prenatal stressors on maternal and fetal outcomes can be mitigated to reduce ethnic disparities in childhood obesity.}, number={11}, journal={CURRENT DEVELOPMENTS IN NUTRITION}, author={Gonzalez-Nahm, Sarah and Marchesoni, Joddy and Maity, Arnab and Maguire, Rachel L. and House, John S. and Tucker, Rachel and Atkinson, Tamara and Murphy, Susan K. and Hoyo, Cathrine}, year={2022}, month={Nov} } @article{ghosal_maity_2022, title={A Score Based Test for Functional Linear Concurrent Regression}, volume={21}, ISSN={["2452-3062"]}, DOI={10.1016/j.ecosta.2021.05.003}, abstractNote={A novel method for testing the null hypothesis of no effect of a covariate on the response is proposed in functional linear concurrent regression. An equivalent random effects formulation of the functional regression model is established under which the testing problem reduces to testing for zero variance component for random effects. For this purpose, a one-sided score test approach is used, which is an extension of the classical score test. Theoretical justification is provided as to why the proposed testing procedure has the correct levels (asymptotically) under null using standard assumptions. Using numerical simulations, the testing method is shown to have the desired type I error rate and higher power compared to a bootstrapped F test currently existing in the literature. The model and the testing procedure give good performances even when the data are sparsely observed, and the functional covariate is contaminated with noise. Applications of the proposed testing method are demonstrated on gait data and a study of child mortality.}, journal={ECONOMETRICS AND STATISTICS}, author={Ghosal, Rahul and Maity, Arnab}, year={2022}, month={Jan}, pages={114–130} } @article{mehrotra_maity_2021, title={Simultaneous variable selection, clustering, and smoothing in function-on-scalar regression}, volume={11}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11668}, abstractNote={We address the problem of multicollinearity in a function‐on‐scalar regression model by using a prior that simultaneously selects, clusters, and smooths functional effects. Our methodology groups the effects of highly correlated predictors, performing dimension reduction without dropping relevant predictors from the model. We validate our approach via a simulation study, showing superior performance relative to existing dimension‐reduction approaches described in the function‐on‐scalar literature. We also demonstrate the use of our model on a data set of age‐specific fertility rates from the United Nations Gender Information database.}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Mehrotra, Suchit and Maity, Arnab}, year={2021}, month={Nov} } @article{ghosal_maity_2021, title={Variable selection in nonlinear function-on-scalar regression}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13564}, abstractNote={AbstractWe develop a new method for variable selection in a nonlinear additive function‐on‐scalar regression (FOSR) model. Existing methods for variable selection in FOSR have focused on the linear effects of scalar predictors, which can be a restrictive assumption in the presence of multiple continuously measured covariates. We propose a computationally efficient approach for variable selection in existing linear FOSR using functional principal component scores of the functional response and extend this framework to a nonlinear additive function‐on‐scalar model. The proposed method provides a unified and flexible framework for variable selection in FOSR, allowing nonlinear effects of the covariates. Numerical analysis using simulation study illustrates the advantages of the proposed method over existing variable selection methods in FOSR even when the underlying covariate effects are all linear. The proposed procedure is demonstrated on accelerometer data from the 2003–2004 cohorts of the National Health and Nutrition Examination Survey (NHANES) in understanding the association between diurnal patterns of physical activity and demographic, lifestyle, and health characteristics of the participants.}, journal={BIOMETRICS}, author={Ghosal, Rahul and Maity, Arnab}, year={2021}, month={Sep} } @article{ghosal_maity_2021, title={Variable selection in nonparametric functional concurrent regression}, volume={9}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11654}, abstractNote={We develop a new method for variable selection in nonparametric functional concurrent regression. The commonly used functional linear concurrent model (FLCM) is far too restrictive in assuming linearity of the covariate effects, which is not necessarily true in many real‐world applications. The nonparametric functional concurrent model (NPFCM), on the other hand, is much more flexible and can capture complex dynamic relationships present between the response and the covariates. We extend the classically used variable selection methods, e.g., group LASSO, group SCAD and group MCP, to perform variable selection in NPFCM. We show via numerical simulations that the proposed variable selection method with the non‐convex penalties can identify the true functional predictors with minimal false‐positive rate and negligible false‐negative rate. The proposed method also provides better out‐of‐sample prediction accuracy compared to the FLCM in the presence of nonlinear effects of the functional predictors. The proposed method's application is demonstrated by identifying the influential predictor variables in two real data studies: a dietary calcium absorption study, and some bike‐sharing data.}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Ghosal, Rahul and Maity, Arnab}, year={2021}, month={Sep} } @article{alam_maity_sinha_rizopoulos_sattar_2021, title={Joint modeling of longitudinal continuous, longitudinal ordinal, and time-to-event outcomes}, volume={27}, ISSN={["1572-9249"]}, DOI={10.1007/s10985-020-09511-3}, abstractNote={In this paper, we propose an innovative method for jointly analyzing survival data and longitudinally measured continuous and ordinal data. We use a random effects accelerated failure time model for survival outcomes, a linear mixed model for continuous longitudinal outcomes and a proportional odds mixed model for ordinal longitudinal outcomes, where these outcome processes are linked through a set of association parameters. A primary objective of this study is to examine the effects of association parameters on the estimators of joint models. The model parameters are estimated by the method of maximum likelihood. The finite-sample properties of the estimators are studied using Monte Carlo simulations. The empirical study suggests that the degree of association among the outcome processes influences the bias, efficiency, and coverage probability of the estimators. Our proposed joint model estimators are approximately unbiased and produce smaller mean squared errors as compared to the estimators obtained from separate models. This work is motivated by a large multicenter study, referred to as the Genetic and Inflammatory Markers of Sepsis (GenIMS) study. We apply our proposed method to the GenIMS data analysis.}, number={1}, journal={LIFETIME DATA ANALYSIS}, author={Alam, Khurshid and Maity, Arnab and Sinha, Sanjoy K. and Rizopoulos, Dimitris and Sattar, Abdus}, year={2021}, month={Jan}, pages={64–90} } @article{martinez_maity_yolken_sullivan_tzeng_2020, title={Robust kernel association testing (RobKAT)}, volume={44}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22280}, DOI={10.1002/gepi.22280}, abstractNote={AbstractTesting the association between single‐nucleotide polymorphism (SNP) effects and a response is often carried out through kernel machine methods based on least squares, such as the sequence kernel association test (SKAT). However, these least‐squares procedures are designed for a normally distributed conditional response, which may not apply. Other robust procedures such as the quantile regression kernel machine (QRKM) restrict the choice of the loss function and only allow inference on conditional quantiles. We propose a general and robust kernel association test with a flexible choice of the loss function, no distributional assumptions, and has SKAT and QRKM as special cases. We evaluate our proposed robust association test (RobKAT) across various data distributions through a simulation study. When errors are normally distributed, RobKAT controls type I error and shows comparable power with SKAT. In all other distributional settings investigated, our robust test has similar or greater power than SKAT. Finally, we apply our robust testing method to data from the Clinical Antipsychotic Trials of Intervention Effectiveness (CATIE) clinical trial to detect associations between selected genes including the major histocompatibility complex (MHC) region on chromosome six and neurotropic herpesvirus antibody levels in schizophrenia patients. RobKAT detected significant association with four SNP sets (HST1H2BJ, MHC, POM12L2, and SLC17A1), three of which were undetected by SKAT.}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Martinez, Kara and Maity, Arnab and Yolken, Robert H. and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2020}, month={Apr}, pages={272–282} } @article{ghosal_maity_clark_longo_2020, title={Variable selection in functional linear concurrent regression}, volume={69}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12408}, abstractNote={SummaryWe propose a novel method for variable selection in functional linear concurrent regression. Our research is motivated by a fisheries footprint study where the goal is to identify important time-varying sociostructural drivers influencing patterns of seafood consumption, and hence the fisheries footprint, over time, as well as estimating their dynamic effects. We develop a variable-selection method in functional linear concurrent regression extending the classically used scalar-on-scalar variable-selection methods like the lasso, smoothly clipped absolute deviation (SCAD) and minimax concave penalty (MCP). We show that in functional linear concurrent regression the variable-selection problem can be addressed as a group lasso, and their natural extension: the group SCAD or a group MCP problem. Through simulations, we illustrate that our method, particularly with the group SCAD or group MCP, can pick out the relevant variables with high accuracy and has minuscule false positive and false negative rate even when data are observed sparsely, are contaminated with noise and the error process is highly non-stationary. We also demonstrate two real data applications of our method in studies of dietary calcium absorption and fisheries footprint in the selection of influential time-varying covariates.}, number={3}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Ghosal, Rahul and Maity, Arnab and Clark, Timothy and Longo, Stefano B.}, year={2020}, month={Jun}, pages={565–587} } @article{szatkiewicz_marceau_yilmaz_bulik_crowley_mattheisen_sullivan_lu_maity_tzeng_et al._2019, title={VARIANCE COMPONENT TEST FOR CROSS-DISORDER PATHWAY ANALYSIS}, volume={29}, ISSN={["1873-7862"]}, DOI={10.1016/j.euroneuro.2018.08.252}, journal={EUROPEAN NEUROPSYCHOPHARMACOLOGY}, author={Szatkiewicz, Jin and Marceau, Rachel and Yilmaz, Zeynep and Bulik, Cynthia and Crowley, James and Mattheisen, Manuel and Sullivan, Patrick and Lu, Wenbin and Maity, Arnab and Tzeng, Jung-Ying and et al.}, year={2019}, pages={1204–1205} } @article{tekbudak_alfaro-córdoba_maity_staicu_2018, title={A comparison of testing methods in scalar-on-function regression}, volume={103}, ISSN={1863-8171 1863-818X}, url={http://dx.doi.org/10.1007/S10182-018-00337-X}, DOI={10.1007/s10182-018-00337-x}, abstractNote={A scalar-response functional model describes the association between a scalar response and a set of functional covariates. An important problem in the functional data literature is to test nullity or linearity of the effect of the functional covariate in the context of scalar-on-function regression. This article provides an overview of the existing methods for testing both the null hypotheses that there is no relationship and that there is a linear relationship between the functional covariate and scalar response, and a comprehensive numerical comparison of their performance. The methods are compared for a variety of realistic scenarios: when the functional covariate is observed at dense or sparse grids and measurements include noise or not. Finally, the methods are illustrated on the Tecator data set.}, number={3}, journal={AStA Advances in Statistical Analysis}, publisher={Springer Science and Business Media LLC}, author={Tekbudak, Merve Yasemin and Alfaro-Córdoba, Marcela and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, month={Oct}, pages={411–436} } @article{kim_maity_staicu_2018, title={Additive nonlinear functional concurrent model}, volume={11}, ISSN={1938-7989 1938-7997}, url={http://dx.doi.org/10.4310/sii.2018.v11.n4.a11}, DOI={10.4310/SII.2018.v11.n4.a11}, abstractNote={We propose a flexible regression model to study the association between a functional response and multiple functional covariates that are observed on the same domain. Specifically, we relate the mean of the current response to current values of the covariates by a sum of smooth unknown bivariate functions, where each of the functions depends on the current value of the covariate and the time point itself. In this framework, we develop estimation methodology that accommodates realistic scenarios where the covariates are sampled with or without error on a sparse and irregular design, and prediction that accounts for unknown model correlation structure. We also discuss the problem of testing the null hypothesis that the covariate has no association with the response. The proposed methods are evaluated numerically through simulations and two real data applications.}, number={4}, journal={Statistics and Its Interface}, publisher={International Press of Boston}, author={Kim, Janet S. and Maity, Arnab and Staicu, Ana-Maria}, year={2018}, pages={669–685} } @article{paul_maity_maiti_2018, title={Bayesian comparative study on binary time series}, volume={88}, ISSN={0094-9655 1563-5163}, url={http://dx.doi.org/10.1080/00949655.2018.1488256}, DOI={10.1080/00949655.2018.1488256}, abstractNote={ABSTRACT In this paper, we consider the Bayesian analysis of binary time series with different priors, namely normal, Students' t, and Jeffreys prior, and compare the results with the frequentist methods through some simulation experiments and one real data on daily rainfall in inches at Mount Washington, NH. Among Bayesian methods, our results show that the Jeffreys prior perform better in most of the situations for both the simulation and the rainfall data. Furthermore, among weakly informative priors considered, Student's t prior with 7 degrees of freedom fits the data most adequately.}, number={14}, journal={Journal of Statistical Computation and Simulation}, publisher={Informa UK Limited}, author={Paul, Erina and Maity, Arnab Kumar and Maiti, Raju}, year={2018}, month={Jun}, pages={2811–2826} } @article{zhao_zhang_clark_maity_wu_2019, title={Composite kernel machine regression based on likelihood ratio test for joint testing of genetic and gene–environment interaction effect}, volume={75}, url={http://dx.doi.org/10.1111/biom.13003}, DOI={10.1111/biom.13003}, abstractNote={Abstract Most common human diseases are a result from the combined effect of genes, the environmental factors, and their interactions such that including gene–environment (GE) interactions can improve power in gene mapping studies. The standard strategy is to test the SNPs, one-by-one, using a regression model that includes both the SNP effect and the GE interaction. However, the SNP-by-SNP approach has serious limitations, such as the inability to model epistatic SNP effects, biased estimation, and reduced power. Thus, in this article, we develop a kernel machine regression framework to model the overall genetic effect of a SNP-set, considering the possible GE interaction. Specifically, we use a composite kernel to specify the overall genetic effect via a nonparametric function andwe model additional covariates parametrically within the regression framework. The composite kernel is constructed as a weighted average of two kernels, one corresponding to the genetic main effect and one corresponding to the GE interaction effect. We propose a likelihood ratio test (LRT) and a restricted likelihood ratio test (RLRT) for statistical significance. We derive a Monte Carlo approach for the finite sample distributions of LRT and RLRT statistics. Extensive simulations and real data analysis show that our proposed method has correct type I error and can have higher power than score-based approaches under many situations.}, number={2}, journal={Biometrics}, author={Zhao, N. and Zhang, H. and Clark, J.J. and Maity, A. and Wu, M.C.}, year={2019}, month={Jun}, pages={625–637} } @article{davenport_maity_baladandayuthapani_2018, title={Functional interaction-based nonlinear models with application to multiplatform genomics data}, volume={37}, ISSN={["1097-0258"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85046549815&partnerID=MN8TOARS}, DOI={10.1002/sim.7671}, abstractNote={Functional regression allows for a scalar response to be dependent on a functional predictor; however, not much work has been done when a scalar exposure that interacts with the functional covariate is introduced. In this paper, we present 2 functional regression models that account for this interaction and propose 2 novel estimation procedures for the parameters in these models. These estimation methods allow for a noisy and/or sparsely observed functional covariate and are easily extended to generalized exponential family responses. We compute standard errors of our estimators, which allows for further statistical inference and hypothesis testing. We compare the performance of the proposed estimators to each other and to one found in the literature via simulation and demonstrate our methods using a real data example.}, number={18}, journal={STATISTICS IN MEDICINE}, author={Davenport, Clemontina A. and Maity, Arnab and Baladandayuthapani, Veerabhadran}, year={2018}, month={Aug}, pages={2715–2733} } @article{maity_zhao_sullivan_tzeng_2018, title={Inference on phenotype-specific effects of genes using multivariate kernel machine regression}, volume={42}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22096}, DOI={10.1002/gepi.22096}, abstractNote={ABSTRACTWe consider the problem of assessing the joint effect of a set of genetic markers on multiple, possibly correlated phenotypes of interest. We develop a kernel machine based multivariate regression framework, where the joint effect of the marker set on each of the phenotypes is modeled using prespecified kernel functions with unknown variance components. Unlike most existing methods that mainly focus on the global association between the marker set and the phenotype set, we develop estimation and testing procedures to study phenotype‐specific associations. Specifically, we develop an estimation method based on the penalized likelihood approach to estimate phenotype‐specific effects and their corresponding standard errors while accounting for possible correlation among the phenotypes. We develop testing procedures for the association of the marker set with any subset of phenotypes using a score‐based variance components testing method. We assess the performance of our proposed methodology via a simulation study and demonstrate the utility of the proposed method using the Clinical Antipsychotic Trials of Intervention Effectiveness (CATIE) data.}, number={1}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Zhao, Jing and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2018}, month={Feb}, pages={64–79} } @article{davenport_maity_sullivan_tzeng_2017, title={A Powerful Test for SNP Effects on Multivariate Binary Outcomes Using Kernel Machine Regression}, volume={10}, ISSN={1867-1764 1867-1772}, url={http://dx.doi.org/10.1007/S12561-017-9189-9}, DOI={10.1007/s12561-017-9189-9}, abstractNote={Evaluating multiple binary outcomes is common in genetic studies of complex diseases. These outcomes are often correlated because they are collected from the same individual and they may share common marker effects. In this paper, we propose a procedure to test for effect of a single nucleotide polymorphism-set on multiple, possibly correlated, binary responses. We develop a score-based test using a non-parametric modeling framework that jointly models the global effect of the marker set. We account for the non-linear effects and potentially complicated interaction between markers using reproducing kernels. Our testing procedure only requires estimation under the null hypothesis and we use multivariate generalized estimating equations to estimate the model components to account for the correlation among the outcomes. We evaluate finite sample performance of our test via simulation study and demonstrate our methods using the Clinical Antipsychotic Trials of Intervention Effectiveness antibody study data and the CoLaus study data.}, number={1}, journal={Statistics in Biosciences}, publisher={Springer Science and Business Media LLC}, author={Davenport, Clemontina A. and Maity, Arnab and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2017}, month={Mar}, pages={117–138} } @article{kim_staicu_maity_carroll_ruppert_2018, title={Additive Function-on-Function Regression}, volume={27}, ISSN={["1537-2715"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85045617567&partnerID=MN8TOARS}, DOI={10.1080/10618600.2017.1356730}, abstractNote={ABSTRACT We study additive function-on-function regression where the mean response at a particular time point depends on the time point itself, as well as the entire covariate trajectory. We develop a computationally efficient estimation methodology based on a novel combination of spline bases with an eigenbasis to represent the trivariate kernel function. We discuss prediction of a new response trajectory, propose an inference procedure that accounts for total variability in the predicted response curves, and construct pointwise prediction intervals. The estimation/inferential procedure accommodates realistic scenarios, such as correlated error structure as well as sparse and/or irregular designs. We investigate our methodology in finite sample size through simulations and two real data applications. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Kim, Janet S. and Staicu, Ana-Maria and Maity, Arnab and Carroll, Raymond J. and Ruppert, David}, year={2018}, pages={234–244} } @article{bandyopadhyay_maity_2018, title={Asymptotic theory for varying coefficient regression models with dependent data}, volume={70}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85014934996&partnerID=MN8TOARS}, DOI={10.1007/s10463-017-0607-z}, number={4}, journal={Annals of the Institute of Statistical Mathematics}, author={Bandyopadhyay, S. and Maity, A.}, year={2018}, pages={745–759} } @article{maity_pradhan_das_2018, title={Bias Reduction in Logistic Regression with Missing Responses When the Missing Data Mechanism is Nonignorable}, volume={73}, ISSN={0003-1305 1537-2731}, url={http://dx.doi.org/10.1080/00031305.2017.1407359}, DOI={10.1080/00031305.2017.1407359}, abstractNote={ABSTRACT In logistic regression with nonignorable missing responses, Ibrahim and Lipsitz proposed a method for estimating regression parameters. It is known that the regression estimates obtained by using this method are biased when the sample size is small. Also, another complexity arises when the iterative estimation process encounters separation in estimating regression coefficients. In this article, we propose a method to improve the estimation of regression coefficients. In our likelihood-based method, we penalize the likelihood by multiplying it by a noninformative Jeffreys prior as a penalty term. The proposed method reduces bias and is able to handle the issue of separation. Simulation results show substantial bias reduction for the proposed method as compared to the existing method. Analyses using real world data also support the simulation findings. An R package called brlrmr is developed implementing the proposed method and the Ibrahim and Lipsitz method.}, number={4}, journal={The American Statistician}, publisher={Informa UK Limited}, author={Maity, Arnab Kumar and Pradhan, Vivek and Das, Ujjwal}, year={2018}, month={Jul}, pages={340–349} } @article{luo_mccullough_tzeng_darrah_vengosh_maguire_maity_samuel-hodge_murphy_mendez_et al._2017, title={Maternal blood cadmium, lead and arsenic levels, nutrient combinations, and offspring birthweight}, volume={17}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85018564741&partnerID=MN8TOARS}, DOI={10.1186/s12889-017-4225-8}, abstractNote={Cadmium (Cd), lead (Pb) and arsenic (As) are common environmental contaminants that have been associated with lower birthweight. Although some essential metals may mitigate exposure, data are inconsistent. This study sought to evaluate the relationship between toxic metals, nutrient combinations and birthweight among 275 mother-child pairs.Non-essential metals, Cd, Pb, As, and essential metals, iron (Fe), zinc (Zn), selenium (Se), copper (Cu), calcium (Ca), magnesium (Mg), and manganese (Mn) were measured in maternal whole blood obtained during the first trimester using inductively coupled plasma mass spectrometry. Folate concentrations were measured by microbial assay. Birthweight was obtained from medical records. We used quantile regression to evaluate the association between toxic metals and nutrients due to their underlying wedge-shaped relationship. Ordinary linear regression was used to evaluate associations between birth weight and toxic metals.After multivariate adjustment, the negative association between Pb or Cd and a combination of Fe, Se, Ca and folate was robust, persistent and dose-dependent (p < 0.05). However, a combination of Zn, Cu, Mn and Mg was positively associated with Pb and Cd levels. While prenatal blood Cd and Pb were also associated with lower birthweight. Fe, Se, Ca and folate did not modify these associations.Small sample size and cross-sectional design notwithstanding, the robust and persistent negative associations between some, but not all, nutrient combinations with these ubiquitous environmental contaminants suggest that only some recommended nutrient combinations may mitigate toxic metal exposure in chronically exposed populations. Larger longitudinal studies are required to confirm these findings.}, number={1}, journal={BMC Public Health}, author={Luo, Y. and McCullough, L.E. and Tzeng, J.-Y. and Darrah, T. and Vengosh, A. and Maguire, R.L. and MAITY, ARNAB and Samuel-Hodge, C. and Murphy, S.K. and Mendez, M.A. and et al.}, year={2017} } @article{maity_2017, title={Nonparametric functional concurrent regression models}, volume={9}, url={https://doi.org/10.1002/wics.1394}, DOI={10.1002/wics.1394}, abstractNote={Function‐on‐function regression refers to the situation where both independent and dependent variables in a regression model are of functional nature. Functional concurrent regression is a specific type of function‐on‐function regression that relates the response function at a specific point to the covariate value at that point and the point itself. Standard functional concurrent models are linear (a linear combination of the covariates is used), and often criticized due to their linearity assumption and lack of flexibility. This gives rise to nonparametric functional concurrent regression that models the response function at a specific point using a multivariate nonparametric function of both the point and the covariate value at that point. Such models allow for much more flexibility and predictive accuracy, especially when the underlying relationship is nonlinear. In the past decade, several methods have been proposed to perform estimation, prediction and inference in the nonparametric concurrent models using various methods such as spline smoothing, Gaussian process regression and local polynomial kernel regression. Such models have been shown to be useful tools in functional regression as well as stepping stone for further development. WIREs Comput Stat 2017, 9:e1394. doi: 10.1002/wics.1394This article is categorized under: Statistical and Graphical Methods of Data Analysis > Nonparametric Methods }, number={2}, journal={WIREs Computational Statistics}, author={Maity, Arnab}, year={2017}, month={Mar} } @misc{maity_2017, title={Nonparametric functional concurrent regression models}, volume={9}, number={2}, journal={Wiley Interdisciplinary Reviews: Computational Statistics}, author={Maity, A.}, year={2017} } @article{luo_maity_wu_smith_duan_li_tzeng_2018, title={On the substructure controls in rare variant analysis: Principal components or variance components?}, volume={42}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85039155216&partnerID=MN8TOARS}, DOI={10.1002/gepi.22102}, abstractNote={AbstractRecent studies showed that population substructure (PS) can have more complex impact on rare variant tests and that similarity‐based collapsing tests (e.g., SKAT) may suffer more severely by PS than burden‐based tests. In this work, we evaluate the performance of SKAT coupling with principal components (PC) or variance components (VC) based PS correction methods. We consider confounding effects caused by PS including stratified populations, admixed populations, and spatially distributed nongenetic risk; we investigate which types of variants (e.g., common, less frequent, rare, or all variants) should be used to effectively control for confounding effects. We found that (i) PC‐based methods can account for confounding effects in most scenarios except for admixture, although the number of sufficient PCs depends on the PS complexity and the type of variants used. (ii) PCs based on all variants (i.e., common + less frequent + rare) tend to require equal or fewer sufficient PCs and often achieve higher power than PCs based on other variant types. (iii) VC‐based methods can effectively adjust for confounding in all scenarios (even for admixture), though the type of variants should be used to construct VC may vary. (iv) VC based on all variants works consistently in all scenarios, though its power may be sometimes lower than VC based on other variant types. Given that the best‐performed method and which variants to use depend on the underlying unknown confounding mechanisms, a robust strategy is to perform SKAT analyses using VC‐based methods based on all variants.}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Luo, Yiwen and Maity, Arnab and Wu, Michael C. and Smith, Chris and Duan, Qing and Li, Yun and Tzeng, Jung-Ying}, year={2018}, month={Apr}, pages={276–287} } @misc{kong_maity_hsu_tzeng_2018, title={Rejoinder to "A note on testing and estimation in marker-set association study using semiparametric quantile regression kernel machine"}, volume={74}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85032786553&partnerID=MN8TOARS}, DOI={10.1111/biom.12786}, abstractNote={Dehan Kong , Arnab Maity, Fang-Chi Hsu, and Jung-Ying Tzeng Department of Statistical Sciences, University of Toronto, Ontario, Canada Department of Statistics, North Carolina State University, North Carolina, U.S.A. Department of Biostatistical Sciences, Wake Forest University, North Carolina, U.S.A. Department of Statistics and Bioinformatics Research Center North Carolina State University, North Carolina, U.S.A. Department of Statistics, National Cheng-Kung University, Taiwan ∗email: kongdehan@utstat.toronto.edu}, number={2}, journal={BIOMETRICS}, author={Kong, Dehan and Maity, Arnab and Hsu, Fang-Chi and Tzeng, Jung-Ying}, year={2018}, month={Jun}, pages={767–768} } @article{zhan_tong_zhao_maity_wu_chen_2017, title={A small-sample multivariate kernel machine test for microbiome association studies}, volume={41}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85014518911&partnerID=MN8TOARS}, DOI={10.1002/gepi.22030}, abstractNote={High‐throughput sequencing technologies have enabled large‐scale studies of the role of the human microbiome in health conditions and diseases. Microbial community level association test, as a critical step to establish the connection between overall microbiome composition and an outcome of interest, has now been routinely performed in many studies. However, current microbiome association tests all focus on a single outcome. It has become increasingly common for a microbiome study to collect multiple, possibly related, outcomes to maximize the power of discovery. As these outcomes may share common mechanisms, jointly analyzing these outcomes can amplify the association signal and improve statistical power to detect potential associations. We propose the multivariate microbiome regression‐based kernel association test (MMiRKAT) for testing association between multiple continuous outcomes and overall microbiome composition, where the kernel used in MMiRKAT is based on Bray‐Curtis or UniFrac distance. MMiRKAT directly regresses all outcomes on the microbiome profiles via a semiparametric kernel machine regression framework, which allows for covariate adjustment and evaluates the association via a variance‐component score test. Because most of the current microbiome studies have small sample sizes, a novel small‐sample correction procedure is implemented in MMiRKAT to correct for the conservativeness of the association test when the sample size is small or moderate. The proposed method is assessed via simulation studies and an application to a real data set examining the association between host gene expression and mucosal microbiome composition. We demonstrate that MMiRKAT is more powerful than large sample based multivariate kernel association test, while controlling the type I error. A free implementation of MMiRKAT in R language is available at http://research.fhcrc.org/wu/en.html.}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Zhan, Xiang and Tong, Xingwei and Zhao, Ni and Maity, Arnab and Wu, Michael C. and Chen, Jun}, year={2017}, month={Apr}, pages={210–220} } @article{kong_staicu_maity_2016, title={Classical testing in functional linear models}, volume={28}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84988336276&partnerID=MN8TOARS}, DOI={10.1080/10485252.2016.1231806}, abstractNote={ABSTRACT We extend four tests common in classical regression – Wald, score, likelihood ratio and F tests – to functional linear regression, for testing the null hypothesis, that there is no association between a scalar response and a functional covariate. Using functional principal component analysis, we re-express the functional linear model as a standard linear model, where the effect of the functional covariate can be approximated by a finite linear combination of the functional principal component scores. In this setting, we consider application of the four traditional tests. The proposed testing procedures are investigated theoretically for densely observed functional covariates when the number of principal components diverges. Using the theoretical distribution of the tests under the alternative hypothesis, we develop a procedure for sample size calculation in the context of functional linear regression. The four tests are further compared numerically for both densely and sparsely observed noisy functional data in simulation experiments and using two real data applications.}, number={4}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Kong, Dehan and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={813–838} } @inbook{tzeng_maity_2016, title={Marker-set Approaches for Assessing Gene-Environment Interactions at Gene Level}, booktitle={Statistical Approaches to Gene x Environment Interactions for Complex Phenotypes}, author={Tzeng, J.Y. and Maity, A.}, year={2016} } @article{zhang_staicu_maity_2016, title={Testing for additivity in non-parametric regression}, volume={44}, ISSN={["1708-945X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84982980889&partnerID=MN8TOARS}, DOI={10.1002/cjs.11295}, abstractNote={AbstractThis article discusses a novel approach for testing for additivity in non‐parametric regression. We represent the model using a linear mixed model framework and equivalently rewrite the original testing problem as testing for a subset of zero variance components. We propose two testing procedures: the restricted likelihood ratio test and the generalized F test. We develop the finite sample null distribution of the restricted likelihood ratio test and generalized F test using the spectral decomposition of the restricted likelihood ratio and the residual sum of squares, respectively. The null distribution is non‐standard and we provide a fast algorithm to simulate from the null distribution of the tests. We show, through numerical investigation, that the proposed testing procedures outperform the available alternatives and apply the methods to a diabetes data set. The Canadian Journal of Statistics 44: 445–462; 2016 © 2016 Statistical Society of Canada}, number={4}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, publisher={Wiley-Blackwell}, author={Zhang, Yichi and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Dec}, pages={445–462} } @article{usset_maity_staicu_schwartzman_2015, title={Glacier Terminus Estimation from Landsat Image Intensity Profiles}, volume={20}, ISSN={1085-7117 1537-2693}, url={http://dx.doi.org/10.1007/S13253-015-0207-4}, DOI={10.1007/s13253-015-0207-4}, number={2}, journal={Journal of Agricultural, Biological, and Environmental Statistics}, publisher={Springer Science and Business Media LLC}, author={Usset, Joseph and Maity, Arnab and Staicu, Ana-Maria and Schwartzman, Armin}, year={2015}, month={May}, pages={279–298} } @article{usset_staicu_maity_2016, title={Interaction models for functional regression}, volume={94}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2015.08.020}, DOI={10.1016/j.csda.2015.08.020}, abstractNote={A functional regression model with a scalar response and multiple functional predictors is proposed that accommodates two-way interactions in addition to their main effects. The proposed estimation procedure models the main effects using penalized regression splines, and the interaction effect by a tensor product basis. Extensions to generalized linear models and data observed on sparse grids or with measurement error are presented. A hypothesis testing procedure for the functional interaction effect is described. The proposed method can be easily implemented through existing software. Numerical studies show that fitting an additive model in the presence of interaction leads to both poor estimation performance and lost prediction power, while fitting an interaction model where there is in fact no interaction leads to negligible losses. The methodology is illustrated on the AneuRisk65 study data.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Usset, Joseph and Staicu, Ana-Maria and Maity, Arnab}, year={2016}, month={Feb}, pages={317–329} } @article{wang_maity_hsiao_voora_kaddurah-daouk_tzeng_2015, title={Module-based association analysis for omics data with network structure.}, volume={10}, url={http://europepmc.org/abstract/med/25822417}, DOI={10.1371/journal.pone.0122309}, abstractNote={Module-based analysis (MBA) aims to evaluate the effect of a group of biological elements sharing common features, such as SNPs in the same gene or metabolites in the same pathways, and has become an attractive alternative to traditional single bio-element approaches. Because bio-elements regulate and interact with each other as part of network, incorporating network structure information can more precisely model the biological effects, enhance the ability to detect true associations, and facilitate our understanding of the underlying biological mechanisms. How-ever, most MBA methods ignore the network structure information, which depicts the interaction and regulation relationship among basic functional units in biology system. We construct the con-nectivity kernel and the topology kernel to capture the relationship among bio-elements in a mod-ule, and use a kernel machine framework to evaluate the joint effect of bio-elements. Our proposed kernel machine approach directly incorporates network structure so to enhance the study effi-ciency; it can assess interactions among modules, account covariates, and is computational effi-cient. Through simulation studies and real data application, we demonstrate that the proposed network-based methods can have markedly better power than the approaches ignoring network information under a range of scenarios.}, number={3}, journal={PLoS ONE}, author={Wang, Z and Maity, A and Hsiao, CK and Voora, D and Kaddurah-Daouk, R and Tzeng, JY}, year={2015}, pages={0122309} } @article{davenport_maity_wu_2015, title={Parametrically guided estimation in nonparametric varying coefficient models with quasi-likelihood}, volume={27}, ISSN={["1029-0311"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84929278394&partnerID=MN8TOARS}, DOI={10.1080/10485252.2015.1026903}, abstractNote={Varying coefficient models (VCMs) allow us to generalise standard linear regression models to incorporate complex covariate effects by modelling the regression coefficients as functions of another covariate. For nonparametric varying coefficients, we can borrow the idea of parametrically guided estimation to improve asymptotic bias. In this paper, we develop a guided estimation procedure for the nonparametric VCMs. Asymptotic properties are established for the guided estimators and a method of bandwidth selection via bias-variance tradeoff is proposed. We compare the performance of the guided estimator with that of the unguided estimator via both simulation and real data examples.}, number={2}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, author={Davenport, Clemontina A. and Maity, Arnab and Wu, Yichao}, year={2015}, month={Apr}, pages={195–213} } @article{urrutia_lee_maity_zhao_shen_li_wu_2015, title={Rare variant testing across methods and thresholds using the multi-kernel sequence kernel association test (MK-SKAT)}, volume={8}, ISSN={["1938-7997"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84945274985&partnerID=MN8TOARS}, DOI={10.4310/sii.2015.v8.n4.a8}, abstractNote={Analysis of rare genetic variants has focused on region-based analysis wherein a subset of the variants within a genomic region is tested for association with a complex trait. Two important practical challenges have emerged. First, it is difficult to choose which test to use. Second, it is unclear which group of variants within a region should be tested. Both depend on the unknown true state of nature. Therefore, we develop the Multi-Kernel SKAT (MK-SKAT) which tests across a range of rare variant tests and groupings. Specifically, we demonstrate that several popular rare variant tests are special cases of the sequence kernel association test which compares pair-wise similarity in trait value to similarity in the rare variant genotypes between subjects as measured through a kernel function. Choosing a particular test is equivalent to choosing a kernel. Similarly, choosing which group of variants to test also reduces to choosing a kernel. Thus, MK-SKAT uses perturbation to test across a range of kernels. Simulations and real data analyses show that our framework controls type I error while maintaining high power across settings: MK-SKAT loses power when compared to the kernel for a particular scenario but has much greater power than poor choices.}, number={4}, journal={STATISTICS AND ITS INTERFACE}, author={Urrutia, Eugene and Lee, Seunggeun and Maity, Arnab and Zhao, Ni and Shen, Judong and Li, Yun and Wu, Michael C.}, year={2015}, pages={495–505} } @article{kong_maity_hsu_tzeng_biometrics_2016, title={Testing and estimation in marker-set association study using semiparametric quantile regression kernel machine}, volume={72}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84978986071&partnerID=MN8TOARS}, DOI={10.1111/biom.12438}, abstractNote={Summary We consider quantile regression for partially linear models where an outcome of interest is related to covariates and a marker set (e.g., gene or pathway). The covariate effects are modeled parametrically and the marker set effect of multiple loci is modeled using kernel machine. We propose an efficient algorithm to solve the corresponding optimization problem for estimating the effects of covariates and also introduce a powerful test for detecting the overall effect of the marker set. Our test is motivated by traditional score test, and borrows the idea of permutation test. Our estimation and testing procedures are evaluated numerically and applied to assess genetic association of change in fasting homocysteine level using the Vitamin Intervention for Stroke Prevention Trial data.}, number={2}, journal={BIOMETRICS}, author={Kong, D. and Maity, A. and Hsu, F.C. and Tzeng, J.Y. and Biometrics}, year={2016}, month={Jun}, pages={364–371} } @article{terry_zhang_maity_arshad_karmaus_2017, title={Unified variable selection in semi-parametric models}, volume={26}, ISSN={["1477-0334"]}, DOI={10.1177/0962280215610928}, abstractNote={ We propose a Bayesian variable selection method in semi-parametric models with applications to genetic and epigenetic data (e.g., single nucleotide polymorphisms and DNA methylation, respectively). The data are individually standardized to reduce heterogeneity and facilitate simultaneous selection of categorical (single nucleotide polymorphisms) and continuous (DNA methylation) variables. The Gaussian reproducing kernel is applied to the transformed data to evaluate joint effect of the variables, which may include complex interactions between, e.g., single nucleotide polymorphisms and DNA methylation. Indicator variables are introduced to the model for the purpose of variable selection. The method is demonstrated and evaluated using simulations under different scenarios. We apply the method to identify informative DNA methylation sites and single nucleotide polymorphisms in a set of genes based on their joint effect on allergic sensitization. The selected single nucleotide polymorphisms and methylation sites have the potential to serve as early markers for allergy prediction, and consequently benefit medical and clinical research to prevent allergy before its manifestation. }, number={6}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, publisher={SAGE Publications}, author={Terry, William and Zhang, Hongmei and Maity, Arnab and Arshad, Hasan and Karmaus, Wilfried}, year={2017}, month={Dec}, pages={2821–2831} } @article{wang_maity_luo_neely_tzeng_2015, title={Complete Effect-Profile Assessment in Association Studies With Multiple Genetic and Multiple Environmental Factors}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921048438&partnerID=MN8TOARS}, DOI={10.1002/gepi.21877}, abstractNote={ABSTRACTStudying complex diseases in the post genome‐wide association studies (GWAS) era has led to developing methods that consider factor‐sets rather than individual genetic/environmental factors (i.e., Multi‐G‐Multi‐E studies), and mining for potential gene‐environment (G×E) interactions has proven to be an invaluable aid in both discovery and deciphering underlying biological mechanisms. Current approaches for examining effect profiles in Multi‐G‐Multi‐E analyses are either underpowered due to large degrees of freedom, ill‐suited for detecting G×E interactions due to imprecise modeling of the G and E effects, or lack of capacity for modeling interactions between two factor‐sets (e.g., existing methods focus primarily on a single E factor). In this work, we illustrate the issues encountered in constructing kernels for investigating interactions between two factor‐sets, and propose a simple yet intuitive solution to construct the G×E kernel that retains the ease‐of‐interpretation of classic regression. We also construct a series of kernel machine (KM) score tests to evaluate the complete effect profile (i.e., the G, E, and G×E effects individually or in combination). We show, via simulations and a data application, that the proposed KM methods outperform the classic and PC regressions across a range of scenarios, including varying effect size, effect structure, and interaction complexity. The largest power gain was observed when the underlying effect structure involved complex G×E interactions; however, the proposed methods have consistent, powerful performance when the effect profile is simple or complex, suggesting that the proposed method could be a useful tool for exploratory or confirmatory G×E analysis.}, number={2}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Wang, Zhi and Maity, Arnab and Luo, Yiwen and Neely, Megan L. and Tzeng, Jung-Ying}, year={2015}, month={Feb}, pages={122–133} } @article{zhao_bell_maity_staicu_joubert_london_wu_2015, title={Global Analysis of Methylation Profiles From High Resolution CpG Data}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921023434&partnerID=MN8TOARS}, DOI={10.1002/gepi.21874}, abstractNote={ABSTRACTNew high throughput technologies are now enabling simultaneous epigenetic profiling of DNA methylation at hundreds of thousands of CpGs across the genome. A problem of considerable practical interest is identification of large scale, global changes in methylation that are associated with environmental variables, clinical outcomes, or other experimental conditions. However, there has been little statistical research on methods for global methylation analysis using technologies with individual CpG resolution. To address this critical gap in the literature, we develop a new strategy for global analysis of methylation profiles using a functional regression approach wherein we approximate either the density or the cumulative distribution function (CDF) of the methylation values for each individual using B‐spline basis functions. The spline coefficients for each individual are allowed to summarize the individual's overall methylation profile. We then test for association between the overall distribution and a continuous or dichotomous outcome variable using a variance component score test that naturally accommodates the correlation between spline coefficients. Simulations indicate that our proposed approach has desirable power while protecting type I error. The method was applied to detect methylation differences, both genome wide and at LINE1 elements, between the blood samples from rheumatoid arthritis patients and healthy controls and to detect the epigenetic changes of human hepatocarcinogenesis in the context of alcohol abuse and hepatitis C virus infection. A free implementation of our methods in the R language is available in the Global Analysis of Methylation Profiles (GAMP) package at http://research.fhcrc.org/wu/en.html.}, number={2}, journal={GENETIC EPIDEMIOLOGY}, author={Zhao, Ni and Bell, Douglas A. and Maity, Arnab and Staicu, Ana-Maria and Joubert, Bonnie R. and London, Stephanie J. and Wu, Michael C.}, year={2015}, month={Feb}, pages={53–64} } @article{carmona_sofer_hutchinson_cantone_coull_maity_vokonas_lin_schwartz_baccarelli_et al._2014, title={Short-Term airborne particulate matter exposure alters the epigenetic landscape of human genes associated with the mitogen-Activated protein kinase network: A cross-sectional study}, volume={13}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84988698028&partnerID=MN8TOARS}, DOI={10.1186/1476-069X-13-94}, abstractNote={Exposure to air particulate matter is known to elevate blood biomarkers of inflammation and to increase cardiopulmonary morbidity and mortality. Major components of airborne particulate matter typically include black carbon from traffic and sulfates from coal-burning power plants. DNA methylation is thought to be sensitive to these environmental toxins and possibly mediate environmental effects on clinical outcomes via regulation of gene networks. The underlying mechanisms may include epigenetic modulation of major inflammatory pathways, yet the details remain unclear. We sought to elucidate how short-term exposure to air pollution components, singly and/or in combination, alter blood DNA methylation in certain inflammation-associated gene networks, MAPK and NF-κB, which may transmit the environmental signal(s) and influence the inflammatory pathway in vivo. To this end, we utilized a custom-integrated workflow—molecular processing, pollution surveillance, biostatical analysis, and bioinformatic visualization—to map novel human (epi)gene pathway-environment interactions. Specifically, out of 84 MAPK pathway genes considered, we identified 11 whose DNA methylation status was highly associated with black carbon exposure, after adjusting for potential confounders—age, sulfate exposure, smoking, blood cell composition, and blood pressure. Moreover, after adjusting for these confounders, multi-pollutant analysis of synergistic DNA methylations significantly associated with sulfate and BC exposures yielded 14 MAPK genes. No associations were found with the NF-κB pathway. Exposure to short-term air pollution components thus resulted in quantifiable epigenetic changes in the promoter areas of MAPK pathway genes. Bioinformatic mapping of single- vs. multi-exposure-associated epigenetic changes suggests that these alterations might affect biological pathways in nuanced ways that are not simply additive or fully predictable via individual-level exposure assessments.}, number={1}, journal={Environmental Health: A Global Access Science Source}, author={Carmona, JJ and Sofer, T and Hutchinson, J and Cantone, L and Coull, B and Maity, A and Vokonas, P and Lin, X and Schwartz, J and Baccarelli, AA and et al.}, year={2014}, pages={94} } @article{maity_williams_ryan_missmer_coull_hauser_2014, title={Analysis of in vitro fertilization data with multiple outcomes using discrete time-to-event analysis}, volume={33}, ISSN={["1097-0258"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84898005340&partnerID=MN8TOARS}, DOI={10.1002/sim.6050}, abstractNote={In vitro fertilization (IVF) is an increasingly common method of assisted reproductive technology. Because of the careful observation and follow‐up required as part of the procedure, IVF studies provide an ideal opportunity to identify and assess clinical and demographic factors along with environmental exposures that may impact successful reproduction. A major challenge in analyzing data from IVF studies is handling the complexity and multiplicity of outcome, resulting from both multiple opportunities for pregnancy loss within a single IVF cycle in addition to multiple IVF cycles. To date, most evaluations of IVF studies do not make use of full data because of its complex structure. In this paper, we develop statistical methodology for analysis of IVF data with multiple cycles and possibly multiple failure types observed for each individual. We develop a general analysis framework based on a generalized linear modeling formulation that allows implementation of various types of models including shared frailty models, failure‐specific frailty models, and transitional models, using standard software. We apply our methodology to data from an IVF study conducted at the Brigham and Women's Hospital, Massachusetts. We also summarize the performance of our proposed methods on the basis of a simulation study. Copyright © 2013 John Wiley & Sons, Ltd.}, number={10}, journal={STATISTICS IN MEDICINE}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Williams, Paige L. and Ryan, Louise and Missmer, Stacey A. and Coull, Brent A. and Hauser, Russ}, year={2014}, month={May}, pages={1738–1749} } @article{liu_maity_lin_wright_christiani_2013, title={Design and Analysis Issues in Gene and Environment Studies}, volume={11}, DOI={10.1201/b16304-14}, abstractNote={PAOLO ROMANIA, ALICE BERTAINA, GIORGIA BRACAGLIA, FRANCO LOCATELLI, DORIANA FRUCI, and ROSSELLA ROTAINTRODUCTIONEpigenetic chromatin remodeling plays a pivotal role in normal mammalian development and post-natal tissue homeostasis. Indeed, lineage specification and cellular differentiation, which underlie embryo development and morphogenesis from a single pluripotent stem cell, are epigenetically regulated processes. The final result is the "plasticity" of an individual genotype that, through the activation of molecular cascades, timely and sequentially controlled, produces different phenotypes in response to different microenvironments. In the last 10 years, special attention has been paid to the non-protein coding portion of the genome such as non-coding small RNAs, among which are microRNAs (miRNAs), considered to be major regulators of developmental pathways [1-8]. Of note, chromatin remodeling and miRNA pathways have been shown to be interconnected and able to regulate each other. To date, it is recognized that the deregulation of the epigenetic-and miRNA-dependent control of gene expression underlies tumorigenesis.}, journal={Exploring Connections Between Genetic Mechanisms and Disease Expression}, publisher={Apple Academic Press}, author={Liu, Chen-Yu and Maity, Arnab and Lin, Xihong and Wright, Robert and Christiani, David}, year={2013}, pages={339–370} } @article{sofer_baccarelli_cantone_coull_maity_lin_schwartz_2013, title={Exposure to airborne particulate matter is associated with methylation pattern in the asthma pathway}, volume={5}, ISSN={["1750-192X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84876099049&partnerID=MN8TOARS}, DOI={10.2217/epi.13.16}, abstractNote={ Background: Asthma exacerbation and other respiratory symptoms are associated with exposure to air pollution. Since environment affects gene methylation, it is hypothesized that asthmatic responses to pollution are mediated through methylation. Materials & methods: We study the possibility that airborne particulate matter affects gene methylation in the asthma pathway. We measured methylation array data in clinic visits of 141 subjects from the Normative Aging Study. Black carbon and sulfate measures from a central monitoring site were recorded and 30-day averages were calculated for each clinic visit. Gene-specific methylation scores were calculated for the genes in the asthma pathway, and the association between the methylation in the asthma pathway and the pollution measures was analyzed using sparse Canonical Correlation Analysis. Results: The analysis found that exposures to black carbon and sulfate were significantly associated with the methylation pattern in the asthma pathway (p-values 0.05 and 0.02, accordingly). Specific genes that contributed to this association were identified. Conclusion: These results suggest that the effect of air pollution on asthmatic and respiratory responses may be mediated through gene methylation. }, number={2}, journal={EPIGENOMICS}, publisher={Future Medicine Ltd}, author={Sofer, Tamar and Baccarelli, Andrea and Cantone, Laura and Coull, Brent and Maity, Arnab and Lin, Xihong and Schwartz, Joel}, year={2013}, month={Apr}, pages={147–154} } @article{wu_maity_lee_simmons_harmon_lin_engel_molldrem_armistead_2013, title={Kernel Machine SNP-Set Testing Under Multiple Candidate Kernels}, volume={37}, ISSN={["0741-0395"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875658304&partnerID=MN8TOARS}, DOI={10.1002/gepi.21715}, abstractNote={ABSTRACTJoint testing for the cumulative effect of multiple single‐nucleotide polymorphisms grouped on the basis of prior biological knowledge has become a popular and powerful strategy for the analysis of large‐scale genetic association studies. The kernel machine (KM)‐testing framework is a useful approach that has been proposed for testing associations between multiple genetic variants and many different types of complex traits by comparing pairwise similarity in phenotype between subjects to pairwise similarity in genotype, with similarity in genotype defined via a kernel function. An advantage of the KM framework is its flexibility: choosing different kernel functions allows for different assumptions concerning the underlying model and can allow for improved power. In practice, it is difficult to know which kernel to use a priori because this depends on the unknown underlying trait architecture and selecting the kernel which gives the lowest P‐value can lead to inflated type I error. Therefore, we propose practical strategies for KM testing when multiple candidate kernels are present based on constructing composite kernels and based on efficient perturbation procedures. We demonstrate through simulations and real data applications that the procedures protect the type I error rate and can lead to substantially improved power over poor choices of kernels and only modest differences in power vs. using the best candidate kernel.}, number={3}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Wu, Michael C. and Maity, Arnab and Lee, Seunggeun and Simmons, Elizabeth M. and Harmon, Quaker E. and Lin, Xinyi and Engel, Stephanie M. and Molldrem, Jeffrey J. and Armistead, Paul M.}, year={2013}, month={Apr}, pages={267–275} } @article{xun_cao_mallick_maity_carroll_2013, title={Parameter Estimation of Partial Differential Equation Models}, volume={108}, ISSN={["1537-274X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84890083793&partnerID=MN8TOARS}, DOI={10.1080/01621459.2013.794730}, abstractNote={Partial differential equation (PDE) models are commonly used to model complex dynamic systems in applied sciences such as biology and finance. The forms of these PDE models are usually proposed by experts based on their prior knowledge and understanding of the dynamic system. Parameters in PDE models often have interesting scientific interpretations, but their values are often unknown and need to be estimated from the measurements of the dynamic system in the presence of measurement errors. Most PDEs used in practice have no analytic solutions, and can only be solved with numerical methods. Currently, methods for estimating PDE parameters require repeatedly solving PDEs numerically under thousands of candidate parameter values, and thus the computational load is high. In this article, we propose two methods to estimate parameters in PDE models: a parameter cascading method and a Bayesian approach. In both methods, the underlying dynamic process modeled with the PDE model is represented via basis function expansion. For the parameter cascading method, we develop two nested levels of optimization to estimate the PDE parameters. For the Bayesian method, we develop a joint model for data and the PDE and develop a novel hierarchical model allowing us to employ Markov chain Monte Carlo (MCMC) techniques to make posterior inference. Simulation studies show that the Bayesian method and parameter cascading method are comparable, and both outperform other available methods in terms of estimation accuracy. The two methods are demonstrated by estimating parameters in a PDE model from long-range infrared light detection and ranging data. Supplementary materials for this article are available online.}, number={503}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, publisher={Informa UK Limited}, author={Xun, Xiaolei and Cao, Jiguo and Mallick, Bani and Maity, Arnab and Carroll, Raymond J.}, year={2013}, month={Sep}, pages={1009–1020} } @article{gertheiss_maity_staicu_2013, title={Variable selection in generalized functional linear models}, volume={2}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/sta4.20}, DOI={10.1002/sta4.20}, abstractNote={Modern research data, where a large number of functional predictors is collected on few subjects are becoming increasingly common. In this paper we propose a variable selection technique, when the predictors are functional and the response is scalar. Our approach is based on adopting a generalized functional linear model framework and using a penalized likelihood method that simultaneously controls the sparsity of the model and the smoothness of the corresponding coefficient functions by adequate penalization. The methodology is characterized by high predictive accuracy, and yields interpretable models, while retaining computational efficiency. The proposed method is investigated numerically in finite samples, and applied to a diffusion tensor imaging tractography data set and a chemometric data set. Copyright © 2013 John Wiley & Sons Ltd}, number={1}, journal={Stat}, publisher={Wiley}, author={Gertheiss, Jan and Maity, Arnab and Staicu, Ana-Maria}, year={2013}, month={May}, pages={86–101} } @article{zhang_maity_arshad_holloway_karmaus_lawson_lee_macnab_2016, title={Variable selection in semi-parametric models}, volume={25}, ISSN={["1477-0334"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84983741909&partnerID=MN8TOARS}, DOI={10.1177/0962280213499679}, abstractNote={We propose Bayesian variable selection methods in semi-parametric models in the framework of partially linear Gaussian and problit regressions. Reproducing kernels are utilized to evaluate possibly non-linear joint effect of a set of variables. Indicator variables are introduced into the reproducing kernels for the inclusion or exclusion of a variable. Different scenarios based on posterior probabilities of including a variable are proposed to select important variables. Simulations are used to demonstrate and evaluate the methods. It was found that the proposed methods can efficiently select the correct variables regardless of the feature of the effects, linear or non-linear in an unknown form. The proposed methods are applied to two real data sets to identify cytosine phosphate guanine methylation sites associated with maternal smoking and cytosine phosphate guanine sites associated with cotinine levels with creatinine levels adjusted. The selected methylation sites have the potential to advance our understanding of the underlying mechanism for the impact of smoking exposure on health outcomes, and consequently benefit medical research in disease intervention.}, number={4}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Zhang, H. and MAITY, ARNAB and Arshad, H. and Holloway, J. and Karmaus, W. and Lawson, A.B. and Lee, D. and MacNab, Y.}, year={2016}, month={Aug}, pages={1736–1752} } @article{maity_2012, title={A powerful test for comparing multiple regression functions}, volume={24}, ISSN={["1048-5252"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84865271146&partnerID=MN8TOARS}, DOI={10.1080/10485252.2012.677842}, abstractNote={In this article, we address the important problem of comparison of two or more population regression functions. Recently, Pardo-Fernández, Van Keilegom and González-Manteiga [2007, ‘Testing for Equality of k Regression Curves’, Statistica Sinica, 17, 1115–1137] developed test statistics for simple nonparametric regression models: Y ij =θ j (Z ij )+σ j (Z ij )ε ij , based on empirical distributions of the errors in each population j=1, …, J. In this article, we propose a test for equality of the θ j (·) based on the concept of generalised likelihood ratio type statistics. We also generalise our test for other nonparametric regression set-ups, for example, nonparametric logistic regression, where the log-likelihood for population j is any general smooth function ℒ{Y j , θ j (Z j )}. We describe a resampling procedure to obtain the critical values of the test. In addition, we present a simulation study to evaluate the performance of the proposed test and compare our results to those in Pardo-Fernández et al. [2007, ‘Testing for Equality of k Regression Curves’, Statistica Sinica, 17, 1115–1137].}, number={3}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, publisher={Informa UK Limited}, author={Maity, Arnab}, year={2012}, pages={563–576} } @article{liu_maity_lin_wright_christiani_2012, title={Design and analysis issues in gene and environment studies}, volume={11}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84872769763&partnerID=MN8TOARS}, DOI={10.1186/1476-069X-11-93}, abstractNote={AbstractBoth nurture (environmental) and nature (genetic factors) play an important role in human disease etiology. Traditionally, these effects have been thought of as independent. This perspective is ill informed for non-mendelian complex disorders which result as an interaction between genetics and environment. To understand health and disease we must study how nature and nurture interact. Recent advances in human genomics and high-throughput biotechnology make it possible to study large numbers of genetic markers and gene products simultaneously to explore their interactions with environment. The purpose of this review is to discuss design and analytic issues for gene-environment interaction studies in the “-omics” era, with a focus on environmental and genetic epidemiological studies. We present an expanded environmental genomic disease paradigm. We discuss several study design issues for gene-environmental interaction studies, including confounding and selection bias, measurement of exposures and genotypes. We discuss statistical issues in studying gene-environment interactions in different study designs, such as choices of statistical models, assumptions regarding biological factors, and power and sample size considerations, especially in genome-wide gene-environment studies. Future research directions are also discussed.}, number={1}, journal={Environmental Health: A Global Access Science Source}, author={Liu, C.-Y. and Maity, A. and Lin, X. and Wright, R.O. and Christiani, D.C.}, year={2012} } @misc{liu_maity_lin_wright_christiani_2012, title={Design and analysis issues in gene and environment studies}, volume={11}, journal={Environmental Health}, author={Liu, C. Y. and Maity, A. and Lin, X. H. and Wright, R. O. and Christiani, D. C.}, year={2012} } @article{sofer_maity_coull_baccarelli_schwartz_lin_2012, title={Multivariate Gene Selection and Testing in Studying the Exposure Effects on a Gene Set}, volume={4}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84870393737&partnerID=MN8TOARS}, DOI={10.1007/s12561-012-9072-7}, abstractNote={Studying the association between a gene set (e.g., pathway) and exposures using multivariate regression methods is of increasing importance in genomic studies. Such an analysis is often more powerful and interpretable than individual gene analysis. Since many genes in a gene set are likely not affected by exposures, one is often interested in identifying a subset of genes in the gene set that are affected by exposures. This allows for better understanding of the underlying biological mechanism and for pursuing further biological investigation of these genes. The selected subset of "signal" genes also provides an attractive vehicle for a more powerful test for the association between the gene set and exposures. We propose two computationally simple Canonical Correlation Analysis (CCA) based variable selection methods: Sparse Outcome Selection (SOS) CCA and step CCA, to jointly select a subset of genes in a gene set that are associated with exposures. Several model selection criteria, such as BIC and the new Correlation Information Criterion (CIC), are proposed and compared. We also develop a global test procedure for testing the exposure effects on the whole gene set, accounting for gene selection. Through simulation studies, we show that the proposed methods improve upon an existing method when the genes are correlated and are more computationally efficient. We apply the proposed methods to the analysis of the Normative Aging DNA methylation Study to examine the effects of airborne particular matter exposures on DNA methylations in a genetic pathway.}, number={2}, journal={Statistics in Biosciences}, publisher={Springer Science + Business Media}, author={Sofer, T. and Maity, A. and Coull, B. and Baccarelli, A.A. and Schwartz, J. and Lin, X.}, year={2012}, pages={319–338} } @article{maity_sullivan_tzeng_2012, title={Multivariate Phenotype Association Analysis by Marker-Set Kernel Machine Regression}, volume={36}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84867539542&partnerID=MN8TOARS}, DOI={10.1002/gepi.21663}, abstractNote={Genetic studies of complex diseases often collect multiple phenotypes relevant to the disorders. As these phenotypes can be correlated and share common genetic mechanisms, jointly analyzing these traits may bring more power to detect genes influencing individual or multiple phenotypes. Given the advancement brought by the multivariate phenotype approaches and the multimarker kernel machine regression, we construct a multivariate regression based on kernel machine to facilitate the joint evaluation of multimarker effects on multiple phenotypes. The kernel machine serves as a powerful dimension‐reduction tool to capture complex effects among markers. The multivariate framework incorporates the potentially correlated multidimensional phenotypic information and accommodates common or different environmental covariates for each trait. We derive the multivariate kernel machine test based on a score‐like statistic, and conduct simulations to evaluate the validity and efficacy of the method. We also study the performance of the commonly adapted strategies for kernel machine analysis on multiple phenotypes, including the multiple univariate kernel machine tests with original phenotypes or with their principal components. Our results suggest that none of these approaches has the uniformly best power, and the optimal test depends on the magnitude of the phenotype correlation and the effect patterns. However, the multivariate test retains to be a reasonable approach when the multiple phenotypes have none or mild correlations, and gives the best power once the correlation becomes stronger or when there exist genes that affect more than one phenotype. We illustrate the utility of the multivariate kernel machine method through the Clinical Antipsychotic Trails of Intervention Effectiveness antibody study.}, number={7}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Sullivan, Patrick E. and Tzeng, Jung-Ying}, year={2012}, month={Nov}, pages={686–695} } @article{fan_maity_wang_wu_2013, title={Parametrically guided generalised additive models with application to mergers and acquisitions data}, volume={25}, ISSN={["1048-5252"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875865626&partnerID=MN8TOARS}, DOI={10.1080/10485252.2012.735233}, abstractNote={Generalised nonparametric additive models present a flexible way to evaluate the effects of several covariates on a general outcome of interest via a link function. In this modelling framework, one assumes that the effect of each of the covariates is nonparametric and additive. However, in practice, often there is prior information available about the shape of the regression functions, possibly from pilot studies or exploratory analysis. In this paper, we consider such situations and propose an estimation procedure where the prior information is used as a parametric guide to fit the additive model. Specifically, we first posit a parametric family for each of the regression functions using the prior information (parametric guides). After removing these parametric trends, we then estimate the remainder of the nonparametric functions using a nonparametric generalised additive model and form the final estimates by adding back the parametric trend. We investigate the asymptotic properties of the estimates and show that when a good guide is chosen, the asymptotic variance of the estimates can be reduced significantly while keeping the asymptotic variance same as the unguided estimator. We observe the performance of our method via a simulation study and demonstrate our method by applying to a real data set on mergers and acquisitions.}, number={1}, journal={JOURNAL OF NONPARAMETRIC STATISTICS}, publisher={Informa UK Limited}, author={Fan, Jianqing and Maity, Arnab and Wang, Yihui and Wu, Yichao}, year={2013}, month={Mar}, pages={109–128} } @article{maity_huang_2012, title={Partially linear varying coefficient models stratified by a functional covariate}, volume={82}, ISSN={["1879-2103"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84864042756&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2012.06.002}, abstractNote={We consider the problem of estimation in semiparametric varying coefficient models where the covariate modifying the varying coefficients is functional and is modeled nonparametrically. We develop a kernel-based estimator of the nonparametric component and a profiling estimator of the parametric component of the model, and derive their asymptotic properties. Specifically, we show the consistency of the nonparametric functional estimates and derive the asymptotic expansion of the estimates of the parametric component. We illustrate the performance of our methodology using a simulation study and a real data application.}, number={10}, journal={STATISTICS & PROBABILITY LETTERS}, publisher={Elsevier BV}, author={Maity, Arnab and Huang, Jianhua Z.}, year={2012}, month={Oct}, pages={1807–1814} } @article{he_zhang_maity_zou_hussey_karmaus_2012, title={Power of a reproducing kernel-based method for testing the joint effect of a set of single-nucleotide polymorphisms}, volume={140}, ISSN={["1573-6857"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84873408116&partnerID=MN8TOARS}, DOI={10.1007/s10709-012-9690-5}, abstractNote={This study explored a semi-parametric method built upon reproducing kernels for estimating and testing the joint effect of a set of single nucleotide polymorphisms (SNPs). The kernel adopted is the identity-by-state kernel that measures SNP similarity between subjects. In this article, through simulations we first assessed its statistical power under different situations. It was found that in addition to the effect of sample size, the testing power was impacted by the strength of association between SNPs and the outcome of interest, and by the SNP similarity among the subjects. A quadratic relationship between SNP similarity and testing power was identified, and this relationship was further affected by sample sizes. Next we applied the method to a SNP-lung function data set to estimate and test the joint effect of a set of SNPs on forced vital capacity, one type of lung function measure. The findings were then connected to the patterns observed in simulation studies and further explored via variable importance indices of each SNP inferred from a variable selection procedure.}, number={10-12}, journal={GENETICA}, publisher={Springer Science + Business Media}, author={He, Hong and Zhang, Hongmei and Maity, Arnab and Zou, Yubo and Hussey, James and Karmaus, Wilfried}, year={2012}, month={Dec}, pages={421–427} } @article{bandyopadhyay_maity_2011, title={Analysis of Sabine river flow data using semiparametric spline modeling}, volume={399}, ISSN={["1879-2707"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79952008745&partnerID=MN8TOARS}, DOI={10.1016/j.jhydrol.2011.01.006}, abstractNote={In this article, a modeling approach for the mean annual flow in different segments of Sabine river, as released in the NHDPlus data in 2007, as a function of five predictor variables is described. Modeling flow is extremely complex and the deterministic flow models are widely used for that purpose. The justification for using these deterministic models comes from the fact that the flow is governed by some explicitly stated physical laws. In contrast, in this article, this complex issue is addressed from a completely statistical point of view. A semiparametric model is proposed to analyze the spatial distribution of the mean annual flow of Sabine river. Semiparametric additive models allow explicit consideration of the linear and nonlinear relations with relevant explanatory variables. We use a conditionally specified Gaussian model for the estimation of the univariate conditional distributions of flow to incorporate auxiliary information and this formulation does not require the target variable to be independent.}, number={3-4}, journal={JOURNAL OF HYDROLOGY}, publisher={Elsevier BV}, author={Bandyopadhyay, Soutir and Maity, Arnab}, year={2011}, month={Mar}, pages={274–280} } @article{mahalingaiah_missmer_maity_williams_meeker_berry_ehrlich_perry_cramer_hauser_et al._2012, title={Association of Hexachlorobenzene (HCB), Dichlorodiphenyltrichloroethane (DDT), and Dichlorodiphenyldichloroethylene (DDE) with in Vitro Fertilization (IVF) Outcomes}, volume={120}, ISSN={["1552-9924"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84856736612&partnerID=MN8TOARS}, DOI={10.1289/ehp.1103696}, abstractNote={Background: Hexachlorobenzene (HCB), dichlorodiphenyltrichloroethane (DDT), and dichlorodiphenyldichloroethylene (DDE) are persistent chlorinated pesticides with endocrine activity that may adversely affect the early stages of human reproduction. Objective: Our goal was to determine the association of serum levels of HCB, DDT, and DDE with implantation failure, chemical pregnancy, and spontaneous abortion in women undergoing in vitro fertilization (IVF) from 1994 to 2003. Methods: Levels of HCB and congeners of DDT and DDE were measured in serum collected during the follicular phase. Multivariable-adjusted statistical models accommodating multiple outcomes and multiple cycles per woman were used to estimate the relation between serum pesticide levels and IVF outcomes. Results: A total of 720 women with a mean ± SD age 35.4 ± 4.2 years at enrollment contributed 774 IVF cycles. All samples had detectable levels of HCB, DDT, and DDE, with median levels of 0.087 ng/g serum for HCB, 1.12 ng/g serum for total DDT, and 1.04 ng/g serum for p,p´-DDE. Compared with the lowest quartile (Q1) of HCB, the lipid- and multivariable-adjusted odds ratio (OR) for failed implantation was significantly elevated for those with higher HCB quartiles [Q2–Q4; adjusted ORs: for Q2, 1.71; 95% confidence interval (CI): 1.03, 2.82; for Q3, 2.30; 95% CI: 1.39, 3.81; for Q4, 2.32; 95% CI: 1.38, 3.90] and showed a significantly increasing trend (p = 0.001). No statistically significant associations were observed between DDT/DDE and IVF outcomes or between HCB and chemical pregnancy or spontaneous abortion. Conclusions: Serum HCB concentrations were on average lower than that of the general U.S. population and associated with failed implantation among women undergoing IVF.}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, publisher={Environmental Health Perspectives}, author={Mahalingaiah, Shruthi and Missmer, Stacey A. and Maity, Arnab and Williams, Paige L. and Meeker, John D. and Berry, Katharine and Ehrlich, Shelley and Perry, Melissa J. and Cramer, Daniel W. and Hauser, Russ and et al.}, year={2012}, month={Feb}, pages={316–320} } @article{maity_apanasovich_2011, title={Estimation via corrected scores in general semiparametric regression models with error-prone covariates}, volume={5}, ISSN={["1935-7524"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84859832500&partnerID=MN8TOARS}, DOI={10.1214/11-ejs647}, abstractNote={This paper considers the problem of estimation in a general semiparametric regression model when error-prone covariates are modeled parametrically while covariates measured without error are modeled nonparametrically. To account for the effects of measurement error, we apply a correction to a criterion function. The specific form of the correction proposed allows Monte Carlo simulations in problems for which the direct calculation of a corrected criterion is difficult. Therefore, in contrast to methods that require solving integral equations of possibly multiple dimensions, as in the case of multiple error-prone covariates, we propose methodology which offers a simple implementation. The resulting methods are functional, they make no assumptions about the distribution of the mismeasured covariates. We utilize profile kernel and backfitting estimation methods and derive the asymptotic distribution of the resulting estimators. Through numerical studies we demonstrate the applicability of proposed methods to Poisson, logistic and multivariate Gaussian partially linear models. We show that the performance of our methods is similar to a computationally demanding alternative. Finally, we demonstrate the practical value of our methods when applied to Nevada Test Site (NTS) Thyroid Disease Study data.}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Maity, Arnab and Apanasovich, Tatiyana V.}, year={2011}, pages={1424–1449} } @article{sherman_maity_wang_2011, title={Inferences for the ratio: Fieller's interval, log ratio, and large sample based confidence intervals}, volume={95}, ISSN={["1863-8171"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960996265&partnerID=MN8TOARS}, DOI={10.1007/s10182-011-0162-5}, number={3}, journal={ASTA-ADVANCES IN STATISTICAL ANALYSIS}, publisher={Springer Science + Business Media}, author={Sherman, Michael and Maity, Arnab and Wang, Suojin}, year={2011}, month={Sep}, pages={313–323} } @article{maity_lin_2011, title={Powerful Tests for Detecting a Gene Effect in the Presence of Possible Gene-Gene Interactions Using Garrote Kernel Machines}, volume={67}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79955121487&partnerID=MN8TOARS}, DOI={10.1111/j.1541-0420.2011.01598.x}, abstractNote={Summary We propose in this article a powerful testing procedure for detecting a gene effect on a continuous outcome in the presence of possible gene–gene interactions (epistasis) in a gene set, e.g., a genetic pathway or network. Traditional tests for this purpose require a large number of degrees of freedom by testing the main effect and all the corresponding interactions under a parametric assumption, and hence suffer from low power. In this article, we propose a powerful kernel machine based test. Specifically, our test is based on a garrote kernel method and is constructed as a score test. Here, the term garrote refers to an extra nonnegative parameter that is multiplied to the covariate of interest so that our score test can be formulated in terms of this nonnegative parameter. A key feature of the proposed test is that it is flexible and developed for both parametric and nonparametric models within a unified framework, and is more powerful than the standard test by accounting for the correlation among genes and hence often uses a much smaller degrees of freedom. We investigate the theoretical properties of the proposed test. We evaluate its finite sample performance using simulation studies, and apply the method to the Michigan prostate cancer gene expression data.}, number={4}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Lin, Xihong}, year={2011}, month={Dec}, pages={1271–1284} } @article{perry_chen_mcauliffe_maity_deloid_2011, title={Semi-Automated Scoring of Triple-probe FISH in Human Sperm: Methods and Further Validation}, volume={79A}, ISSN={["1552-4930"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960558287&partnerID=MN8TOARS}, DOI={10.1002/cyto.a.21078}, abstractNote={AbstractAlthough the frequency and consequence of sperm chromosomal abnormalities are considerable, few epidemiologic studies in large samples have been conducted to investigate etiologic risk factors. This is, in part, attributable to the labor intensive demands of manual sperm fluorescence in situ hybridization (FISH) scoring. As part of an epidemiologic study investigating environmental risk factors for aneuploidy among men attending a hospital‐based fertility clinic, a semi‐automated method of slide scoring was further validated and used to estimate sex chromosome sperm disomy frequency in a large number of samples. Multiprobe FISH for chromosomes X, Y, and 18 was used to determine sex chromosome disomy in sperm nuclei. Semi‐automated scoring methods were used to quantify X disomy (sperm FISH genotype XX18), Y disomy (YY18), and XY disomy (XY18). The semi‐automated results were compared with the results from manual scoring in 10 slides. The semi‐automated method was then used to estimate sex chromosome disomy frequency in 60 men. Of 10 slides scored, significant differences between the manual and semi‐automated results were seen primarily in one slide that was of poor quality because of over swollen nuclei. Among 60 men analyzed using the semi‐automated method, median total sex chromosome disomy frequency was 1.65%, which is higher than seen among normal men but within range with reports from fertility clinic populations. These results further validate that semi‐automated methods can be used to score sperm disomy with results comparable to manual methods. This is the largest study to date to provide estimates of sex chromosome disomy among men attending fertility clinics. These methods should be replicated in larger clinic populations to arrive at stable estimates of aneuploidy frequency in men who are members of subfertile couples. © 2011 International Society for Advancement of Cytometry}, number={8}, journal={CYTOMETRY PART A}, publisher={Wiley-Blackwell}, author={Perry, Melissa J. and Chen, Xing and McAuliffe, Megan E. and Maity, Arnab and Deloid, Glen M.}, year={2011}, month={Aug}, pages={661–666} } @article{meeker_maity_missmer_williams_mahalingaiah_ehrlich_berry_altshul_perry_cramer_et al._2011, title={Serum Concentrations of Polychlorinated Biphenyls in Relation to in Vitro Fertilization Outcomes}, volume={119}, ISSN={["0091-6765"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79960065763&partnerID=MN8TOARS}, DOI={10.1289/ehp.1002922}, abstractNote={Background: Human exposure to polychlorinated biphenyls (PCBs) remains widespread. PCBs have been associated with adverse reproductive health outcomes including reduced fecundability and increased risk of pregnancy loss, although the human data remain largely inconclusive. Objective: Our goal was to explore the relationship between serum PCB concentrations and early pregnancy loss among a large cohort of women undergoing in vitro fertilization (IVF) between 1994 and 2003. Methods: Concentrations of 57 PCB congeners were measured in serum samples collected during 827 IVF/intracytoplasmic sperm injection cycles from 765 women. Joint statistical models that accommodate multiple outcomes and multiple cycles per woman were used to assess the relationship between serum PCB quartiles and implantation failure, chemical pregnancies (human chorionic gonadotropin level > 5.0 mIU/mL) that did not result in clinical pregnancy, or spontaneous abortion, while also adjusting for confounders. Results: PCB-153 was the congener present in the highest concentration (median, 46.2 ng/g lipid). Increasing quartiles of PCB-153 and the sum of all measured PCB congeners (ΣPCBs) were associated with significantly elevated dose-dependent odds of failed implantation. Adjusted odds ratios (95% confidence interval) for highest versus lowest quartile were 2.0 (1.2–3.4) for PCB-153 and 1.7 (1.0–2.9) for ΣPCBs. There were suggestive trends for increased odds of implantation failure for PCB-118 and cytochrome P450–inducing congeners (p-values for trend = 0.06). No statistically significant associations between PCBs and chemical pregnancy or spontaneous abortion were found. Conclusions: Serum PCB concentrations at levels similar to the U.S. general population were associated with failed implantation among women undergoing IVF. These findings may help explain previous reports of reduced fecundability among women exposed to PCBs.}, number={7}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, publisher={Environmental Health Perspectives}, author={Meeker, John D. and Maity, Arnab and Missmer, Stacey A. and Williams, Paige L. and Mahalingaiah, Shruthi and Ehrlich, Shelley and Berry, Katharine F. and Altshul, Larisa and Perry, Melissa J. and Cramer, Daniel W. and et al.}, year={2011}, month={Jul}, pages={1010–1016} } @article{maity_sherman_2012, title={Testing for spatial isotropy under general designs}, volume={142}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84856023088&partnerID=MN8TOARS}, DOI={10.1016/j.jspi.2011.11.013}, abstractNote={Spatial modeling is typically composed of a specification of a mean function and a model for the correlation structure. A common assumption on the spatial correlation is that it is isotropic. This means that the correlation between any two observations depends only on the distance between those sites and not on their relative orientation. The assumption of isotropy is often made due to a simpler interpretation of correlation behavior and to an easier estimation problem under an assumed isotropy. The assumption of isotropy, however, can have serious deleterious effects when not appropriate. In this paper we formulate a test of isotropy for spatial observations located according to a general class of stochastic designs. Distribution theory of our test statistic is derived and we carry out extensive simulations which verify the efficacy of our approach. We apply our methodology to a data set on longleaf pine trees from an oldgrowth forest in the southern United States.}, number={5}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Maity, Arnab and Sherman, Michael}, year={2012}, pages={1081–1091} } @inproceedings{schwartz_sofer_maity_lin_baccarelli_2010, title={Particulate Air Pollution Modifies Methylation Of NFKb Pathways}, volume={5}, DOI={10.1164/ajrccm-conference.2010.181.1_meetingabstracts.a4008}, booktitle={C16. GENETICS OF LUNG DISEASE AND GENE: ENVIRONMENT INTERACTIONS}, publisher={American Thoracic Society}, author={Schwartz, Joel and Sofer, Tamar and Maity, Arnab and Lin, Xihong and Baccarelli, Andrea}, year={2010} } @inbook{dey_ghosh_mallick_2010, place={Boca Raton}, title={Proportional Hazards Regression Using Bayesian Kernel Machines}, url={http://www.crcnetbase.com/doi/book/10.1201/EBK1420070170.}, DOI={10.1201/EBK1420070170.}, booktitle={Bayesian Modeling in Bioinformatics}, publisher={CRC Press}, author={Dey, D. and Ghosh, S. and Mallick, B.K.}, year={2010} } @article{zhou_huang_martinez_maity_baladandayuthapani_carroll_2010, title={Reduced Rank Mixed Effects Models for Spatially Correlated Hierarchical Functional Data}, volume={105}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-77952558527&partnerID=MN8TOARS}, DOI={10.1198/jasa.2010.tm08737}, abstractNote={Hierarchical functional data are widely seen in complex studies where subunits are nested within units, which in turn are nested within treatment groups. We propose a general framework of functional mixed effects model for such data: within-unit and within-subunit variations are modeled through two separate sets of principal components; the subunit level functions are allowed to be correlated. Penalized splines are used to model both the mean functions and the principal components functions, where roughness penalties are used to regularize the spline fit. An expectation–maximization (EM) algorithm is developed to fit the model, while the specific covariance structure of the model is utilized for computational efficiency to avoid storage and inversion of large matrices. Our dimension reduction with principal components provides an effective solution to the difficult tasks of modeling the covariance kernel of a random function and modeling the correlation between functions. The proposed methodology is illustrated using simulations and an empirical dataset from a colon carcinogenesis study. Supplemental materials are available online.}, number={489}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Zhou, Lan and Huang, Jianhua Z. and Martinez, Josue G. and Maity, Arnab and Baladandayuthapani, Veerabhadran and Carroll, Raymond J.}, year={2010}, pages={390–400} } @article{wei_carroll_maity_2011, title={Testing for constant nonparametric effects in general semiparametric regression models with interactions}, volume={81}, ISSN={["1879-2103"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79955146434&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2010.11.002}, abstractNote={We consider the problem of testing for a constant nonparametric effect in a general semi-parametric regression model when there is the potential for interaction between the parametrically and nonparametrically modeled variables. The work was originally motivated by a unique testing problem in genetic epidemiology (Chatterjee, et al., 2006) that involved a typical generalized linear model but with an additional term reminiscent of the Tukey one-degree-of-freedom formulation, and their interest was in testing for main effects of the genetic variables, while gaining statistical power by allowing for a possible interaction between genes and the environment. Later work (Maity, et al., 2009) involved the possibility of modeling the environmental variable nonparametrically, but they focused on whether there was a parametric main effect for the genetic variables. In this paper, we consider the complementary problem, where the interest is in testing for the main effect of the nonparametrically modeled environmental variable. We derive a generalized likelihood ratio test for this hypothesis, show how to implement it, and provide evidence that our method can improve statistical power when compared to standard partially linear models with main effects only. We use the method for the primary purpose of analyzing data from a case-control study of colorectal adenoma.}, number={7}, journal={STATISTICS & PROBABILITY LETTERS}, publisher={Elsevier BV}, author={Wei, Jiawei and Carroll, Raymond J. and Maity, Arnab}, year={2011}, month={Jul}, pages={717–723} } @article{carroll_maity_mammen_yu_2009, title={Efficient Semiparametric Marginal Estimation for the Partially Linear Additive Model for Longitudinal/Clustered Data}, volume={1}, DOI={10.1007/s12561-009-9000-7}, abstractNote={We consider the efficient estimation of a regression parameter in a partially linear additive nonparametric regression model from repeated measures data when the covariates are multivariate. To date, while there is some literature in the scalar covariate case, the problem has not been addressed in the multivariate additive model case. Ours represents a first contribution in this direction. As part of this work, we first describe the behavior of nonparametric estimators for additive models with repeated measures when the underlying model is not additive. These results are critical when one considers variants of the basic additive model. We apply them to the partially linear additive repeated-measures model, deriving an explicit consistent estimator of the parametric component; if the errors are in addition Gaussian, the estimator is semiparametric efficient. We also apply our basic methods to a unique testing problem that arises in genetic epidemiology; in combination with a projection argument we develop an efficient and easily computed testing scheme. Simulations and an empirical example from nutritional epidemiology illustrate our methods.}, number={1}, journal={Stat Biosci}, publisher={Springer Science + Business Media}, author={Carroll, Raymond and Maity, Arnab and Mammen, Enno and Yu, Kyusang}, year={2009}, pages={10–31} } @article{carroll_maity_mammen_yu_2009, title={Nonparametric Additive Regression for Repeatedly Measured Data}, volume={96}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-66249142283&partnerID=MN8TOARS}, DOI={10.1093/biomet/asp015}, abstractNote={We develop an easily computed smooth backfitting algorithm for additive model fitting in repeated measures problems. Our methodology easily copes with various settings, such as when some covariates are the same over repeated response measurements. We allow for a working covariance matrix for the regression errors, showing that our method is most efficient when the correct covariance matrix is used. The component functions achieve the known asymptotic variance lower bound for the scalar argument case. Smooth backfitting also leads directly to design-independent biases in the local linear case. Simulations show our estimator has smaller variance than the usual kernel estimator. This is also illustrated by an example from nutritional epidemiology. Copyright 2009, Oxford University Press.}, number={2}, journal={Biometrika}, author={Carroll, R.J. and Maity, A. and Mammen, E. and Yu, K.}, year={2009}, pages={383–398} } @article{apanasovich_carroll_maity_2009, title={SIMEX and standard error estimation in semiparametric measurement error models}, volume={3}, DOI={10.1214/08-ejs341}, abstractNote={SIMEX is a general-purpose technique for measurement error correction. There is a substantial literature on the application and theory of SIMEX for purely parametric problems, as well as for purely non-parametric regression problems, but there is neither application nor theory for semiparametric problems. Motivated by an example involving radiation dosimetry, we develop the basic theory for SIMEX in semiparametric problems using kernel-based estimation methods. This includes situations that the mismeasured variable is modeled purely parametrically, purely non-parametrically, or that the mismeasured variable has components that are modeled both parametrically and nonparametrically. Using our asymptotic expansions, easily computed standard error formulae are derived, as are the bias properties of the nonparametric estimator. The standard error method represents a new method for estimating variability of nonparametric estimators in semiparametric problems, and we show in both simulations and in our example that it improves dramatically on first order methods.We find that for estimating the parametric part of the model, standard bandwidth choices of order O(n(-1/5)) are sufficient to ensure asymptotic normality, and undersmoothing is not required. SIMEX has the property that it fits misspecified models, namely ones that ignore the measurement error. Our work thus also more generally describes the behavior of kernel-based methods in misspecified semiparametric problems.}, number={0}, journal={Electronic Journal of Statistics}, publisher={Institute of Mathematical Statistics - care of Project Euclid}, author={Apanasovich, Tatiyana V. and Carroll, Raymond J. and Maity, Arnab}, year={2009}, pages={318–348} } @phdthesis{efficient inference in general semiparametric regression models_2008, url={http://oaktrust.library.tamu.edu/handle/1969.1/ETD-TAMU-3075}, year={2008} } @article{maity_2008, title={Efficient estimation of population quantiles in general semiparametric regression models}, volume={78}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-53249086611&partnerID=MN8TOARS}, DOI={10.1016/j.spl.2008.03.022}, abstractNote={The problem of quantile estimation in general semiparametric regression models is considered. We derive plug-in kernel-based estimators, investigate their asymptotic distribution and establish the semiparametric efficiency of these estimators under mild assumptions. We apply our methodology in an example in nutritional epidemiology. The generalization to the important case where responses are missing at random is also addressed.}, number={16}, journal={Statistics and Probability Letters}, publisher={Elsevier BV}, author={Maity, Arnab}, year={2008}, pages={2744–2750} } @article{maity_apanasovich_carroll_2008, title={Estimation of population-level summaries in general semiparametric repeated measures regression models}, DOI={10.1214/193940307000000095}, abstractNote={This paper considers a wide family of semiparametric repeated measures regression models, in which the main interest is on estimating population-level quantities such as mean, variance, probabilities etc. Examples of our framework include generalized linear models for clustered/longitudinal data, among many others. We derive plug-in kernel-based estimators of the population level quantities and derive their asymptotic distribution. An exam- ple involving estimation of the survival function of hemoglobin measures in the Kenya hemoglobin study data is presented to demonstrate our methodology. This paper is about semiparametric regression models with repeated measures when the primary goal is to estimate a population quantity such as mean, variance, prob- ability, etc. We will construct estimators of these quantities which utilize the under- lying semiparametric structure of the model and derive their limiting distribution. The work is motivated by the following example: the Kenya hemoglobin data. The goal is to study the changes of hemoglobin over time during the first year of birth. The data set consists of 68 families with 2 children per family. For each child, 4 repeated measures are taken over time in the first year since birth: the time of visit varied from child to child. The factors include mother's age at child birth, child sex and placental parasitemia density (PDEN), a marker of malaria that could affect hemoglobin. To model these data, Lin and Carroll (2) considered a semiparametric model where the mother's age effect is modeled nonparametrically and (sex, PDEN) is modeled parametrically. The model is given by the repeated measures partially linear model Yijk = X T ijkβ0 + θ0(Zij) + ǫijk,}, journal={Collections}, publisher={Institute of Mathematical Statistics - care of Project Euclid}, author={Maity, Arnab and Apanasovich, Tatiyana V. and Carroll, Raymond J.}, year={2008}, pages={123–137} } @article{maity_sherman_2008, title={On adaptive linear regression}, volume={35}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-54049132878&partnerID=MN8TOARS}, DOI={10.1080/02664760802382475}, abstractNote={Ordinary least squares (OLS) is omnipresent in regression modeling. Occasionally, least absolute deviations (LAD) or other methods are used as an alternative when there are outliers. Although some data adaptive estimators have been proposed, they are typically difficult to implement. In this paper, we propose an easy to compute adaptive estimator which is simply a linear combination of OLS and LAD. We demonstrate large sample normality of our estimator and show that its performance is close to best for both light-tailed (e.g. normal and uniform) and heavy-tailed (e.g. double exponential and t 3) error distributions. We demonstrate this through three simulation studies and illustrate our method on state public expenditures and lutenizing hormone data sets. We conclude that our method is general and easy to use, which gives good efficiency across a wide range of error distributions.}, number={12}, journal={Journal of Applied Statistics}, publisher={Informa UK Limited}, author={Maity, Arnab and Sherman, Michael}, year={2008}, pages={1409–1422} } @article{maity_carroll_mammen_chatterjee_2009, title={Testing in semiparametric models with interaction, with applications to gene-environment interactions}, volume={71}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-58149340050&partnerID=MN8TOARS}, DOI={10.1111/j.1467-9868.2008.00671.x}, abstractNote={SummaryMotivated from the problem of testing for genetic effects on complex traits in the presence of gene–environment interaction, we develop score tests in general semiparametric regression problems that involves Tukey style 1 degree-of-freedom form of interaction between parametrically and non-parametrically modelled covariates. We find that the score test in this type of model, as recently developed by Chatterjee and co-workers in the fully parametric setting, is biased and requires undersmoothing to be valid in the presence of non-parametric components. Moreover, in the presence of repeated outcomes, the asymptotic distribution of the score test depends on the estimation of functions which are defined as solutions of integral equations, making implementation difficult and computationally taxing. We develop profiled score statistics which are unbiased and asymptotically efficient and can be performed by using standard bandwidth selection methods. In addition, to overcome the difficulty of solving functional equations, we give easy interpretations of the target functions, which in turn allow us to develop estimation procedures that can be easily implemented by using standard computational methods. We present simulation studies to evaluate type I error and power of the method proposed compared with a naive test that does not consider interaction. Finally, we illustrate our methodology by analysing data from a case–control study of colorectal adenoma that was designed to investigate the association between colorectal adenoma and the candidate gene NAT2 in relation to smoking history.}, number={1}, journal={Journal of the Royal Statistical Society. Series B: Statistical Methodology}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Carroll, Raymond J. and Mammen, Enno and Chatterjee, Nilanjan}, year={2009}, pages={75–96} } @article{carroll_maity_2007, title={Comments on: Nonparametric inference with generalized likelihood ratio tests}, volume={16}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-36448987181&partnerID=MN8TOARS}, DOI={10.1007/s11749-007-0085-3}, number={3}, journal={Test}, publisher={Springer Science + Business Media}, author={Carroll, Raymond J. and Maity, Arnab}, year={2007}, pages={456–458} } @article{maity_ma_carroll_2007, title={Efficient estimation of population-level summaries in general semiparametric regression models}, volume={102}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-33947277465&partnerID=MN8TOARS}, DOI={10.1198/016214506000001103}, abstractNote={This article considers a wide class of semiparametric regression models in which interest focuses on population-level quantities that combine both the parametric and the nonparametric parts of the model. Special cases in this approach include generalized partially linear models, generalized partially linear single-index models, structural measurement error models, and many others. For estimating the parametric part of the model efficiently, profile likelihood kernel estimation methods are well established in the literature. Here our focus is on estimating general population-level quantities that combine the parametric and nonparametric parts of the model (e.g., population mean, probabilities, etc.). We place this problem in a general context, provide a general kernel-based methodology, and derive the asymptotic distributions of estimates of these population-level quantities, showing that in many cases the estimates are semiparametric efficient. For estimating the population mean with no missing data, we show that the sample mean is semiparametric efficient for canonical exponential families, but not in general. We apply the methods to a problem in nutritional epidemiology, where estimating the distribution of usual intake is of primary interest and semiparametric methods are not available. Extensions to the case of missing response data are also discussed.}, number={477}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Maity, A. and Ma, Y. and Carroll, R.J.}, year={2007}, pages={123–139} } @article{maity_sherman_2006, title={The two-sample t test with one variance unknown}, volume={60}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-33744525717&partnerID=MN8TOARS}, DOI={10.1198/000313006X108567}, abstractNote={We consider the situation in two-sample testing when one variance is assumed to be known and the other variance is considered unknown. This situation arises, for example, when one is interested in comparing a standard treatment with a new treatment. Although this situation occurs relatively infrequently, our example discusses the important tool of moment matching and makes the classic two-sample Satterthwaite t approximation transparent.}, number={2}, journal={American Statistician}, author={Maity, A. and Sherman, M.}, year={2006}, pages={163–166} } @article{a perturbation technique for sample moment matching in kernel density estimation_2005, url={http://dx.doi.org/10.1177/0008068320050510}, DOI={10.1177/0008068320050510}, abstractNote={ Summary The fundamental idea of kernel smoothing technique can be recognized as one-parameter data perturbation with a smooth density. The usual kernel density estimates might not match arbitrary sample moments calculated from the unsmoothed data. A technique based on two-parameter data perturbation is developed for sample moment matching in kernel density estimation. It is shown that the moments calculated from the resulting tuned kernel density estimate can be made arbitrarily close to the raw sample moments. Moreover, the pointwise rate of MISE of the resulting density estimates remains optimal. Relevant simulation studies are carried out to demonstrate the usefulness and other features of this technique. }, journal={Calcutta Statistical Association Bulletin}, year={2005}, month={Mar} }