@article{cosco_wells_zhang_goodell_monsur_xu_moore_2022, title={Hands-on childcare garden intervention: A randomized controlled trial to assess effects on fruit and vegetable identification, liking, and consumption among children aged 3-5 years in North Carolina}, volume={13}, ISSN={["1664-1078"]}, DOI={10.3389/fpsyg.2022.993637}, abstractNote={Gardening at childcare centers may have a potent influence on young children’s learning about fruits and vegetables and their development of healthy dietary behaviors. This randomized controlled trial examined the effect of a garden intervention on fruit and vegetable (FV) identification, FV liking, and FV consumption among 3–5-year-old children enrolled in childcare centers in Wake County, North Carolina, USA. Eligible childcare centers (serving primarily low-income families) were randomly selected and then randomly assigned to one of three groups: (1) intervention; (2) waitlist-control that served as a control in year 1 and received the intervention in year 2; or (3) no-intervention control. From the 15 participating childcare centers, 285 children aged 3–5 years were consented by their parents or guardians to participate. The intervention comprised six standardized, raised, mulched garden beds, planted with warm-season annual vegetables and fruits, and perennial fruits. A Gardening Activity Guide describing 12 age-appropriate, sequential gardening activities was distributed for teachers to lead hands-on gardening activities during the growing season. Data were gathered between Spring 2018 and Fall 2019. FV identification and liking were measured using an age-appropriate tablet-enabled protocol. FV consumption was measured by weighing each child’s fruit and vegetable snack tray before and after tasting sessions. Compared to children receiving no-intervention, children who received the garden intervention showed a greater increase in accurate identification of both fruits and vegetables as well as consumption of both fruit and vegetables during the tasting sessions. Consistent with prior research, the effects on fruit consumption were greater than on vegetable consumption. There was no significant effect of the garden intervention on children’s FV liking. Garden interventions implemented early in life foster learning about FV and promote healthy eating. Early exposure to gardening may yield a return on investment throughout the lifecourse, impacting healthy diet and associated health outcomes, which are particularly important within disadvantaged communities where children’s health is challenged by a host of risk factors. Clinical Trials Registration #NCT04864574 (clinicaltrials.gov).}, journal={FRONTIERS IN PSYCHOLOGY}, author={Cosco, Nilda G. G. and Wells, Nancy M. M. and Zhang, Daowen and Goodell, L. Suzanne and Monsur, Muntazar and Xu, Tong and Moore, Robin C. C.}, year={2022}, month={Nov} } @article{cosco_wells_monsur_goodell_zhang_xu_hales_moore_2021, title={Research Design, Protocol, and Participant Characteristics of COLEAFS: A Cluster Randomized Controlled Trial of a Childcare Garden Intervention}, volume={18}, ISSN={["1660-4601"]}, DOI={10.3390/ijerph182413066}, abstractNote={Childcare garden interventions may be an effective strategy to increase fruit and vegetable (FV) consumption and physical activity among young children. The objective of this paper is to describe the research design, protocol, outcome measures, and baseline characteristics of participants in the Childcare Outdoor Learning Environments as Active Food Systems (“COLEAFS”) study, a cluster randomized controlled trial (RCT) examining the effect of a garden intervention on outcomes related to diet and physical activity. Fifteen childcare centers in low-income areas were randomly assigned to intervention (to receive garden intervention in Year 1), waitlist control (to receive garden intervention in Year 2), and control group (no intervention). The garden intervention comprised six raised beds planted with warm-season vegetables and fruits, and a garden activity booklet presenting 12 gardening activities. FV knowledge and FV liking were measured using a tablet-enabled protocol. FV consumption was measured by weighing FV before and after a snack session. Physical activity was measured using Actigraph GT3x+ worn by children for three consecutive days while at the childcare center. Of the 543 eligible children from the 15 childcare centers, 250 children aged 3–5 years received parental consent, assented, and participated in baseline data collection. By employing an RCT to examine the effect of a garden intervention on diet and physical activity among young children attending childcare centers within low-income communities, this study offers compelling research design and methods, addresses a critical gap in the empirical literature, and is a step toward evidence-based regulations to promote early childhood healthy habits.}, number={24}, journal={INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH}, author={Cosco, Nilda Graciela and Wells, Nancy M. and Monsur, Muntazar and Goodell, Lora Suzanne and Zhang, Daowen and Xu, Tong and Hales, Derek and Moore, Robin Clive}, year={2021}, month={Dec} } @misc{liu_shih_strawderman_zhang_johnson_chai_2019, title={Statistical Analysis of Zero-Inflated Nonnegative Continuous Data: A Review}, volume={34}, ISSN={["2168-8745"]}, DOI={10.1214/18-STS681}, abstractNote={Zero-inflated nonnegative continuous (or semicontinuous) data arise frequently in biomedical, economical, and ecological studies. Examples include substance abuse, medical costs, medical care utilization, biomarkers (e.g., CD4 cell counts, coronary artery calcium scores), single cell gene expression rates, and (relative) abundance of microbiome. Such data are often characterized by the presence of a large portion of zero values and positive continuous values that are skewed to the right and heteroscedastic. Both of these features suggest that no simple parametric distribution may be suitable for modeling such type of outcomes. In this paper, we review statistical methods for analyzing zero-inflated nonnegative outcome data. We will start with the cross-sectional setting, discussing ways to separate zero and positive values and introducing flexible models to characterize right skewness and heteroscedasticity in the positive values. We will then present models of correlated zero-inflated nonnegative continuous data, using random effects to tackle the correlation on repeated measures from the same subject and that across different parts of the model. We will also discuss expansion to related topics, for example, zero-inflated count and survival data, nonlinear covariate effects, and joint models of longitudinal zero-inflated nonnegative continuous data and survival. Finally, we will present applications to three real datasets (i.e., microbiome, medical costs, and alcohol drinking) to illustrate these methods. Example code will be provided to facilitate applications of these methods.}, number={2}, journal={STATISTICAL SCIENCE}, author={Liu, Lei and Shih, Ya-Chen Tina and Strawderman, Robert L. and Zhang, Daowen and Johnson, Bankole A. and Chai, Haitao}, year={2019}, month={May}, pages={253–279} } @article{zhang_sun_pieper_2016, title={Bivariate Mixed Effects Analysis of Clustered Data with Large Cluster Sizes}, volume={8}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-015-9140-x}, abstractNote={Linear mixed effects models are widely used to analyze a clustered response variable. Motivated by a recent study to examine and compare the hospital length of stay (LOS) between patients undertaking percutaneous coronary intervention (PCI) and coronary artery bypass graft (CABG) from several international clinical trials, we proposed a bivariate linear mixed effects model for the joint modeling of clustered PCI and CABG LOS's where each clinical trial is considered a cluster. Due to the large number of patients in some trials, commonly used commercial statistical software for fitting (bivariate) linear mixed models failed to run since it could not allocate enough memory to invert large dimensional matrices during the optimization process. We consider ways to circumvent the computational problem in the maximum likelihood (ML) inference and restricted maximum likelihood (REML) inference. Particularly, we developed an expected and maximization (EM) algorithm for the REML inference and presented an ML implementation using existing software. The new REML EM algorithm is easy to implement and computationally stable and efficient. With this REML EM algorithm, we could analyze the LOS data and obtained meaningful results.}, number={2}, journal={STATISTICS IN BIOSCIENCES}, author={Zhang, Daowen and Sun, Jie Lena and Pieper, Karen}, year={2016}, month={Oct}, pages={220–233} } @article{chen_liu_shih_zhang_severini_2016, title={A flexible model for correlated medical costs, with application to medical expenditure panel survey data}, volume={35}, ISSN={["1097-0258"]}, DOI={10.1002/sim.6743}, abstractNote={We propose a flexible model for correlated medical cost data with several appealing features. First, the mean function is partially linear. Second, the distributional form for the response is not specified. Third, the covariance structure of correlated medical costs has a semiparametric form. We use extended generalized estimating equations to simultaneously estimate all parameters of interest. B‐splines are used to estimate unknown functions, and a modification to Akaike information criterion is proposed for selecting knots in spline bases. We apply the model to correlated medical costs in the Medical Expenditure Panel Survey dataset. Simulation studies are conducted to assess the performance of our method. Copyright © 2015 John Wiley & Sons, Ltd.}, number={6}, journal={STATISTICS IN MEDICINE}, author={Chen, Jinsong and Liu, Lei and Shih, Ya-Chen T. and Zhang, Daowen and Severini, Thomas A.}, year={2016}, month={Mar}, pages={883–894} } @article{zhao_marceau_zhang_tzeng_2015, title={Assessing gene-environment interactions for common and rare variants with binary traits using gene-trait similarity regression}, volume={199}, number={3}, journal={Genetics}, author={Zhao, G. L. and Marceau, R. and Zhang, D. W. and Tzeng, J. Y.}, year={2015}, pages={695-} } @article{bernhardt_zhang_wang_2015, title={A fast EM algorithm for fitting joint models of a binary response and multiple longitudinal covariates subject to detection limits}, volume={85}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2014.11.011}, abstractNote={Joint modeling techniques have become a popular strategy for studying the association between a response and one or more longitudinal covariates. Motivated by the GenIMS study, where it is of interest to model the event of survival using censored longitudinal biomarkers, a joint model is proposed for describing the relationship between a binary outcome and multiple longitudinal covariates subject to detection limits. A fast, approximate EM algorithm is developed that reduces the dimension of integration in the E-step of the algorithm to one, regardless of the number of random effects in the joint model. Numerical studies demonstrate that the proposed approximate EM algorithm leads to satisfactory parameter and variance estimates in situations with and without censoring on the longitudinal covariates. The approximate EM algorithm is applied to analyze the GenIMS data set.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Bernhardt, Paul W. and Zhang, Daowen and Wang, Huixia Judy}, year={2015}, month={May}, pages={37–53} } @article{wang_zhang_tzeng_2014, title={Pathway-Guided Identification of Gene-Gene Interactions}, volume={78}, ISSN={["1469-1809"]}, DOI={10.1111/ahg.12080}, abstractNote={Assessing gene‐gene interactions (GxG) at the gene level can permit examination of epistasis at biologically functional units with amplified interaction signals from marker‐marker pairs. While current gene‐based GxG methods tend to be designed for two or a few genes, for complex traits, it is often common to have a list of many candidate genes to explore GxG. We propose a regression model with pathway‐guided regularization for detecting interactions among genes. Specifically, we use the principal components to summarize the SNP‐SNP interactions between a gene pair, and use an L1 penalty that incorporates adaptive weights based on biological guidance and trait supervision to identify important main and interaction effects. Our approach aims to combine biological guidance and data adaptiveness, and yields credible findings that may be likely to shed insights in order to formulate biological hypotheses for further molecular studies. The proposed approach can be used to explore the GxG with a list of many candidate genes and is applicable even when sample size is smaller than the number of predictors studied. We evaluate the utility of the proposed method using simulation and real data analysis. The results suggest improved performance over methods not utilizing pathway and trait guidance.}, number={6}, journal={ANNALS OF HUMAN GENETICS}, author={Wang, Xin and Zhang, Daowen and Tzeng, Jung-Ying}, year={2014}, month={Nov}, pages={478–491} } @article{chen_liu_zhang_shih_2013, title={A flexible model for the mean and variance functions, with application to medical cost data}, volume={32}, ISSN={["1097-0258"]}, DOI={10.1002/sim.5838}, abstractNote={Medical cost data are often skewed to the right and heteroscedastic, having a nonlinear relation with covariates. To tackle these issues, we consider an extension to generalized linear models by assuming nonlinear associations of covariates in the mean function and allowing the variance to be an unknown but smooth function of the mean. We make no further assumption on the distributional form. The unknown functions are described by penalized splines, and the estimation is carried out using nonparametric quasi‐likelihood. Simulation studies show the flexibility and advantages of our approach. We apply the model to the annual medical costs of heart failure patients in the clinical data repository at the University of Virginia Hospital System. Copyright © 2013 John Wiley & Sons, Ltd.}, number={24}, journal={STATISTICS IN MEDICINE}, author={Chen, Jinsong and Liu, Lei and Zhang, Daowen and Shih, Ya-Chen T.}, year={2013}, month={Oct}, pages={4306–4318} } @inbook{torres_zhang_wang_2013, title={Constructing Conditional Reference Charts for Grip Strength Measured with Error}, ISBN={9781461478454 9781461478461}, ISSN={2194-1009 2194-1017}, url={http://dx.doi.org/10.1007/978-1-4614-7846-1_24}, DOI={10.1007/978-1-4614-7846-1_24}, abstractNote={Muscular strength, usually quantified through the grip strength, can be used in humans and animals as an indicator of neuromuscular function or to assess hand function in patients with trauma or congenital problems. Because grip strength cannot be accurately measured, several contaminated measurements are often taken on the same subject. A research interest in grip strength studies is estimating the conditional quantiles of the latent grip strength, which can be used to construct conditional grip strength charts. Current work in the literature often applies conventional quantile regression method using the subject-specific average of the repeated measurements as the response variable. We show that this approach suffers from model misspecification and often leads to biased estimates of the conditional quantiles of the latent grip strength. We propose a new semi-nonparametric estimation approach, which is able to account for measurement errors and allows the subject-specific random effects to follow a flexible distribution. We demonstrate through simulation studies that the proposed method leads to consistent and efficient estimates of the conditional quantiles of the latent response variable. The value of the proposed method is assessed by analyzing a grip strength data set on laboratory mice.}, booktitle={Springer Proceedings in Mathematics & Statistics}, publisher={Springer New York}, author={Torres, Pedro A. and Zhang, Daowen and Wang, Huixia Judy}, year={2013}, pages={299–310} } @article{bernhardt_wang_zhang_2014, title={Flexible modeling of survival data with covariates subject to detection limits via multiple imputation}, volume={69}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2013.07.027}, abstractNote={Models for survival data generally assume that covariates are fully observed. However, in medical studies it is not uncommon for biomarkers to be censored at known detection limits. A computationally-efficient multiple imputation procedure for modeling survival data with covariates subject to detection limits is proposed. This procedure is developed in the context of an accelerated failure time model with a flexible seminonparametric error distribution. The consistency and asymptotic normality of the multiple imputation estimator are established and a consistent variance estimator is provided. An iterative version of the proposed multiple imputation algorithm that approximates the EM algorithm for maximum likelihood is also suggested. Simulation studies demonstrate that the proposed multiple imputation methods work well while alternative methods lead to estimates that are either biased or more variable. The proposed methods are applied to analyze the dataset from a recently-conducted GenIMS study.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Bernhardt, Paul W. and Wang, Huixia Judy and Zhang, Daowen}, year={2014}, month={Jan}, pages={81–91} } @article{bernhardt_wang_zhang_2015, title={Statistical Methods for Generalized Linear Models with Covariates Subject to Detection Limits}, volume={7}, ISSN={1867-1764 1867-1772}, url={http://dx.doi.org/10.1007/S12561-013-9099-4}, DOI={10.1007/S12561-013-9099-4}, abstractNote={Censored observations are a common occurrence in biomedical data sets. Although a large amount of research has been devoted to estimation and inference for data with censored responses, very little research has focused on proper statistical procedures when predictors are censored. In this paper, we consider statistical methods for dealing with multiple predictors subject to detection limits within the context of generalized linear models. We investigate and adapt several conventional methods and develop a new multiple imputation approach for analyzing data sets with predictors censored due to detection limits. We establish the consistency and asymptotic normality of the proposed multiple imputation estimator and suggest a computationally simple and consistent variance estimator. We also demonstrate that the conditional mean imputation method often leads to inconsistent estimates in generalized linear models, while several other methods are either computationally intensive or lead to parameter estimates that are biased or more variable compared to the proposed multiple imputation estimator. In an extensive simulation study, we assess the bias and variability of different approaches within the context of a logistic regression model and compare variance estimation methods for the proposed multiple imputation estimator. Lastly, we apply several methods to analyze the data set from a recently-conducted GenIMS study.}, number={1}, journal={Statistics in Biosciences}, publisher={Springer Science and Business Media LLC}, author={Bernhardt, Paul W. and Wang, Huixia J. and Zhang, Daowen}, year={2015}, month={May}, pages={68–89} } @article{yan_zhang_lu_grifo_liu_2012, title={A Semi-nonparametric Approach to Joint Modeling of A Primary Binary Outcome and Longitudinal Data Measured at Discrete Informative Times}, volume={4}, ISSN={1867-1764 1867-1772}, url={http://dx.doi.org/10.1007/S12561-011-9053-2}, DOI={10.1007/S12561-011-9053-2}, abstractNote={In a study conducted at the New York University Fertility Center, one of the scientific objectives is to investigate the relationship between the final pregnancy outcomes of participants receiving an in vitro fertilization (IVF) treatment and their β-human chorionic gonadotrophin (β-hCG) profiles. A common joint modeling approach to this objective is to use subject-specific normal random effects in a linear mixed model for longitudinal β-hCG data as predictors in a model (e.g., logistic model) for the final pregnancy outcome. Empirical data exploration indicates that the observation times for longitudinal β-hCG data may be informative and the distribution of random effects for longitudinal β-hCG data may not be normally distributed. We propose to introduce a third model in the joint model for the informative β-hCG observation times, and relax the normality distributional assumption of random effects using the semi-nonparametric (SNP) approach of Gallant and Nychka (Econometrica 55:363–390, 1987). An EM algorithm is developed for parameter estimation. Extensive simulation designed to evaluate the proposed method indicates that ignoring either informative observation times or distributional assumption of the random effects would lead to invalid and/or inefficient inference. Applying our new approach to the data reveals some interesting findings the traditional approach failed to discover.}, number={2}, journal={Statistics in Biosciences}, publisher={Springer Science and Business Media LLC}, author={Yan, Song and Zhang, Daowen and Lu, Wenbin and Grifo, James A. and Liu, Mengling}, year={2012}, month={Jan}, pages={213–234} } @article{chen_johnson_wang_o'quigley_isaac_zhang_liu_2012, title={Trajectory Analyses in Alcohol Treatment Research}, volume={36}, ISSN={["0145-6008"]}, DOI={10.1111/j.1530-0277.2012.01748.x}, abstractNote={BackgroundVarious statistical methods have been used for data analysis in alcohol treatment studies. Trajectory analyses can better capture differences in treatment effects and may provide insight on the optimal duration of future clinical trials and grace periods. This improves on the limitation of commonly used parametric (e.g., linear) methods that cannot capture nonlinear temporal trends in the data.MethodsWe propose an exploratory approach, using more flexible smoothing mixed effects models, more accurately to characterize the temporal patterns of the drinking data. We estimated the trajectories of the treatment arms for data sets from 2 sources: a multisite topiramate study, and the Combined Pharmacotherapies (acamprosate and naltrexone) and Behavioral Interventions study.ResultsOur methods illustrate that drinking outcomes of both the topiramate and placebo arms declined over the entire course of the trial but with a greater rate of decline for the topiramate arm. By the point‐wise confidence intervals, the heavy drinking probabilities for the topiramate arm might differ from those of the placebo arm as early as week 2. Furthermore, the heavy drinking probabilities of both arms seemed to stabilize at the end of the study. Overall, naltrexone was better than placebo in reducing drinking over time yet was not different from placebo for subjects receiving the combination of a brief medical management and an intensive combined behavioral intervention.ConclusionsThe estimated trajectory plots clearly showed nonlinear temporal trends of the treatment with different medications on drinking outcomes and offered more detailed interpretation of the results. This trajectory analysis approach is proposed as a valid exploratory method for evaluating efficacy in pharmacotherapy trials in alcoholism.}, number={8}, journal={ALCOHOLISM-CLINICAL AND EXPERIMENTAL RESEARCH}, author={Chen, Jinsong and Johnson, Bankole A. and Wang, Xin-Qun and O'Quigley, John and Isaac, Maria and Zhang, Daowen and Liu, Lei}, year={2012}, month={Aug}, pages={1442–1448} } @article{tzeng_zhang_pongpanich_smith_mccarthy_sale_worrall_hsu_thomas_sullivan_2011, title={Studying Gene and Gene-Environment Effects of Uncommon and Common Variants on Continuous Traits: A Marker-Set Approach Using Gene-Trait Similarity Regression}, volume={89}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2011.07.007}, abstractNote={Genomic association analyses of complex traits demand statistical tools that are capable of detecting small effects of common and rare variants and modeling complex interaction effects and yet are computationally feasible. In this work, we introduce a similarity-based regression method for assessing the main genetic and interaction effects of a group of markers on quantitative traits. The method uses genetic similarity to aggregate information from multiple polymorphic sites and integrates adaptive weights that depend on allele frequencies to accomodate common and uncommon variants. Collapsing information at the similarity level instead of the genotype level avoids canceling signals that have the opposite etiological effects and is applicable to any class of genetic variants without the need for dichotomizing the allele types. To assess gene-trait associations, we regress trait similarities for pairs of unrelated individuals on their genetic similarities and assess association by using a score test whose limiting distribution is derived in this work. The proposed regression framework allows for covariates, has the capacity to model both main and interaction effects, can be applied to a mixture of different polymorphism types, and is computationally efficient. These features make it an ideal tool for evaluating associations between phenotype and marker sets defined by linkage disequilibrium (LD) blocks, genes, or pathways in whole-genome analysis. Genomic association analyses of complex traits demand statistical tools that are capable of detecting small effects of common and rare variants and modeling complex interaction effects and yet are computationally feasible. In this work, we introduce a similarity-based regression method for assessing the main genetic and interaction effects of a group of markers on quantitative traits. The method uses genetic similarity to aggregate information from multiple polymorphic sites and integrates adaptive weights that depend on allele frequencies to accomodate common and uncommon variants. Collapsing information at the similarity level instead of the genotype level avoids canceling signals that have the opposite etiological effects and is applicable to any class of genetic variants without the need for dichotomizing the allele types. To assess gene-trait associations, we regress trait similarities for pairs of unrelated individuals on their genetic similarities and assess association by using a score test whose limiting distribution is derived in this work. The proposed regression framework allows for covariates, has the capacity to model both main and interaction effects, can be applied to a mixture of different polymorphism types, and is computationally efficient. These features make it an ideal tool for evaluating associations between phenotype and marker sets defined by linkage disequilibrium (LD) blocks, genes, or pathways in whole-genome analysis.}, number={2}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Tzeng, Jung-Ying and Zhang, Daowen and Pongpanich, Monnat and Smith, Chris and McCarthy, Mark I. and Sale, Michele M. and Worrall, Bradford B. and Hsu, Fang-Chi and Thomas, Duncan C. and Sullivan, Patrick F.}, year={2011}, month={Aug}, pages={277–288} } @article{ni_zhang_zhang_2009, title={Automatic model selection for partially linear models}, volume={100}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2009.06.009}, abstractNote={We propose and study a unified procedure for variable selection in partially linear models. A new type of double-penalized least squares is formulated, using the smoothing spline to estimate the nonparametric part and applying a shrinkage penalty on parametric components to achieve model parsimony. Theoretically we show that, with proper choices of the smoothing and regularization parameters, the proposed procedure can be as efficient as the oracle estimator [J. Fan, R. Li, Variable selection via nonconcave penalized likelihood and its oracle properties, Journal of American Statistical Association 96 (2001) 1348–1360]. We also study the asymptotic properties of the estimator when the number of parametric effects diverges with the sample size. Frequentist and Bayesian estimates of the covariance and confidence intervals are derived for the estimators. One great advantage of this procedure is its linear mixed model (LMM) representation, which greatly facilitates its implementation by using standard statistical software. Furthermore, the LMM framework enables one to treat the smoothing parameter as a variance component and hence conveniently estimate it together with other regression coefficients. Extensive numerical studies are conducted to demonstrate the effective performance of the proposed procedure.}, number={9}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Ni, Xiao and Zhang, Hao Helen and Zhang, Daowen}, year={2009}, month={Oct}, pages={2100–2111} } @article{tzeng_zhang_chang_thomas_davidian_2009, title={Gene-Trait Similarity Regression for Multimarker-Based Association Analysis}, volume={65}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2008.01176.x}, DOI={10.1111/j.1541-0420.2008.01176.x}, abstractNote={Summary We propose a similarity‐based regression method to detect associations between traits and multimarker genotypes. The model regresses similarity in traits for pairs of “unrelated” individuals on their haplotype similarities, and detects the significance by a score test for which the limiting distribution is derived. The proposed method allows for covariates, uses phase‐independent similarity measures to bypass the needs to impute phase information, and is applicable to traits of general types (e.g., quantitative and qualitative traits). We also show that the gene‐trait similarity regression is closely connected with random effects haplotype analysis, although commonly they are considered as separate modeling tools. This connection unites the classic haplotype sharing methods with the variance‐component approaches, which enables direct derivation of analytical properties of the sharing statistics even when the similarity regression model becomes analytically challenging.}, number={3}, journal={Biometrics}, publisher={Wiley}, author={Tzeng, Jung-Ying and Zhang, Daowen and Chang, Sheng-Mao and Thomas, Duncan C. and Davidian, Marie}, year={2009}, month={Feb}, pages={822–832} } @article{munana_zhang_patterson_2010, title={Placebo Effect in Canine Epilepsy Trials}, volume={24}, ISSN={["0891-6640"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-73349107805&partnerID=MN8TOARS}, DOI={10.1111/j.1939-1676.2009.0407.x}, abstractNote={Background:The placebo effect is a well‐recognized phenomenon in human medicine; in contrast, little information exists on the effect of placebo administration in veterinary patients.Hypothesis:Nonpharmacologic therapeutic effects play a role in response rates identified in canine epilepsy trials.Animals:Thirty‐four dogs with epilepsy.Methods:Meta‐analysis of the 3 known prospective, placebo‐controlled canine epilepsy trials. The number of seizures per week was compiled for each dog throughout their participation in the trial. Log‐linear models were developed to evaluate seizure frequency during treatment and placebo relative to baseline.Results:Twenty‐two of 28 (79%) dogs in the study that received placebo demonstrated a decrease in seizure frequency compared with baseline, and 8 (29%) could be considered responders, with a 50% or greater reduction in seizures. For the 3 trials evaluated, the average reduction in seizures during placebo administration relative to baseline was 26% (P= .0018), 29% (P= .17), and 46% (P= .01).Conclusions and Clinical Importance:A positive response to placebo administration, manifesting as a decrease in seizure frequency, can be observed in epileptic dogs. This is of importance when evaluating open label studies in dogs that aim to assess efficacy of antiepileptic drugs, as the reported results might be overstated. Findings from this study highlight the need for more placebo‐controlled trials in veterinary medicine.}, number={1}, journal={JOURNAL OF VETERINARY INTERNAL MEDICINE}, author={Munana, K. R. and Zhang, D. and Patterson, E. E.}, year={2010}, pages={166–170} } @article{zhang_quan_2009, title={Power and sample size calculation for log-rank test with a time lag in treatment effect}, volume={28}, ISSN={["0277-6715"]}, DOI={10.1002/sim.3501}, abstractNote={AbstractThe log‐rank test is the most powerful non‐parametric test for detecting a proportional hazards alternative and thus is the most commonly used testing procedure for comparing time‐to‐event distributions between different treatments in clinical trials. When the log‐rank test is used for the primary data analysis, the sample size calculation should also be based on the test to ensure the desired power for the study. In some clinical trials, the treatment effect may not manifest itself right after patients receive the treatment. Therefore, the proportional hazards assumption may not hold. Furthermore, patients may discontinue the study treatment prematurely and thus may have diluted treatment effect after treatment discontinuation. If a patient's treatment termination time is independent of his/her time‐to‐event of interest, the termination time can be treated as a censoring time in the final data analysis. Alternatively, we may keep collecting time‐to‐event data until study termination from those patients who discontinued the treatment and conduct an intent‐to‐treat analysis by including them in the original treatment groups. We derive formulas necessary to calculate the asymptotic power of the log‐rank test under this non‐proportional hazards alternative for the two data analysis strategies. Simulation studies indicate that the formulas provide accurate power for a variety of trial settings. A clinical trial example is used to illustrate the application of the proposed methods. Copyright © 2009 John Wiley & Sons, Ltd.}, number={5}, journal={STATISTICS IN MEDICINE}, author={Zhang, Daomen and Quan, Hui}, year={2009}, month={Feb}, pages={864–879} } @article{levine_zhang_harris_vaden_2010, title={The use of pooled vs serial urine samples to measure urine protein:creatinine ratios}, volume={39}, ISSN={["0275-6382"]}, DOI={10.1111/j.1939-165x.2009.00167.x}, abstractNote={Background: Evaluation of serial urine protein:creatinine (UPC) ratios is important in prognosticating chronic kidney disease and monitoring response to therapeutic interventions. Owing to random biologic variation in dogs with stable glomerular proteinuria, multiple determinations of UPC ratios often are recommended to reliably assess urine protein loss. This can be cost‐prohibitive.Objective: The purpose of this study was to evaluate agreement between the mean of 3 UPC ratios obtained on 3 separate urine samples per dog and a single UPC ratio obtained when aliquots of the separate samples were pooled and analyzed as 1 sample.Methods: Three separate urine samples were collected from each of 25 dogs, both client‐owned and members of a research colony. Protein and creatinine concentrations were measured in the supernatant of each sample using a biochemical analyzer, and the mean of the 3 UPC ratios was calculated. A 1.0 mL aliquot of each of the 3 samples from each dog was pooled to create a fourth sample for that dog, and the UPC ratio of the pooled sample was similarly determined. Agreement and correlation between the mean and pooled UPC ratios were assessed using Bland–Altman difference plots and regression analysis, respectively.Results: The UPC ratio in the pooled samples was highly correlated (r=.9998, P<.0001) with the mean UPC ratio of the 3 separate samples. Strong agreement between results was demonstrated; a UPC ratio from a pooled sample was at most ±20% different than the mean UPC ratio obtained from 3 separate samples.Conclusions: Measuring the UPC ratio in a pooled sample containing equal volumes of several different urine specimens from the same dog provides a reliable and cost‐effective alternative to assessing multiple UPC ratios on several specimens from the same dog.}, number={1}, journal={VETERINARY CLINICAL PATHOLOGY}, author={LeVine, Dana N. and Zhang, Daowen and Harris, Tonya and Vaden, Shelly L.}, year={2010}, month={Mar}, pages={53–56} } @article{ni_zhang_zhang_2010, title={Variable Selection for Semiparametric Mixed Models in Longitudinal Studies}, volume={66}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2009.01240.x}, abstractNote={SummaryWe propose a double‐penalized likelihood approach for simultaneous model selection and estimation in semiparametric mixed models for longitudinal data. Two types of penalties are jointly imposed on the ordinary log‐likelihood: the roughness penalty on the nonparametric baseline function and a nonconcave shrinkage penalty on linear coefficients to achieve model sparsity. Compared to existing estimation equation based approaches, our procedure provides valid inference for data with missing at random, and will be more efficient if the specified model is correct. Another advantage of the new procedure is its easy computation for both regression components and variance parameters. We show that the double‐penalized problem can be conveniently reformulated into a linear mixed model framework, so that existing software can be directly used to implement our method. For the purpose of model inference, we derive both frequentist and Bayesian variance estimation for estimated parametric and nonparametric components. Simulation is used to evaluate and compare the performance of our method to the existing ones. We then apply the new method to a real data set from a lactation study.}, number={1}, journal={BIOMETRICS}, author={Ni, Xiao and Zhang, Daowen and Zhang, Hao Helen}, year={2010}, month={Mar}, pages={79–88} } @article{sowers_eyvazzadeh_mcconnell_yosef_jannausch_zhang_harlow_randolph_2008, title={Anti-Mullerian hormone and inhibin B in the definition of ovarian aging and the menopause transition}, volume={93}, ISSN={["1945-7197"]}, DOI={10.1210/jc.2008-0567}, abstractNote={Context/Objective: The objective of the study was to determine whether anti-Mullerian hormone (AMH) and inhibin B are viable endocrine biomarkers for framing the menopause transition from initiation to the final menstrual period (FMP). Design: We assayed AMH, inhibin B, and FSH in 300 archival follicular phase specimens from 50 women with six consecutive annual visits commencing in 1993 when all women were in the pre- and perimenopausal menopause stages. Subsequently each woman had a documented FMP. The assay results were fitted as individual-woman profiles and then related to time to FMP and age at FMP as outcomes. Results: Based on annual values from six time points prior to the FMP, logAMH longitudinal profiles declined and were highly associated with a time point 5 yr prior to FMP [including both observed and values below detection (P < 0.0001 and P = 0.0001, respectively)]. Baseline AMH profiles were also associated with age at FMP (P = 0.035). Models of declining loginhibin B profiles (including both observed and values below detection) were associated with time to FMP (P < 0.0001 and P = 0.0003, respectively). There was no significant association of loginhibin B profiles with age at FMP. Conclusions: AMH, an endocrine marker that reflects the transition of resting primordial follicles to growing follicles, declined to a time point 5 yr prior to the FMP; this may represent a critical biological juncture in the menopause transition. Low and nondetectable levels inhibin B levels also were observed 4–5 yr prior to the FMP but were less predictive of time to FMP or age at FMP.}, number={9}, journal={JOURNAL OF CLINICAL ENDOCRINOLOGY & METABOLISM}, author={Sowers, MaryFran R. and Eyvazzadeh, Aimee D. and McConnell, Daniel and Yosef, Matheos and Jannausch, Mary L. and Zhang, Daowen and Harlow, Sioban and Randolph, John F., Jr.}, year={2008}, month={Sep}, pages={3478–3483} } @article{quan_zhang_zhang_devlamynck_2007, title={Analysis of a binary composite endpoint with missing data in components}, volume={26}, ISSN={["0277-6715"]}, DOI={10.1002/sim.2893}, abstractNote={AbstractComposite endpoints are often used in clinical trials in order to increase the overall event rates, reduce the sizes of the trials and achieve desired power. For example, in a trial to study the effect of a treatment on the prevention of venous thromboembolic events after a major orthopaedic surgery of the lower limbs, the primary endpoint is usually a composite endpoint consisting of any deep vein thrombosis identified by systematic venography of lower limbs, symptomatic and well‐documented non‐fatal pulmonary embolism, and death from all causes. Just as any endpoints, missing data can occur in the components of the composite endpoint. If a patient has missing data on some of the components but not all the components, this patient may not have complete data but partial data for the composite endpoint. To be consistent with the intention‐to‐treat principle, the patient should not be discarded from the analysis. In this research, we propose an approach for the analysis of a composite endpoint with missing data in components. The main idea is to first derive the probabilities of all possible study outcomes based on the appropriate model and then to construct the overall rate for the composite endpoint. Simulations are conducted to compare the approach with several naïve methods. A data example is used to illustrate the application of the approach. Copyright © 2007 John Wiley & Sons, Ltd.}, number={26}, journal={STATISTICS IN MEDICINE}, author={Quan, Hui and Zhang, Daowen and Zhang, Ji and Devlamynck, Laure}, year={2007}, month={Nov}, pages={4703–4718} } @article{tzeng_zhang_2007, title={Haplotype-based association analysis via variance-components score test}, volume={81}, ISSN={["0002-9297"]}, DOI={10.1086/521558}, abstractNote={Haplotypes provide a more informative format of polymorphisms for genetic association analysis than do individual single-nucleotide polymorphisms. However, the practical efficacy of haplotype-based association analysis is challenged by a trade-off between the benefits of modeling abundant variation and the cost of the extra degrees of freedom. To reduce the degrees of freedom, several strategies have been considered in the literature. They include (1) clustering evolutionarily close haplotypes, (2) modeling the level of haplotype sharing, and (3) smoothing haplotype effects by introducing a correlation structure for haplotype effects and studying the variance components (VC) for association. Although the first two strategies enjoy a fair extent of power gain, empirical evidence showed that VC methods may exhibit only similar or less power than the standard haplotype regression method, even in cases of many haplotypes. In this study, we report possible reasons that cause the underpowered phenomenon and show how the power of the VC strategy can be improved. We construct a score test based on the restricted maximum likelihood or the marginal likelihood function of the VC and identify its nontypical limiting distribution. Through simulation, we demonstrate the validity of the test and investigate the power performance of the VC approach and that of the standard haplotype regression approach. With suitable choices for the correlation structure, the proposed method can be directly applied to unphased genotypic data. Our method is applicable to a wide-ranging class of models and is computationally efficient and easy to implement. The broad coverage and the fast and easy implementation of this method make the VC strategy an effective tool for haplotype analysis, even in modern genomewide association studies.}, number={5}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Tzeng, Jung-Ying and Zhang, Daowen}, year={2007}, month={Nov}, pages={927–938} } @article{zhang_lin_sowers_2007, title={Two-stage functional mixed models for evaluating the effect of longitudinal covariate profiles on a scalar outcome}, volume={63}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2006.00713.x}, abstractNote={Summary The Daily Hormone Study, a substudy of the Study of Women's Health Across the Nation (SWAN) consisting of more than 600 pre‐ and perimenopausal women, includes a scalar measure of total hip bone mineral density (BMD) together with repeated measures of creatinine‐adjusted follicle stimulating hormone (FSH) assayed from daily urine samples collected over one menstrual cycle. It is of scientific interest to investigate the effect of the FSH time profile during a menstrual cycle on total hip BMD, adjusting for age and body mass index. The statistical analysis is challenged by several features of the data: (1) the covariate FSH is measured longitudinally and its effect on the scalar outcome BMD may be complex; (2) due to varying menstrual cycle lengths, subjects have unbalanced longitudinal measures of FSH; and (3) the longitudinal measures of FSH are subject to considerable among‐ and within‐subject variations and measurement errors. We propose a measurement error partial functional linear model, where repeated measures of FSH are modeled using a functional mixed effects model and the effect of the FSH time profile on BMD is modeled using a partial functional linear model by treating the unobserved true subject‐specific FSH time profile as a functional covariate. We develop a two‐stage nonparametric regression calibration method using period smoothing splines. Using the connection between smoothing splines and mixed models, we show that a key feature of our approach is that estimation at both stages can be conveniently cast into a unified mixed model framework. A simple testing procedure for constant functional covariate effect is also proposed. The proposed methods are evaluated using simulation studies and applied to the SWAN data.}, number={2}, journal={BIOMETRICS}, author={Zhang, Daowen and Lin, Xihong and Sowers, MaryFran}, year={2007}, month={Jun}, pages={351–362} } @article{li_zhang_davidian_2007, title={Likelihood and pseudo-likelihood methods for semiparametric joint models for a primary endpoint and longitudinal data}, volume={51}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/j.csda.2006.10.008}, DOI={10.1016/j.csda.2006.10.008}, abstractNote={Inference on the association between a primary endpoint and features of longitudinal profiles of a continuous response is of central interest in medical and public health research. Joint models that represent the association through shared dependence of the primary and longitudinal data on random effects are increasingly popular; however, existing inferential methods may be inefficient or sensitive to assumptions on the random effects distribution. We consider a semiparametric joint model that makes only mild assumptions on this distribution and develop likelihood-based inference on the association and distribution, which offers improved performance relative to existing methods that is insensitive to the true random effects distribution. Moreover, the estimated distribution can reveal interesting population features, as we demonstrate for a study of the association between longitudinal hormone levels and bone status in peri-menopausal women.}, number={12}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Li, Erning and Zhang, Daowen and Davidian, Marie}, year={2007}, month={Aug}, pages={5776–5790} } @article{lin_zhang_davidian_2006, title={Smoothing spline-based score tests for proportional hazards models}, volume={62}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2005.00521.x}, abstractNote={Summary We propose “score‐type” tests for the proportional hazards assumption and for covariate effects in the Cox model using the natural smoothing spline representation of the corresponding nonparametric functions of time or covariate. The tests are based on the penalized partial likelihood and are derived by viewing the inverse of the smoothing parameter as a variance component and testing an equivalent null hypothesis that the variance component is zero. We show that the tests have a size close to the nominal level and good power against general alternatives, and we apply them to data from a cancer clinical trial.}, number={3}, journal={BIOMETRICS}, author={Lin, Jiang and Zhang, Daowen and Davidian, Marie}, year={2006}, month={Sep}, pages={803–812} } @article{sowers_crutchfield_richards_wilkin_furniss_jannausch_zhang_gross_2005, title={Sarcopenia is related to physical functioning and leg strength in middle-aged women}, volume={60}, ISSN={["1758-535X"]}, DOI={10.1093/gerona/60.4.486}, abstractNote={BACKGROUND In the aging process, loss of muscle is relatively continuous, but the initiation, timing, and amount of muscle loss that relate to functional compromise are poorly described. Also poorly understood is whether strength and functioning in aging are related to the amount of lean mass and its change as well as to the amount of fat mass and its change. METHODS The purpose of the study was to ascertain whether 3-year lean and fat mass change predicted functional status in 712 African American and Caucasian women, aged 34-58 years. Fat and lean mass were assessed with bioelectrical impedance. Lower leg strength (torque) was measured with a portable isometric chair, and two indices of physical functioning, walking velocity and double support (both feet touching the surface while walking), were measured with an instrumented gait mat. RESULTS Almost 9% of middle-aged women had at least a 6% loss (>2.5 kg) of lean mass over the 3-year observation period. Women who lost at least 2.5 kg of lean mass had slower walking velocity and less leg strength, although women who simultaneously gained more than 2.5 kg of fat mass (at least 7.5%) did not have less leg strength. Age was significantly associated with less velocity, less leg strength, and more time in double support. CONCLUSIONS Even in middle-aged women, there is loss of lean mass among almost 1 woman in 10, and this loss of lean mass (sarcopenia) is associated with greater compromise in physical functioning.}, number={4}, journal={JOURNALS OF GERONTOLOGY SERIES A-BIOLOGICAL SCIENCES AND MEDICAL SCIENCES}, author={Sowers, MR and Crutchfield, M and Richards, K and Wilkin, MK and Furniss, A and Jannausch, M and Zhang, DW and Gross, M}, year={2005}, month={Apr}, pages={486–490} } @article{li_zhang_davidian_2004, title={Conditional estimation for generalized linear models when covariates are subject-specific parameters in a mixed model for longitudinal measurements}, volume={60}, number={1}, journal={Biometrics}, author={Li, E. N. and Zhang, D. W. and Davidian, M.}, year={2004}, pages={07-} } @article{zhang_choi_wanamaker_fenton_chin_malatrasi_turuspekov_walia_akhunov_kianian_et al._2004, title={Construction and evaluation of cDNA libraries for large-scale expressed sequence tag sequencing in wheat (Triticum aestivum L.)}, volume={168}, ISSN={["1943-2631"]}, DOI={10.1534/genetics.104.034785}, abstractNote={Abstract A total of 37 original cDNA libraries and 9 derivative libraries enriched for rare sequences were produced from Chinese Spring wheat (Triticum aestivum L.), five other hexaploid wheat genotypes (Cheyenne, Brevor, TAM W101, BH1146, Butte 86), tetraploid durum wheat (T. turgidum L.), diploid wheat (T. monococcum L.), and two other diploid members of the grass tribe Triticeae (Aegilops speltoides Tausch and Secale cereale L.). The emphasis in the choice of plant materials for library construction was reproductive development subjected to environmental factors that ultimately affect grain quality and yield, but roots and other tissues were also included. Partial cDNA expressed sequence tags (ESTs) were examined by various measures to assess the quality of these libraries. All ESTs were processed to remove cloning system sequences and contaminants and then assembled using CAP3. Following these processing steps, this assembly yielded 101,107 sequences derived from 89,043 clones, which defined 16,740 contigs and 33,213 singletons, a total of 49,953 “unigenes.” Analysis of the distribution of these unigenes among the libraries led to the conclusion that the enrichment methods were effective in reducing the most abundant unigenes and to the observation that the most diverse libraries were from tissues exposed to environmental stresses including heat, drought, salinity, or low temperature.}, number={2}, journal={GENETICS}, author={Zhang, D and Choi, DW and Wanamaker, S and Fenton, RD and Chin, A and Malatrasi, M and Turuspekov, Y and Walia, H and Akhunov, ED and Kianian, P and et al.}, year={2004}, month={Oct}, pages={595–608} } @article{zhang_2004, title={Generalized linear mixed models with varying coefficients for longitudinal data}, volume={60}, number={1}, journal={Biometrics}, author={Zhang, D. W.}, year={2004}, pages={15-} } @article{zhang_davidian_2004, title={Likelihood and conditional likelihood inference for generalized additive mixed models for clustered data}, volume={91}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2004.04.007}, abstractNote={Lin and Zhang (J. Roy. Statist. Soc. Ser. B 61 (1999) 381) proposed the generalized additive mixed model (GAMM) as a framework for analysis of correlated data, where normally distributed random effects are used to account for correlation in the data, and proposed to use double penalized quasi-likelihood (DPQL) to estimate the nonparametric functions in the model and marginal likelihood to estimate the smoothing parameters and variance components simultaneously. However, the normal distributional assumption for the random effects may not be realistic in many applications, and it is unclear how violation of this assumption affects ensuing inferences for GAMMs. For a particular class of GAMMs, we propose a conditional estimation procedure built on a conditional likelihood for the response given a sufficient statistic for the random effect, treating the random effect as a nuisance parameter, which thus should be robust to its distribution. In extensive simulation studies, we assess performance of this estimator under a range of conditions and use it as a basis for comparison to DPQL to evaluate the impact of violation of the normality assumption. The procedure is illustrated with application to data from the Multicenter AIDS Cohort Study (MACS).}, number={1}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Zhang, DW and Davidian, M}, year={2004}, month={Oct}, pages={90–106} } @article{zhang_lin_2003, title={Hypothesis testing in semiparametric additive mixed models}, volume={4}, number={1}, journal={Biostatistics (Oxford, England)}, author={Zhang, D. W. and Lin, X. H.}, year={2003}, pages={57–74} } @article{chen_zhang_davidian_2002, title={A Monte Carlo EM algorithm for generalized linear mixed models with flexible random effects distribution}, volume={3}, number={3}, journal={Biostatistics (Oxford, England)}, author={Chen, J. L. and Zhang, D. W. and Davidian, M.}, year={2002}, pages={347–360} } @article{randomized comparison of platelet inhibition with abciximab, tirofiban and eptifibatide during percutaneous coronary intervention in acute coronary syndromes - the compare trial_2002, volume={106}, number={12}, journal={Circulation (New York, N.Y. : 1950)}, year={2002}, pages={1470–1476} } @article{zhang_davidian_2001, title={Linear mixed models with flexible distributions of random effects for longitudinal data}, volume={57}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2001.00795.x}, abstractNote={Summary. Normality of random effects is a routine assumption for the linear mixed model, but it may be unrealistic, obscuring important features of among‐individual variation. We relax this assumption by approximating the random effects density by the seminonparameteric (SNP) representation of Gallant and Nychka (1987, Econometrics55, 363–390), which includes normality as a special case and provides flexibility in capturing a broad range of nonnormal behavior, controlled by a user‐chosen tuning parameter. An advantage is that the marginal likelihood may be expressed in closed form, so inference may be carried out using standard optimization techniques. We demonstrate that standard information criteria may be used to choose the tuning parameter and detect departures from normality, and we illustrate the approach via simulation and using longitudinal data from the Framingham study.}, number={3}, journal={BIOMETRICS}, author={Zhang, DW and Davidian, M}, year={2001}, month={Sep}, pages={795–802} } @article{lin_zhang_2001, title={Semiparametric nonlinear mixed-effects models and their applications - Comment}, volume={96}, number={456}, journal={Journal of the American Statistical Association}, author={Lin, X. H. and Zhang, D. W.}, year={2001}, pages={1288–1291} } @article{lin_ryan_sammel_zhang_padungtod_xu_2000, title={A scaled linear mixed model for multiple outcomes}, volume={56}, DOI={10.1111/j.0006-341X.2000.00593.x}, abstractNote={Summary. We propose a scaled linear mixed model to assess the effects of exposure and other covariates on multiple continuous outcomes. The most general form of the model allows a different exposure effect for each outcome. An important special case is a model that represents the exposure effects using a common global measure that can be characterized in terms of effect sizes. Correlations among different outcomes within the same subject are accommodated using random effects. We develop two approaches to model fitting, including the maximum likelihood method and the working parameter method. A key feature of both methods is that they can be easily implemented by repeatedly calling software for fitting standard linear mixed models, e.g., SAS PROC MIXED. Compared to the maximum likelihood method, the working parameter method is easier to implement and yields fully efficient estimators of the parameters of interest. We illustrate the proposed methods by analyzing data from a study of the effects of occupational pesticide exposure on semen quality in a cohort of Chinese men.}, number={2}, journal={Biometrics}, author={Lin, X. H. and Ryan, L. and Sammel, M. and Zhang, D. W. and Padungtod, C. and Xu, X. P.}, year={2000}, pages={593–601} } @article{zhang_lin_sowers_2000, title={Semiparametric regression for periodic longitudinal hormone data from multiple menstrual cycles}, volume={56}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2000.00031.x}, abstractNote={Summary. We consider Semiparametric regression for periodic longitudinal data. Parametric fixed effects are used to model the covariate effects and a periodic nonparametric smooth function is used to model the time effect. The within–subject correlation is modeled using subject‐specific random effects and a random stochastic process with a periodic variance function. We use maximum penalized likelihood to estimate the regression coefficients and the periodic nonparametric time function, whose estimator is shown to be a periodic cubic smoothing spline. We use restricted maximum likelihood to simultaneously estimate the smoothing parameter and the variance components. We show that all model parameters can be easily obtained by fitting a linear mixed model. A common problem in the analysis of longitudinal data is to compare the time profiles of two groups, e.g., between treatment and placebo. We develop a scaled chi‐squared test for the equality of two nonparametric time functions. The proposed model and the test are illustrated by analyzing hormone data collected during two consecutive menstrual cycles and their performance is evaluated through simulations.}, number={1}, journal={BIOMETRICS}, author={Zhang, DW and Lin, XH and Sowers, MF}, year={2000}, month={Mar}, pages={31–39} } @article{lin_zhang_1999, title={Inference in generalized additive mixed models by using smoothing splines}, volume={61}, number={1999}, journal={Journal of the Royal Statistical Society. Series B, Methodological}, author={Lin, X. H. and Zhang, D. W.}, year={1999}, pages={381–400} }