@article{wang_tzeng_huang_maguire_hoyo_allen_2023, title={Duration of exposure to epidural anesthesia at delivery, DNA methylation in umbilical cord blood and their association with offspring asthma in Non-Hispanic Black women}, volume={9}, ISSN={["2058-5888"]}, DOI={10.1093/eep/dvac026}, abstractNote={Abstract}, number={1}, journal={ENVIRONMENTAL EPIGENETICS}, author={Wang, Yaxu and Tzeng, Jung-Ying and Huang, Yueyang and Maguire, Rachel and Hoyo, Cathrine and Allen, Terrence K.}, year={2023}, month={Jan} } @article{clark_fu_liu_ho_wang_lee_chou_wang_tzeng_2023, title={The prediction of Alzheimer's disease through multi-trait genetic modeling}, volume={15}, ISSN={["1663-4365"]}, DOI={10.3389/fnagi.2023.1168638}, abstractNote={To better capture the polygenic architecture of Alzheimer’s disease (AD), we developed a joint genetic score, MetaGRS. We incorporated genetic variants for AD and 24 other traits from two independent cohorts, NACC (n = 3,174, training set) and UPitt (n = 2,053, validation set). One standard deviation increase in the MetaGRS is associated with about 57% increase in the AD risk [hazard ratio (HR) = 1.577, p = 7.17 E-56], showing little difference from the HR for AD GRS alone (HR = 1.579, p = 1.20E-56), suggesting similar utility of both models. We also conducted APOE-stratified analyses to assess the role of the e4 allele on risk prediction. Similar to that of the combined model, our stratified results did not show a considerable improvement of the MetaGRS. Our study showed that the prediction power of the MetaGRS significantly outperformed that of the reference model without any genetic information, but was effectively equivalent to the prediction power of the AD GRS.}, journal={FRONTIERS IN AGING NEUROSCIENCE}, author={Clark, Kaylyn and Fu, Wei and Liu, Chia-Lun and Ho, Pei-Chuan and Wang, Hui and Lee, Wan-Ping and Chou, Shin-Yi and Wang, Li-San and Tzeng, Jung-Ying}, year={2023}, month={Jul} } @article{jeng_hu_venkat_lu_tzeng_2023, title={Transfer learning with false negative control improves polygenic risk prediction}, volume={19}, ISSN={["1553-7404"]}, DOI={10.1371/journal.pgen.1010597}, abstractNote={Polygenic risk score (PRS) is a quantity that aggregates the effects of variants across the genome and estimates an individual’s genetic predisposition for a given trait. PRS analysis typically contains two input data sets: base data for effect size estimation and target data for individual-level prediction. Given the availability of large-scale base data, it becomes more common that the ancestral background of base and target data do not perfectly match. In this paper, we treat the GWAS summary information obtained in the base data as knowledge learned from a pre-trained model, and adopt a transfer learning framework to effectively leverage the knowledge learned from the base data that may or may not have similar ancestral background as the target samples to build prediction models for target individuals. Our proposed transfer learning framework consists of two main steps: (1) conducting false negative control (FNC) marginal screening to extract useful knowledge from the base data; and (2) performing joint model training to integrate the knowledge extracted from base data with the target training data for accurate trans-data prediction. This new approach can significantly enhance the computational and statistical efficiency of joint-model training, alleviate over-fitting, and facilitate more accurate trans-data prediction when heterogeneity level between target and base data sets is small or high.}, number={11}, journal={PLOS GENETICS}, author={Jeng, Xinge Jessie and Hu, Yifei and Venkat, Vaishnavi and Lu, Tzu-Pin and Tzeng, Jung-Ying}, year={2023}, month={Nov} } @article{huang_callahan_wu_holloway_brochu_lu_peng_tzeng_2022, title={Phylogeny-guided microbiome OTU-specific association test (POST)}, volume={10}, ISSN={["2049-2618"]}, DOI={10.1186/s40168-022-01266-3}, abstractNote={Abstract}, number={1}, journal={MICROBIOME}, author={Huang, Caizhi and Callahan, Benjamin John and Wu, Michael C. and Holloway, Shannon T. and Brochu, Hayden and Lu, Wenbin and Peng, Xinxia and Tzeng, Jung-Ying}, year={2022}, month={Jun} } @article{yu_lu_hsiao_lin_wu_tzeng_hsiao_2021, title={An Integrative Co-localization (INCO) Analysis for SNV and CNV Genomic Features With an Application to Taiwan Biobank Data}, volume={12}, ISSN={["1664-8021"]}, DOI={10.3389/fgene.2021.709555}, abstractNote={Genomic studies have been a major approach to elucidating disease etiology and to exploring potential targets for treatments of many complex diseases. Statistical analyses in these studies often face the challenges of multiplicity, weak signals, and the nature of dependence among genetic markers. This situation becomes even more complicated when multi-omics data are available. To integrate the data from different platforms, various integrative analyses have been adopted, ranging from the direct union or intersection operation on sets derived from different single-platform analysis to complex hierarchical multi-level models. The former ignores the biological relationship between molecules while the latter can be hard to interpret. We propose in this study an integrative approach that combines both single nucleotide variants (SNVs) and copy number variations (CNVs) in the same genomic unit to co-localize the concurrent effect and to deal with the sparsity due to rare variants. This approach is illustrated with simulation studies to evaluate its performance and is applied to low-density lipoprotein cholesterol and triglyceride measurements from Taiwan Biobank. The results show that the proposed method can more effectively detect the collective effect from both SNVs and CNVs compared to traditional methods. For the biobank analysis, the identified genetic regions including the gene VNN2 could be novel and deserve further investigation.}, journal={FRONTIERS IN GENETICS}, author={Yu, Qi-You and Lu, Tzu-Pin and Hsiao, Tzu-Hung and Lin, Ching-Heng and Wu, Chi-Yun and Tzeng, Jung-Ying and Hsiao, Chuhsing Kate}, year={2021}, month={Sep} } @article{chang_yang_lu_huang_huang_hung_miecznikowski_lu_tzeng_2021, title={Gene-set integrative analysis of multi-omics data using tensor-based association test}, volume={37}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btab125}, abstractNote={Abstract}, number={16}, journal={BIOINFORMATICS}, author={Chang, Sheng-Mao and Yang, Meng and Lu, Wenbin and Huang, Yu-Jyun and Huang, Yueyang and Hung, Hung and Miecznikowski, Jeffrey C. and Lu, Tzu-Pin and Tzeng, Jung-Ying}, year={2021}, month={Aug}, pages={2259–2265} } @article{chi_ipsen_hsiao_lin_wang_lee_lu_tzeng_2021, title={SEAGLE: A Scalable Exact Algorithm for Large-Scale Set-Based Gene-Environment Interaction Tests in Biobank Data}, volume={12}, ISSN={["1664-8021"]}, DOI={10.3389/fgene.2021.710055}, abstractNote={The explosion of biobank data offers unprecedented opportunities for gene-environment interaction (GxE) studies of complex diseases because of the large sample sizes and the rich collection in genetic and non-genetic information. However, the extremely large sample size also introduces new computational challenges in G×E assessment, especially for set-based G×E variance component (VC) tests, which are a widely used strategy to boost overall G×E signals and to evaluate the joint G×E effect of multiple variants from a biologically meaningful unit (e.g., gene). In this work, we focus on continuous traits and present SEAGLE, aScalableExactAlGorithm forLarge-scale set-based G×Etests, to permit G×E VC tests for biobank-scale data. SEAGLE employs modern matrix computations to calculate the test statistic andp-value of the GxE VC test in a computationally efficient fashion, without imposing additional assumptions or relying on approximations. SEAGLE can easily accommodate sample sizes in the order of 105, is implementable on standard laptops, and does not require specialized computing equipment. We demonstrate the performance of SEAGLE using extensive simulations. We illustrate its utility by conducting genome-wide gene-based G×E analysis on the Taiwan Biobank data to explore the interaction of gene and physical activity status on body mass index.}, journal={FRONTIERS IN GENETICS}, author={Chi, Jocelyn T. and Ipsen, Ilse C. F. and Hsiao, Tzu-Hung and Lin, Ching-Heng and Wang, Li-San and Lee, Wan-Ping and Lu, Tzu-Pin and Tzeng, Jung-Ying}, year={2021}, month={Nov} } @article{brucker_lu_west_yu_hsiao_hsiao_lin_magnusson_sullivan_szatkiewicz_et al._2020, title={Association test using Copy Number Profile Curves (CONCUR) enhances power in rare copy number variant analysis}, volume={16}, ISSN={["1553-7358"]}, url={https://doi.org/10.1371/journal.pcbi.1007797}, DOI={10.1371/journal.pcbi.1007797}, abstractNote={Copy number variants (CNVs) are the gain or loss of DNA segments in the genome that can vary in dosage and length. CNVs comprise a large proportion of variation in human genomes and impact health conditions. To detect rare CNV associations, kernel-based methods have been shown to be a powerful tool due to their flexibility in modeling the aggregate CNV effects, their ability to capture effects from different CNV features, and their accommodation of effect heterogeneity. To perform a kernel association test, a CNV locus needs to be defined so that locus-specific effects can be retained during aggregation. However, CNV loci are arbitrarily defined and different locus definitions can lead to different performance depending on the underlying effect patterns. In this work, we develop a new kernel-based test called CONCUR (i.e., copy number profile curve-based association test) that is free from a definition of locus and evaluates CNV-phenotype associations by comparing individuals’ copy number profiles across the genomic regions. CONCUR is built on the proposed concepts of “copy number profile curves” to describe the CNV profile of an individual, and the “common area under the curve (cAUC) kernel” to model the multi-feature CNV effects. The proposed method captures the effects of CNV dosage and length, accounts for the numerical nature of copy numbers, and accommodates between- and within-locus etiological heterogeneity without the need to define artificial CNV loci as required in current kernel methods. In a variety of simulation settings, CONCUR shows comparable or improved power over existing approaches. Real data analyses suggest that CONCUR is well powered to detect CNV effects in the Swedish Schizophrenia Study and the Taiwan Biobank.}, number={5}, journal={PLOS COMPUTATIONAL BIOLOGY}, publisher={Public Library of Science (PLoS)}, author={Brucker, Amanda and Lu, Wenbin and West, Rachel Marceau and Yu, Qi-You and Hsiao, Chuhsing Kate and Hsiao, Tzu-Hung and Lin, Ching-Heng and Magnusson, Patrik K. E. and Sullivan, Patrick F. and Szatkiewicz, Jin P. and et al.}, editor={Ma, JianEditor}, year={2020}, month={May} } @article{jeng_rhyne_zhang_tzeng_2020, title={Effective SNP ranking improves the performance of eQTL mapping}, volume={44}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.22293}, abstractNote={Abstract}, number={6}, journal={GENETIC EPIDEMIOLOGY}, author={Jeng, X. Jessie and Rhyne, Jacob and Zhang, Teng and Tzeng, Jung-Ying}, year={2020}, month={Sep}, pages={611–619} } @article{rhyne_jeng_chi_tzeng_2020, title={FastLORS: Joint modelling for expression quantitative trait loci mapping in R}, volume={9}, ISSN={["2049-1573"]}, url={https://doi.org/10.1002/sta4.265}, DOI={10.1002/sta4.265}, abstractNote={FastLORS is a software package that implements a new algorithm to solve sparse multivariate regression for expression quantitative trait loci (eQTLs) mapping. FastLORS solves the same optimization problem as LORS, an existing popular algorithm. The optimization problem is solved through inexact block coordinate descent with updates by proximal gradient steps, which reduces the computational cost compared with LORS. We apply LORS and FastLORS to a real dataset for eQTL mapping and demonstrate that FastLORS delivers comparable results with LORS in much less computing time.}, number={1}, journal={STAT}, publisher={Wiley}, author={Rhyne, Jacob and Jeng, X. Jessie and Chi, Eric C. and Tzeng, Jung-Ying}, year={2020} } @article{martinez_maity_yolken_sullivan_tzeng_2020, title={Robust kernel association testing (RobKAT)}, volume={44}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22280}, DOI={10.1002/gepi.22280}, abstractNote={Abstract}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Martinez, Kara and Maity, Arnab and Yolken, Robert H. and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2020}, month={Apr}, pages={272–282} } @article{huang_tzeng_maguire_hoyo_allen_2020, title={The association between neuraxial anesthesia and the development of childhood asthma - a secondary analysis of the newborn epigenetics study cohort}, volume={36}, ISSN={["1473-4877"]}, DOI={10.1080/03007995.2020.1747417}, abstractNote={Abstract Objectives Childhood asthma is a common chronic illness that has been associated with mode of delivery. However, the effect of cesarean delivery alone does not fully account for the increased prevalence of childhood asthma. We tested the hypothesis that neuraxial anesthesia used for labor analgesia and cesarean delivery alters the risk of developing childhood asthma. Methods Within the Newborn Epigenetics Study birth cohort, 196 mother and child pairs with entries in the electronic anesthesia records were included. From these records, data on maternal anesthesia type, duration of exposure, and drugs administered peripartum were abstracted and combined with questionnaire-derived prenatal risk factors and medical records and questionnaire-derived asthma diagnosis data in children. Logistic regression models were used to evaluate associations between type of anesthesia, duration of anesthesia, and the development of asthma in males and females. Results We found that longer duration of epidural anesthesia was associated with a lower risk of asthma in male children (OR = 0.80; 95% CI = 0.66–0.95) for each hour of epidural exposure. Additionally, a unit increase in the composite dose of local anesthetics and opioid analgesics administered via the spinal route was associated with a lower risk of asthma in both male (OR = 0.59, 95% CI = 0.36–0.96) and female children (OR 0.26, 95% CI 0.09–0.82). Conclusion Our data suggest that peripartum exposure to neuraxial anesthesia may reduce the risk of childhood asthma primarily in males. Larger human studies and model systems with longer follow-up are required to elucidate these findings.}, number={6}, journal={CURRENT MEDICAL RESEARCH AND OPINION}, author={Huang, Yueyang and Tzeng, Jung-Ying and Maguire, Rachel and Hoyo, Cathrine and Allen, Terrence}, year={2020}, month={Jun}, pages={1025–1032} } @misc{yao_chung_lin_tsai_chang_yeh_tsai_liao_hua_lai_et al._2019, title={Genetic loci determining total immunoglobulin E levels from birth through adulthood}, volume={74}, ISSN={["1398-9995"]}, DOI={10.1111/all.13654}, abstractNote={ferentiation of the cutaneous microbiota in psoriasis. Microbiome. 2013;1:31. 6. Chng KR, Tay AS, Li C, et al. Whole metagenome profiling reveals skin microbiome‐dependent susceptibility to atopic dermatitis flare. Nat Microbiol. 2016;1:16106. 7. St Laurent G 3rd, Seilheimer B, Tackett M, et al. Deep sequencing transcriptome analysis of murine wound healing: effects of a multicomponent, Multitarget Natural Product Therapy‐Tr14. Front Mol Biosci. 2017;4:57. 8. Caley MP, Martins VL, O'Toole EA. Metalloproteinases and wound healing. Adv Wound Care (New Rochelle). 2015;4:225‐234. 9. Hoffmann AR, Patterson AP, Diesel A, et al. The skin microbiome in healthy and allergic dogs. PLoS One. 2014;9:e83197.}, number={3}, journal={ALLERGY}, author={Yao, Tsung-Chieh and Chung, Ren-Hua and Lin, Chung-Yen and Tsai, Pei-Chien and Chang, Wei-Chiao and Yeh, Kuo-Wei and Tsai, Ming-Han and Liao, Sui-Ling and Hua, Man-Chin and Lai, Shen-Hao and et al.}, year={2019}, month={Mar}, pages={621–625} } @article{west_lu_rotroff_kuenemann_chang_wu_wagner_buse_motsinger-reif_fourches_et al._2019, title={Identifying individual risk rare variants using protein structure guided local tests (POINT)}, volume={15}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1006722}, abstractNote={Rare variants are of increasing interest to genetic association studies because of their etiological contributions to human complex diseases. Due to the rarity of the mutant events, rare variants are routinely analyzed on an aggregate level. While aggregation analyses improve the detection of global-level signal, they are not able to pinpoint causal variants within a variant set. To perform inference on a localized level, additional information, e.g., biological annotation, is often needed to boost the information content of a rare variant. Following the observation that important variants are likely to cluster together on functional domains, we propose a protein structure guided local test (POINT) to provide variant-specific association information using structure-guided aggregation of signal. Constructed under a kernel machine framework, POINT performs local association testing by borrowing information from neighboring variants in the 3-dimensional protein space in a data-adaptive fashion. Besides merely providing a list of promising variants, POINT assigns each variant a p-value to permit variant ranking and prioritization. We assess the selection performance of POINT using simulations and illustrate how it can be used to prioritize individual rare variants in PCSK9, ANGPTL4 and CETP in the Action to Control Cardiovascular Risk in Diabetes (ACCORD) clinical trial data.}, number={2}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={West, Rachel Marceau and Lu, Wenbin and Rotroff, Daniel M. and Kuenemann, Melaine A. and Chang, Sheng-Mao and Wu, Michael C. and Wagner, Michael J. and Buse, John B. and Motsinger-Reif, Alison A. and Fourches, Denis and et al.}, year={2019}, month={Feb} } @article{chang_tsai_tzeng_yeh_chen_lai_liao_hua_tsai_huang_et al._2019, title={Reference equations for spirometry in healthy Asian children aged 5 to 18 years in Taiwan}, volume={12}, ISSN={["1939-4551"]}, DOI={10.1016/j.waojou.2019.100074}, abstractNote={This study aimed to establish reference equations for spirometry in healthy Taiwanese children and assess the applicability of the Global Lung Function Initiative (GLI)-2012 equations to Taiwanese children.Spirometric data collected from 757 healthy Taiwanese children aged 5 to 18 years in a population-based cohort study. Prediction equations derived using linear regression and the generalized additive models for location, scale and shape (GAMLSS) method, respectively.The GLI-2012 South East Asian equations did not provide a close fit with mean ± standard error z-scores of -0.679 ± 0.030 (FVC), -0.186 ± 0.044 (FEV1), -0.875 ± 0.049 (FEV1/FVC ratio) and -2.189 ± 0.063 (FEF25-75) for girls; and 0.238 ± 0.059, -0.061 ± 0.053, -0.513 ± 0.059 and -1.896 ± 0.077 for boys. The proposed GAMLSS models took age, height, and weight into account. GAMLSS models for boys and girls captured the characteristics of spirometric data in the study population closely in contrast to the linear regression models and the GLI-2012 equations.This study provides up-to-date reference values for spirometry using GAMLSS modeling in healthy Taiwanese children aged 5 to 18 years. Our study provides evidence that the GLI-2012 reference equations are not properly matched to spirometric data in a contemporary Taiwanese child population, indicating the urgent need for an update of GLI reference values by inclusion of more data of non-Caucasian decent.}, number={11}, journal={WORLD ALLERGY ORGANIZATION JOURNAL}, author={Chang, Sheng-Mao and Tsai, Hui-Ju and Tzeng, Jung-Ying and Yeh, Kuo-Wei and Chen, Li-Chen and Lai, Shen-Hao and Liao, Sui-Ling and Hua, Man-Chin and Tsai, Ming-Han and Huang, Jing-Long and et al.}, year={2019}, month={Nov} } @article{szatkiewicz_marceau_yilmaz_bulik_crowley_mattheisen_sullivan_lu_maity_tzeng_et al._2019, title={VARIANCE COMPONENT TEST FOR CROSS-DISORDER PATHWAY ANALYSIS}, volume={29}, ISSN={["1873-7862"]}, DOI={10.1016/j.euroneuro.2018.08.252}, journal={EUROPEAN NEUROPSYCHOPHARMACOLOGY}, author={Szatkiewicz, Jin and Marceau, Rachel and Yilmaz, Zeynep and Bulik, Cynthia and Crowley, James and Mattheisen, Manuel and Sullivan, Patrick and Lu, Wenbin and Maity, Arnab and Tzeng, Jung-Ying and et al.}, year={2019}, pages={1204–1205} } @article{green_hoyo_mattingly_luo_tzeng_murphy_buchwalter_planchart_2018, title={Cadmium exposure increases the risk of juvenile obesity: a human and zebrafish comparative study}, volume={42}, ISSN={0307-0565 1476-5497}, url={http://dx.doi.org/10.1038/S41366-018-0036-Y}, DOI={10.1038/S41366-018-0036-Y}, abstractNote={Human obesity is a complex metabolic disorder disproportionately affecting people of lower socioeconomic strata, and ethnic minorities, especially African Americans and Hispanics. Although genetic predisposition and a positive energy balance are implicated in obesity, these factors alone do not account for the excess prevalence of obesity in lower socioeconomic populations. Therefore, environmental factors, including exposure to pesticides, heavy metals, and other contaminants, are agents widely suspected to have obesogenic activity, and they also are spatially correlated with lower socioeconomic status. Our study investigates the causal relationship between exposure to the heavy metal, cadmium (Cd), and obesity in a cohort of children and in a zebrafish model of adipogenesis. An extensive collection of first trimester maternal blood samples obtained as part of the Newborn Epigenetics Study (NEST) was analyzed for the presence of Cd, and these results were cross analyzed with the weight-gain trajectory of the children through age 5 years. Next, the role of Cd as a potential obesogen was analyzed in an in vivo zebrafish model. Our analysis indicates that the presence of Cd in maternal blood during pregnancy is associated with increased risk of juvenile obesity in the offspring, independent of other variables, including lead (Pb) and smoking status. Our results are recapitulated in a zebrafish model, in which exposure to Cd at levels approximating those observed in the NEST study is associated with increased adiposity. Our findings identify Cd as a potential human obesogen. Moreover, these observations are recapitulated in a zebrafish model, suggesting that the underlying mechanisms may be evolutionarily conserved, and that zebrafish may be a valuable model for uncovering pathways leading to Cd-mediated obesity in human populations.}, number={7}, journal={International Journal of Obesity}, publisher={Springer Science and Business Media LLC}, author={Green, Adrian J. and Hoyo, Cathrine and Mattingly, Carolyn J. and Luo, Yiwen and Tzeng, Jung-Ying and Murphy, Susan K. and Buchwalter, David B. and Planchart, Antonio}, year={2018}, month={Feb}, pages={1285–1295} } @article{jeng_zhang_tzeng_2018, title={Efficient Signal Inclusion With Genomic Applications}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2018.1518236}, DOI={10.1080/01621459.2018.1518236}, abstractNote={Abstract This article addresses the challenge of efficiently capturing a high proportion of true signals for subsequent data analyses when sample sizes are relatively limited with respect to data dimension. We propose the signal missing rate (SMR) as a new measure for false-negative control to account for the variability of false-negative proportion. Novel data-adaptive procedures are developed to control SMR without incurring many unnecessary false positives under dependence. We justify the efficiency and adaptivity of the proposed methods via theory and simulation. The proposed methods are applied to GWAS on human height to effectively remove irrelevant single nucleotide polymorphisms (SNPs) while retaining a high proportion of relevant SNPs for subsequent polygenic analysis. Supplementary materials for this article are available online.}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Jeng, X. Jessie and Zhang, Teng and Tzeng, Jung-Ying}, year={2018}, month={Sep}, pages={1–23} } @article{maity_zhao_sullivan_tzeng_2018, title={Inference on phenotype-specific effects of genes using multivariate kernel machine regression}, volume={42}, ISSN={["1098-2272"]}, url={https://doi.org/10.1002/gepi.22096}, DOI={10.1002/gepi.22096}, abstractNote={ABSTRACT}, number={1}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Zhao, Jing and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2018}, month={Feb}, pages={64–79} } @article{luo_maity_wu_smith_duan_li_tzeng_2018, title={On the substructure controls in rare variant analysis: Principal components or variance components?}, volume={42}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85039155216&partnerID=MN8TOARS}, DOI={10.1002/gepi.22102}, abstractNote={Abstract}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Luo, Yiwen and Maity, Arnab and Wu, Michael C. and Smith, Chris and Duan, Qing and Li, Yun and Tzeng, Jung-Ying}, year={2018}, month={Apr}, pages={276–287} } @article{wang_tzeng_wu_preisig_hsiao_2018, title={Reexamining Dis/Similarity-Based Tests for Rare-Variant Association with Case-Control Samples}, volume={209}, ISSN={["1943-2631"]}, DOI={10.1534/genetics.118.300769}, abstractNote={Abstract}, number={1}, journal={GENETICS}, author={Wang, Charlotte and Tzeng, Jung-Ying and Wu, Pei-Zhen and Preisig, Martin and Hsiao, Chuhsing Kate}, year={2018}, month={May}, pages={105–113} } @misc{kong_maity_hsu_tzeng_2018, title={Rejoinder to "A note on testing and estimation in marker-set association study using semiparametric quantile regression kernel machine"}, volume={74}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85032786553&partnerID=MN8TOARS}, DOI={10.1111/biom.12786}, abstractNote={Dehan Kong , Arnab Maity, Fang-Chi Hsu, and Jung-Ying Tzeng Department of Statistical Sciences, University of Toronto, Ontario, Canada Department of Statistics, North Carolina State University, North Carolina, U.S.A. Department of Biostatistical Sciences, Wake Forest University, North Carolina, U.S.A. Department of Statistics and Bioinformatics Research Center North Carolina State University, North Carolina, U.S.A. Department of Statistics, National Cheng-Kung University, Taiwan ∗email: kongdehan@utstat.toronto.edu}, number={2}, journal={BIOMETRICS}, author={Kong, Dehan and Maity, Arnab and Hsu, Fang-Chi and Tzeng, Jung-Ying}, year={2018}, month={Jun}, pages={767–768} } @article{davenport_maity_sullivan_tzeng_2017, title={A Powerful Test for SNP Effects on Multivariate Binary Outcomes Using Kernel Machine Regression}, volume={10}, ISSN={1867-1764 1867-1772}, url={http://dx.doi.org/10.1007/S12561-017-9189-9}, DOI={10.1007/S12561-017-9189-9}, abstractNote={Evaluating multiple binary outcomes is common in genetic studies of complex diseases. These outcomes are often correlated because they are collected from the same individual and they may share common marker effects. In this paper, we propose a procedure to test for effect of a single nucleotide polymorphism-set on multiple, possibly correlated, binary responses. We develop a score-based test using a non-parametric modeling framework that jointly models the global effect of the marker set. We account for the non-linear effects and potentially complicated interaction between markers using reproducing kernels. Our testing procedure only requires estimation under the null hypothesis and we use multivariate generalized estimating equations to estimate the model components to account for the correlation among the outcomes. We evaluate finite sample performance of our test via simulation study and demonstrate our methods using the Clinical Antipsychotic Trials of Intervention Effectiveness antibody study data and the CoLaus study data.}, number={1}, journal={Statistics in Biosciences}, publisher={Springer Science and Business Media LLC}, author={Davenport, Clemontina A. and Maity, Arnab and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2017}, month={Mar}, pages={117–138} } @article{szatkiewicz_tzeng_magnusson_sullivan_2017, title={A new method for detecting associations with rare copy-number variants}, volume={27}, journal={European Neuropsychopharmacology}, author={Szatkiewicz, J. and Tzeng, J. Y. and Magnusson, P. and Sullivan, P.}, year={2017}, pages={S165–166} } @article{chang_tzeng_chen_2017, title={Fast Bayesian variable screenings for binary response regressions with small sample size}, volume={87}, ISSN={["1563-5163"]}, DOI={10.1080/00949655.2017.1341887}, abstractNote={ABSTRACT Screening procedures play an important role in data analysis, especially in high-throughput biological studies where the datasets consist of more covariates than independent subjects. In this article, a Bayesian screening procedure is introduced for the binary response models with logit and probit links. In contrast to many screening rules based on marginal information involving one or a few covariates, the proposed Bayesian procedure simultaneously models all covariates and uses closed-form screening statistics. Specifically, we use the posterior means of the regression coefficients as screening statistics; by imposing a generalized g-prior on the regression coefficients, we derive the analytical form of their posterior means and compute the screening statistics without Markov chain Monte Carlo implementation. We evaluate the utility of the proposed Bayesian screening method using simulations and real data analysis. When the sample size is small, the simulation results suggest improved performance with comparable computational cost.}, number={14}, journal={JOURNAL OF STATISTICAL COMPUTATION AND SIMULATION}, author={Chang, S. -M. and Tzeng, J. -Y. and Chen, R. -B.}, year={2017}, pages={2708–2723} } @article{luo_mccullough_tzeng_darrah_vengosh_maguire_maity_samuel-hodge_murphy_mendez_et al._2017, title={Maternal blood cadmium, lead and arsenic levels, nutrient combinations, and offspring birthweight}, volume={17}, journal={BMC Public Health}, author={Luo, Y. W. and McCullough, L. E. and Tzeng, J. Y. and Darrah, T. and Vengosh, A. and Maguire, R. L. and Maity, A. and Samuel-Hodge, C. and Murphy, S. K. and Mendez, M. A. and et al.}, year={2017} } @article{zhang_huang_xu_tzeng_conneely_guan_kang_li_2016, title={Across-Platform Imputation of DNA Methylation Levels Incorporating Nonlocal Information Using Penalized Functional Regression}, volume={40}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.21969}, abstractNote={ABSTRACT}, number={4}, journal={GENETIC EPIDEMIOLOGY}, author={Zhang, Guosheng and Huang, Kuan-Chieh and Xu, Zheng and Tzeng, Jung-Ying and Conneely, Karen N. and Guan, Weihua and Kang, Jian and Li, Yun}, year={2016}, month={May}, pages={333–340} } @article{hung_lin_chen_wang_huang_tzeng_2016, title={Detection of Gene-Gene Interactions Using Multistage Sparse and Low-Rank Regression}, volume={72}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12374}, abstractNote={Summary}, number={1}, journal={BIOMETRICS}, author={Hung, Hung and Lin, Yu-Ting and Chen, Penweng and Wang, Chen-Chien and Huang, Su-Yun and Tzeng, Jung-Ying}, year={2016}, month={Mar}, pages={85–94} } @article{jeng_daye_lu_tzeng_2016, title={Rare variants association analysis in large-scale sequencing studies at the single locus level}, volume={12}, number={6}, journal={PLoS Computational Biology}, author={Jeng, X. J. and Daye, Z. J. and Lu, W. B. and Tzeng, J. Y.}, year={2016} } @article{kong_maity_hsu_tzeng_biometrics_2016, title={Testing and estimation in marker-set association study using semiparametric quantile regression kernel machine}, volume={72}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84978986071&partnerID=MN8TOARS}, DOI={10.1111/biom.12438}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, author={Kong, D. and Maity, A. and Hsu, F.C. and Tzeng, J.Y. and Biometrics}, year={2016}, month={Jun}, pages={364–371} } @article{marceau_lu_holloway_sale_worrall_williams_hsu_tzeng_2015, title={A Fast Multiple-Kernel Method With Applications to Detect Gene-Environment Interaction}, volume={39}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.21909}, abstractNote={ABSTRACT}, number={6}, journal={GENETIC EPIDEMIOLOGY}, author={Marceau, Rachel and Lu, Wenbin and Holloway, Shannon and Sale, Michele M. and Worrall, Bradford B. and Williams, Stephen R. and Hsu, Fang-Chi and Tzeng, Jung-Ying}, year={2015}, month={Sep}, pages={456–468} } @article{tzeng_magnusson_sullivan_szatkiewicz_2015, title={A new method for detecting associations with rare copy-number variants}, volume={11}, number={10}, journal={PLoS Genetics}, author={Tzeng, J. Y. and Magnusson, P. K. E. and Sullivan, P. F. and Szatkiewicz, J. P.}, year={2015} } @article{neely_bondell_tzeng_2015, title={A penalized likelihood approach for investigating gene-drug interactions in pharmacogenetic studies}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12259}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, author={Neely, Megan L. and Bondell, Howard D. and Tzeng, Jung-Ying}, year={2015}, month={Jun}, pages={529–537} } @article{zhao_marceau_zhang_tzeng_2015, title={Assessing gene-environment interactions for common and rare variants with binary traits using gene-trait similarity regression}, volume={199}, number={3}, journal={Genetics}, author={Zhao, G. L. and Marceau, R. and Zhang, D. W. and Tzeng, J. Y.}, year={2015}, pages={695-} } @article{wang_maity_luo_neely_tzeng_2015, title={Complete Effect-Profile Assessment in Association Studies With Multiple Genetic and Multiple Environmental Factors}, volume={39}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84921048438&partnerID=MN8TOARS}, DOI={10.1002/gepi.21877}, abstractNote={ABSTRACT}, number={2}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Wang, Zhi and Maity, Arnab and Luo, Yiwen and Neely, Megan L. and Tzeng, Jung-Ying}, year={2015}, month={Feb}, pages={122–133} } @article{maier_moser_chen_ripke_coryell_potash_scheftner_shi_weissman_hultman_et al._2015, title={Joint Analysis of Psychiatric Disorders Increases Accuracy of Risk Prediction for Schizophrenia, Bipolar Disorder, and Major Depressive Disorder}, volume={96}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/J.AJHG.2014.12.006}, DOI={10.1016/J.AJHG.2014.12.006}, abstractNote={Genetic risk prediction has several potential applications in medical research and clinical practice and could be used, for example, to stratify a heterogeneous population of patients by their predicted genetic risk. However, for polygenic traits, such as psychiatric disorders, the accuracy of risk prediction is low. Here we use a multivariate linear mixed model and apply multi-trait genomic best linear unbiased prediction for genetic risk prediction. This method exploits correlations between disorders and simultaneously evaluates individual risk for each disorder. We show that the multivariate approach significantly increases the prediction accuracy for schizophrenia, bipolar disorder, and major depressive disorder in the discovery as well as in independent validation datasets. By grouping SNPs based on genome annotation and fitting multiple random effects, we show that the prediction accuracy could be further improved. The gain in prediction accuracy of the multivariate approach is equivalent to an increase in sample size of 34% for schizophrenia, 68% for bipolar disorder, and 76% for major depressive disorders using single trait models. Because our approach can be readily applied to any number of GWAS datasets of correlated traits, it is a flexible and powerful tool to maximize prediction accuracy. With current sample size, risk predictors are not useful in a clinical setting but already are a valuable research tool, for example in experimental designs comparing cases with high and low polygenic risk.}, number={2}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={Maier, Robert and Moser, Gerhard and Chen, Guo-Bo and Ripke, Stephan and Coryell, William and Potash, James B. and Scheftner, William A. and Shi, Jianxin and Weissman, Myrna M. and Hultman, Christina M. and et al.}, year={2015}, month={Feb}, pages={283–294} } @article{wang_maity_hsiao_voora_kaddurah-daouk_tzeng_2015, title={Module-based association analysis for omics data with network structure}, volume={10}, number={3}, journal={PLoS One}, author={Wang, Z. and Maity, A. and Hsiao, C. K. and Voora, D. and Kaddurah-Daouk, R. and Tzeng, J. Y.}, year={2015} } @article{hu_sun_tzeng_perou_2015, title={Proper Use of Allele-Specific Expression Improves Statistical Power for cis-eQTL Mapping with RNA-Seq Data}, volume={110}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2015.1038449}, abstractNote={Studies of expression quantitative trait loci (eQTLs) offer insight into the molecular mechanisms of loci that were found to be associated with complex diseases and the mechanisms can be classified into cis- and trans-acting regulation. At present, high-throughput RNA sequencing (RNA-seq) is rapidly replacing expression microarrays to assess gene expression abundance. Unlike microarrays that only measure the total expression of each gene, RNA-seq also provides information on allele-specific expression (ASE), which can be used to distinguish cis-eQTLs from trans-eQTLs and, more importantly, enhance cis-eQTL mapping. However, assessing the cis-effect of a candidate eQTL on a gene requires knowledge of the haplotypes connecting the candidate eQTL and the gene, which can not be inferred with certainty. The existing two-stage approach that first phases the candidate eQTL against the gene and then treats the inferred phase as observed in the association analysis tends to attenuate the estimated cis-effect and reduce the power for detecting a cis-eQTL. In this article, we provide a maximum-likelihood framework for cis-eQTL mapping with RNA-seq data. Our approach integrates the inference of haplotypes and the association analysis into a single stage, and is thus unbiased and statistically powerful. We also develop a pipeline for performing a comprehensive scan of all local eQTLs for all genes in the genome by controlling for false discovery rate, and implement the methods in a computationally efficient software program. The advantages of the proposed methods over the existing ones are demonstrated through realistic simulation studies and an application to empirical breast cancer data from The Cancer Genome Atlas project. Supplementary materials for this article are available online.}, number={511}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Hu, Yi-Juan and Sun, Wei and Tzeng, Jung-Ying and Perou, Charles M.}, year={2015}, month={Sep}, pages={962–974} } @article{tzeng_2015, title={Psychiatric genome-wide association study analyses implicate neuronal, immune and histone pathways}, volume={18}, ISSN={1097-6256 1546-1726}, url={http://dx.doi.org/10.1038/NN.3922}, DOI={10.1038/NN.3922}, abstractNote={Better analytical methods are needed to extract biological meaning from genome-wide association studies (GWAS) of psychiatric disorders. Here the authors take GWAS data from over 60,000 subjects, including patients with schizophrenia, bipolar disorder and major depression, and identify common etiological pathways shared amongst them. Genome-wide association studies (GWAS) of psychiatric disorders have identified multiple genetic associations with such disorders, but better methods are needed to derive the underlying biological mechanisms that these signals indicate. We sought to identify biological pathways in GWAS data from over 60,000 participants from the Psychiatric Genomics Consortium. We developed an analysis framework to rank pathways that requires only summary statistics. We combined this score across disorders to find common pathways across three adult psychiatric disorders: schizophrenia, major depression and bipolar disorder. Histone methylation processes showed the strongest association, and we also found statistically significant evidence for associations with multiple immune and neuronal signaling pathways and with the postsynaptic density. Our study indicates that risk variants for psychiatric disorders aggregate in particular biological pathways and that these pathways are frequently shared between disorders. Our results confirm known mechanisms and suggest several novel insights into the etiology of psychiatric disorders.}, number={2}, journal={Nature Neuroscience}, publisher={Springer Science and Business Media LLC}, author={Tzeng, Jung-Ying}, year={2015}, month={Jan}, pages={199–209} } @article{o'dushlaine_rossin_lee_duncan_parikshak_newhouse_ripke_neale_purcell_posthuma_et al._2015, title={Psychiatric genome-wide association study analyses implicate neuronal, immune and histone pathways}, volume={18}, number={2}, journal={Nature Neuroscience}, author={O'Dushlaine, C. and Rossin, L. and Lee, P. H. and Duncan, L. and Parikshak, N. N. and Newhouse, S. and Ripke, S. and Neale, B. M. and Purcell, S. M. and Posthuma, D. and et al.}, year={2015}, pages={199–209} } @article{wang_epstein_tzeng_2014, title={Analysis of Gene-Gene Interactions Using Gene-Trait Similarity Regression}, volume={78}, ISSN={["1423-0062"]}, DOI={10.1159/000360161}, abstractNote={Objective: Gene-gene interactions (G×G) are important to study because of their extensiveness in biological systems and their potential in explaining missing heritability of complex traits. In this work, we propose a new similarity-based test to assess G×G at the gene level, which permits the study of epistasis at biologically functional units with amplified interaction signals. Methods: Under the framework of gene-trait similarity regression (SimReg), we propose a gene-based test for detecting G×G. SimReg uses a regression model to correlate trait similarity with genotypic similarity across a gene. Unlike existing gene-level methods based on leading principal components (PCs), SimReg summarizes all information on genotypic variation within a gene and can be used to assess the joint/interactive effects of two genes as well as the effect of one gene conditional on another. Results: Using simulations and a real data application to the Warfarin study, we show that the SimReg G×G tests have satisfactory power and robustness under different genetic architecture when compared to existing gene-based interaction tests such as PC analysis or partial least squares. A genome-wide association study with approx. 20,000 genes may be completed on a parallel computing system in 2 weeks.}, number={1}, journal={HUMAN HEREDITY}, author={Wang, Xin and Epstein, Michael P. and Tzeng, Jung-Ying}, year={2014}, pages={17–26} } @article{tzeng_lu_hsu_2014, title={GENE-LEVEL PHARMACOGENETIC ANALYSIS ON SURVIVAL OUTCOMES USING GENE-TRAIT SIMILARITY REGRESSION}, volume={8}, ISSN={["1932-6157"]}, DOI={10.1214/14-aoas735}, abstractNote={Gene/pathway-based methods are drawing significant attention due to their usefulness in detecting rare and common variants that affect disease susceptibility. The biological mechanism of drug responses indicates that a gene-based analysis has even greater potential in pharmacogenetics. Motivated by a study from the Vitamin Intervention for Stroke Prevention (VISP) trial, we develop a gene-trait similarity regression for survival analysis to assess the effect of a gene or pathway on time-to-event outcomes. The similarity regression has a general framework that covers a range of survival models, such as the proportional hazards model and the proportional odds model. The inference procedure developed under the proportional hazards model is robust against model misspecification. We derive the equivalence between the similarity survival regression and a random effects model, which further unifies the current variance-component based methods. We demonstrate the effectiveness of the proposed method through simulation studies. In addition, we apply the method to the VISP trial data to identify the genes that exhibit an association with the risk of a recurrent stroke. TCN2 gene was found to be associated with the recurrent stroke risk in the low-dose arm. This gene may impact recurrent stroke risk in response to cofactor therapy.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Tzeng, Jung-Ying and Lu, Wenbin and Hsu, Fang-Chi}, year={2014}, month={Jun}, pages={1232–1255} } @article{wright_sullivan_brooks_zou_sun_xia_madar_jansen_chung_zhou_et al._2014, title={Heritability and genomics of gene expression in peripheral blood}, volume={46}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/NG.2951}, DOI={10.1038/NG.2951}, abstractNote={Fred Wright, Patrick Sullivan and colleagues present the results of a large expression QTL study of peripheral blood using a classic twin design with follow-up replication in independent samples. Their results enable a more precise estimate of the heritability of gene expression and provide a useful resource for exploring the genetic control of transcription. We assessed gene expression profiles in 2,752 twins, using a classic twin design to quantify expression heritability and quantitative trait loci (eQTLs) in peripheral blood. The most highly heritable genes (∼777) were grouped into distinct expression clusters, enriched in gene-poor regions, associated with specific gene function or ontology classes, and strongly associated with disease designation. The design enabled a comparison of twin-based heritability to estimates based on dizygotic identity-by-descent sharing and distant genetic relatedness. Consideration of sampling variation suggests that previous heritability estimates have been upwardly biased. Genotyping of 2,494 twins enabled powerful identification of eQTLs, which we further examined in a replication set of 1,895 unrelated subjects. A large number of non-redundant local eQTLs (6,756) met replication criteria, whereas a relatively small number of distant eQTLs (165) met quality control and replication standards. Our results provide a new resource toward understanding the genetic control of transcription.}, number={5}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Wright, Fred A and Sullivan, Patrick F and Brooks, Andrew I and Zou, Fei and Sun, Wei and Xia, Kai and Madar, Vered and Jansen, Rick and Chung, Wonil and Zhou, Yi-Hui and et al.}, year={2014}, month={Apr}, pages={430–437} } @article{wright_sullivan_brooks_zou_sun_xia_madar_jansen_chung_zhou_et al._2014, title={Heritability and genomics of gene expression in peripheral blood}, volume={46}, number={5}, journal={Nature Genetics}, author={Wright, F. A. and Sullivan, P. F. and Brooks, A. I. and Zou, F. and Sun, W. and Xia, K. and Madar, V. and Jansen, R. and Chung, W. I. and Zhou, Y. H. and et al.}, year={2014}, pages={430–437} } @article{hu_tzeng_2014, title={Integrative gene set analysis of multi-platform data with sample heterogeneity}, volume={30}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btu060}, abstractNote={Abstract}, number={11}, journal={BIOINFORMATICS}, author={Hu, Jun and Tzeng, Jung-Ying}, year={2014}, month={Jun}, pages={1501–1507} } @article{wang_zhang_tzeng_2014, title={Pathway-Guided Identification of Gene-Gene Interactions}, volume={78}, ISSN={["1469-1809"]}, DOI={10.1111/ahg.12080}, abstractNote={Assessing gene‐gene interactions (GxG) at the gene level can permit examination of epistasis at biologically functional units with amplified interaction signals from marker‐marker pairs. While current gene‐based GxG methods tend to be designed for two or a few genes, for complex traits, it is often common to have a list of many candidate genes to explore GxG. We propose a regression model with pathway‐guided regularization for detecting interactions among genes. Specifically, we use the principal components to summarize the SNP‐SNP interactions between a gene pair, and use an L1 penalty that incorporates adaptive weights based on biological guidance and trait supervision to identify important main and interaction effects. Our approach aims to combine biological guidance and data adaptiveness, and yields credible findings that may be likely to shed insights in order to formulate biological hypotheses for further molecular studies. The proposed approach can be used to explore the GxG with a list of many candidate genes and is applicable even when sample size is smaller than the number of predictors studied. We evaluate the utility of the proposed method using simulation and real data analysis. The results suggest improved performance over methods not utilizing pathway and trait guidance.}, number={6}, journal={ANNALS OF HUMAN GENETICS}, author={Wang, Xin and Zhang, Daowen and Tzeng, Jung-Ying}, year={2014}, month={Nov}, pages={478–491} } @article{sullivan_daly_ripke_lewis_lin_wray_neale_levinson_breen_byrne_et al._2013, title={A mega-analysis of genome-wide association studies for major depressive disorder}, volume={18}, number={4}, journal={Molecular Psychiatry}, author={Sullivan, P. F. and Daly, M. J. and Ripke, S. and Lewis, C. M. and Lin, D. Y. and Wray, N. R. and Neale, B. and Levinson, D. F. and Breen, G. and Byrne, E. M. and et al.}, year={2013}, pages={497–511} } @article{tzeng_2013, title={Genetic relationship between five psychiatric disorders estimated from genome-wide SNPs}, volume={45}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/NG.2711}, DOI={10.1038/NG.2711}, abstractNote={Naomi Wray and colleagues report an analysis of genome-wide association data sets from the Psychiatric Genomics Consortium for five psychiatric disorders. They find that common variation explains 17–29% of the variance in liability and provide further support for a shared genetic etiology for these related psychiatric disorders. Most psychiatric disorders are moderately to highly heritable. The degree to which genetic variation is unique to individual disorders or shared across disorders is unclear. To examine shared genetic etiology, we use genome-wide genotype data from the Psychiatric Genomics Consortium (PGC) for cases and controls in schizophrenia, bipolar disorder, major depressive disorder, autism spectrum disorders (ASD) and attention-deficit/hyperactivity disorder (ADHD). We apply univariate and bivariate methods for the estimation of genetic variation within and covariation between disorders. SNPs explained 17–29% of the variance in liability. The genetic correlation calculated using common SNPs was high between schizophrenia and bipolar disorder (0.68 ± 0.04 s.e.), moderate between schizophrenia and major depressive disorder (0.43 ± 0.06 s.e.), bipolar disorder and major depressive disorder (0.47 ± 0.06 s.e.), and ADHD and major depressive disorder (0.32 ± 0.07 s.e.), low between schizophrenia and ASD (0.16 ± 0.06 s.e.) and non-significant for other pairs of disorders as well as between psychiatric disorders and the negative control of Crohn's disease. This empirical evidence of shared genetic etiology for psychiatric disorders can inform nosology and encourages the investigation of common pathophysiologies for related disorders.}, number={9}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Tzeng, Jung-Ying}, year={2013}, month={Aug}, pages={984–994} } @article{lee_ripke_neale_faraone_purcell_perlis_mowry_thapar_goddard_witte_et al._2013, title={Genetic relationship between five psychiatric disorders estimated from genome-wide SNPs}, volume={45}, number={9}, journal={Nature Genetics}, author={Lee, S. H. and Ripke, S. and Neale, B. M. and Faraone, S. V. and Purcell, S. M. and Perlis, R. H. and Mowry, B. J. and Thapar, A. and Goddard, M. E. and Witte, J. S. and et al.}, year={2013}, pages={984-} } @article{maity_sullivan_tzeng_2012, title={Multivariate Phenotype Association Analysis by Marker-Set Kernel Machine Regression}, volume={36}, ISSN={["1098-2272"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84867539542&partnerID=MN8TOARS}, DOI={10.1002/gepi.21663}, abstractNote={Genetic studies of complex diseases often collect multiple phenotypes relevant to the disorders. As these phenotypes can be correlated and share common genetic mechanisms, jointly analyzing these traits may bring more power to detect genes influencing individual or multiple phenotypes. Given the advancement brought by the multivariate phenotype approaches and the multimarker kernel machine regression, we construct a multivariate regression based on kernel machine to facilitate the joint evaluation of multimarker effects on multiple phenotypes. The kernel machine serves as a powerful dimension‐reduction tool to capture complex effects among markers. The multivariate framework incorporates the potentially correlated multidimensional phenotypic information and accommodates common or different environmental covariates for each trait. We derive the multivariate kernel machine test based on a score‐like statistic, and conduct simulations to evaluate the validity and efficacy of the method. We also study the performance of the commonly adapted strategies for kernel machine analysis on multiple phenotypes, including the multiple univariate kernel machine tests with original phenotypes or with their principal components. Our results suggest that none of these approaches has the uniformly best power, and the optimal test depends on the magnitude of the phenotype correlation and the effect patterns. However, the multivariate test retains to be a reasonable approach when the multiple phenotypes have none or mild correlations, and gives the best power once the correlation becomes stronger or when there exist genes that affect more than one phenotype. We illustrate the utility of the multivariate kernel machine method through the Clinical Antipsychotic Trails of Intervention Effectiveness antibody study.}, number={7}, journal={GENETIC EPIDEMIOLOGY}, publisher={Wiley-Blackwell}, author={Maity, Arnab and Sullivan, Patrick E. and Tzeng, Jung-Ying}, year={2012}, month={Nov}, pages={686–695} } @article{lee_tzeng_huang_hsiao_2011, title={Combining an evolution-guided clustering algorithm and haplotype-based LRT in family association studies}, volume={12}, journal={BMC Genetics}, author={Lee, M. H. and Tzeng, J. Y. and Huang, S. Y. and Hsiao, C. K.}, year={2011} } @article{tzeng_zhang_pongpanich_smith_mccarthy_sale_worrall_hsu_thomas_sullivan_2011, title={Studying Gene and Gene-Environment Effects of Uncommon and Common Variants on Continuous Traits: A Marker-Set Approach Using Gene-Trait Similarity Regression}, volume={89}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2011.07.007}, abstractNote={Genomic association analyses of complex traits demand statistical tools that are capable of detecting small effects of common and rare variants and modeling complex interaction effects and yet are computationally feasible. In this work, we introduce a similarity-based regression method for assessing the main genetic and interaction effects of a group of markers on quantitative traits. The method uses genetic similarity to aggregate information from multiple polymorphic sites and integrates adaptive weights that depend on allele frequencies to accomodate common and uncommon variants. Collapsing information at the similarity level instead of the genotype level avoids canceling signals that have the opposite etiological effects and is applicable to any class of genetic variants without the need for dichotomizing the allele types. To assess gene-trait associations, we regress trait similarities for pairs of unrelated individuals on their genetic similarities and assess association by using a score test whose limiting distribution is derived in this work. The proposed regression framework allows for covariates, has the capacity to model both main and interaction effects, can be applied to a mixture of different polymorphism types, and is computationally efficient. These features make it an ideal tool for evaluating associations between phenotype and marker sets defined by linkage disequilibrium (LD) blocks, genes, or pathways in whole-genome analysis.}, number={2}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Tzeng, Jung-Ying and Zhang, Daowen and Pongpanich, Monnat and Smith, Chris and McCarthy, Mark I. and Sale, Michele M. and Worrall, Bradford B. and Hsu, Fang-Chi and Thomas, Duncan C. and Sullivan, Patrick F.}, year={2011}, month={Aug}, pages={277–288} } @article{pongpanich_sullivan_tzeng_2010, title={A quality control algorithm for filtering SNPs in genome-wide association studies}, volume={26}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btq272}, abstractNote={Abstract}, number={14}, journal={BIOINFORMATICS}, author={Pongpanich, Monnat and Sullivan, Patrick F. and Tzeng, Jung-Ying}, year={2010}, month={Jul}, pages={1731–1737} } @article{koehler_bondell_tzeng_2010, title={Evaluating Haplotype Effects in Case-Control Studies via Penalized-Likelihood Approaches: Prospective or Retrospective Analysis?}, volume={34}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.20545}, abstractNote={Abstract}, number={8}, journal={GENETIC EPIDEMIOLOGY}, author={Koehler, Megan L. and Bondell, Howard D. and Tzeng, Jung-Ying}, year={2010}, month={Dec}, pages={892–911} } @article{tzeng_lu_farmen_liu_sullivan_2010, title={Haplotype-Based Pharmacogenetic Analysis for Longitudinal Quantitative Traits in the Presence of Dropout}, volume={20}, ISSN={["1520-5711"]}, DOI={10.1080/10543400903572787}, abstractNote={We propose a variety of methods based on the generalized estimation equations to address the issues encountered in haplotype-based pharmacogenetic analysis, including analysis of longitudinal data with outcome-dependent dropouts, and evaluation of the high-dimensional haplotype and haplotype–drug interaction effects in an overall manner. We use the inverse probability weights to handle the outcome-dependent dropouts under the missing-at-random assumption, and incorporate the weighted L 1 penalty to select important main and interaction effects with high dimensionality. The proposed methods are easy to implement, computationally efficient, and provide an optimal balance between false positives and false negatives in detecting genetic effects.}, number={2}, journal={JOURNAL OF BIOPHARMACEUTICAL STATISTICS}, author={Tzeng, Jung-Ying and Lu, Wenbin and Farmen, Mark W. and Liu, Youfang and Sullivan, Patrick F.}, year={2010}, pages={334–350} } @article{liu_li_satten_allen_tzeng_2009, title={A Regression-based Association Test for Case-control Studies that Uses Inferred Ancestral Haplotype Similarity}, volume={73}, ISSN={["1469-1809"]}, DOI={10.1111/j.1469-1809.2009.00536.x}, abstractNote={Summary}, journal={ANNALS OF HUMAN GENETICS}, author={Liu, Youfang and Li, Yi-Ju and Satten, Glen A. and Allen, Andrew S. and Tzeng, Jung-Ying}, year={2009}, month={Sep}, pages={520–526} } @article{tzeng_bondell_2009, title={A comprehensive approach to haplotype-specific analysis by penalized likelihood}, volume={18}, ISSN={1018-4813 1476-5438}, url={http://dx.doi.org/10.1038/ejhg.2009.118}, DOI={10.1038/ejhg.2009.118}, abstractNote={Haplotypes can hold key information to understand the role of candidate genes in disease etiology. However, standard haplotype analysis has yet been able to fully reveal the information retained by haplotypes. In most analysis, haplotype inference focuses on relative effects compared with an arbitrarily chosen baseline haplotype. It does not depict the effect structure unless an additional inference procedure is used in a secondary post hoc analysis, and such analysis tends to be lack of power. In this study, we propose a penalized regression approach to systematically evaluate the pattern and structure of the haplotype effects. By specifying an L1 penalty on the pairwise difference of the haplotype effects, we present a model-based haplotype analysis to detect and to characterize the haplotypic association signals. The proposed method avoids the need to choose a baseline haplotype; it simultaneously carries out the effect estimation and effect comparison of all haplotypes, and outputs the haplotype group structure based on their effect size. Finally, our penalty weights are theoretically designed to balance the likelihood and the penalty term in an appropriate manner. The proposed method can be used as a tool to comprehend candidate regions identified from a genome or chromosomal scan. Simulation studies reveal the better abilities of the proposed method to identify the haplotype effect structure compared with the traditional haplotype association methods, demonstrating the informativeness and powerfulness of the proposed method.}, number={1}, journal={European Journal of Human Genetics}, publisher={Springer Science and Business Media LLC}, author={Tzeng, Jung-Ying and Bondell, Howard D}, year={2009}, month={Jul}, pages={95–103} } @article{sullivan_lin_tzeng_van den oord_perkins_stroup_wagner_lee_wright_zou_et al._2009, title={Erratum: Genomewide association for schizophrenia in the CATIE study: results of stage 1}, volume={14}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/mp.2008.74}, DOI={10.1038/mp.2008.74}, abstractNote={Correction to: Molecular Psychiatry (2008) 13, 570–584; doi: 10.1038/mp.2008.25 For technical reasons, Supplementary Tables 2, 3 and 4 were not published online. They now appear online at www.nature.com/mp.}, number={12}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Sullivan, P F and Lin, D and Tzeng, J-Y and van den Oord, E and Perkins, D and Stroup, T S and Wagner, M and Lee, S and Wright, F A and Zou, F and et al.}, year={2009}, month={Nov}, pages={1144–1144} } @article{tzeng_zhang_chang_thomas_davidian_2009, title={Gene-Trait Similarity Regression for Multimarker-Based Association Analysis}, volume={65}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2008.01176.x}, DOI={10.1111/j.1541-0420.2008.01176.x}, abstractNote={Summary We propose a similarity‐based regression method to detect associations between traits and multimarker genotypes. The model regresses similarity in traits for pairs of “unrelated” individuals on their haplotype similarities, and detects the significance by a score test for which the limiting distribution is derived. The proposed method allows for covariates, uses phase‐independent similarity measures to bypass the needs to impute phase information, and is applicable to traits of general types (e.g., quantitative and qualitative traits). We also show that the gene‐trait similarity regression is closely connected with random effects haplotype analysis, although commonly they are considered as separate modeling tools. This connection unites the classic haplotype sharing methods with the variance‐component approaches, which enables direct derivation of analytical properties of the sharing statistics even when the similarity regression model becomes analytically challenging.}, number={3}, journal={Biometrics}, publisher={Wiley}, author={Tzeng, Jung-Ying and Zhang, Daowen and Chang, Sheng-Mao and Thomas, Duncan C. and Davidian, Marie}, year={2009}, month={Feb}, pages={822–832} } @misc{sullivan_geus_willemsen_james_smit_zandbelt_arolt_baune_blackwood_cichon_et al._2009, title={Genome-wide association for major depressive disorder: A possible role for the presynaptic protein piccolo}, volume={14}, number={4}, journal={Molecular Psychiatry}, author={Sullivan, P. F. and Geus, E. J. C. and Willemsen, G. and James, M. R. and Smit, J. H. and Zandbelt, T. and Arolt, V. and Baune, B. T. and Blackwood, D. and Cichon, S. and et al.}, year={2009}, pages={359–375} } @article{sullivan_de geus_willemsen_james_smit_zandbelt_arolt_baune_blackwood_cichon_et al._2008, title={Genome-wide association for major depressive disorder: a possible role for the presynaptic protein piccolo}, volume={14}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/mp.2008.125}, DOI={10.1038/mp.2008.125}, abstractNote={Major depressive disorder (MDD) is a common complex trait with enormous public health significance. As part of the Genetic Association Information Network initiative of the US Foundation for the National Institutes of Health, we conducted a genome-wide association study of 435 291 single nucleotide polymorphisms (SNPs) genotyped in 1738 MDD cases and 1802 controls selected to be at low liability for MDD. Of the top 200, 11 signals localized to a 167 kb region overlapping the gene piccolo (PCLO, whose protein product localizes to the cytomatrix of the presynaptic active zone and is important in monoaminergic neurotransmission in the brain) with P-values of 7.7 × 10−7 for rs2715148 and 1.2 × 10−6 for rs2522833. We undertook replication of SNPs in this region in five independent samples (6079 MDD independent cases and 5893 controls) but no SNP exceeded the replication significance threshold when all replication samples were analyzed together. However, there was heterogeneity in the replication samples, and secondary analysis of the original sample with the sample of greatest similarity yielded P=6.4 × 10−8 for the nonsynonymous SNP rs2522833 that gives rise to a serine to alanine substitution near a C2 calcium-binding domain of the PCLO protein. With the integrated replication effort, we present a specific hypothesis for further studies.}, number={4}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Sullivan, P F and de Geus, E J C and Willemsen, G and James, M R and Smit, J H and Zandbelt, T and Arolt, V and Baune, B T and Blackwood, D and Cichon, S and et al.}, year={2008}, month={Dec}, pages={359–375} } @article{sullivan_lin_tzeng_van den oord_perkins_stroup_wagner_lee_wright_zou_et al._2008, title={Genomewide association for schizophrenia in the CATIE study: results of stage 1}, volume={13}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/mp.2008.25}, DOI={10.1038/mp.2008.25}, abstractNote={Little is known for certain about the genetics of schizophrenia. The advent of genomewide association has been widely anticipated as a promising means to identify reproducible DNA sequence variation associated with this important and debilitating disorder. A total of 738 cases with DSM-IV schizophrenia (all participants in the CATIE study) and 733 group-matched controls were genotyped for 492 900 single-nucleotide polymorphisms (SNPs) using the Affymetrix 500K two-chip genotyping platform plus a custom 164K fill-in chip. Following multiple quality control steps for both subjects and SNPs, logistic regression analyses were used to assess the evidence for association of all SNPs with schizophrenia. We identified a number of promising SNPs for follow-up studies, although no SNP or multimarker combination of SNPs achieved genomewide statistical significance. Although a few signals coincided with genomic regions previously implicated in schizophrenia, chance could not be excluded. These data do not provide evidence for the involvement of any genomic region with schizophrenia detectable with moderate sample size. However, a planned genomewide association study for response phenotypes and inclusion of individual phenotype and genotype data from this study in meta-analyses hold promise for eventual identification of susceptibility and protective variants.}, number={6}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Sullivan, P F and Lin, D and Tzeng, J-Y and van den Oord, E and Perkins, D and Stroup, T S and Wagner, M and Lee, S and Wright, F A and Zou, F and et al.}, year={2008}, month={Mar}, pages={570–584} } @article{tzeng_zhang_2007, title={Haplotype-based association analysis via variance-components score test}, volume={81}, ISSN={["0002-9297"]}, DOI={10.1086/521558}, abstractNote={Haplotypes provide a more informative format of polymorphisms for genetic association analysis than do individual single-nucleotide polymorphisms. However, the practical efficacy of haplotype-based association analysis is challenged by a trade-off between the benefits of modeling abundant variation and the cost of the extra degrees of freedom. To reduce the degrees of freedom, several strategies have been considered in the literature. They include (1) clustering evolutionarily close haplotypes, (2) modeling the level of haplotype sharing, and (3) smoothing haplotype effects by introducing a correlation structure for haplotype effects and studying the variance components (VC) for association. Although the first two strategies enjoy a fair extent of power gain, empirical evidence showed that VC methods may exhibit only similar or less power than the standard haplotype regression method, even in cases of many haplotypes. In this study, we report possible reasons that cause the underpowered phenomenon and show how the power of the VC strategy can be improved. We construct a score test based on the restricted maximum likelihood or the marginal likelihood function of the VC and identify its nontypical limiting distribution. Through simulation, we demonstrate the validity of the test and investigate the power performance of the VC approach and that of the standard haplotype regression approach. With suitable choices for the correlation structure, the proposed method can be directly applied to unphased genotypic data. Our method is applicable to a wide-ranging class of models and is computationally efficient and easy to implement. The broad coverage and the fast and easy implementation of this method make the VC strategy an effective tool for haplotype analysis, even in modern genomewide association studies.}, number={5}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Tzeng, Jung-Ying and Zhang, Daowen}, year={2007}, month={Nov}, pages={927–938} } @article{wen_tzeng_kao_hsiao_2006, title={A two-stage design for multiple testing in large-scale association studies}, volume={51}, ISSN={["1435-232X"]}, DOI={10.1007/s10038-006-0393-6}, abstractNote={Modern association studies often involve a large number of markers and hence may encounter the problem of testing multiple hypotheses. Traditional procedures are usually over-conservative and with low power to detect mild genetic effects. From the design perspective, we propose a two-stage selection procedure to address this concern. Our main principle is to reduce the total number of tests by removing clearly unassociated markers in the first-stage test. Next, conditional on the findings of the first stage, which uses a less stringent nominal level, a more conservative test is conducted in the second stage using the augmented data and the data from the first stage. Previous studies have suggested using independent samples to avoid inflated errors. However, we found that, after accounting for the dependence between these two samples, the true discovery rate increases substantially. In addition, the cost of genotyping can be greatly reduced via this approach. Results from a study of hypertriglyceridemia and simulations suggest the two-stage method has a higher overall true positive rate (TPR) with a controlled overall false positive rate (FPR) when compared with single-stage approaches. We also report the analytical form of its overall FPR, which may be useful in guiding study design to achieve a high TPR while retaining the desired FPR.}, number={6}, journal={JOURNAL OF HUMAN GENETICS}, author={Wen, Shu-Hui and Tzeng, Jung-Ying and Kao, Jau-Tsuen and Hsiao, Chuhsing Kate}, year={2006}, month={Jun}, pages={523–532} } @article{tzeng_roeder_2006, title={Likelihood-based inference on haplotype effects in genetic association studies - Comment}, volume={101}, number={473}, journal={Journal of the American Statistical Association}, author={Tzeng, J. Y. and Roeder, K.}, year={2006}, pages={111–114} } @article{tzeng_wang_kao_hsiao_2006, title={Regression-based association analysis with clustered haplotypes through use of genotypes}, volume={78}, ISSN={["1537-6605"]}, DOI={10.1086/500025}, abstractNote={Haplotype-based association analysis has been recognized as a tool with high resolution and potentially great power for identifying modest etiological effects of genes. However, in practice, its efficacy has not been as successfully reproduced as expected in theory. One primary cause is that such analysis tends to require a large number of parameters to capture the abundant haplotype varieties, and many of those are expended on rare haplotypes for which studies would have insufficient power to detect association even if it existed. To concentrate statistical power on more-relevant inferences, in this study, we developed a regression-based approach using clustered haplotypes to assess haplotype-phenotype association. Specifically, we generalized the probabilistic clustering methods of Tzeng to the generalized linear model (GLM) framework established by Schaid et al. The proposed method uses unphased genotypes and incorporates both phase uncertainty and clustering uncertainty. Its GLM framework allows adjustment of covariates and can model qualitative and quantitative traits. It can also evaluate the overall haplotype association or the individual haplotype effects. We applied the proposed approach to study the association between hypertriglyceridemia and the apolipoprotein A5 gene. Through simulation studies, we assessed the performance of the proposed approach and demonstrate its validity and power in testing for haplotype-trait association.}, number={2}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Tzeng, JY and Wang, CH and Kao, JT and Hsiao, CK}, year={2006}, month={Feb}, pages={231–242} } @article{tzeng_2005, title={Evolutionary-based grouping of haplotypes in association analysis}, volume={28}, ISSN={["0741-0395"]}, DOI={10.1002/gepi.20063}, abstractNote={Abstract}, number={3}, journal={GENETIC EPIDEMIOLOGY}, author={Tzeng, JY}, year={2005}, month={Apr}, pages={220–231} }