@article{ford_lin_zhou_wright_gombar_sedykh_shah_chiu_rusyn_2024, title={Characterizing PFAS hazards and risks: a human population-based in vitro cardiotoxicity assessment strategy}, volume={18}, ISSN={["1479-7364"]}, DOI={10.1186/s40246-024-00665-x}, abstractNote={Abstract Per- and poly-fluoroalkyl substances (PFAS) are emerging contaminants of concern because of their wide use, persistence, and potential to be hazardous to both humans and the environment. Several PFAS have been designated as substances of concern; however, most PFAS in commerce lack toxicology and exposure data to evaluate their potential hazards and risks. Cardiotoxicity has been identified as a likely human health concern, and cell-based assays are the most sensible approach for screening and prioritization of PFAS. Human-induced pluripotent stem cell (iPSC)-derived cardiomyocytes are a widely used method to test for cardiotoxicity, and recent studies showed that many PFAS affect these cells. Because iPSC-derived cardiomyocytes are available from different donors, they also can be used to quantify human variability in responses to PFAS. The primary objective of this study was to characterize potential human cardiotoxic hazard, risk, and inter-individual variability in responses to PFAS. A total of 56 PFAS from different subclasses were tested in concentration-response using human iPSC-derived cardiomyocytes from 16 donors without known heart disease. Kinetic calcium flux and high-content imaging were used to evaluate biologically-relevant phenotypes such as beat frequency, repolarization, and cytotoxicity. Of the tested PFAS, 46 showed concentration-response effects in at least one phenotype and donor; however, a wide range of sensitivities were observed across donors. Inter-individual variability in the effects could be quantified for 19 PFAS, and risk characterization could be performed for 20 PFAS based on available exposure information. For most tested PFAS, toxicodynamic variability was within a factor of 10 and the margins of exposure were above 100. This study identified PFAS that may pose cardiotoxicity risk and have high inter-individual variability. It also demonstrated the feasibility of using a population-based human in vitro method to quantify population variability and identify cardiotoxicity risks of emerging contaminants.}, number={1}, journal={HUMAN GENOMICS}, author={Ford, Lucie C. and Lin, Hsing-Chieh and Zhou, Yi-Hui and Wright, Fred A. and Gombar, Vijay K. and Sedykh, Alexander and Shah, Ruchir R. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2024}, month={Sep} } @article{rudra_zhou_nobel_wright_2024, title={Control of false discoveries in grouped hypothesis testing for eQTL data}, volume={25}, ISSN={["1471-2105"]}, DOI={10.1186/s12859-024-05736-3}, abstractNote={Abstract Background Expression quantitative trait locus (eQTL) analysis aims to detect the genetic variants that influence the expression of one or more genes. Gene-level eQTL testing forms a natural grouped-hypothesis testing strategy with clear biological importance. Methods to control family-wise error rate or false discovery rate for group testing have been proposed earlier, but may not be powerful or easily apply to eQTL data, for which certain structured alternatives may be defensible and may enable the researcher to avoid overly conservative approaches. Results In an empirical Bayesian setting, we propose a new method to control the false discovery rate (FDR) for grouped hypotheses. Here, each gene forms a group, with SNPs annotated to the gene corresponding to individual hypotheses. The heterogeneity of effect sizes in different groups is considered by the introduction of a random effects component. Our method, entitled Random Effects model and testing procedure for Group-level FDR control (REG-FDR), assumes a model for alternative hypotheses for the eQTL data and controls the FDR by adaptive thresholding. As a convenient alternate approach, we also propose Z-REG-FDR, an approximate version of REG-FDR, that uses only Z-statistics of association between genotype and expression for each gene-SNP pair. The performance of Z-REG-FDR is evaluated using both simulated and real data. Simulations demonstrate that Z-REG-FDR performs similarly to REG-FDR, but with much improved computational speed. Conclusion Our results demonstrate that the Z-REG-FDR method performs favorably compared to other methods in terms of statistical power and control of FDR. It can be of great practical use for grouped hypothesis testing for eQTL analysis or similar problems in statistical genomics due to its fast computation and ability to be fit using only summary data.}, number={1}, journal={BMC BIOINFORMATICS}, author={Rudra, Pratyaydipta and Zhou, Yi-Hui and Nobel, Andrew and Wright, Fred A.}, year={2024}, month={Apr} } @article{stonebraker_pace_gallins_dang_aksit_faino_gordon_macparland_bamshad_gibson_et al._2024, title={Genetic variation in severe cystic fibrosis liver disease is associated with novel mechanisms for disease pathogenesis}, volume={3}, ISSN={["1527-3350"]}, DOI={10.1097/HEP.0000000000000863}, abstractNote={ Background and Aims: It is not known why severe cystic fibrosis (CF) liver disease (CFLD) with portal hypertension occurs in only ~7% of people with CF (pwCF). We aimed to identify genetic modifiers for severe CFLD to improve understanding of disease mechanisms. Approach and Results: Whole genome sequencing was available in 4,082 pwCF with pancreatic insufficiency (n=516 with severe CFLD; n=3,566 without CFLD). We tested ~15.9 million SNPs for association with severe CFLD versus no-CFLD, using pre-modulator clinical phenotypes including: 1) genetic variant (SERPINA1; Z-allele) previously associated with severe CFLD; 2) candidate SNPs (n=205) associated with non-CF liver diseases; 3) genome-wide association study (GWAS) of common/rare SNPs; 4) transcriptome-wide association (TWAS); and 5) gene-level and pathway analyses. The Z-allele was significantly associated with severe CFLD (p=1.1×10-4). No significant candidate SNPs were identified. GWAS identified genome-wide significant SNPs in 2 loci and 2 suggestive loci. These 4 loci contained genes [significant, PKD1 (p=8.05×10-10) and FNBP1 (p=4.74×10-9); suggestive, DUSP6 (p=1.51×10-7) and ANKUB1 (p=4.69×10-7)] relevant to severe CFLD pathophysiology. TWAS identified 3 genes [CXCR1 (p=1.01×10-6), AAMP (p=1.07×10-6), and TRBV24 (p=1.23×10-5)] involved in hepatic inflammation and innate immunity. Gene-ranked analyses identified pathways enriched in genes linked to multiple liver pathologies. Conclusion: These results identify loci/genes associated with severe CFLD that point to disease mechanisms involving hepatic fibrosis, inflammation and innate immune function, vascular pathology, intracellular signaling, actin cytoskeleton and tight junction integrity, and mechanisms of hepatic steatosis and insulin resistance. These discoveries will facilitate mechanistic studies and the development of therapeutics for severe CFLD. }, journal={HEPATOLOGY}, author={Stonebraker, Jaclyn and Pace, Rhonda and Gallins, Paul and Dang, Hong and Aksit, Melis and Faino, Anna and Gordon, William and Macparland, Sonya and Bamshad, Michael and Gibson, Ronald and et al.}, year={2024}, month={Mar} } @article{ford_lin_tsai_zhou_wright_sedykh_shah_chiu_rusyn_2024, title={Hazard and risk characterization of 56 structurally diverse PFAS using a targeted battery of broad coverage assays using six human cell types}, volume={503}, ISSN={["1879-3185"]}, DOI={10.1016/j.tox.2024.153763}, abstractNote={Per- and poly-fluoroalkyl substances (PFAS) are extensively used in commerce leading to their prevalence in the environment. Due to their chemical stability, PFAS are considered to be persistent and bioaccumulative; they are frequently detected in both the environment and humans. Because of this, PFAS as a class (composed of hundreds to thousands of chemicals) are contaminants of very high concern. Little information is available for the vast majority of PFAS, and regulatory agencies lack safety data to determine whether exposure limits or restrictions are needed. Cell-based assays are a pragmatic approach to inform decision-makers on potential health hazards; therefore, we hypothesized that a targeted battery of human in vitro assays can be used to determine whether there are structure-bioactivity relationships for PFAS, and to characterize potential risks by comparing bioactivity (points of departure) to exposure estimates. We tested 56 PFAS from 8 structure-based subclasses in concentration response (0.1–100 μM) using six human cell types selected from target organs with suggested adverse effects of PFAS – human induced pluripotent stem cell (iPSC)-derived hepatocytes, neurons, and cardiomyocytes, primary human hepatocytes, endothelial and HepG2 cells. While many compounds were without effect; certain PFAS demonstrated cell-specific activity highlighting the necessity of using a compendium of in vitro models to identify potential hazards. No class-specific groupings were evident except for some chain length- and structure-related trends. In addition, margins of exposure (MOE) were derived using empirical and predicted exposure data. Conservative MOE calculations showed that most tested PFAS had a MOE in the 1–100 range; ∼20% of PFAS had MOE<1, providing tiered priorities for further studies. Overall, we show that a compendium of human cell-based models can be used to derive bioactivity estimates for a range of PFAS, enabling comparisons with human biomonitoring data. Furthermore, we emphasize that establishing structure-bioactivity relationships may be challenging for the tested PFAS.}, journal={TOXICOLOGY}, author={Ford, Lucie C. and Lin, Hsing-Chieh and Tsai, Han-Hsuan D. and Zhou, Yi-Hui and Wright, Fred A. and Sedykh, Alexander and Shah, Ruchir R. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2024}, month={Mar} } @article{tsai_ford_burnett_dickey_wright_chiu_rusyn_2024, title={Informing Hazard Identification and Risk Characterization of Environmental Chemicals by Combining Transcriptomic and Functional Data from Human-Induced Pluripotent Stem-Cell-Derived Cardiomyocytes}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.4c00193}, abstractNote={Environmental chemicals may contribute to the global burden of cardiovascular disease, but experimental data are lacking to determine which substances pose the greatest risk. Human-induced pluripotent stem cell (iPSC)-derived cardiomyocytes are a high-throughput cardiotoxicity model that is widely used to test drugs and chemicals; however, most studies focus on exploring electro-physiological readouts. Gene expression data may provide additional molecular insights to be used for both mechanistic interpretation and dose–response analyses. Therefore, we hypothesized that both transcriptomic and functional data in human iPSC-derived cardiomyocytes may be used as a comprehensive screening tool to identify potential cardiotoxicity hazards and risks of the chemicals. To test this hypothesis, we performed concentration–response analysis of 464 chemicals from 12 classes, including both pharmaceuticals and nonpharmaceutical substances. Functional effects (beat frequency, QT prolongation, and asystole), cytotoxicity, and whole transcriptome response were evaluated. Points of departure were derived from phenotypic and transcriptomic data, and risk characterization was performed. Overall, 244 (53%) substances were active in at least one phenotype; as expected, pharmaceuticals with known cardiac liabilities were the most active. Positive chronotropy was the functional phenotype activated by the largest number of tested chemicals. No chemical class was particularly prone to pose a potential hazard to cardiomyocytes; a varying proportion (10–44%) of substances in each class had effects on cardiomyocytes. Transcriptomic data showed that 69 (15%) substances elicited significant gene expression changes; most perturbed pathways were highly relevant to known key characteristics of human cardiotoxicants. The bioactivity-to-exposure ratios showed that phenotypic- and transcriptomic-based POD led to similar results for risk characterization. Overall, our findings demonstrate how the integrative use of in vitro transcriptomic and phenotypic data from iPSC-derived cardiomyocytes not only offers a complementary approach for hazard and risk prioritization, but also enables mechanistic interpretation of the in vitro test results to increase confidence in decision-making.}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Tsai, Han-Hsuan D. and Ford, Lucie C. and Burnett, Sarah D. and Dickey, Allison N. and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2024}, month={Jul} } @article{broadaway_brotman_rosen_currin_alkhawaja_etheridge_wright_gallins_jima_zhou_et al._2024, title={Liver eQTL meta-analysis illuminates potential molecular mechanisms of cardiometabolic traits}, volume={111}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2024.07.017}, abstractNote={Understanding the molecular mechanisms of complex traits is essential for developing targeted interventions. We analyzed liver expression quantitative-trait locus (eQTL) meta-analysis data on 1,183 participants to identify conditionally distinct signals. We found 9,013 eQTL signals for 6,564 genes; 23% of eGenes had two signals, and 6% had three or more signals. We then integrated the eQTL results with data from 29 cardiometabolic genome-wide association study (GWAS) traits and identified 1,582 GWAS-eQTL colocalizations for 747 eGenes. Non-primary eQTL signals accounted for 17% of all colocalizations. Isolating signals by conditional analysis prior to coloc resulted in 37% more colocalizations than using marginal eQTL and GWAS data, highlighting the importance of signal isolation. Isolating signals also led to stronger evidence of colocalization: among 343 eQTL-GWAS signal pairs in multi-signal regions, analyses that isolated the signals of interest resulted in higher posterior probability of colocalization for 41% of tests. Leveraging allelic heterogeneity, we predicted causal effects of gene expression on liver traits for four genes. To predict functional variants and regulatory elements, we colocalized eQTL with liver chromatin accessibility QTL (caQTL) and found 391 colocalizations, including 73 with non-primary eQTL signals and 60 eQTL signals that colocalized with both a caQTL and a GWAS signal. Finally, we used publicly available massively parallel reporter assays in HepG2 to highlight 14 eQTL signals that include at least one expression-modulating variant. This multi-faceted approach to unraveling the genetic underpinnings of liver-related traits could lead to therapeutic development.}, number={9}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Broadaway, K. Alaine and Brotman, Sarah M. and Rosen, Jonathan D. and Currin, Kevin W. and Alkhawaja, Abdalla A. and Etheridge, Amy S. and Wright, Fred and Gallins, Paul and Jima, Dereje and Zhou, Yi-hui and et al.}, year={2024}, month={Sep} } @article{tsai_ford_chen_dickey_wright_rusyn_2024, title={Risk-Based Prioritization of PFAS Using Phenotypic and Transcriptomic Data from Human Induced Pluripotent Stem Cell-Derived Hepatocytes and Cardiomyocytes}, volume={41}, ISSN={["1868-8551"]}, DOI={10.14573/altex.2311031}, abstractNote={Per- and polyfluoroalkyl substances (PFAS) are chemicals with important applications; they are persistent in the environment and may pose human health hazards. Regulatory agencies are considering restrictions and bans of PFAS; however, little data exists for informed decisions. Several prioritization strategies were proposed for evaluation of potential hazards of PFAS. Structure-based grouping could expedite the selection of PFAS for testing; still, the hypothesis that structure-effect relationships exist for PFAS requires confirmation. We tested 26 structurally diverse PFAS from 8 groups using human-induced pluripotent stem cell-derived hepatocytes and cardiomyocytes, and tested concentration-response effects on cell function and gene expression. Few phenotypic effects were observed in hepatocytes, but negative chronotropy was observed for 8 of the 26 PFAS. Substance- and cell type-dependent transcriptomic changes were more prominent but lacked substantial group-specific effects. In hepatocytes, we found up-regulation of stress-related and extracellular matrix organization pathways, and down-regulation of fat metabolism. In cardiomyocytes, contractility-related pathways were most affected. We derived phenotypic and transcriptomic points of departure and compared them to predicted PFAS exposures. The conservative estimates for bioactivity and exposure were used to derive bioactivity-to-exposure ratio (BER) for each PFAS, most (23 of 26) PFAS had BER>1. Overall, these data suggests that structure-based grouping of PFAS may not be sufficient to predict their biological effects. Testing of individual PFAS may be needed for scientific-based decision-making. Our proposed strategy of using two human cell types and considering phenotypic and transcriptomic effects, combined with dose-response analysis and calculation of BER, may be used for PFAS prioritization. Plain language summaryPer- and polyfluoroalkyl substances (PFAS) are man-made chemicals used in many products. However, most of these substances have not been tested for safety, and concerns exist that they may be harmful to human health and/or the environment. This study aimed to use human cell-based models to investigate if some of the PFAS may exhibit hazardous properties and if similarities among substances are observed. Few effects were observed in liver cells, but a decrease in beating frequency was observed in heart cells for some PFAS. Gene expression changes were substance- and cell type-dependent. We did not find convincing structure-based similarities among PFAS; this suggests that testing of individual PFAS may be necessary in the future to inform health decisions. Overall, this study showed that a test strategy of using two human cell types, from liver and heart, may inform PFAS prioritization without a need for testing in animals.}, number={3}, journal={ALTEX-ALTERNATIVES TO ANIMAL EXPERIMENTATION}, author={Tsai, Han-Hsuan Doris and Ford, Lucie C. and Chen, Zunwei and Dickey, Allison N. and Wright, Fred A. and Rusyn, Ivan}, year={2024}, pages={363–381} } @article{mathisen_bearth_jones_hoffmann_vist_ames_husoy_svendsen_tsaioun_ashikaga_et al._2024, title={Time for CHANGE: system-level interventions for bringing forward the date of effective use of NAMs in regulatory toxicology}, ISSN={["1432-0738"]}, DOI={10.1007/s00204-024-03802-6}, journal={ARCHIVES OF TOXICOLOGY}, author={Mathisen, Gro H. and Bearth, Angela and Jones, Lowenna B. and Hoffmann, Sebastian and Vist, Gunn E. and Ames, Heather M. and Husoy, Trine and Svendsen, Camilla and Tsaioun, Katya and Ashikaga, Takao and et al.}, year={2024}, month={Jun} } @article{roe_ball_wright_chiu_rusyn_2024, title={To Accept or Not To Accept a Read-Across Adaptation: A Systematic Analysis of 15 Years of Testing Proposal Decisions by the European Chemicals Agency}, volume={399}, ISSN={["1879-3169"]}, DOI={10.1016/j.toxlet.2024.07.175}, journal={TOXICOLOGY LETTERS}, author={Roe, H. M. and Ball, N. and Wright, F. A. and Chiu, W. A. and Rusyn, I.}, year={2024}, month={Sep}, pages={S63–S63} } @article{kvasnicka_aurisano_borries_lu_fantke_jolliet_wright_chiu_2024, title={Two-Stage Machine Learning-Based Approach to Predict Points of Departure for Human Noncancer and Developmental/Reproductive Effects}, ISSN={["1520-5851"]}, DOI={10.1021/acs.est.4c00172}, abstractNote={Chemical points of departure (PODs) for critical health effects are crucial for evaluating and managing human health risks and impacts from exposure. However, PODs are unavailable for most chemicals in commerce due to a lack of}, journal={ENVIRONMENTAL SCIENCE & TECHNOLOGY}, author={Kvasnicka, Jacob and Aurisano, Nicolo and Borries, Kerstin and Lu, En-Hsuan and Fantke, Peter and Jolliet, Olivier and Wright, Fred A. and Chiu, Weihsueh A.}, year={2024}, month={May} } @article{tsai_house_wright_chiu_rusyn_2023, title={A tiered testing strategy based on in vitro phenotypic and transcriptomic data for selecting representative petroleum UVCBs for toxicity evaluation in vivo}, ISSN={["1096-0929"]}, url={https://doi.org/10.1093/toxsci/kfad041}, DOI={10.1093/toxsci/kfad041}, abstractNote={Abstract Hazard evaluation of substances of “unknown or variable composition, complex reaction products and biological materials” (UVCBs) remains a major challenge in regulatory science because their chemical composition is difficult to ascertain. Petroleum substances are representative UVCBs and human cell-based data have been previously used to substantiate their groupings for regulatory submissions. We hypothesized that a combination of phenotypic and transcriptomic data could be integrated to make decisions as to selection of group-representative worst-case petroleum UVCBs for subsequent toxicity evaluation in vivo. We used data obtained from 141 substances from 16 manufacturing categories previously tested in 6 human cell types (induced pluripotent stem cell [iPSC]-derived hepatocytes, cardiomyocytes, neurons, and endothelial cells, and MCF7 and A375 cell lines). Benchmark doses for gene-substance combinations were calculated, and both transcriptomic and phenotype-derived points of departure (PODs) were obtained. Correlation analysis and machine learning were used to assess associations between phenotypic and transcriptional PODs and to determine the most informative cell types and assays, thus representing a cost-effective integrated testing strategy. We found that 2 cell types—iPSC-derived-hepatocytes and -cardiomyocytes—contributed the most informative and protective PODs and may be used to inform selection of representative petroleum UVCBs for further toxicity evaluation in vivo. Overall, although the use of new approach methodologies to prioritize UVCBs has not been widely adopted, our study proposes a tiered testing strategy based on iPSC-derived hepatocytes and cardiomyocytes to inform selection of representative worst-case petroleum UVCBs from each manufacturing category for further toxicity evaluation in vivo.}, journal={TOXICOLOGICAL SCIENCES}, author={Tsai, Han-Hsuan Doris and House, John S. and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2023}, month={Apr} } @article{sakolish_moyer_tsai_ford_dickey_wright_han_bajaj_baltazar_carmichael_et al._2023, title={Analysis of reproducibility and robustness of a renal proximal tubule microphysiological system OrganoPlate 3-lane 40 for in vitro studies of drug transport and toxicity}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfad080}, abstractNote={Abstract Microphysiological systems are an emerging area of in vitro drug development, and their independent evaluation is important for wide adoption and use. The primary goal of this study was to test reproducibility and robustness of a renal proximal tubule microphysiological system, OrganoPlate 3-lane 40, as an in vitro model for drug transport and toxicity studies. This microfluidic model was compared with static multiwell cultures and tested using several human renal proximal tubule epithelial cell (RPTEC) types. The model was characterized in terms of the functional transport for various tubule-specific proteins, epithelial permeability of small molecules (cisplatin, tenofovir, and perfluorooctanoic acid) versus large molecules (fluorescent dextrans, 60–150 kDa), and gene expression response to a nephrotoxic xenobiotic. The advantages offered by OrganoPlate 3-lane 40 as compared with multiwell cultures are the presence of media flow, albeit intermittent, and increased throughput compared with other microfluidic models. However, OrganoPlate 3-lane 40 model appeared to offer only limited (eg, MRP-mediated transport) advantages in terms of either gene expression or functional transport when compared with the multiwell plate culture conditions. Although OrganoPlate 3-lane 40 can be used to study cellular uptake and direct toxic effects of small molecules, it may have limited utility for drug transport studies. Overall, this study offers refined experimental protocols and comprehensive comparative data on the function of RPETCs in traditional multiwell culture and microfluidic OrganoPlate 3-lane 40, information that will be invaluable for the prospective end-users of in vitro models of the human proximal tubule.}, journal={TOXICOLOGICAL SCIENCES}, author={Sakolish, Courtney and Moyer, Haley L. and Tsai, Han-Hsuan D. and Ford, Lucie C. and Dickey, Allison N. and Wright, Fred A. and Han, Gang and Bajaj, Piyush and Baltazar, Maria T. and Carmichael, Paul L. and et al.}, year={2023}, month={Aug} } @article{cordova_dodds_tsai_lloyd_roman-hubers_wright_chiu_mcdonald_zhu_newman_et al._2023, title={Application of Ion Mobility Spectrometry-Mass Spectrometry for Compositional Characterization and Fingerprinting of a Library of Diverse Crude Oil Samples}, ISSN={["1552-8618"]}, DOI={10.1002/etc.5727}, abstractNote={AbstractExposure characterization of crude oils, especially in time‐sensitive circumstances such as spills and disasters, is a well‐known analytical chemistry challenge. Gas chromatography–mass spectrometry is commonly used for “fingerprinting” and origin tracing in oil spills; however, this method is both time‐consuming and lacks the resolving power to separate co‐eluting compounds. Recent advances in methodologies to analyze petroleum substances using high‐resolution analytical techniques have demonstrated both improved resolving power and higher throughput. One such method, ion mobility spectrometry–mass spectrometry (IMS–MS), is especially promising because it is both rapid and high‐throughput, with the ability to discern among highly homologous hydrocarbon molecules. Previous applications of IMS–MS to crude oil analyses included a limited number of samples and did not provide detailed characterization of chemical constituents. We analyzed a diverse library of 195 crude oil samples using IMS–MS and applied a computational workflow to assign molecular formulas to individual features. The oils were from 12 groups based on geographical and geological origins: non‐US (1 group), US onshore (3), and US Gulf of Mexico offshore (8). We hypothesized that information acquired through IMS–MS data would provide a more confident grouping and yield additional fingerprint information. Chemical composition data from IMS–MS was used for unsupervised hierarchical clustering, as well as machine learning–based supervised analysis to predict geographic and source rock categories for each sample; the latter also yielded several novel prospective biomarkers for fingerprinting of crude oils. We found that IMS–MS data have complementary advantages for fingerprinting and characterization of diverse crude oils and that proposed polycyclic aromatic hydrocarbon biomarkers can be used for rapid exposure characterization. Environ Toxicol Chem 2023;42:2336–2349. © 2023 The Authors. Environmental Toxicology and Chemistry published by Wiley Periodicals LLC on behalf of SETAC.}, journal={ENVIRONMENTAL TOXICOLOGY AND CHEMISTRY}, author={Cordova, Alexandra C. and Dodds, James N. and Tsai, Han-Hsuan D. and Lloyd, Dillon T. and Roman-Hubers, Alina T. and Wright, Fred A. and Chiu, Weihsueh A. and McDonald, Thomas J. and Zhu, Rui and Newman, Galen and et al.}, year={2023}, month={Aug} } @article{zhou_gallins_pace_dang_aksit_blue_buckingham_collaco_faino_gordon_et al._2023, title={Genetic Modifiers of Cystic Fibrosis Lung Disease Severity}, volume={207}, ISSN={["1535-4970"]}, url={https://doi.org/10.1164/rccm.202209-1653OC}, DOI={10.1164/rccm.202209-1653OC}, abstractNote={RATIONALE Lung disease is the major cause of morbidity and mortality in persons with cystic fibrosis (pwCF). Variability in CF lung disease has substantial non-CFTR genetic influence. Identification of genetic modifiers has prognostic and therapeutic importance. OBJECTIVES Identify genetic modifier loci and genes/pathways associated with pulmonary disease severity. METHODS Whole genome sequencing (WGS) data on 4,248 unique pwCF with pancreatic insufficiency (PI) and lung function measures were combined with imputed genotypes from an additional 3,592 PI patients from the US, Canada, and France. This report describes association of ~15.9 million single nucleotide polymorphisms (SNPs), using the quantitative Kulich Normal Residual Mortality Adjusted (KNoRMA) lung disease phenotype in 7,840 pwCF using pre-modulator lung function data. MEASUREMENTS AND MAIN RESULTS Testing included common and rare SNPs, transcriptome-wide association, gene level, and pathway analyses. Pathway analyses identified novel associations with genes that have key roles in organ development, and we hypothesize these genes may relate to dysanapsis and/or variability in lung repair. Results confirmed and extended previous GWAS findings. These WGS data provide finely mapped genetic information to support mechanistic studies. No novel primary associations with common single variants or with rare variants were found. Multi-locus effects at chr5p13 (SLC9A3/CEP72) and chr11p13 (EHF/APIP) were identified. Variant effect size estimates at associated loci were consistently ordered across the cohorts, indicating possible age or birth cohort effects. CONCLUSIONS This pre-modulator genomic, transcriptomic, and pathway association study of 7,840 pwCF will facilitate mechanistic and post-modulator genetic studies and, development of novel therapeutics for CF lung disease.}, number={10}, journal={AMERICAN JOURNAL OF RESPIRATORY AND CRITICAL CARE MEDICINE}, author={Zhou, Yi-Hui and Gallins, Paul J. and Pace, Rhonda G. and Dang, Hong and Aksit, Melis A. and Blue, Elizabeth E. and Buckingham, Kati J. and Collaco, Joseph M. and Faino, Anna V. and Gordon, William W. and et al.}, year={2023}, month={May}, pages={1324–1333} } @article{cordova_klaren_ford_grimm_baker_zhou_wright_rusyn_2023, title={Integrative Chemical-Biological Grouping of Complex High Production Volume Substances from Lower Olefin Manufacturing Streams}, volume={11}, ISSN={["2305-6304"]}, url={https://doi.org/10.3390/toxics11070586}, DOI={10.3390/toxics11070586}, abstractNote={Human cell-based test methods can be used to evaluate potential hazards of mixtures and products of petroleum refining (“unknown or variable composition, complex reaction products, or biological materials” substances, UVCBs). Analyses of bioactivity and detailed chemical characterization of petroleum UVCBs were used separately for grouping these substances; a combination of the approaches has not been undertaken. Therefore, we used a case example of representative high production volume categories of petroleum UVCBs, 25 lower olefin substances from low benzene naphtha and resin oils categories, to determine whether existing manufacturing-based category grouping can be supported. We collected two types of data: nontarget ion mobility spectrometry-mass spectrometry of both neat substances and their organic extracts and in vitro bioactivity of the organic extracts in five human cell types: umbilical vein endothelial cells and induced pluripotent stem cell-derived hepatocytes, endothelial cells, neurons, and cardiomyocytes. We found that while similarity in composition and bioactivity can be observed for some substances, existing categories are largely heterogeneous. Strong relationships between composition and bioactivity were observed, and individual constituents that determine these associations were identified. Overall, this study showed a promising approach that combines chemical composition and bioactivity data to better characterize the variability within manufacturing categories of petroleum UVCBs.}, number={7}, journal={TOXICS}, author={Cordova, Alexandra C. and Klaren, William D. and Ford, Lucie C. and Grimm, Fabian A. and Baker, Erin S. and Zhou, Yi-Hui and Wright, Fred A. and Rusyn, Ivan}, year={2023}, month={Jul} } @article{ting_wright_zhou_2023, title={Simultaneous modeling of multivariate heterogeneous responses and heteroskedasticity via a two-stage composite likelihood}, ISSN={["1521-4036"]}, url={https://doi.org/10.1002/bimj.202200029}, DOI={10.1002/bimj.202200029}, abstractNote={AbstractMultivariate heterogeneous responses and heteroskedasticity have attracted increasing attention in recent years. In genome‐wide association studies, effective simultaneous modeling of multiple phenotypes would improve statistical power and interpretability. However, a flexible common modeling system for heterogeneous data types can pose computational difficulties. Here we build upon a previous method for multivariate probit estimation using a two‐stage composite likelihood that exhibits favorable computational time while retaining attractive parameter estimation properties. We extend this approach to incorporate multivariate responses of heterogeneous data types (binary and continuous), and possible heteroskedasticity. Although the approach has wide applications, it would be particularly useful for genomics, precision medicine, or individual biomedical prediction. Using a genomics example, we explore statistical power and confirm that the approach performs well for hypothesis testing and coverage percentages under a wide variety of settings. The approach has the potential to better leverage genomics data and provide interpretable inference for pleiotropy, in which a locus is associated with multiple traits.}, journal={BIOMETRICAL JOURNAL}, author={Ting, Bryan W. W. and Wright, Fred A. and Zhou, Yi-Hui}, year={2023}, month={May} } @article{rusyn_wright_2023, title={Ten years of using key characteristics of human carcinogens to organize and evaluate mechanistic evidence in IARC Monographs on the identification of carcinogenic hazards to humans: Patterns and associations}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfad134}, abstractNote={Abstract Systematic review and evaluation of mechanistic evidence using the Key Characteristics approach was proposed by the International Agency for Research on Cancer (IARC) in 2012 and used by the IARC Monographs Working Groups since 2015. Key Characteristics are 10 features of agents known to cause cancer in humans. From 2015 to 2022, a total of 19 Monographs (73 agents combined) used Key Characteristics for cancer hazard classification. We hypothesized that a retrospective analysis of applications of the Key Characteristics approach to cancer hazard classification using heterogenous mechanistic data on diverse agents would be informative for systematic reviews in decision-making. We extracted information on the conclusions, data types, and the role mechanistic data played in the cancer hazard classification from each Monograph. Statistical analyses identified patterns in the use of Key Characteristics, as well as trends and correlations among Key Characteristics, data types, and ultimate decisions. Despite gaps in data for many agents and Key Characteristics, several significant results emerged. Mechanistic data from in vivo animal, in vitro animal, and in vitro human studies were most impactful in concluding that an agent could cause cancer via a Key Characteristic. To exclude the involvement of a Key Characteristic, data from large-scale systematic in vitro testing programs such as ToxCast, were most informative. Overall, increased availability of systemized data streams, such as human in vitro data, would provide the basis for more confident and informed conclusions about both positive and negative associations and inform expert judgments on cancer hazard.}, journal={TOXICOLOGICAL SCIENCES}, author={Rusyn, Ivan and Wright, Fred A.}, year={2023}, month={Dec} } @article{ford_jang_chen_zhou_gallins_wright_chiu_rusyn_2022, title={A Population-Based Human In Vitro Approach to Quantify Inter-Individual Variability in Responses to Chemical Mixtures}, volume={10}, ISSN={["2305-6304"]}, url={https://doi.org/10.3390/toxics10080441}, DOI={10.3390/toxics10080441}, abstractNote={Human cell-based population-wide in vitro models have been proposed as a strategy to derive chemical-specific estimates of inter-individual variability; however, the utility of this approach has not yet been tested for cumulative exposures in mixtures. This study aimed to test defined mixtures and their individual components and determine whether adverse effects of the mixtures were likely to be more variable in a population than those of the individual chemicals. The in vitro model comprised 146 human lymphoblastoid cell lines from four diverse subpopulations of European and African descent. Cells were exposed, in concentration–response, to 42 chemicals from diverse classes of environmental pollutants; in addition, eight defined mixtures were prepared from these chemicals using several exposure- or hazard-based scenarios. Points of departure for cytotoxicity were derived using Bayesian concentration–response modeling and population variability was quantified in the form of a toxicodynamic variability factor (TDVF). We found that 28 chemicals and all mixtures exhibited concentration–response cytotoxicity, enabling calculation of the TDVF. The median TDVF across test substances, for both individual chemicals or defined mixtures, ranged from a default assumption (101/2) of toxicodynamic variability in human population to >10. The data also provide a proof of principle for single-variant genome-wide association mapping for toxicity of the chemicals and mixtures, although replication would be necessary due to statistical power limitations with the current sample size. This study demonstrates the feasibility of using a set of human lymphoblastoid cell lines as an in vitro model to quantify the extent of inter-individual variability in hazardous properties of both individual chemicals and mixtures. The data show that population variability of the mixtures is unlikely to exceed that of the most variable component, and that similarity in genome-wide associations among components may be used to accrue additional evidence for grouping of constituents in a mixture for cumulative assessments.}, number={8}, journal={TOXICS}, author={Ford, Lucie C. and Jang, Suji and Chen, Zunwei and Zhou, Yi-Hui and Gallins, Paul J. and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2022}, month={Aug} } @article{zhou_gallins_etheridge_jima_scholl_wright_innocenti_2022, title={A resource for integrated genomic analysis of the human liver}, volume={12}, ISSN={["2045-2322"]}, url={https://doi.org/10.1038/s41598-022-18506-z}, DOI={10.1038/s41598-022-18506-z}, abstractNote={AbstractIn this study, we generated whole-transcriptome RNA-Seq from n = 192 genotyped liver samples and used these data with existing data from the GTEx Project (RNA-Seq) and previous liver eQTL (microarray) studies to create an enhanced transcriptomic sequence resource in the human liver. Analyses of genotype-expression associations show pronounced enrichment of associations with genes of drug response. The associations are primarily consistent across the two RNA-Seq datasets, with some modest variation, indicating the importance of obtaining multiple datasets to produce a robust resource. We further used an empirical Bayesian model to compare eQTL patterns in liver and an additional 20 GTEx tissues, finding that MHC genes, and especially class II genes, are enriched for liver-specific eQTL patterns. To illustrate the utility of the resource to augment GWAS analysis with small sample sizes, we developed a novel meta-analysis technique to combine several liver eQTL data sources. We also illustrate its application using a transcriptome-enhanced re-analysis of a study of neutropenia in pancreatic cancer patients. The associations of genotype with liver expression, including splice variation and its genetic associations, are made available in a searchable genome browser.}, number={1}, journal={SCIENTIFIC REPORTS}, author={Zhou, Yi-Hui and Gallins, Paul J. and Etheridge, Amy S. and Jima, Dereje and Scholl, Elizabeth and Wright, Fred A. and Innocenti, Federico}, year={2022}, month={Sep} } @article{roman-hubers_cordova_rohde_chiu_mcdonald_wright_dodds_baker_rusyn_2022, title={Characterization of compositional variability in petroleum substances}, volume={317}, ISSN={["1873-7153"]}, DOI={10.1016/j.fuel.2022.123547}, abstractNote={In the process of registration of substances of Unknown or Variable Composition, Complex Reaction Products or Biological Materials (UVCBs), information sufficient to enable substance identification must be provided. Substance identification for UVCBs formed through petroleum refining is particularly challenging due to their chemical complexity, as well as variability in refining process conditions and composition of the feedstocks. This study aimed to characterize compositional variability of petroleum UVCBs both within and across product categories. We utilized ion mobility spectrometry (IMS)-MS as a technique to evaluate detailed chemical composition of independent production cycle-derived samples of 6 petroleum products from 3 manufacturing categories (heavy aromatic, hydrotreated light paraffinic, and hydrotreated heavy paraffinic). Atmospheric pressure photoionization and drift tube IMS-MS were used to identify structurally related compounds and quantified between- and within-product variability. In addition, we determined both individual molecules and hydrocarbon blocks that were most variable in samples from different production cycles. We found that detailed chemical compositional data on petroleum UVCBs obtained from IMS-MS can provide the information necessary for hazard and risk characterization in terms of quantifying the variability of the products in a manufacturing category, as well as in subsequent production cycles of the same product.}, journal={FUEL}, author={Roman-Hubers, Alina T. and Cordova, Alexandra C. and Rohde, Arlean M. and Chiu, Weihsueh A. and McDonald, Thomas J. and Wright, Fred A. and Dodds, James N. and Baker, Erin S. and Rusyn, Ivan}, year={2022}, month={Jun} } @article{boysen_rusyn_chiu_wright_2022, title={Characterization of population variability of 1,3-butadiene derived protein adducts in humans and mice}, volume={132}, ISSN={["1096-0295"]}, DOI={10.1016/j.yrtph.2022.105171}, abstractNote={1,3-butadiene is a known human carcinogen and a chemical to which humans are exposed occupationally and through environmental pollution. Inhalation risk assessment of 1,3-butadiene was completed several decades ago before data on molecular biomarkers of exposure and effect have been reported from both human studies of workers and experimental studies in mice. To improve risk assessment of 1,3-butadiene, the quantitative characterization of uncertainty in estimations of inter-individual variability in cancer-related effects is needed. For this, we ought to take advantage of the availability of the data on 1,3-butadiene hemoglobin adducts, well established biomarkers of the internal dose of the reactive epoxides, from several large-scale human studies and from a study in a Collaborative Cross mouse population. We found that in humans, toxicokinetic uncertainty factor for 99th percentile of the population ranged from 3.27 to 7.9, depending on the hemoglobin adduct. For mice, these values ranged from less than 2 to 7.51, depending on the dose and the adduct. Quantitative estimated from this study can be used to reduce uncertainties in the parameter estimates used in the models to derive the inhalation unit risk, as well as to address possible differences in variability in 1,3-butadiene metabolism that may be dose-related.}, journal={REGULATORY TOXICOLOGY AND PHARMACOLOGY}, author={Boysen, Gunnar and Rusyn, Ivan and Chiu, Weihsueh A. and Wright, Fred A.}, year={2022}, month={Jul} } @article{mandal_levy_ives_hwang_zhou_motsinger-reif_pan_huggins_hamilton_wright_et al._2022, title={Correlation Analysis of Variables From the Atherosclerosis Risk in Communities Study}, volume={13}, ISSN={["1663-9812"]}, DOI={10.3389/fphar.2022.883433}, abstractNote={The need to test chemicals in a timely and cost-effective manner has driven the development of new alternative methods (NAMs) that utilize in silico and in vitro approaches for toxicity prediction. There is a wealth of existing data from human studies that can aid in understanding the ability of NAMs to support chemical safety assessment. This study aims to streamline the integration of data from existing human cohorts by programmatically identifying related variables within each study. Study variables from the Atherosclerosis Risk in Communities (ARIC) study were clustered based on their correlation within the study. The quality of the clusters was evaluated via a combination of manual review and natural language processing (NLP). We identified 391 clusters including 3,285 variables. Manual review of the clusters containing more than one variable determined that human reviewers considered 95% of the clusters related to some degree. To evaluate potential bias in the human reviewers, clusters were also scored via NLP, which showed a high concordance with the human classification. Clusters were further consolidated into cluster groups using the Louvain community finding algorithm. Manual review of the cluster groups confirmed that clusters within a group were more related than clusters from different groups. Our data-driven approach can facilitate data harmonization and curation efforts by providing human annotators with groups of related variables reflecting the themes present in the data. Reviewing groups of related variables should increase efficiency of the human review, and the number of variables reviewed can be reduced by focusing curator attention on variable groups whose theme is relevant for the topic being studied.}, journal={FRONTIERS IN PHARMACOLOGY}, author={Mandal, Meisha and Levy, Josh and Ives, Cataia and Hwang, Stephen and Zhou, Yi-Hui and Motsinger-Reif, Alison and Pan, Huaqin and Huggins, Wayne and Hamilton, Carol and Wright, Fred and et al.}, year={2022}, month={Jul} } @article{ting_wright_zhou_2022, title={Fast Multivariate Probit Estimation via a Two-Stage Composite Likelihood}, volume={2}, ISSN={["1867-1772"]}, url={https://doi.org/10.1007/s12561-022-09338-6}, DOI={10.1007/s12561-022-09338-6}, abstractNote={AbstractThe multivariate probit is popular for modeling correlated binary data, with an attractive balance of flexibility and simplicity. However, considerable challenges remain in computation and in devising a clear statistical framework. Interest in the multivariate probit has increased in recent years. Current applications include genomics and precision medicine, where simultaneous modeling of multiple traits may be of interest, and computational efficiency is an important consideration. We propose a fast method for multivariate probit estimation via a two-stage composite likelihood. We explore computational and statistical efficiency, and note that the approach sets the stage for extensions beyond the purely binary setting.}, journal={STATISTICS IN BIOSCIENCES}, author={Ting, Bryan and Wright, Fred and Zhou, Yi-Hui}, year={2022}, month={Feb} } @article{jima_skaar_planchart_motsinger-reif_cevik_park_cowley_wright_house_liu_et al._2022, title={Genomic map of candidate human imprint control regions: the imprintome}, volume={6}, ISSN={["1559-2308"]}, url={https://doi.org/10.1080/15592294.2022.2091815}, DOI={10.1080/15592294.2022.2091815}, abstractNote={ABSTRACT Imprinted genes – critical for growth, metabolism, and neuronal function – are expressed from one parental allele. Parent-of-origin-dependent CpG methylation regulates this expression at imprint control regions (ICRs). Since ICRs are established before tissue specification, these methylation marks are similar across cell types. Thus, they are attractive for investigating the developmental origins of adult diseases using accessible tissues, but remain unknown. We determined genome-wide candidate ICRs in humans by performing whole-genome bisulphite sequencing (WGBS) of DNA derived from the three germ layers and from gametes. We identified 1,488 hemi-methylated candidate ICRs, including 19 of 25 previously characterized ICRs (https://humanicr.org/). Gamete methylation approached 0% or 100% in 332 ICRs (178 paternally and 154 maternally methylated), supporting parent-of-origin-specific methylation, and 65% were in well-described CTCF-binding or DNaseI hypersensitive regions. This draft of the human imprintome will allow for the systematic determination of the role of early-acquired imprinting dysregulation in the pathogenesis of human diseases and developmental and behavioural disorders.}, journal={EPIGENETICS}, author={Jima, Dereje D. and Skaar, David A. and Planchart, Antonio and Motsinger-Reif, Alison and Cevik, Sebnem E. and Park, Sarah S. and Cowley, Michael and Wright, Fred and House, John and Liu, Andy and et al.}, year={2022}, month={Jun} } @article{house_grimm_klaren_dalzell_kuchi_zhang_lenz_boogaard_ketelslegers_gant_et al._2022, title={Grouping of UVCB Substances with Dose-Response Transcriptomics Data from Human Cell-Based Assays}, volume={39}, ISSN={["1868-8551"]}, DOI={10.14573/altex.2107051}, abstractNote={The application of in vitro biological assays as new approach methodologies (NAMs) to support grouping of UVCB (unknown or variable composition, complex reaction products, and biological materials) substances has recently been demonstrated. In addition to cell-based phenotyping as NAMs, in vitro transcriptomic profiling is used to gain deeper mechanistic understanding of biological responses to chemicals and to support grouping and read-across. However, the value of gene expression profiling for characterizing complex substances like UVCBs has not been explored. Using 141 petroleum substance extracts, we performed dose-response transcriptomic profiling in human induced pluripotent stem cell (iPSC)-derived hepatocytes, cardiomyocytes, neurons, and endothelial cells, as well as cell lines MCF7 and A375. The goal was to determine whether transcriptomic data can be used to group these UVCBs and to further characterize the molecular basis for in vitro biological responses. We found distinct transcriptional responses for petroleum substances by manufacturing class. Pathway enrichment informed interpretation of effects of substances and UVCB petroleum-class. Transcriptional activity was strongly correlated with concentration of polycyclic aromatic compounds (PAC), especially in iPSC-derived hepatocytes. Supervised analysis using transcriptomics, alone or in combination with bioactivity data collected on these same substances/cells, suggest that transcriptomics data provide useful mechanistic information, but only modest additional value for grouping. Overall, these results further demonstrate the value of NAMs for grouping of UVCBs, identify informative cell lines, and provide data that could be used for justifying selection of substances for further testing that may be required for registration.}, number={3}, journal={ALTEX-ALTERNATIVES TO ANIMAL EXPERIMENTATION}, author={House, John S. and Grimm, Fabian A. and Klaren, William D. and Dalzell, Abigail and Kuchi, Srikeerthana and Zhang, Shu-Dong and Lenz, Klaus and Boogaard, Peter J. and Ketelslegers, Hans B. and Gant, Timothy W. and et al.}, year={2022}, pages={388–404} } @article{harlow_gandawijaya_bamford_martin_wood_most_tanaka_leonard_etheridge_innocenti_et al._2022, title={Identification and single-base gene-editing functional validation of a cis-EPO variant as a genetic predictor for EPO-increasing therapies}, volume={109}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2022.08.004}, abstractNote={Hypoxia-inducible factor prolyl hydroxylase inhibitors (HIF-PHIs) are currently under clinical development for treating anemia in chronic kidney disease (CKD), but it is important to monitor their cardiovascular safety. Genetic variants can be used as predictors to help inform the potential risk of adverse effects associated with drug treatments. We therefore aimed to use human genetics to help assess the risk of adverse cardiovascular events associated with therapeutically altered EPO levels to help inform clinical trials studying the safety of HIF-PHIs. By performing a genome-wide association meta-analysis of EPO (n = 6,127), we identified a cis-EPO variant (rs1617640) lying in the EPO promoter region. We validated this variant as most likely causal in controlling EPO levels by using genetic and functional approaches, including single-base gene editing. Using this variant as a partial predictor for therapeutic modulation of EPO and large genome-wide association data in Mendelian randomization tests, we found no evidence (at p < 0.05) that genetically predicted long-term rises in endogenous EPO, equivalent to a 2.2-unit increase, increased risk of coronary artery disease (CAD, OR [95% CI] = 1.01 [0.93, 1.07]), myocardial infarction (MI, OR [95% CI] = 0.99 [0.87, 1.15]), or stroke (OR [95% CI] = 0.97 [0.87, 1.07]). We could exclude increased odds of 1.15 for cardiovascular disease for a 2.2-unit EPO increase. A combination of genetic and functional studies provides a powerful approach to investigate the potential therapeutic profile of EPO-increasing therapies for treating anemia in CKD.}, number={9}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Harlow, Charli E. and Gandawijaya, Josan and Bamford, Rosemary A. and Martin, Emily-Rose and Wood, Andrew R. and Most, Peter J. and Tanaka, Toshiko and Leonard, Hampton L. and Etheridge, Amy S. and Innocenti, Federico and et al.}, year={2022}, month={Sep}, pages={1638–1652} } @article{sun_liu_rosen_huang_pace_dang_gallins_blue_ling_corvol_et al._2022, title={Leveraging TOPMed imputation server and constructing a cohort-specific imputation reference panel to enhance genotype imputation among cystic fibrosis patients}, volume={3}, ISSN={["2666-2477"]}, DOI={10.1016/j.xhgg.2022.100090}, abstractNote={Cystic fibrosis (CF) is a severe genetic disorder that can cause multiple comorbidities affecting the lungs, the pancreas, the luminal digestive system and beyond. In our previous genome-wide association studies (GWAS), we genotyped approximately 8,000 CF samples using a mixture of different genotyping platforms. More recently, the Cystic Fibrosis Genome Project (CFGP) performed deep (approximately 30×) whole genome sequencing (WGS) of 5,095 samples to better understand the genetic mechanisms underlying clinical heterogeneity among patients with CF. For mixtures of GWAS array and WGS data, genotype imputation has proven effective in increasing effective sample size. Therefore, we first performed imputation for the approximately 8,000 CF samples with GWAS array genotype using the Trans-Omics for Precision Medicine (TOPMed) freeze 8 reference panel. Our results demonstrate that TOPMed can provide high-quality imputation for patients with CF, boosting genomic coverage from approximately 0.3–4.2 million genotyped markers to approximately 11–43 million well-imputed markers, and significantly improving polygenic risk score (PRS) prediction accuracy. Furthermore, we built a CF-specific CFGP reference panel based on WGS data of patients with CF. We demonstrate that despite having approximately 3% the sample size of TOPMed, our CFGP reference panel can still outperform TOPMed when imputing some CF disease-causing variants, likely owing to allele and haplotype differences between patients with CF and general populations. We anticipate our imputed data for 4,656 samples without WGS data will benefit our subsequent genetic association studies, and the CFGP reference panel built from CF WGS samples will benefit other investigators studying CF.}, number={2}, journal={HUMAN GENETICS AND GENOMICS ADVANCES}, author={Sun, Quan and Liu, Weifang and Rosen, Jonathan D. and Huang, Le and Pace, Rhonda G. and Dang, Hong and Gallins, Paul J. and Blue, Elizabeth E. and Ling, Hua and Corvol, Harriet and et al.}, year={2022}, month={Apr} } @article{sun_yang_rosen_jiang_chen_liu_wen_raffield_pace_zhou_et al._2022, title={MagicalRsq: Machine-learning-based genotype imputation quality calibration}, volume={109}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2022.09.009}, abstractNote={Whole-genome sequencing (WGS) is the gold standard for fully characterizing genetic variation but is still prohibitively expensive for large samples. To reduce costs, many studies sequence only a subset of individuals or genomic regions, and genotype imputation is used to infer genotypes for the remaining individuals or regions without sequencing data. However, not all variants can be well imputed, and the current state-of-the-art imputation quality metric, denoted as standard Rsq, is poorly calibrated for lower-frequency variants. Here, we propose MagicalRsq, a machine-learning-based method that integrates variant-level imputation and population genetics statistics, to provide a better calibrated imputation quality metric. Leveraging WGS data from the Cystic Fibrosis Genome Project (CFGP), and whole-exome sequence data from UK BioBank (UKB), we performed comprehensive experiments to evaluate the performance of MagicalRsq compared to standard Rsq for partially sequenced studies. We found that MagicalRsq aligns better with true R2 than standard Rsq in almost every situation evaluated, for both European and African ancestry samples. For example, when applying models trained from 1,992 CFGP sequenced samples to an independent 3,103 samples with no sequencing but TOPMed imputation from array genotypes, MagicalRsq, compared to standard Rsq, achieved net gains of 1.4 million rare, 117k low-frequency, and 18k common variants, where net gains were gained numbers of correctly distinguished variants by MagicalRsq over standard Rsq. MagicalRsq can serve as an improved post-imputation quality metric and will benefit downstream analysis by better distinguishing well-imputed variants from those poorly imputed. MagicalRsq is freely available on GitHub.}, number={11}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Sun, Quan and Yang, Yingxi and Rosen, Jonathan D. and Jiang, Min-Zhi and Chen, Jiawen and Liu, Weifang and Wen, Jia and Raffield, Laura M. and Pace, Rhonda G. and Zhou, Yi-Hui and et al.}, year={2022}, month={Nov}, pages={1986–1997} } @article{rusyn_chiu_wright_2022, title={Model systems and organisms for addressing inter- and intra-species variability in risk assessment}, volume={132}, ISSN={["1096-0295"]}, DOI={10.1016/j.yrtph.2022.105197}, abstractNote={Addressing inter- and intra-species differences in potential hazardous effects of chemicals remains a long-standing challenge in human health risk assessment that is typically addressed heuristically through use of 10-fold default "uncertainty" or "safety" factors. Although it has long been recognized that chemical-specific data would be preferable to replace the "defaults," only recently have there emerged experimental model systems and organisms with the potential to experimentally quantify the population variability in both toxicokinetics and toxicodynamics for specific chemicals. Progress is most evident in the use of population in vitro human cell-based models and population in vivo mouse models. Multiple case studies were published in the past 10–15 years that clearly demonstrate the utility of such models to derive data with direct application to quantifying variability at hazard identification, exposure-response assessment, and mechanistic understanding of toxicity steps of traditional risk assessments. Here, we review recent efforts to develop fit-for-purpose approaches utilizing these novel population-based in vitro and in vivo models in the context of risk assessment. We also describe key challenges and opportunities to broadening application of population-based experimental approaches. We conclude that population-based models are now beginning to realize their potential to address long-standing data gaps in inter- and intra-species variability.}, journal={REGULATORY TOXICOLOGY AND PHARMACOLOGY}, author={Rusyn, Ivan and Chiu, Weihsueh A. and Wright, Fred A.}, year={2022}, month={Jul} } @article{aksit_ling_pace_raraigh_onchiri_faino_pagel_pugh_stilp_sun_et al._2022, title={Pleiotropic modifiers of age-related diabetes and neonatal intestinal obstruction in cystic fibrosis}, volume={109}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2022.09.004}, abstractNote={Individuals with cystic fibrosis (CF) develop complications of the gastrointestinal tract influenced by genetic variants outside of CFTR. Cystic fibrosis-related diabetes (CFRD) is a distinct form of diabetes with a variable age of onset that occurs frequently in individuals with CF, while meconium ileus (MI) is a severe neonatal intestinal obstruction affecting ∼20% of newborns with CF. CFRD and MI are slightly correlated traits with previous evidence of overlap in their genetic architectures. To better understand the genetic commonality between CFRD and MI, we used whole-genome-sequencing data from the CF Genome Project to perform genome-wide association. These analyses revealed variants at 11 loci (6 not previously identified) that associated with MI and at 12 loci (5 not previously identified) that associated with CFRD. Of these, variants at SLC26A9, CEBPB, and PRSS1 associated with both traits; variants at SLC26A9 and CEBPB increased risk for both traits, while variants at PRSS1, the higher-risk alleles for CFRD, conferred lower risk for MI. Furthermore, common and rare variants within the SLC26A9 locus associated with MI only or CFRD only. As expected, different loci modify risk of CFRD and MI; however, a subset exhibit pleiotropic effects indicating etiologic and mechanistic overlap between these two otherwise distinct complications of CF.}, number={10}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Aksit, Melis A. and Ling, Hua and Pace, Rhonda G. and Raraigh, Karen S. and Onchiri, Frankline and Faino, Anna V. and Pagel, Kymberleigh and Pugh, Elizabeth and Stilp, Adrienne M. and Sun, Quan and et al.}, year={2022}, month={Oct}, pages={1894–1908} } @article{bhandari_casillas_aly_zhu_newman_wright_miller_adler_rusyn_chiu_2022, title={Spatial and Temporal Analysis of Impacts of Hurricane Florence on Criteria Air Pollutants and Air Toxics in Eastern North Carolina}, volume={19}, ISSN={["1660-4601"]}, DOI={10.3390/ijerph19031757}, abstractNote={Natural and anthropogenic disasters are associated with air quality concerns due to the potential redistribution of pollutants in the environment. Our objective was to conduct a spatiotemporal analysis of air concentrations of benzene, toluene, ethylbenzne, and xylene (BTEX) and criteria air pollutants in North Carolina during and after Hurricane Florence. Three sampling campaigns were carried out immediately after the storm (September 2018) and at four-month intervals. BTEX were measured along major roads. Concurrent criteria air pollutant concentrations were predicted from modeling. Correlation between air pollutants and possible point sources was conducted using spatial regression. Exceedances of ambient air criteria were observed for benzene (in all sampling periods) and PM2.5 (mostly immediately after Florence). For both, there was an association between higher concentrations and fueling stations, particularly immediately after Florence. For other pollutants, concentrations were generally below levels of regulatory concern. Through characterization of air quality under both disaster and “normal” conditions, this study demonstrates spatial and temporal variation in air pollutants. We found that only benzene and PM2.5 were present at levels of potential concern, and there were localized increases immediately after the hurricane. These substances warrant particular attention in future disaster response research (DR2) investigations.}, number={3}, journal={INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH}, author={Bhandari, Sharmila and Casillas, Gaston and Aly, Noor A. and Zhu, Rui and Newman, Galen and Wright, Fred A. and Miller, Anthony and Adler, Gabriela and Rusyn, Ivan and Chiu, Weihsueh A.}, year={2022}, month={Feb} } @article{xia_shabalin_yin_chung_sullivan_wright_styner_gilmore_santelli_zou_2022, title={TwinEQTL: ultrafast and powerful association analysis for eQTL and GWAS in twin studies}, ISSN={["1943-2631"]}, DOI={10.1093/genetics/iyac088}, abstractNote={AbstractWe develop a computationally efficient alternative, TwinEQTL, to a linear mixed-effects model for twin genome-wide association study data. Instead of analyzing all twin samples together with linear mixed-effects model, TwinEQTL first splits twin samples into 2 independent groups on which multiple linear regression analysis can be validly performed separately, followed by an appropriate meta-analysis-like approach to combine the 2 nonindependent test results. Through mathematical derivations, we prove the validity of TwinEQTL algorithm and show that the correlation between 2 dependent test statistics at each single-nucleotide polymorphism is independent of its minor allele frequency. Thus, the correlation is constant across all single-nucleotide polymorphisms. Through simulations, we show empirically that TwinEQTL has well controlled type I error with negligible power loss compared with the gold-standard linear mixed-effects models. To accommodate expression quantitative loci analysis with twin subjects, we further implement TwinEQTL into an R package with much improved computational efficiency. Our approaches provide a significant leap in terms of computing speed for genome-wide association study and expression quantitative loci analysis with twin samples.}, journal={GENETICS}, author={Xia, Kai and Shabalin, Andrey A. and Yin, Zhaoyu and Chung, Wonil and Sullivan, Patrick F. and Wright, Fred A. and Styner, Martin and Gilmore, John H. and Santelli, Rebecca C. and Zou, Fei}, year={2022}, month={Jun} } @article{wang_rivenbark_gong_wright_phillips_2021, title={Application of Edible Montmorillonite Clays for the Adsorption and Detoxification of Microcystin}, volume={4}, ISSN={["2576-6422"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85115268354&partnerID=MN8TOARS}, DOI={10.1021/acsabm.1c00779}, abstractNote={Exposure to microcystins (MCs) in humans and animals commonly occurs through the consumption of drinking water and food contaminated with cyanobacteria. Although studies have focused on developing water filtration treatments for MCs using activated carbon, dietary sorbents to reduce the bioavailability of MCs from the stomach and intestines have not been reported. To address this need, edible calcium and sodium montmorillonite clays were characterized for their ability to bind MC containing leucine and arginine (MC-LR) under conditions simulating the gastrointestinal tract and compared with a medical-grade activated carbon. Results of in vitro adsorption isotherms and thermodynamics showed that binding plots for MC-LR on montmorillonites fit the Langmuir model with high binding capacity, affinity, Gibbs free energy, and enthalpy. The in silico results from molecular modeling predicted that the major binding mechanisms involved electrostatics and hydrogen bonds, and that interlayers were important binding sites. The safety and detoxification efficacy of the sorbents against MC-LR were validated in a battery of living organisms, including Hydra vulgaris, Lemna minor, and Caenorhabditis elegans. The inclusion of 0.05% and 0.1% montmorillonite clays in hydra media significantly reduced MC-LR toxicity and protected hydra by 60-80%, whereas only slight protection was shown with the heat-collapsed clay. In the Lemna minor assay, montmorillonites significantly enhanced the growth of lemna, as supported by the increase in frond number, surface area, chlorophyll content, and growth rate, as well as the decrease in inhibition rate. Similar results were shown in the C. elegans assay, where montmorillonite clays reduced MC-LR effects on body length and brood size. All 3 bioassays confirmed dose-dependent protection from MC-LR, validated the in vitro and in silico findings, and suggested that edible montmorillonites are safe and efficacious binders for MC-LR. Moreover, their inclusion in diets during algal blooming seasons could protect vulnerable populations of humans and animals.}, number={9}, journal={ACS APPLIED BIO MATERIALS}, author={Wang, Meichen and Rivenbark, Kelly and Gong, Joonho and Wright, Fred A. and Phillips, Timothy D.}, year={2021}, month={Sep}, pages={7254–7265} } @article{roman-hubers_cordova_aly_mcdonald_lloyd_wright_baker_chiu_rusyn_2021, title={Data Processing Workflow to Identify Structurally Related Compounds in Petroleum Substances Using Ion Mobility Spectrometry-Mass Spectrometry}, volume={35}, ISSN={["1520-5029"]}, DOI={10.1021/acs.energyfuels.1c00892}, abstractNote={Ion mobility spectrometry coupled with mass spectrometry (IMS-MS) is a post-ionization separation technique that can be used for rapid multidimensional analyses of complex samples. IMS-MS offers untargeted analysis, including ion-specific conformational data derived as collisional cross section (CCS) values. Here, we combine nitrogen gas drift tube CCS (DTCCSN2) and Kendrick mass defect (KMD) analyses based on CH2 and H functional units to enable compositional analyses of petroleum substances. First, polycyclic aromatic compound standards were analyzed by IMS-MS to demonstrate how CCS assists the identification of isomeric species in homologous series. Next, we used case studies of a gasoline standard previously characterized for paraffin, isoparaffin, aromatic, naphthene, and olefinic (PIANO) compounds, and a crude oil sample to demonstrate the application of the KMD analyses and CCS filtering. Finally, we propose a workflow that enables confident molecular formula assignment to the IMS-MS-derived features in petroleum samples. Collectively, this work demonstrates how rapid untargeted IMS-MS analysis and the proposed data processing workflow can be used to provide confident compositional characterization of hydrocarbon-containing substances.}, number={13}, journal={ENERGY & FUELS}, author={Roman-Hubers, Alina T. and Cordova, Alexandra C. and Aly, Noor A. and McDonald, Thomas J. and Lloyd, Dillon T. and Wright, Fred A. and Baker, Erin S. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2021}, month={Jul}, pages={10529–10539} } @article{aly_casillas_luo_mcdonald_wade_zhu_newman_lloyd_wright_chiu_et al._2021, title={Environmental impacts of Hurricane Florence flooding in eastern North Carolina: temporal analysis of contaminant distribution and potential human health risks}, volume={31}, ISSN={["1559-064X"]}, DOI={10.1038/s41370-021-00325-5}, abstractNote={Hurricane Florence made landfall in North Carolina in September 2018 causing extensive flooding. Several potential point sources of hazardous substances and Superfund sites sustained water damage and contaminants may have been released into the environment.This study conducted temporal analysis of contaminant distribution and potential human health risks from Hurricane Florence-associated flooding.Soil samples were collected from 12 sites across four counties in North Carolina in September 2018, January and May 2019. Chemical analyses were performed for organics by gas chromatography-mass spectrometry. Metals were analyzed using inductively coupled plasma mass spectrometry. Hazard index and cancer risk were calculated using EPA Regional Screening Level Soil Screening Levels for residential soils.PAH and metals detected downstream from the coal ash storage pond that leaked were detected and were indicative of a pyrogenic source of contamination. PAH at these sites were of human health concern because cancer risk values exceeded 1 × 10-6 threshold. Other contaminants measured across sampling sites, or corresponding hazard index and cancer risk, did not exhibit spatial or temporal differences or were of concern.This work shows the importance of rapid exposure assessment following natural disasters. It also establishes baseline levels of contaminants for future comparisons.}, number={5}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Aly, Noor A. and Casillas, Gaston and Luo, Yu-Syuan and McDonald, Thomas J. and Wade, Terry L. and Zhu, Rui and Newman, Galen and Lloyd, Dillon and Wright, Fred A. and Chiu, Weihsueh A. and et al.}, year={2021}, month={Sep}, pages={810–822} } @article{erber_goodman_wright_chiu_tretyakova_rusyn_2021, title={Intra- and Inter-Species Variability in Urinary N7-(1-Hydroxy-3-buten-2-yl)guanine Adducts Following Inhalation Exposure to 1,3-Butadiene}, volume={34}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.1c00291}, abstractNote={1,3-Butadiene is a known carcinogen primarily targeting lymphoid tissues, lung, and liver. Cytochrome P450 activates butadiene to epoxides which form covalent DNA adducts that are thought to be a key mechanistic event in cancer. Previous studies suggested that inter-species, -tissue, and -individual susceptibility to adverse health effects of butadiene exposure may be due to differences in metabolism and other mechanisms. In this study, we aimed to examine the extent of inter-individual and inter-species variability in the urinary N7-(1-hydroxy-3-buten-2-yl)guanine (EB-GII) DNA adduct, a well-known biomarker of exposure to butadiene. For a population variability study in mice, we used the collaborative cross model. Female and male mice from five strains were exposed to filtered air or butadiene (590 ppm, 6 h/day, 5 days/week for 2 weeks) by inhalation. Urine samples were collected, and the metabolic activation of butadiene by DNA-reactive species was quantified as urinary EB-GII adducts. We quantified the degree of EB-GII variation across mouse strains and sexes; then, we compared this variation with the data from rats (exposed to 62.5 or 200 ppm butadiene) and humans (0.004-2.2 ppm butadiene). We show that sex and strain are significant contributors to the variability in urinary EB-GII levels in mice. In addition, we find that the degree of variability in urinary EB-GII in collaborative cross mice, when expressed as an uncertainty factor for the inter-individual variability (UFH), is relatively modest (≤threefold) possibly due to metabolic saturation. By contrast, the variability in urinary EB-GII (adjusted for exposure) observed in humans, while larger than the default value of 10-fold, is largely consistent with UFH estimates for other chemicals based on human data for non-cancer endpoints. Overall, these data demonstrate that urinary EB-GII levels, particularly from human studies, may be useful for quantitative characterization of human variability in cancer risks to butadiene.}, number={11}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Erber, Luke and Goodman, Samantha and Wright, Fred A. and Chiu, Weihsueh A. and Tretyakova, Natalia Y. and Rusyn, Ivan}, year={2021}, month={Nov}, pages={2375–2383} } @article{chen_jang_kaihatu_zhou_wright_chiu_rusyn_2021, title={Potential Human Health Hazard of Post-Hurricane Harvey Sediments in Galveston Bay and Houston Ship Channel: A Case Study of Using In Vitro Bioactivity Data to Inform Risk Management Decisions}, volume={18}, ISSN={["1660-4601"]}, url={https://doi.org/10.3390/ijerph182413378}, DOI={10.3390/ijerph182413378}, abstractNote={Natural and anthropogenic disasters may be associated with redistribution of chemical contaminants in the environment; however, current methods for assessing hazards and risks of complex mixtures are not suitable for disaster response. This study investigated the suitability of in vitro toxicity testing methods as a rapid means of identifying areas of potential human health concern. We used sediment samples (n = 46) from Galveston Bay and the Houston Ship Channel (GB/HSC) areas after hurricane Harvey, a disaster event that led to broad redistribution of chemically-contaminated sediments, including deposition of the sediment on shore due to flooding. Samples were extracted with cyclohexane and dimethyl sulfoxide and screened in a compendium of human primary or induced pluripotent stem cell (iPSC)-derived cell lines from different tissues (hepatocytes, neuronal, cardiomyocytes, and endothelial) to test for concentration-dependent effects on various functional and cytotoxicity phenotypes (n = 34). Bioactivity data were used to map areas of potential concern and the results compared to the data on concentrations of polycyclic aromatic hydrocarbons (PAHs) in the same samples. We found that setting remediation goals based on reducing bioactivity is protective of both “known” risks associated with PAHs and “unknown” risks associated with bioactivity, but the converse was not true for remediation based on PAH risks alone. Overall, we found that in vitro bioactivity can be used as a comprehensive indicator of potential hazards and is an example of a new approach method (NAM) to inform risk management decisions on site cleanup.}, number={24}, journal={INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH}, author={Chen, Zunwei and Jang, Suji and Kaihatu, James M. and Zhou, Yi-Hui and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2021}, month={Dec} } @article{luo_chen_blanchette_zhou_wright_baker_chiu_rusyn_2021, title={Relationships between constituents of energy drinks and beating parameters in human induced pluripotent stem cell (iPSC)-Derived cardiomyocytes}, volume={149}, ISSN={["1873-6351"]}, url={https://doi.org/10.1016/j.fct.2021.111979}, DOI={10.1016/j.fct.2021.111979}, abstractNote={Consumption of energy drinks has been associated with adverse cardiovascular effects; however, little is known about the ingredients that may contribute to these effects. We therefore characterized the chemical profiles and in vitro effects of energy drinks and their ingredients on human induced pluripotent stem cell (iPSC)-derived cardiomyocytes, and identified the putative active ingredients using a multivariate prediction model. Energy drinks from 17 widely-available over-the-counter brands were evaluated in this study. The concentrations of six common ingredients (caffeine, taurine, riboflavin, pantothenic acid, adenine, and L-methionine) were quantified by coupling liquid chromatography with a triple quadrupole mass spectrometer for the acquisition of LC-MS/MS spectra. In addition, untargeted analyses for each beverage were performed with a platform combining LC, ion mobility spectrometry and mass spectrometry (LC-IMS-MS) measurements. Approximately 300 features were observed across samples in the untargeted studies, and of these ~100 were identified. In vitro effects of energy drinks and some of their ingredients were then tested in iPSC-derived cardiomyocytes. Data on the beat rate (positive and negative chronotropy), ion channel function (QT prolongation), and cytotoxicity were collected in a dilution series. We found that some of the energy drinks elicited adverse effects on the cardiomyocytes with the most common being an increase in the beat rate, while QT prolongation was also observed at the lowest concentrations. Finally, concentration addition modeling using quantitative data from the 6 common ingredients and multivariate prediction modeling was used to determine potential ingredients responsible for the adverse effects on the cardiomyocytes. These analyses suggested theophylline, adenine, and azelate as possibly contributing to the in vitro effects of energy drinks on QT prolongation in cardiomyocytes.}, journal={FOOD AND CHEMICAL TOXICOLOGY}, author={Luo, Yu-Syuan and Chen, Zunwei and Blanchette, Alexander D. and Zhou, Yi-Hui and Wright, Fred A. and Baker, Erin S. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2021}, month={Mar} } @misc{marvel_house_wheeler_song_zhou_wright_chiu_rusyn_motsinger-reif_reif_2021, title={The COVID-19 Pandemic Vulnerability Index (PVI) Dashboard: Monitoring County-Level Vulnerability Using Visualization, Statistical Modeling, and Machine Learning}, volume={129}, ISSN={["1552-9924"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85099420902&partnerID=MN8TOARS}, DOI={10.1289/EHP8690}, abstractNote={Vol. 129, No. 1 Research LetterOpen AccessThe COVID-19 Pandemic Vulnerability Index (PVI) Dashboard: Monitoring County-Level Vulnerability Using Visualization, Statistical Modeling, and Machine Learning Skylar W. Marvel, John S. House, Matthew Wheeler, Kuncheng Song, Yi-Hui Zhou, Fred A. Wright, Weihsueh A. Chiu, Ivan Rusyn, Alison Motsinger-Reif, and David M. Reif Skylar W. Marvel Bioinformatics Research Center, Department of Biological Sciences, North Carolina State University (NCSU), Raleigh, North Carolina, USA , John S. House Biostatistics and Computational Biology Branch, National Institute of Environmental Health Sciences, National Institutes of Health, Department of Health and Human Services, Research Triangle Park, North Carolina, USA , Matthew Wheeler Biostatistics and Computational Biology Branch, National Institute of Environmental Health Sciences, National Institutes of Health, Department of Health and Human Services, Research Triangle Park, North Carolina, USA , Kuncheng Song Bioinformatics Research Center, Department of Biological Sciences, North Carolina State University (NCSU), Raleigh, North Carolina, USA , Yi-Hui Zhou Bioinformatics Research Center, Department of Biological Sciences, North Carolina State University (NCSU), Raleigh, North Carolina, USA , Fred A. Wright Bioinformatics Research Center, Department of Biological Sciences, North Carolina State University (NCSU), Raleigh, North Carolina, USA Department of Statistics, NCSU, Raleigh, North Carolina, USA , Weihsueh A. Chiu Veterinary Integrative Biosciences, College of Veterinary Medicine and Biomedical Sciences, Texas A&M University, College Station, Texas, USA , Ivan Rusyn Veterinary Integrative Biosciences, College of Veterinary Medicine and Biomedical Sciences, Texas A&M University, College Station, Texas, USA , Alison Motsinger-Reif Address correspondence to Alison Motsinger-Reif, 111 T.W. Alexander Dr., Rall Building, Research Triangle Park, NC 27709 USA. Email: E-mail Address: [email protected], or David M. Reif, Box 7566, 1 Lampe Dr., Raleigh NC 27695 USA. Email: E-mail Address: [email protected] Biostatistics and Computational Biology Branch, National Institute of Environmental Health Sciences, National Institutes of Health, Department of Health and Human Services, Research Triangle Park, North Carolina, USA , and David M. Reif Address correspondence to Alison Motsinger-Reif, 111 T.W. Alexander Dr., Rall Building, Research Triangle Park, NC 27709 USA. Email: E-mail Address: [email protected], or David M. Reif, Box 7566, 1 Lampe Dr., Raleigh NC 27695 USA. Email: E-mail Address: [email protected] Bioinformatics Research Center, Department of Biological Sciences, North Carolina State University (NCSU), Raleigh, North Carolina, USA Published:5 January 2021CID: 017701https://doi.org/10.1289/EHP8690AboutSectionsPDF ToolsDownload CitationsTrack Citations ShareShare onFacebookTwitterLinked InReddit IntroductionExpert groups have coalesced around a roadmap to address the current COVID-19 pandemic centered on social distancing, monitoring case counts and health care capacity, and, eventually, moving to pharmaceutical interventions. However, responsibility for navigating the pandemic response falls largely on state and local officials. To make equitable decisions on allocating resources, caring for vulnerable subpopulations, and implementing local- and state-level interventions, access to current pandemic data and key vulnerabilities at the community level are essential (National Academies of Sciences, Engineering, and Medicine 2020). Although numerous predictive models and interactive monitoring applications have been developed using pandemic-related data sets (Wynants et al. 2020), their capacity to aid in dynamic, community-level decision-making is limited. We developed the interactive COVID-19 Pandemic Vulnerability Index (PVI) Dashboard ( https://covid19pvi.niehs.nih.gov/) to address this need by presenting a visual synthesis of dynamic information at the county level to monitor disease trajectories, communicate local vulnerabilities, forecast key outcomes, and guide informed responses (Figure 1).Figure 1. COVID-19 PVI Dashboard. Dashboard screenshot displaying PVI profiles atop a choropleth map layer indicating overall COVID-19 PVI rank. The PVI Scorecard and associated data for Clarendon County, South Carolina, has been selected. The scorecard summarizes the overall PVI score and rank compared with all 3,142 U.S. counties on each indicator slice. The scrollable score distributions at left compare the selected county PVI to the distributions of overall and slice-wise scores across the United States. The panels below the map are populated with county-specific information on observed trends in cases and deaths, cumulative numbers for the county, historical timelines (for cumulative cases, cumulative deaths, PVI, and PVI rank), daily case and death counts for the most recent 14-d period, and a 14-d forecast of predicted cases and deaths. The information displayed for both observed COVID-19 data and PVI layers is scrollable back through March 2020. Documentation of additional features and usage, including advanced options (accessible via the collapsed menu at the upper left), is provided in a Quick Start Guide (linked at the upper right corner). Note: Pop, population; PVI, Pandemic Vulnerability Index.MethodsThe current PVI model integrates multiple data streams into an overall score derived from 12 key indicators—including well-established, general vulnerability factors for public health, plus emerging factors relevant to the pandemic—distributed across four domains: current infection rates, baseline population concentration, current interventions, and health and environmental vulnerabilities. The PVI profiles translate numerical results into visual representations, with each vulnerability factor represented as a component slice of a radar chart (Figure 2). The PVI profile for each county is calculated using the Toxicological Prioritization Index (ToxPi) framework for data integration within a geospatial context (Marvel et al. 2018; Bhandari et al. 2020). Data sources in the current model (version 11.2.1) include the Social Vulnerability Index (SVI) of the Centers for Disease Control and Prevention (CDC) for emergency response and hazard mitigation planning (Horney et al. 2017), testing rates from the COVID Tracking Project (Atlantic Monthly Group 2020), social distancing metrics from mobile device data ( https://www.unacast.com/covid19/social-distancing-scoreboard), and dynamic measures of disease spread and case numbers ( https://usafacts.org/issues/coronavirus/). Methodological details concerning the integration of data streams—plus the complete, daily time series of all source data since February 2020 and resultant PVI scores—are maintained on the public Github project page (COVID19PVI 2020). Over this period, the PVI has been strongly associated with key vulnerability-related outcome metrics (by rank-correlation), with updates of its performance assessment posted with model updates alongside data at the Github project page (COVID19PVI 2020).Figure 2. Translation of data into COVID-19 PVI profiles. Information from all 3,142 U.S. counties is translated into PVI slices. The illustration shows how air pollution data (average density of fine particulate matterPM2.5 per county) are compared for two example counties. The county with the higher relative measurement (County Y) has a longer air pollution slice than the county with a lower measurement (County X). This procedure is repeated for all slices, resulting in an integrated, overall PVI profile. Note: pop, population; PVI, Pandemic Vulnerability Index.In addition to the PVI itself—which is a summary, human-centric visualization of relative vulnerability drivers—the dashboard is supported by rigorous statistical modeling of the underlying data to enable quantitative analysis and provide short-term, local predictions of cases and deaths [complete methodological details are maintained at the Github project page (COVID19PVI 2020)]. Generalized linear models of cumulative outcome data indicated that, after population size, the most significant predictors were the proportion of Black residents, mean fine particulate matter [particulate matter less than or equal to 2.5 micrometers≤2.5μm in diameter (fine particulate matterPM2.5)], percentage of population with insurance coverage (which was positively associated), and proportion of Hispanic residents. The local predictions of cases and deaths (see the "Predictions" panel in Figure 1) are updated daily using a Bayesian spatiotemporal random-effects model to build forecasts up to 2 weeks out.DiscussionThe PVI Dashboard supports decision-making and dynamic monitoring in several ways. The display can be tailored to add or remove layers of information, filtered by region (e.g., all counties within a state) or clustered by profile shape similarity. The timelines for both PVI models and observed COVID-19 outcomes facilitate tracking the impact of interventions and directing local resource allocations. The "Predictions" panel (Figure 1) connects these historical numbers to local forecasts of cases and deaths. By communicating an integrated concept of vulnerability that considers both dynamic (infection rate and interventions) and static (community population and health care characteristics) drivers, the interactive dashboard can promote buy-in from diverse audiences, which is necessary for effective public health interventions. This messaging can assist in addressing known racial disparities in COVID-19 case and death rates (Tan et al. 2020) or populations, and the PVI Dashboard is part of the "Unique Populations" tab of the CDC's COVID-19 Data Tracker ( https://covid.cdc.gov/covid-data-tracker). By filtering the display to highlight vulnerability drivers within an overall score context, the dashboard can inform targeted interventions for specific localities.Unfortunately, the pandemic endures across the United States, with broad disparities based on the local environment (Tan et al. 2020). We present the PVI Dashboard as a dynamic container for contextualizing these disparities. It is a modular tool that will evolve to incorporate new data sources and analytics as they emerge (e.g., concurrent flu infections, school and business reopening statistics, heterogeneous public health practices). This flexibility positions it well as a resource for integrated prioritization of eventual vaccine distribution and monitoring its local impact. The PVI Dashboard can empower local and state officials to take informed action to combat the pandemic by communicating interactive, visual profiles of vulnerability atop an underlying statistical framework that enables the comparison of counties and the evaluation of the PVI's component data.AcknowledgmentsWe thank the information technology and web services staff at the National Institute of Environmental Health Sciences (NIEHS)/National Institutes of Health (NIH) for their help and support, as well as J.K. Cetina and D.J. Reif for their useful technical input and advice. This work was supported by NIEHS/NIH grants (P42 ES027704, P30 ES029067, P42 ES031009, and P30 ES025128) and NIEHS/NIH intramural funds (Z ES103352-01).ReferencesAtlantic Monthly Group.2020. The COVID Tracking Project. https://covidtracking.com/ [accessed 15 November 2020]. Google ScholarBhandari S, Lewis PGT, Craft E, Marvel SW, Reif DM, Chiu WA. 2020. HGBEnviroScreen: enabling community action through data integration in the Houston–Galveston–Brazoria region. Int J Environ Res Public Health 17(4):1130, PMID: 32053902, 10.3390/ijerph17041130. Crossref, Medline, Google ScholarCOVID19PVI.2020. COVID19PVI/data. https://github.com/COVID19PVI/data [accessed 15 November 2020]. Google ScholarHorney J, Nguyen M, Salvesen D, Dwyer C, Cooper J, Berke P. 2017. Assessing the quality of rural hazard mitigation plans in the southeastern United States. J Plan Educ Res 37(1):56–65, 10.1177/0739456X16628605. Crossref, Google ScholarMarvel SW, To K, Grimm FA, Wright FA, Rusyn I, Reif DM. 2018. ToxPi Graphical User Interface 2.0: dynamic exploration, visualization, and sharing of integrated data models. BMC Bioinformatics 19(1):80, PMID: 29506467, 10.1186/s12859-018-2089-2. Crossref, Medline, Google ScholarNational Academies of Sciences, Engineering, and Medicine.2020. Framework for Equitable Allocation of COVID-19 Vaccine. Gayle H, Foege W, Brown L, Kahn B, eds. Washington, DC: National Academies Press. Google ScholarTan TQ, Kullar R, Swartz TH, Mathew TA, Piggott DA, Berthaud V. 2020. Location matters: geographic disparities and impact of coronavirus disease 2019. J Infect Dis 222(12):1951–1954, PMID: 32942299, 10.1093/infdis/jiaa583. Crossref, Medline, Google ScholarWynants L, Van Calster B, Collins GS, Riley RD, Heinze G, Schuit E, et al.2020. Prediction models for diagnosis and prognosis of covid-19: systematic review and critical appraisal. BMJ 369:m1328, PMID: 32265220, 10.1136/bmj.m1328. Crossref, Medline, Google ScholarThe authors declare they have no actual or potential competing financial interests.FiguresReferencesRelatedDetails Vol. 129, No. 1 January 2021Metrics About Article Metrics Publication History Manuscript received20 November 2020Manuscript revised14 December 2020Manuscript accepted21 December 2020Originally published5 January 2021 Financial disclosuresPDF download License information EHP is an open-access journal published with support from the National Institute of Environmental Health Sciences, National Institutes of Health. All content is public domain unless otherwise noted. Note to readers with disabilities EHP strives to ensure that all journal content is accessible to all readers. However, some figures and Supplemental Material published in EHP articles may not conform to 508 standards due to the complexity of the information being presented. If you need assistance accessing journal content, please contact [email protected]. Our staff will work with you to assess and meet your accessibility needs within 3 working days.}, number={1}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Marvel, Skylar W. and House, John S. and Wheeler, Matthew and Song, Kuncheng and Zhou, Yi-Hui and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan and Motsinger-Reif, Alison and Reif, David M.}, year={2021}, month={Jan} } @article{mukherjee_beykal_szafran_onel_stossi_mancini_lloyd_wright_zhou_mancini_et al._2020, title={Classification of estrogenic compounds by coupling high content analysis and machine learning algorithms}, volume={16}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1008191}, abstractNote={Environmental toxicants affect human health in various ways. Of the thousands of chemicals present in the environment, those with adverse effects on the endocrine system are referred to as endocrine-disrupting chemicals (EDCs). Here, we focused on a subclass of EDCs that impacts the estrogen receptor (ER), a pivotal transcriptional regulator in health and disease. Estrogenic activity of compounds can be measured by many in vitro or cell-based high throughput assays that record various endpoints from large pools of cells, and increasingly at the single-cell level. To simultaneously capture multiple mechanistic ER endpoints in individual cells that are affected by EDCs, we previously developed a sensitive high throughput/high content imaging assay that is based upon a stable cell line harboring a visible multicopy ER responsive transcription unit and expressing a green fluorescent protein (GFP) fusion of ER. High content analysis generates voluminous multiplex data comprised of minable features that describe numerous mechanistic endpoints. In this study, we present a machine learning pipeline for rapid, accurate, and sensitive assessment of the endocrine-disrupting potential of benchmark chemicals based on data generated from high content analysis. The multidimensional imaging data was used to train a classification model to ultimately predict the impact of unknown compounds on the ER, either as agonists or antagonists. To this end, both linear logistic regression and nonlinear Random Forest classifiers were benchmarked and evaluated for predicting the estrogenic activity of unknown compounds. Furthermore, through feature selection, data visualization, and model discrimination, the most informative features were identified for the classification of ER agonists/antagonists. The results of this data-driven study showed that highly accurate and generalized classification models with a minimum number of features can be constructed without loss of generality, where these machine learning models serve as a means for rapid mechanistic/phenotypic evaluation of the estrogenic potential of many chemicals.}, number={9}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={Mukherjee, Rajib and Beykal, Burcu and Szafran, Adam T. and Onel, Melis and Stossi, Fabio and Mancini, Maureen G. and Lloyd, Dillon and Wright, Fred A. and Zhou, Lan and Mancini, Michael A. and et al.}, year={2020}, month={Sep} } @article{house_grimm_klaren_dalzell_kuchi_zhang_lenz_boogaard_ketelslegers_gant_et al._2021, title={Grouping of UVCB Substances with New Approach Methodologies (NAMs) Data}, volume={38}, ISSN={["1868-8551"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85100279673&partnerID=MN8TOARS}, DOI={10.14573/altex.2006262}, abstractNote={One of the most challenging areas in regulatory science is assessment of the substances known as UVCB (unknown or variable composition, complex reaction products and biological materials). Because the inherent complexity and variability of UVCBs present considerable challenges for establishing sufficient substance similarity based on chemical characteristics or other data, we hypothesized that new approach methodologies (NAMs), including in vitro test-derived biological activity signatures to characterize substance similarity, could be used to support grouping of UVCBs. We tested 141 petroleum substances as representative UVCBs in a compendium of 15 human cell types representing a variety of tissues. Petroleum substances were assayed in dilution series to derive point of departure estimates for each cell type and phenotype. Extensive quality control measures were taken to ensure that only high-confidence in vitro data were used to determine whether current groupings of these petroleum substances, based largely on the manufacturing process and physico-chemical properties, are justifiable. We found that bioactivity data-based groupings of petroleum substances were generally consistent with the manufacturing class-based categories. We also showed that these data, especially bioactivity from human induced pluripotent stem cell (iPSC)-derived and primary cells, can be used to rank substances in a manner highly concordant with their expected in vivo hazard potential based on their chemical compositional profile. Overall, this study demonstrates that NAMs can be used to inform groupings of UVCBs, to assist in identification of representative substances in each group for testing when needed, and to fill data gaps by read-across.}, number={1}, journal={ALTEX-ALTERNATIVES TO ANIMAL EXPERIMENTATION}, author={House, John S. and Grimm, Fabian A. and Klaren, William D. and Dalzell, Abigail and Kuchi, Srikeerthana and Zhang, Shu-Dong and Lenz, Klaus and Boogaard, Peter J. and Ketelslegers, Hans B. and Gant, Timothy W. and et al.}, year={2021}, pages={123–137} } @article{house_bouzos_fahy_francisco_lloyd_wright_motsinger-reif_asuri_wheeler_2020, title={Low-Dose Silver Nanoparticle Surface Chemistry and Temporal Effects on Gene Expression in Human Liver Cells}, volume={16}, ISSN={["1613-6829"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85082473420&partnerID=MN8TOARS}, DOI={10.1002/smll.202000299}, abstractNote={AbstractSilver nanoparticles (AgNPs) are widely incorporated into consumer and biomedical products for their antimicrobial and plasmonic properties with limited risk assessment of low‐dose cumulative exposure in humans. To evaluate cellular responses to low‐dose AgNP exposures across time, human liver cells (HepG2) are exposed to AgNPs with three different surface charges (1.2 µg mL−1) and complete gene expression is monitored across a 24 h period. Time and AgNP surface chemistry mediate gene expression. In addition, since cells are fed, time has marked effects on gene expression that should be considered. Surface chemistry of AgNPs alters gene transcription in a time‐dependent manner, with the most dramatic effects in cationic AgNPs. Universal to all surface coatings, AgNP‐treated cells responded by inactivating proliferation and enabling cell cycle checkpoints. Further analysis of these universal features of AgNP cellular response, as well as more detailed analysis of specific AgNP treatments, time points, or specific genes, is facilitated with an accompanying application. Taken together, these results provide a foundation for understanding hepatic response to low‐dose AgNPs for future risk assessment.}, number={21}, journal={SMALL}, author={House, John S. and Bouzos, Evangelia and Fahy, Kira M. and Francisco, Victorino Miguel and Lloyd, Dillon T. and Wright, Fred A. and Motsinger-Reif, Alison A. and Asuri, Prashanth and Wheeler, Korin E.}, year={2020}, month={May} } @misc{rusyn_chiu_wright_2020, title={Questioning Existing Cancer Hazard Evaluation Standards in the Name of Statistics}, volume={177}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfaa077}, abstractNote={Crump et al. (2020) argue that they present “a more comprehensive analysis of the animal carcinogenicity data” in their use of a multi-response permutation procedure, adjusting for a large number of tumors across multiple studies. In essence, they posit that because every rodent cancer bioassay examines dozens of organs, and it is not uncommon for a chemical to have multiple bioassays, multiple testing corrections should be applied across all studies and tumors ever examined, to determine whether a chemical is a carcinogen. This argument, wrapped in a seemingly innocuous statistical framework, sets a potentially far-reaching precedent to revisit decades of standard practice of cancer hazard evaluation performed by national and international agencies. They seek to erect new barriers to chemical evaluation, and propose a very high bar to classify a chemical as positive in a rodent bioassay. We strongly question whether the proposed approach will strengthen public health protections, noting that extrinsic factors may account for a majority of cancer risk (Wu et al., 2016).}, number={2}, journal={TOXICOLOGICAL SCIENCES}, author={Rusyn, Ivan and Chiu, Weihsueh A. and Wright, Fred A.}, year={2020}, month={Oct}, pages={521–522} } @article{chen_liu_wright_chiu_rusyn_2020, title={Rapid Hazard Characterization of Environmental Chemicals Using a Compendium of Human Cell Lines from Different Organs}, volume={37}, ISSN={["1868-8551"]}, DOI={10.14573/altex.2002291}, abstractNote={The lack of adequate toxicity data for the vast majority of chemicals in the environment has spurred the development of new approach methodologies (NAMs). This study aimed to develop a practical high-throughput in vitro model for rapidly evaluating potential hazards of chemicals using a small number of human cells. Forty-two compounds were tested using human induced pluripotent stem cell (iPSC)-derived cells (hepatocytes, neurons, cardiomyocytes and endothelial cells), and a primary endothelial cell line. Both functional and cytotoxicity endpoints were evaluated using high-content imaging. Concentration-response was used to derive points-of-departure (POD). PODs were integrated with ToxPi and used as surrogate NAM-based PODs for risk characterization. We found chemical class-specific similarity among the chemicals tested; metal salts exhibited the highest overall bioactivity. We also observed cell type-specific patterns among classes of chemicals, indicating the ability of the proposed in vitro model to recognize effects on different cell types. Compared to available NAM datasets, such as ToxCast/Tox21 and chemical structure-based descriptors, we found that the data from the five-cell-type model was as good or even better in assigning compounds to chemical classes. Additionally, the PODs from this model performed well as a conservative surrogate for regulatory in vivo PODs and were less likely to underestimate in vivo potency and potential risk compared to other NAM-based PODs. In summary, we demonstrate the potential of this in vitro screening model to inform rapid risk-based decision-making through ranking, clustering, and assessment of both hazard and risks of diverse environmental chemicals.}, number={4}, journal={ALTEX-ALTERNATIVES TO ANIMAL EXPERIMENTATION}, author={Chen, Zunwei and Liu, Yizhong and Wright, Fred A. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2020}, pages={623–638} } @article{chen_lloyd_zhou_chiu_wright_rusyn_2021, title={Risk Characterization of Environmental Samples Using In Vitro Bioactivity and Polycyclic Aromatic Hydrocarbon Concentrations Data}, volume={179}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfaa166}, abstractNote={Abstract Methods to assess environmental exposure to hazardous chemicals have primarily focused on quantification of individual chemicals, although chemicals often occur in mixtures, presenting challenges to the traditional risk characterization framework. Sampling sites in a defined geographic region provide an opportunity to characterize chemical contaminants, with spatial interpolation as a tool to provide estimates for non-sampled sites. At the same time, the use of in vitro bioactivity measurements has been shown to be informative for rapid risk-based decisions. In this study, we measured in vitro bioactivity in 39 surface soil samples collected immediately after flooding associated with Hurricane Harvey in Texas in a residential area known to be inundated with polycyclic aromatic hydrocarbon (PAH) contaminants. Bioactivity data were from a number of functional and toxicity assays in 5 human cell types, such as induced pluripotent stem cell-derived hepatocytes, cardiomyocytes, neurons, and endothelial cells, as well as human umbilical vein endothelial cells. Data on concentrations of PAH in these samples were also available and the combination of data sources offered a unique opportunity to assess the joint spatial variation of PAH components and bioactivity. We found significant evidence of spatial correlation of a subset of PAH contaminants and of cell-based phenotypes. In addition, we show that the cell-based bioactivity data can be used to predict environmental concentrations for several PAH contaminants, as well as overall PAH summaries and cancer risk. This study’s impact lies in its demonstration that cell-based profiling can be used for rapid hazard screening of environmental samples by anchoring the bioassays to concentrations of PAH. This work sets the stage for identification of the areas of concern and direct quantitative risk characterization based on bioactivity data, thereby providing an important supplement to traditional individual chemical analyses by shedding light on constituents that may be missed from targeted chemical monitoring.}, number={1}, journal={TOXICOLOGICAL SCIENCES}, author={Chen, Zunwei and Lloyd, Dillon and Zhou, Yi-Hui and Chiu, Weihsueh A. and Wright, Fred A. and Rusyn, Ivan}, year={2021}, month={Jan}, pages={108–120} } @article{song_wright_zhou_2020, title={Systematic Comparisons for Composition Profiles, Taxonomic Levels, and Machine Learning Methods for Microbiome-Based Disease Prediction}, volume={7}, ISSN={["2296-889X"]}, DOI={10.3389/fmolb.2020.610845}, abstractNote={Microbiome composition profiles generated from 16S rRNA sequencing have been extensively studied for their usefulness in phenotype trait prediction, including for complex diseases such as diabetes and obesity. These microbiome compositions have typically been quantified in the form of Operational Taxonomic Unit (OTU) count matrices. However, alternate approaches such as Amplicon Sequence Variants (ASV) have been used, as well as the direct use of k-mer sequence counts. The overall effect of these different types of predictors when used in concert with various machine learning methods has been difficult to assess, due to varied combinations described in the literature. Here we provide an in-depth investigation of more than 1,000 combinations of these three clustering/counting methods, in combination with varied choices for normalization and filtering, grouping at various taxonomic levels, and the use of more than ten commonly used machine learning methods for phenotype prediction. The use of short k-mers, which have computational advantages and conceptual simplicity, is shown to be effective as a source for microbiome-based prediction. Among machine-learning approaches, tree-based methods show consistent, though modest, advantages in prediction accuracy. We describe the various advantages and disadvantages of combinations in analysis approaches, and provide general observations to serve as a useful guide for future trait-prediction explorations using microbiome data.}, journal={FRONTIERS IN MOLECULAR BIOSCIENCES}, author={Song, Kuncheng and Wright, Fred A. and Zhou, Yi-Hui}, year={2020}, month={Dec} } @article{aguet_barbeira_bonazzola_brown_castel_jo_kasela_kim-hellmuth_liang_parsana_et al._2020, title={The GTEx Consortium atlas of genetic regulatory effects across human tissues}, volume={369}, ISSN={["1095-9203"]}, DOI={10.1126/science.aaz1776}, abstractNote={The Genotype-Tissue Expression (GTEx) project dissects how genetic variation affects gene expression and splicing.}, number={6509}, journal={SCIENCE}, author={Aguet, Francois and Barbeira, Alvaro N. and Bonazzola, Rodrigo and Brown, Andrew and Castel, Stephane E. and Jo, Brian and Kasela, Silva and Kim-Hellmuth, Sarah and Liang, Yanyu and Parsana, Princy and et al.}, year={2020}, month={Sep}, pages={1318–1330} } @article{etheridge_gallins_jima_broadaway_ratain_schuetz_schadt_schroder_molony_zhou_et al._2020, title={A New Liver Expression Quantitative Trait Locus Map From 1,183 Individuals Provides Evidence for Novel Expression Quantitative Trait Loci of Drug Response, Metabolic, and Sex-Biased Phenotypes}, volume={107}, ISSN={["1532-6535"]}, DOI={10.1002/cpt.1751}, abstractNote={Expression quantitative trait locus (eQTL) studies in human liver are crucial for elucidating how genetic variation influences variability in disease risk and therapeutic outcomes and may help guide strategies to obtain maximal efficacy and safety of clinical interventions. Associations between expression microarray and genome‐wide genotype data from four human liver eQTL studies (n = 1,183) were analyzed. More than 2.3 million cis‐eQTLs for 15,668 genes were identified. When eQTLs were filtered against a list of 1,496 drug response genes, 187,829 cis‐eQTLs for 1,191 genes were identified. Additionally, 1,683 sex‐biased cis‐eQTLs were identified, as well as 49 and 73 cis‐eQTLs that colocalized with genome‐wide association study signals for blood metabolite or lipid levels, respectively. Translational relevance of these results is evidenced by linking DPYD eQTLs to differences in safety of chemotherapy, linking the sex‐biased regulation of PCSK9 expression to anti‐lipid therapy, and identifying the G‐protein coupled receptor GPR180 as a novel drug target for hypertriglyceridemia.}, number={6}, journal={CLINICAL PHARMACOLOGY & THERAPEUTICS}, author={Etheridge, Amy S. and Gallins, Paul J. and Jima, Dereje and Broadaway, K. Alaine and Ratain, Mark J. and Schuetz, Erin and Schadt, Eric and Schroder, Adrian and Molony, Cliona and Zhou, Yihui and et al.}, year={2020}, month={Jun}, pages={1383–1393} } @article{miikherjee_onel_beykal_szafran_stossi_mancini_zhou_wright_pistikopoulos_2019, title={Development of the Texas A&M Superfund Research Program Computational Platform for Data Integration, Visualization, and Analysis}, volume={46}, ISBN={["978-0-12-819939-8"]}, ISSN={["1570-7946"]}, DOI={10.1016/B978-0-12-818634-3.50162-4}, abstractNote={The National Institute of Environmental Health Sciences (NIEHS) Superfund Research Program (SRP) aims to support university-based multidisciplinary research on human health and environmental issues related to hazardous substances and pollutants. The Texas A&M Superfund Research Program comprehensively evaluates the complexities of hazardous chemical mixtures and their potential adverse health impacts due to exposure through a number of multi-disciplinary projects and cores. One of the essential components of the Texas A&M Superfund Research Center is the Data Science Core, which serves as the basis for translating the data produced by the multi-disciplinary research projects into useful knowledge for the community via data collection, quality control, analysis, and model generation. In this work, we demonstrate the Texas A&M Superfund Research Program computational platform, which houses and integrates large-scale, diverse datasets generated across the Center, provides basic visualization service to facilitate interpretation, monitors data quality, and finally implements a variety of state-of-the-art statistical analysis for model/tool development. The platform is aimed to facilitate effective integration and collaboration across the Center and acts as an enabler for the dissemination of comprehensive ad-hoc tools and models developed to address the environmental and health effects of chemical mixture exposure during environmental emergency-related contamination events.}, journal={29TH EUROPEAN SYMPOSIUM ON COMPUTER AIDED PROCESS ENGINEERING, PT A}, author={Miikherjee, Rajib and Onel, Melis and Beykal, Burcu and Szafran, Adam T. and Stossi, Fabio and Mancini, Michael A. and Zhou, Lan and Wright, Fred A. and Pistikopoulos, Efstratios N.}, year={2019}, pages={967–972} } @article{onel_beykal_ferguson_chiu_mcdonald_zhou_house_wright_sheen_rusyn_et al._2019, title={Grouping of complex substances using analytical chemistry data: A framework for quantitative evaluation and visualization}, volume={14}, ISSN={["1932-6203"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85073118823&partnerID=MN8TOARS}, DOI={10.1371/journal.pone.0223517}, abstractNote={A detailed characterization of the chemical composition of complex substances, such as products of petroleum refining and environmental mixtures, is greatly needed in exposure assessment and manufacturing. The inherent complexity and variability in the composition of complex substances obfuscate the choices for their detailed analytical characterization. Yet, in lieu of exact chemical composition of complex substances, evaluation of the degree of similarity is a sensible path toward decision-making in environmental health regulations. Grouping of similar complex substances is a challenge that can be addressed via advanced analytical methods and streamlined data analysis and visualization techniques. Here, we propose a framework with unsupervised and supervised analyses to optimally group complex substances based on their analytical features. We test two data sets of complex oil-derived substances. The first data set is from gas chromatography-mass spectrometry (GC-MS) analysis of 20 Standard Reference Materials representing crude oils and oil refining products. The second data set consists of 15 samples of various gas oils analyzed using three analytical techniques: GC-MS, GC×GC-flame ionization detection (FID), and ion mobility spectrometry-mass spectrometry (IM-MS). We use hierarchical clustering using Pearson correlation as a similarity metric for the unsupervised analysis and build classification models using the Random Forest algorithm for the supervised analysis. We present a quantitative comparative assessment of clustering results via Fowlkes–Mallows index, and classification results via model accuracies in predicting the group of an unknown complex substance. We demonstrate the effect of (i) different grouping methodologies, (ii) data set size, and (iii) dimensionality reduction on the grouping quality, and (iv) different analytical techniques on the characterization of the complex substances. While the complexity and variability in chemical composition are an inherent feature of complex substances, we demonstrate how the choices of the data analysis and visualization methods can impact the communication of their characteristics to delineate sufficient similarity.}, number={10}, journal={PLOS ONE}, author={Onel, Melis and Beykal, Burcu and Ferguson, Kyle and Chiu, Weihsueh A. and McDonald, Thomas J. and Zhou, Lan and House, John S. and Wright, Fred A. and Sheen, David A. and Rusyn, Ivan and et al.}, year={2019}, month={Oct} } @article{lewis_borowa-mazgaj_conti_chappell_luo_bodnar_konganti_wright_threadgill_chiu_et al._2019, title={Population-Based Analysis of DNA Damage and Epigenetic Effects of 1,3-Butadiene in the Mouse}, volume={32}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.9b00035}, abstractNote={Metabolism of 1,3-butadiene, a known human and rodent carcinogen, results in formation of reactive epoxides, a key event in its carcinogenicity. Although mice exposed to 1,3-butadiene present DNA adducts in all tested tissues, carcinogenicity is limited to liver, lung, and lymphoid tissues. Previous studies demonstrated that strain- and tissue-specific epigenetic effects in response to 1,3-butadiene exposure may influence susceptibly to DNA damage and serve as a potential mechanism of tissue-specific carcinogenicity. This study aimed to investigate interindividual variability in the effects of 1,3-butadiene using a population-based mouse model. Male mice from 20 Collaborative Cross strains were exposed to 0 or 635 ppm 1,3-butadiene by inhalation (6 h/day, 5 days/week) for 2 weeks. We evaluated DNA damage and epigenetic effects in target (lung and liver) and nontarget (kidney) tissues of 1,3-butadiene-induced carcinogenesis. DNA damage was assessed by measuring N-7-(2,3,4-trihydroxybut-1-yl)-guanine (THB-Gua) adducts. To investigate global histone modification alterations, we evaluated the trimethylation and acetylation of histones H3 and H4 across tissues. Changes in global cytosine DNA methylation were evaluated from the levels of methylation of LINE-1 and SINE B1 retrotransposons. We quantified the degree of variation across strains, deriving a chemical-specific human variability factor to address population variability in carcinogenic risk, which is largely ignored in current cancer risk assessment practice. Quantitative trait locus mapping identified four candidate genes related to chromatin remodeling whose variation was associated with interstrain susceptibility. Overall, this study uses 1,3-butadiene to demonstrate how the Collaborative Cross mouse population can be used to identify the mechanisms for and quantify the degree of interindividual variability in tissue-specific effects that are relevant to chemically induced carcinogenesis.}, number={5}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Lewis, Lauren and Borowa-Mazgaj, Barbara and Conti, Aline and Chappell, Grace A. and Luo, Yu-Syuan and Bodnar, Wanda and Konganti, Kranti and Wright, Fred A. and Threadgill, David W. and Chiu, Weihsueh A. and et al.}, year={2019}, month={May}, pages={887–898} } @article{luo_cichocki_hsieh_lewis_wright_threadgill_chiu_rusyn_2019, title={Using Collaborative Cross Mouse Population to Fill Data Gaps in Risk Assessment: A Case Study of Population-Based Analysis of Toxicokinetics and Kidney Toxicodynamics of Tetrachloroethylene}, volume={127}, ISSN={["1552-9924"]}, DOI={10.1289/EHP5105}, abstractNote={Background: Interindividual variability in susceptibility remains poorly characterized for environmental chemicals such as tetrachloroethylene (PERC). Development of population-based experimental models provide a potential approach to fill this critical need in human health risk assessment. Objectives: In this study, we aimed to better characterize the contribution of glutathione (GSH) conjugation to kidney toxicity of PERC and the degree of associated interindividual toxicokinetic (TK) and toxicodynamic (TD) variability by using the Collaborative Cross (CC) mouse population. Methods: Male mice from 45 strains were intragastrically dosed with PERC (1,000mg/kg) or vehicle (5% Alkamuls EL-620 in saline), and time-course samples were collected for up to 24 h. Population variability in TK of S-(1,2,2-trichlorovinyl)GSH (TCVG), S-(1,2,2-trichlorovinyl)-L-cysteine (TCVC), and N-acetyl-S-(1,2,2-trichlorovinyl)-L-cysteine (NAcTCVC) was quantified in serum, liver, and kidney, and analyzed using a toxicokinetic model. Effects of PERC on kidney weight, fatty acid metabolism–associated genes [Acot1 (Acyl-CoA thioesterase 1), Fabp1 (fatty acid-binding protein 1), and Ehhadh (enoyl-coenzyme A, hydratase/3-hydroxyacyl coenzyme A dehydrogenase)], and a marker of proximal tubular injury [KIM-1 (kidney injury molecule-1)/Hepatitis A virus cellular receptor 1 (Havcr1)] were evaluated. Finally, quantitative data on interstrain variability in both formation of GSH conjugation metabolites of PERC and its kidney effects was used to calculate adjustment factors for the interindividual variability in both TK and TD. Results: Mice treated with PERC had significantly lower kidney weight, higher kidney-to-body weight (BW) ratio, and higher expression of fatty acid metabolism–associated genes (Acot1, Fabp1, and Ehhadh) and a marker of proximal tubular injury (KIM-1/Havcr1). Liver levels of TCVG were significantly correlated with KIM-1/Havcr1 in kidney, consistent with kidney injury being associated with GSH conjugation. We found that the default uncertainty factor for human variability may be marginally adequate to protect 95%, but not more, of the population for kidney toxicity mediated by PERC. Discussion: Overall, this study demonstrates the utility of the CC mouse population in characterizing metabolism–toxicity interactions and quantifying interindividual variability. Further refinement of the characterization of interindividual variability can be accomplished by incorporating these data into in silico population models both for TK (such as a physiologically based pharmacokinetic model), as well as for toxicodynamic responses. https://doi.org/10.1289/EHP5105}, number={6}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Luo, Yu-Syuan and Cichocki, Joseph A. and Hsieh, Nan-Hung and Lewis, Lauren and Wright, Fred A. and Threadgill, David W. and Chiu, Weihsueh A. and Rusyn, Ivan}, year={2019}, month={Jun} } @article{grimm_blanchette_house_ferguson_hsieh_dalaijamts_wright_anson_wright_chiu_et al._2018, title={A Human Population-Based Organotypic In Vitro Model for Cardiotoxicity Screening}, volume={35}, ISSN={["1868-8551"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85054462389&partnerID=MN8TOARS}, DOI={10.14573/altex.1805301}, abstractNote={Assessing inter-individual variability in responses to xenobiotics remains a substantial challenge, both in drug development with respect to pharmaceuticals and in public health with respect to environmental chemicals. Although approaches exist to characterize pharmacokinetic variability, there are no methods to routinely address pharmacodynamic variability. In this study, we aimed to demonstrate the feasibility of characterizing inter-individual variability in a human in vitro model. Specifically, we hypothesized that genetic variability across a population of iPSC-derived cardiomyocytes translates into reproducible variability in both baseline phenotypes and drug responses. We measured baseline and drug-related effects in iPSC-derived cardiomyocytes from 27 healthy donors on kinetic Ca2+ flux and high-content live cell imaging. Cells were treated in concentration-response with cardiotoxic drugs: isoproterenol (β-adrenergic receptor agonist/positive inotrope), propranolol (β-adrenergic receptor antagonist/negative inotrope), and cisapride (hERG channel inhibitor/QT prolongation). Cells from four of the 27 donors were further evaluated in terms of baseline and treatment-related gene expression. Reproducibility of phenotypic responses was evaluated across batches and time. iPSC-derived cardiomyocytes exhibited reproducible donor-specific differences in baseline function and drug-induced effects. We demonstrate the feasibility of using a panel of population-based organotypic cells from healthy donors as an animal replacement experimental model. This model can be used to rapidly screen drugs and chemicals for inter-individual variability in cardiotoxicity. This approach demonstrates the feasibility of quantifying inter-individual variability in xenobiotic responses and can be expanded to other cell types for which in vitro populations can be derived from iPSCs.}, number={4}, journal={ALTEX-ALTERNATIVES TO ANIMAL EXPERIMENTATION}, author={Grimm, Fabian A. and Blanchette, Alexander and House, John S. and Ferguson, Kyle and Hsieh, Nan-Hung and Dalaijamts, Chimeddulam and Wright, Alec A. and Anson, Blake and Wright, Fred A. and Chiu, Weihsueh A. and et al.}, year={2018}, pages={441–452} } @article{frayling_beaumont_jones_yaghootkar_tuke_ruth_casanova_west_locke_sharp_et al._2018, title={A common allele in FGF21 associated with sugar intake is associated with body shape, lower total body-fat percentage, and higher blood pressure}, volume={23}, number={2}, journal={Cell reports}, author={Frayling, T. M. and Beaumont, R. N. and Jones, S. E. and Yaghootkar, H. and Tuke, M. A. and Ruth, K. S. and Casanova, F. and West, B. and Locke, J. and Sharp, S. and et al.}, year={2018}, pages={327–336} } @article{polineni_dang_gallins_jones_pace_stonebraker_commander_krenicky_zhou_corvol_et al._2018, title={Airway mucosal host defense is key to genomic regulation of cystic fibrosis lung disease severity}, volume={197}, number={1}, journal={American Journal of Respiratory and Critical Care Medicine}, author={Polineni, D. and Dang, H. and Gallins, P. J. and Jones, L. C. and Pace, R. G. and Stonebraker, J. R. and Commander, L. A. and Krenicky, J. E. and Zhou, Y. H. and Corvol, H. and et al.}, year={2018}, pages={79–93} } @article{li_jima_wright_nobel_2018, title={HT-eQTL: integrative expression quantitative trait loci analysis in a large number of human tissues}, volume={19}, journal={BMC Bioinformatics}, author={Li, G. and Jima, D. and Wright, F. A. and Nobel, A. B.}, year={2018} } @article{grimm_house_wilson_sirenko_iwata_wright_ball_rusyn_2019, title={Multi-dimensional in vitro bioactivity profiling for grouping of glycol ethers}, volume={101}, ISSN={["1096-0295"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85057214979&partnerID=MN8TOARS}, DOI={10.1016/j.yrtph.2018.11.011}, abstractNote={High-content screening data derived from physiologically-relevant in vitro models promise to improve confidence in data-integrative groupings for read-across in human health safety assessments. The biological data-based read-across concept is especially applicable to bioactive chemicals with defined mechanisms of toxicity; however, the challenge of data-derived groupings for chemicals that are associated with little or no bioactivity has not been explored. In this study, we apply a suite of organotypic and population-based in vitro models for comprehensive bioactivity profiling of twenty E-Series and P-Series glycol ethers, solvents with a broad variation in toxicity ranging from relatively non-toxic to reproductive and hematopoetic system toxicants. Both E-Series and P-Series glycol ethers elicited cytotoxicity only at high concentrations (mM range) in induced pluripotent stem cell-derived hepatocytes and cardiomyocytes. Population-variability assessment comprised a study of cytotoxicity in 94 human lymphoblast cell lines from 9 populations and revealed differences in inter-individual variability across glycol ethers, but did not indicate population-specific effects. Data derived from various phenotypic and transcriptomic assays revealed consistent bioactivity trends between both cardiomyocytes and hepatocytes, indicating a more universal, rather than cell-type specific mode-of-action for the tested glycol ethers in vitro. In vitro bioactivity-based similarity assessment using Toxicological Priority Index (ToxPi) showed that glycol ethers group according to their alcohol chain length, longer chains were associated with increased bioactivity. While overall in vitro bioactivity profiles did not correlate with in vivo toxicity data on glycol ethers, in vitro bioactivity of E-series glycol ethers were indicative of and correlated with in vivo irritation scores.}, journal={REGULATORY TOXICOLOGY AND PHARMACOLOGY}, publisher={Elsevier BV}, author={Grimm, Fabian A. and House, John S. and Wilson, Melinda R. and Sirenko, Oksana and Iwata, Yasuhiro and Wright, Fred A. and Ball, Nicholas and Rusyn, Ivan}, year={2019}, month={Feb}, pages={91–102} } @article{house_mendez_maguire_gonzalez-nahm_huang_daniels_murphy_fuemmeler_wright_hoyo_2018, title={Periconceptional maternal mediterranean diet is associated with favorable offspring behaviors and altered CpG methylation of imprinted genes}, volume={6}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85053720031&partnerID=MN8TOARS}, DOI={10.3389/fcell.2018.00107}, abstractNote={Background: Maternal diet during pregnancy has been shown to influence the child neuro-developmental outcomes. Studies examining effects of dietary patterns on offspring behavior are sparse. Objective: Determine if maternal adherence to a Mediterranean diet is associated with child behavioral outcomes assessed early in life, and to evaluate the role of differentially methylated regions (DMRs) regulating genomically imprinted genes in these associations. Methods: Among 325 mother/infant pairs, we used regression models to evaluate the association between tertiles of maternal periconceptional Mediterranean diet adherence (MDA) scores derived from a Food Frequency Questionnaire, and social and emotional scores derived from the Infant Toddler Social and Emotional Assessment (ITSEA) questionnaire in the second year of life. Methylation of nine genomically imprinted genes was measured to determine if MDA was associated with CpG methylation. Results: Child depression was inversely associated with maternal MDA (Bonferroni-corrected p = 0.041). While controlling for false-discovery, compared to offspring of women with the lowest MDA tertile, those with MDA scores in middle and high MDA tertiles had decreased odds for atypical behaviors [OR (95% CI) = 0.40 (0.20, 0.78) for middle and 0.40 (0.17, 0.92) for highest tertile], for maladaptive behaviors [0.37 (0.18, 0.72) for middle tertile and 0.42 (0.18, 0.95) for highest tertile] and for an index of autism spectrum disorder behaviors [0.46 (0.23, 0.90) for middle and 0.35 (0.15, 0.80) for highest tertile]. Offspring of women with the highest MDA tertile were less likely to exhibit depressive [OR = 0.28 (0.12, 0.64)] and anxiety [0.42 (0.18, 0.97)] behaviors and increased odds of social relatedness [2.31 (1.04, 5.19)] behaviors when compared to low MDA mothers. Some associations varied by sex. Perinatal MDA score was associated with methylation differences for imprinted control regions of PEG10/SGCE [females: Beta (95% CI) = 1.66 (0.52, 2.80) – Bonferroni-corrected p = 0.048; males: -0.56 (-1.13, -0.00)], as well as both MEG3 and IGF2 in males [0.97 (0.00, 1.94)] and -0.92 (-1.65, -0.19) respectively. Conclusion: In this ethnically diverse cohort, maternal adherence to a Mediterranean diet in early pregnancy was associated with favorable neurobehavioral outcomes in early childhood and with sex-dependent methylation differences of MEG3, IGF2, and SGCE/PEG10 DMRs.}, number={SEP}, journal={Frontiers in Cell and Developmental Biology}, author={House, John S. and Mendez, M. and Maguire, R.L. and Gonzalez-Nahm, S. and Huang, Z. and Daniels, J. and Murphy, S.K. and Fuemmeler, B.F. and Wright, F.A. and Hoyo, C.}, year={2018}, pages={107} } @article{singleton_lee_dickey_stroud_scholl_wright_aitken_2018, title={Polyphasic characterization of four soil-derived phenanthrene-degrading Acidovorax strains and proposal of Acidovorax carolinensis sp nov.}, volume={41}, ISSN={["0723-2020"]}, DOI={10.1016/j.syapm.2018.06.001}, abstractNote={Four bacterial strains identified as members of the Acidovorax genus were isolated from two geographically distinct but similarly contaminated soils in North Carolina, USA, characterized, and their genomes sequenced. Their 16S rRNA genes were highly similar to those previously recovered during stable-isotope probing (SIP) of one of the soils with the polycyclic aromatic hydrocarbon (PAH) phenanthrene. Heterotrophic growth of all strains occurred with a number of organic acids, as well as phenanthrene, but no other tested PAHs. Optimal growth occurred aerobically under mesophilic temperature, neutral pH, and low salinity conditions. Predominant fatty acids were C16:1ω7c/C16:1ω6c, C16:0, and C18:1ω7c, and were consistent with the genus. Genomic G + C contents ranged from 63.6 to 64.2%. A combination of whole genome comparisons and physiological analyses indicated that these four strains likely represent a single species within the Acidovorax genus. Chromosomal genes for phenanthrene degradation to phthalate were nearly identical to highly conserved regions in phenanthrene-degrading Delftia, Burkholderia, Alcaligenes, and Massilia species in regions flanked by transposable or extrachromosomal elements. The lower degradation pathway for phenanthrene metabolism was inferred by comparisons to described genes and proteins. The novel species Acidovorax carolinensis sp. nov. is proposed, comprising the four strains described in this study with strain NA3T as the type strain (=LMG 30136, =DSM 105008).}, number={5}, journal={SYSTEMATIC AND APPLIED MICROBIOLOGY}, author={Singleton, David R. and Lee, Janice and Dickey, Allison N. and Stroud, Aaron and Scholl, Elizabeth H. and Wright, Fred A. and Aitken, Michael D.}, year={2018}, month={Sep}, pages={460–472} } @article{venkatratnam_house_konganti_mckenney_threadgill_chiu_aylor_wright_rusyn_2018, title={Population-based dose-response analysis of liver transcriptional response to trichloroethylene in mouse}, volume={29}, ISSN={["1432-1777"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85040693289&partnerID=MN8TOARS}, DOI={10.1007/s00335-018-9734-y}, abstractNote={Studies of gene expression are common in toxicology and provide important clues to mechanistic understanding of adverse effects of chemicals. Most prior studies have been performed in a single strain or cell line; however, gene expression is heavily influenced by the genetic background, and these genotype-expression differences may be key drivers of inter-individual variation in response to chemical toxicity. In this study, we hypothesized that the genetically diverse Collaborative Cross mouse population can be used to gain insight and suggest mechanistic hypotheses for the dose- and genetic background-dependent effects of chemical exposure. This hypothesis was tested using a model liver toxicant trichloroethylene (TCE). Liver transcriptional responses to TCE exposure were evaluated 24 h after dosing. Transcriptomic dose–responses were examined for both TCE and its major oxidative metabolite trichloroacetic acid (TCA). As expected, peroxisome- and fatty acid metabolism-related pathways were among the most dose–responsive enriched pathways in all strains. However, nearly half of the TCE-induced liver transcriptional perturbation was strain-dependent, with abundant evidence of strain/dose interaction, including in the peroxisomal signaling-associated pathways. These effects were highly concordant between the administered TCE dose and liver levels of TCA. Dose–response analysis of gene expression at the pathway level yielded points of departure similar to those derived from the traditional toxicology studies for both non-cancer and cancer effects. Mapping of expression–genotype–dose relationships revealed some significant associations; however, the effects of TCE on gene expression in liver appear to be highly polygenic traits that are challenging to positionally map. This study highlights the usefulness of mouse population-based studies in assessing inter-individual variation in toxicological responses, but cautions that genetic mapping may be challenging because of the complexity in gene exposure–dose relationships.}, number={1-2}, journal={MAMMALIAN GENOME}, author={Venkatratnam, Abhishek and House, John S. and Konganti, Kranti and McKenney, Connor and Threadgill, David W. and Chiu, Weihsueh A. and Aylor, David L. and Wright, Fred A. and Rusyn, Ivan}, year={2018}, month={Feb}, pages={168–181} } @article{marvel_to_grimm_wright_rusyn_reif_2018, title={ToxPi Graphical User Interface 2.0: Dynamic exploration, visualization, and sharing of integrated data models}, volume={19}, journal={BMC Bioinformatics}, author={Marvel, S. W. and To, K. and Grimm, F. A. and Wright, F. A. and Rusyn, I. and Reif, D. M.}, year={2018} } @article{house_grimm_jima_zhou_rusyn_wright_2017, title={A Pipeline for high-throughput concentration response modeling of gene expression for toxicogenomics}, volume={8}, journal={Frontiers in Genetics}, author={House, J. S. and Grimm, F. A. and Jima, D. D. and Zhou, Y. H. and Rusyn, I. and Wright, F. A.}, year={2017} } @article{rudra_zhou_wright_2017, title={A procedure to detect general association based on concentration of ranks}, volume={6}, number={1}, journal={Stat}, author={Rudra, P. and Zhou, Y. H. and Wright, F. A.}, year={2017}, pages={88–101} } @article{chiu_wright_rusyn_2017, title={A tiered, Bayesian approach to estimating population variability for regulatory decision-making}, volume={34}, number={3}, journal={Altex-alternatives to Animal Experimentation}, author={Chiu, W. A. and Wright, F. A. and Rusyn, I.}, year={2017}, pages={377–388} } @article{henriquez_house_miller_snow_fisher_ren_schladweiler_ledbetter_wright_kodavanti_2017, title={Adrenal-derived stress hormones modulate ozone-induced lung injury and inflammation}, volume={329}, ISSN={["1096-0333"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85020944922&partnerID=MN8TOARS}, DOI={10.1016/j.taap.2017.06.009}, abstractNote={Ozone-induced systemic effects are modulated through activation of the neuro-hormonal stress response pathway. Adrenal demedullation (DEMED) or bilateral total adrenalectomy (ADREX) inhibits systemic and pulmonary effects of acute ozone exposure. To understand the influence of adrenal-derived stress hormones in mediating ozone-induced lung injury/inflammation, we assessed global gene expression (mRNA sequencing) and selected proteins in lung tissues from male Wistar-Kyoto rats that underwent DEMED, ADREX, or sham surgery (SHAM) prior to their exposure to air or ozone (1 ppm), 4 h/day for 1 or 2 days. Ozone exposure significantly changed the expression of over 2300 genes in lungs of SHAM rats, and these changes were markedly reduced in DEMED and ADREX rats. SHAM surgery but not DEMED or ADREX resulted in activation of multiple ozone-responsive pathways, including glucocorticoid, acute phase response, NRF2, and PI3K-AKT. Predicted targets from sequencing data showed a similarity between transcriptional changes induced by ozone and adrenergic and steroidal modulation of effects in SHAM but not ADREX rats. Ozone-induced increases in lung Il6 in SHAM rats coincided with neutrophilic inflammation, but were diminished in DEMED and ADREX rats. Although ozone exposure in SHAM rats did not significantly alter mRNA expression of Ifnγ and Il-4, the IL-4 protein and ratio of IL-4 to IFNγ (IL-4/IFNγ) proteins increased suggesting a tendency for a Th2 response. This did not occur in ADREX and DEMED rats. We demonstrate that ozone-induced lung injury and neutrophilic inflammation require the presence of circulating epinephrine and corticosterone, which transcriptionally regulates signaling mechanisms involved in this response.}, journal={TOXICOLOGY AND APPLIED PHARMACOLOGY}, author={Henriquez, Andres and House, John and Miller, Desinia B. and Snow, Samantha J. and Fisher, Anna and Ren, Hongzu and Schladweiler, Mette C. and Ledbetter, Allen D. and Wright, Fred and Kodavanti, Urmila P.}, year={2017}, month={Aug}, pages={249–258} } @article{li_shabalin_rusyn_wright_nobel_2018, title={An empirical Bayes approach for multiple tissue eQTL analysis}, volume={19}, DOI={10.1093/biostatistics/kxx048}, abstractNote={SUMMARY Expression quantitative trait locus (eQTL) analyses identify genetic markers associated with the expression of a gene. Most up‐to‐date eQTL studies consider the connection between genetic variation and expression in a single tissue. Multi‐tissue analyses have the potential to improve findings in a single tissue, and elucidate the genotypic basis of differences between tissues. In this article, we develop a hierarchical Bayesian model (MT‐eQTL) for multi‐tissue eQTL analysis. MT‐eQTL explicitly captures patterns of variation in the presence or absence of eQTL, as well as the heterogeneity of effect sizes across tissues. We devise an efficient Expectation‐Maximization (EM) algorithm for model fitting. Inferences concerning eQTL detection and the configuration of eQTL across tissues are derived from the adaptive thresholding of local false discovery rates, and maximum a posteriori estimation, respectively. We also provide theoretical justification of the adaptive procedure. We investigate the MT‐eQTL model through an extensive analysis of a 9‐tissue data set from the GTEx initiative.}, number={3}, journal={Biostatistics (Oxford, England)}, author={Li, G. and Shabalin, A. A. and Rusyn, I. and Wright, F. A. and Nobel, A. B.}, year={2018}, pages={391–406} } @article{lee_sun_wright_zou_2017, title={An improved and explicit surrogate variable analysis procedure by coefficient adjustment}, volume={104}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asx018}, abstractNote={&NA; Unobserved environmental, demographic and technical factors canadversely affect the estimation and testing of the effects ofprimary variables. Surrogate variable analysis, proposed to tacklethis problem, has been widely used in genomic studies. To estimatehidden factors that are correlated with the primary variables,surrogate variable analysis performs principal component analysiseither on a subset of features or on all features, but weightingeach differently. However, existing approaches may fail to identifyhidden factors that are strongly correlated with the primaryvariables, and the extra step of feature selection and weightcalculation makes the theoretical investigation of surrogatevariable analysis challenging. In this paper, we propose an improvedsurrogate variable analysis, using all measured features, that has anatural connection with restricted least squares, which allows us tostudy its theoretical properties. Simulation studies and real‐dataanalysis show that the method is competitive with state‐of‐the‐artmethods.}, number={2}, journal={BIOMETRIKA}, author={Lee, Seunggeun and Sun, Wei and Wright, Fred A. and Zou, Fei}, year={2017}, month={Jun}, pages={303–316} } @article{zhou_marron_wright_2018, title={Computation of ancestry scores with mixed families and unrelated individuals}, volume={74}, ISSN={["1541-0420"]}, url={https://doi.org/10.1111/biom.12708}, DOI={10.1111/biom.12708}, abstractNote={Summary The issue of robustness to family relationships in computing genotype ancestry scores such as eigenvector projections has received increased attention in genetic association, and is particularly challenging when sets of both unrelated individuals and closely related family members are included. The current standard is to compute loadings (left singular vectors) using unrelated individuals and to compute projected scores for remaining family members. However, projected ancestry scores from this approach suffer from shrinkage toward zero. We consider two main novel strategies: (i) matrix substitution based on decomposition of a target family-orthogonalized covariance matrix, and (ii) using family-averaged data to obtain loadings. We illustrate the performance via simulations, including resampling from 1000 Genomes Project data, and analysis of a cystic fibrosis dataset. The matrix substitution approach has similar performance to the current standard, but is simple and uses only a genotype covariance matrix, while the family-average method shows superior performance. Our approaches are accompanied by novel ancillary approaches that provide considerable insight, including individual-specific eigenvalue scree plots.}, number={1}, journal={BIOMETRICS}, publisher={Wiley}, author={Zhou, Yi-Hui and Marron, James S. and Wright, Fred A.}, year={2018}, month={Mar}, pages={155–164} } @article{jansen_hottenga_nivard_abdellaoui_laport_geus_wright_penninx_boomsma_2017, title={Conditional eQTL analysis reveals allelic heterogeneity of gene expression}, volume={26}, ISSN={["1460-2083"]}, DOI={10.1093/hmg/ddx043}, abstractNote={In recent years, multiple eQTL (expression quantitative trait loci) catalogs have become available that can help understand the functionality of complex trait-related single nucleotide polymorphisms (SNPs). In eQTL catalogs, gene expression is often strongly associated with multiple SNPs, which may reflect either one or multiple independent associations. Conditional eQTL analysis allows a distinction between dependent and independent eQTLs. We performed conditional eQTL analysis in 4,896 peripheral blood microarray gene expression samples. Our analysis showed that 35% of genes with a cis eQTL have at least two independent cis eQTLs; for several genes up to 13 independent cis eQTLs were identified. Also, 12% (671) of the independent cis eQTLs identified in conditional analyses were not significant in unconditional analyses. The number of GWAS catalog SNPs identified as eQTL in the conditional analyses increases with 24% as compared to unconditional analyses. We provide an online conditional cis eQTL mapping catalog for whole blood (https://eqtl.onderzoek.io/), which can be used to lookup eQTLs more accurately than in standard unconditional whole blood eQTL databases.}, number={8}, journal={HUMAN MOLECULAR GENETICS}, author={Jansen, Rick and Hottenga, Jouke-Jan and Nivard, Michel G. and Abdellaoui, Abdel and Laport, Bram and Geus, Eco J. and Wright, Fred A. and Penninx, Brenda W. J. H. and Boomsma, Dorret I.}, year={2017}, month={Apr}, pages={1444–1451} } @article{tan_li_shanmugam_piskol_kohler_young_liu_zhang_ramaswami_ariyoshi_et al._2017, title={Dynamic landscape and regulation of RNA editing in mammals}, volume={550}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/NATURE24041}, DOI={10.1038/NATURE24041}, abstractNote={Using the GTEx data and others, a comprehensive analysis of adenosine-to-inosine RNA editing in mammals is presented; targets of the various ADAR enzymes are identified, as are several potential regulators of editing, such as AIMP2. The GTEx (Genotype-Tissue Expression) Consortium has established a reference catalogue and associated tissue biobank for gene-expression levels across individuals for diverse tissues of the human body, with a broad sampling of normal, non-diseased human tissues from postmortem donors. The consortium now presents the deepest survey of gene expression across multiple tissues and individuals to date, encompassing 7,051 samples from 449 donors across 44 human tissues. Barbara Engelhardt and colleagues characterize the relationship between genetic variation and gene expression, and find that most genes are regulated by genetic variation near to the affected gene. In accompanying GTEx studies, Alexis Battle, Stephen Montgomery and colleagues examine the effect of rare genetic variation on gene expression across human tissues, Daniel MacArthur and colleagues systematically survey the landscape of X chromosome inactivation in human tissues, and Jin Billy Li and colleagues provide a comprehensive cross-species analysis of adenosine-to-inosine RNA editing in mammals. In an accompanying News & Views, Michelle Ward and Yoav Gilad put the latest results in context and discuss how these findings are helping to crack the regulatory code of the human genome. Adenosine-to-inosine (A-to-I) RNA editing is a conserved post-transcriptional mechanism mediated by ADAR enzymes that diversifies the transcriptome by altering selected nucleotides in RNA molecules1. Although many editing sites have recently been discovered2,3,4,5,6,7, the extent to which most sites are edited and how the editing is regulated in different biological contexts are not fully understood8,9,10. Here we report dynamic spatiotemporal patterns and new regulators of RNA editing, discovered through an extensive profiling of A-to-I RNA editing in 8,551 human samples (representing 53 body sites from 552 individuals) from the Genotype-Tissue Expression (GTEx) project and in hundreds of other primate and mouse samples. We show that editing levels in non-repetitive coding regions vary more between tissues than editing levels in repetitive regions. Globally, ADAR1 is the primary editor of repetitive sites and ADAR2 is the primary editor of non-repetitive coding sites, whereas the catalytically inactive ADAR3 predominantly acts as an inhibitor of editing. Cross-species analysis of RNA editing in several tissues revealed that species, rather than tissue type, is the primary determinant of editing levels, suggesting stronger cis-directed regulation of RNA editing for most sites, although the small set of conserved coding sites is under stronger trans-regulation. In addition, we curated an extensive set of ADAR1 and ADAR2 targets and showed that many editing sites display distinct tissue-specific regulation by the ADAR enzymes in vivo. Further analysis of the GTEx data revealed several potential regulators of editing, such as AIMP2, which reduces editing in muscles by enhancing the degradation of the ADAR proteins. Collectively, our work provides insights into the complex cis- and trans-regulation of A-to-I editing.}, number={7675}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Tan, Meng How and Li, Qin and Shanmugam, Raghuvaran and Piskol, Robert and Kohler, Jennefer and Young, Amy N. and Liu, Kaiwen Ivy and Zhang, Rui and Ramaswami, Gokul and Ariyoshi, Kentaro and et al.}, year={2017}, month={Oct}, pages={249–254} } @article{venkatratnam_furuya_kosyk_gold_bodnar_konganti_threadgill_gillespie_aylor_wright_et al._2017, title={Editor's Highlight: Collaborative Cross Mouse Population Enables Refinements to Characterization of the Variability in Toxicokinetics of Trichloroethylene and Provides Genetic Evidence for the Role of PPAR Pathway in Its Oxidative Metabolism}, volume={158}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfx065}, abstractNote={Background Trichloroethylene (TCE) is a known carcinogen in humans and rodents. Previous studies of inter-strain variability in TCE metabolism were conducted in multi-strain panels of classical inbred mice with limited genetic diversity to identify gene-environment interactions associated with chemical exposure. Objectives To evaluate inter-strain variability in TCE metabolism and identify genetic determinants that are associated with TCE metabolism and effects using Collaborative Cross (CC), a large panel of genetically diverse strains of mice. Methods We administered a single oral dose of 0, 24, 80, 240, or 800 mg/kg of TCE to mice from 50 CC strains, and collected organs 24 h post-dosing. Levels of trichloroacetic acid (TCA), a major oxidative metabolite of TCE were measured in multiple tissues. Protein expression and activity levels of TCE-metabolizing enzymes were evaluated in the liver. Liver transcript levels of known genes perturbed by TCE exposure were also quantified. Genetic association mapping was performed on the acquired phenotypes. Results TCA levels varied in a dose- and strain-dependent manner in liver, kidney, and serum. The variability in TCA levels among strains did not correlate with expression or activity of a number of enzymes known to be involved in TCE oxidation. Peroxisome proliferator-activated receptor alpha (PPARα)-responsive genes were found to be associated with strain-specific differences in TCE metabolism. Conclusions This study shows that CC mouse population is a valuable tool to quantitatively evaluate inter-individual variability in chemical metabolism and to identify genes and pathways that may underpin population differences.}, number={1}, journal={TOXICOLOGICAL SCIENCES}, author={Venkatratnam, Abhishek and Furuya, Shinji and Kosyk, Oksana and Gold, Avram and Bodnar, Wanda and Konganti, Kranti and Threadgill, David W. and Gillespie, Kevin M. and Aylor, David L. and Wright, Fred A. and et al.}, year={2017}, month={Jul}, pages={48–62} } @article{palowitch_shabalin_zhou_nobel_wright_2018, title={Estimation of cis-eQTL effect sizes using a log of linear model}, volume={74}, ISSN={["1541-0420"]}, url={https://doi.org/10.1111/biom.12810}, DOI={10.1111/biom.12810}, abstractNote={Summary The study of expression Quantitative Trait Loci (eQTL) is an important problem in genomics and biomedicine. While detection (testing) of eQTL associations has been widely studied, less work has been devoted to the estimation of eQTL effect size. To reduce false positives, detection methods frequently rely on linear modeling of rank-based normalized or log-transformed gene expression data. Unfortunately, these approaches do not correspond to the simplest model of eQTL action, and thus yield estimates of eQTL association that can be uninterpretable and inaccurate. In this article, we propose a new, log-of-linear model for eQTL action, termed ACME, that captures allelic contributions to cis-acting eQTLs in an additive fashion, yielding effect size estimates that correspond to a biologically coherent model of cis-eQTLs. We describe a non-linear least-squares algorithm to fit the model by maximum likelihood, and obtain corresponding p-values. We perform careful investigation of the model using a combination of simulated data and data from the Genotype Tissue Expression (GTEx) project. Our results reveal little evidence for dominance effects, a parsimonious result that accords with a simple biological model for allele-specific expression and supports use of the ACME model. We show that Type-I error is well-controlled under our approach in a realistic setting, so that rank-based normalizations are unnecessary. Furthermore, we show that such normalizations can be detrimental to power and estimation accuracy under the proposed model. We then show, through effect size analyses of whole-genome cis-eQTLs in the GTEx data, that using standard normalizations instead of ACME noticeably affects the ranking and sign of estimates.}, number={2}, journal={BIOMETRICS}, publisher={Wiley}, author={Palowitch, John and Shabalin, Andrey and Zhou, Yi-Hui and Nobel, Andrew B. and Wright, Fred A.}, year={2018}, month={Jun}, pages={616–625} } @article{liu_finucane_gusev_bhatia_gazal_o'connor_bulik-sullivan_wright_sullivan_neale_et al._2017, title={Functional Architectures of Local and Distal Regulation of Gene Expression in Multiple Human Tissues}, volume={100}, ISSN={["1537-6605"]}, DOI={10.1016/j.ajhg.2017.03.002}, abstractNote={Genetic variants that modulate gene expression levels play an important role in the etiology of human diseases and complex traits. Although large-scale eQTL mapping studies routinely identify many local eQTLs, the molecular mechanisms by which genetic variants regulate expression remain unclear, particularly for distal eQTLs, which these studies are not well powered to detect. Here, we leveraged all variants (not just those that pass stringent significance thresholds) to analyze the functional architecture of local and distal regulation of gene expression in 15 human tissues by employing an extension of stratified LD-score regression that produces robust results in simulations. The top enriched functional categories in local regulation of peripheral-blood gene expression included coding regions (11.41×), conserved regions (4.67×), and four histone marks (p < 5 × 10-5 for all enrichments); local enrichments were similar across the 15 tissues. We also observed substantial enrichments for distal regulation of peripheral-blood gene expression: coding regions (4.47×), conserved regions (4.51×), and two histone marks (p < 3 × 10-7 for all enrichments). Analyses of the genetic correlation of gene expression across tissues confirmed that local regulation of gene expression is largely shared across tissues but that distal regulation is highly tissue specific. Our results elucidate the functional components of the genetic architecture of local and distal regulation of gene expression.}, number={4}, journal={AMERICAN JOURNAL OF HUMAN GENETICS}, author={Liu, Xuanyao and Finucane, Hilary K. and Gusev, Alexander and Bhatia, Gaurav and Gazal, Steven and O'Connor, Luke and Bulik-Sullivan, Brendan and Wright, Fred A. and Sullivan, Patrick F. and Neale, Benjamin M. and et al.}, year={2017}, month={Apr}, pages={605–616} } @article{aguet_brown_castel_davis_he_jo_mohammadi_park_parsana_segrè_et al._2017, title={Genetic effects on gene expression across human tissues}, volume={550}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/NATURE24277}, DOI={10.1038/NATURE24277}, abstractNote={Abstract Characterization of the molecular function of the human genome and its variation across individuals is essential for identifying the cellular mechanisms that underlie human genetic traits and diseases. The Genotype-Tissue Expression (GTEx) project aims to characterize variation in gene expression levels across individuals and diverse tissues of the human body, many of which are not easily accessible. Here we describe genetic effects on gene expression levels across 44 human tissues. We find that local genetic variation affects gene expression levels for the majority of genes, and we further identify inter-chromosomal genetic effects for 93 genes and 112 loci. On the basis of the identified genetic effects, we characterize patterns of tissue specificity, compare local and distal effects, and evaluate the functional properties of the genetic effects. We also demonstrate that multi-tissue, multi-individual data can be used to identify genes and pathways affected by human disease-associated variation, enabling a mechanistic interpretation of gene regulation and the genetic basis of disease.}, number={7675}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Aguet, François and Brown, Andrew A. and Castel, Stephane E. and Davis, Joe R. and He, Yuan and Jo, Brian and Mohammadi, Pejman and Park, YoSon and Parsana, Princy and Segrè, Ayellet V. and et al.}, editor={Battle, Alexis and Brown, Christopher D. and Engelhardt, Barbara E. and Montgomery, Stephen B.Editors}, year={2017}, month={Oct}, pages={204–213} } @article{aguet_brown_castel_davis_he_jo_mohammadi_park_parsana_segre_et al._2017, title={Genetic effects on gene expression across human tissues}, volume={550}, number={7675}, journal={Nature}, author={Aguet, F. and Brown, A. A. and Castel, S. E. and Davis, J. R. and He, Y. and Jo, B. and Mohammadi, P. and Park, Y. and Parsana, P. and Segre, A. V. and et al.}, year={2017}, pages={204-} } @article{tukiainen_villani_yen_rivas_marshall_satija_aguirre_gauthier_fleharty_kirby_et al._2017, title={Landscape of X chromosome inactivation across human tissues}, volume={550}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/NATURE24265}, DOI={10.1038/NATURE24265}, abstractNote={AbstractX chromosome inactivation (XCI) silences transcription from one of the two X chromosomes in female mammalian cells to balance expression dosage between XX females and XY males. XCI is, however, incomplete in humans: up to one-third of X-chromosomal genes are expressed from both the active and inactive X chromosomes (Xa and Xi, respectively) in female cells, with the degree of ‘escape’ from inactivation varying between genes and individuals1,2. The extent to which XCI is shared between cells and tissues remains poorly characterized3,4, as does the degree to which incomplete XCI manifests as detectable sex differences in gene expression5and phenotypic traits6. Here we describe a systematic survey of XCI, integrating over 5,500 transcriptomes from 449 individuals spanning 29 tissues from GTEx (v6p release) and 940 single-cell transcriptomes, combined with genomic sequence data. We show that XCI at 683 X-chromosomal genes is generally uniform across human tissues, but identify examples of heterogeneity between tissues, individuals and cells. We show that incomplete XCI affects at least 23% of X-chromosomal genes, identify seven genes that escape XCI with support from multiple lines of evidence and demonstrate that escape from XCI results in sex biases in gene expression, establishing incomplete XCI as a mechanism that is likely to introduce phenotypic diversity6,7. Overall, this updated catalogue of XCI across human tissues helps to increase our understanding of the extent and impact of the incompleteness in the maintenance of XCI.}, number={7675}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Tukiainen, Taru and Villani, Alexandra-Chloé and Yen, Angela and Rivas, Manuel A. and Marshall, Jamie L. and Satija, Rahul and Aguirre, Matt and Gauthier, Laura and Fleharty, Mark and Kirby, Andrew and et al.}, year={2017}, month={Oct}, pages={244–248} } @article{li_kim_tsang_davis_damani_chiang_hess_zappala_strober_scott_et al._2017, title={The impact of rare variation on gene expression across tissues}, volume={550}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/NATURE24267}, DOI={10.1038/NATURE24267}, abstractNote={Abstract Rare genetic variants are abundant in humans and are expected to contribute to individual disease risk1,2,3,4. While genetic association studies have successfully identified common genetic variants associated with susceptibility, these studies are not practical for identifying rare variants1,5. Efforts to distinguish pathogenic variants from benign rare variants have leveraged the genetic code to identify deleterious protein-coding alleles1,6,7, but no analogous code exists for non-coding variants. Therefore, ascertaining which rare variants have phenotypic effects remains a major challenge. Rare non-coding variants have been associated with extreme gene expression in studies using single tissues8,9,10,11, but their effects across tissues are unknown. Here we identify gene expression outliers, or individuals showing extreme expression levels for a particular gene, across 44 human tissues by using combined analyses of whole genomes and multi-tissue RNA-sequencing data from the Genotype-Tissue Expression (GTEx) project v6p release12. We find that 58% of underexpression and 28% of overexpression outliers have nearby conserved rare variants compared to 8% of non-outliers. Additionally, we developed RIVER (RNA-informed variant effect on regulation), a Bayesian statistical model that incorporates expression data to predict a regulatory effect for rare variants with higher accuracy than models using genomic annotations alone. Overall, we demonstrate that rare variants contribute to large gene expression changes across tissues and provide an integrative method for interpretation of rare variants in individual genomes.}, number={7675}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Li, Xin and Kim, Yungil and Tsang, Emily K. and Davis, Joe R. and Damani, Farhan N. and Chiang, Colby and Hess, Gaelen T. and Zappala, Zachary and Strober, Benjamin J. and Scott, Alexandra J. and et al.}, year={2017}, month={Oct}, pages={239–243} } @article{grimm_iwata_sirenko_chappell_wright_reif_braisted_gerhold_yeakley_shepard_et al._2016, title={A chemical-biological similarity-based grouping of complex substances as a prototype approach for evaluating chemical alternatives}, volume={18}, ISSN={["1463-9270"]}, url={http://europepmc.org/abstract/med/28035192}, DOI={10.1039/c6gc01147k}, abstractNote={An experimental and computational approach to categorizing UVCBs according to chemical and biological similarities.}, number={16}, journal={GREEN CHEMISTRY}, author={Grimm, Fabian A. and Iwata, Yasuhiro and Sirenko, Oksana and Chappell, Grace A. and Wright, Fred A. and Reif, David M. and Braisted, John and Gerhold, David L. and Yeakley, Joanne M. and Shepard, Peter and et al.}, year={2016}, pages={4407–4419} } @article{yang_huang_petralia_long_zhang_argmann_zhao_mobbs_schadt_zhu_et al._2016, title={Erratum: Corrigendum: Synchronized age-related gene expression changes across multiple tissues in human and the link to complex diseases}, volume={6}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/SREP19384}, DOI={10.1038/SREP19384}, abstractNote={Aging is one of the most important biological processes and is a known risk factor for many age-related diseases in human. Studying age-related transcriptomic changes in tissues across the whole body can provide valuable information for a holistic understanding of this fundamental process. In this work, we catalogue age-related gene expression changes in nine tissues from nearly two hundred individuals collected by the Genotype-Tissue Expression (GTEx) project. In general, we find the aging gene expression signatures are very tissue specific. However, enrichment for some well-known aging components such as mitochondria biology is observed in many tissues. Different levels of cross-tissue synchronization of age-related gene expression changes are observed, and some essential tissues (e.g., heart and lung) show much stronger “co-aging” than other tissues based on a principal component analysis. The aging gene signatures and complex disease genes show a complex overlapping pattern and only in some cases, we see that they are significantly overlapped in the tissues affected by the corresponding diseases. In summary, our analyses provide novel insights to the co-regulation of age-related gene expression in multiple tissues; it also presents a tissue-specific view of the link between aging and age-related diseases.}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Yang, Jialiang and Huang, Tao and Petralia, Francesca and Long, Quan and Zhang, Bin and Argmann, Carmen and Zhao, Yong and Mobbs, Charles V. and Schadt, Eric E. and Zhu, Jun and et al.}, year={2016}, month={Jan}, pages={19384} } @article{jansen_penninx_madar_xia_milaneschi_hottenga_hammerschlag_beekman_wee_smit_et al._2016, title={Gene expression in major depressive disorder}, volume={21}, number={3}, journal={Molecular Psychiatry}, author={Jansen, R. and Penninx, B. W. J. H. and Madar, V. and Xia, K. and Milaneschi, Y. and Hottenga, J. J. and Hammerschlag, A. R. and Beekman, A. and Wee, N. and Smit, J. H. and et al.}, year={2016}, pages={339–347} } @article{tian_patel_ridpath_chen_zhou_neo_clement_takata_takeda_sale_et al._2016, title={Homologous recombination and translesion DNA synthesis play critical roles on tolerating DNA damage caused by trace levels of hexavalent chromium}, volume={11}, number={12}, journal={PLoS One}, author={Tian, X. and Patel, K. and Ridpath, J. R. and Chen, Y. J. and Zhou, Y. H. and Neo, D. and Clement, J. and Takata, M. and Takeda, S. and Sale, J. and et al.}, year={2016} } @article{gusev_ko_shi_bhatia_chung_penninx_jansen_de geus_boomsma_wright_et al._2016, title={Integrative approaches for large-scale transcriptome-wide association studies}, volume={48}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/NG.3506}, DOI={10.1038/NG.3506}, abstractNote={Alexander Gusev, Bogdan Pasaniuc and colleagues present a strategy that integrates gene expression measurements with summary statistics from large-scale genome-wide association studies to identify genes whose cis-regulated expression is associated with complex traits. They identify 69 new genes significantly associated with obesity-related traits and illustrate how this approach can provide insights into the genetic basis of complex traits. Many genetic variants influence complex traits by modulating gene expression, thus altering the abundance of one or multiple proteins. Here we introduce a powerful strategy that integrates gene expression measurements with summary association statistics from large-scale genome-wide association studies (GWAS) to identify genes whose cis-regulated expression is associated with complex traits. We leverage expression imputation from genetic data to perform a transcriptome-wide association study (TWAS) to identify significant expression-trait associations. We applied our approaches to expression data from blood and adipose tissue measured in ∼3,000 individuals overall. We imputed gene expression into GWAS data from over 900,000 phenotype measurements to identify 69 new genes significantly associated with obesity-related traits (BMI, lipids and height). Many of these genes are associated with relevant phenotypes in the Hybrid Mouse Diversity Panel. Our results showcase the power of integrating genotype, gene expression and phenotype to gain insights into the genetic basis of complex traits.}, number={3}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Gusev, Alexander and Ko, Arthur and Shi, Huwenbo and Bhatia, Gaurav and Chung, Wonil and Penninx, Brenda W J H and Jansen, Rick and de Geus, Eco J C and Boomsma, Dorret I and Wright, Fred A and et al.}, year={2016}, month={Feb}, pages={245–252} } @article{gusev_ko_shi_bhatia_chung_penninx_jansen_geus_boomsma_wright_et al._2016, title={Integrative approaches for large-scale transcriptome-wide association studies}, volume={48}, number={3}, journal={Nature Genetics}, author={Gusev, A. and Ko, A. and Shi, H. and Bhatia, G. and Chung, W. and Penninx, B. W. J. H. and Jansen, R. and Geus, E. J. C. and Boomsma, D. I. and Wright, F. A. and et al.}, year={2016}, pages={245–252} } @inproceedings{theisen_williams_2016, title={Poster: risk-based attack surface approximation}, booktitle={Symposium and Bootcamp on the Science of Security}, author={Theisen, C. and Williams, L.}, year={2016}, pages={121–123} } @article{zhou_wright_2016, title={The projack: a resampling approach to correct for ranking bias in high-throughput studies}, volume={17}, number={1}, journal={Biostatistics (Oxford, England)}, author={Zhou, Y. H. and Wright, F. A.}, year={2016}, pages={54–64} } @article{walter_wright_nobel_2015, title={Consistent testing for recurrent genomic aberrations}, volume={102}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asv046}, abstractNote={We consider the detection and identification of recurrent departures from stationary behaviour in genomic or similarly arranged data containing measurements at an ordered set of variables. Our primary focus is on departures that occur only at a single variable, or within a small window of contiguous variables, but involve more than one sample. This encompasses the identification of aberrant markers in genome-wide measurements of DNA copy number and DNA methylation, as well as meta-analyses of genome-wide association studies. We propose and analyse a cyclic shift-based procedure for testing recurrent departures from stationarity. Our analysis establishes the consistency of cyclic shift [Formula: see text]-values for datasets with a fixed set of samples as the number of observed variables tends to infinity, under the assumption that each sample is an independent realization of a stationary Markov chain. Our results apply to any test statistic satisfying a simple invariance condition.}, number={4}, journal={BIOMETRIKA}, author={Walter, V. and Wright, F. A. and Nobel, A. B.}, year={2015}, month={Dec}, pages={783–796} } @article{jansen_penninx_madar_xia_milaneschi_hottenga_hammerschlag_beekman_van der wee_smit_et al._2015, title={Erratum: Gene expression in major depressive disorder}, volume={21}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/MP.2015.94}, DOI={10.1038/MP.2015.94}, abstractNote={Correction to: Molecular Psychiatry advance online publication, 26 May 2015; doi:10.1038/mp.2015.57 Following publication of the above article, the authors noticed that the Supplementary Figure legends were not published with the paper. The legends accompany this erratum. In addition, the Supplementary Tables were originally presented as PDF files.}, number={3}, journal={Molecular Psychiatry}, publisher={Springer Nature}, author={Jansen, R and Penninx, B W J H and Madar, V and Xia, K and Milaneschi, Y and Hottenga, J J and Hammerschlag, A R and Beekman, A and van der Wee, N and Smit, J H and et al.}, year={2015}, month={Jun}, pages={444–444} } @article{eduati_mangravite_wang_tang_bare_huang_norman_kellen_menden_yang_et al._2015, title={Erratum: Prediction of human population responses to toxic compounds by a collaborative competition}, volume={33}, ISSN={1087-0156 1546-1696}, url={http://dx.doi.org/10.1038/NBT1015-1109A}, DOI={10.1038/NBT1015-1109A}, abstractNote={Nat. Biotechnol. 33, 933–940 (2015); published online 10 August 2015; corrected after print 1 October 2015 In the version of this article initially published, in the HTML only, all authors names were incorrectly included in the main author list, and several authors names were repeated. The authors have now added 12 additional authors to the list of “Other participants in the NIEHS-NCATS-UNC DREAM Toxicogenetics Collaboration,” including Alok Jaiswal, Antti Poso, Himanshu Chheda, Ismeet Kaur, Jing Tang, John-Patrick Mpindi, Krister Wennerberg, Natalio Krasnogor, Samuel Kaski, Tero Aittokallio, Petteri Hintsanen and Suleiman Ali Khan.}, number={10}, journal={Nature Biotechnology}, publisher={Springer Nature}, author={Eduati, Federica and Mangravite, Lara M and Wang, Tao and Tang, Hao and Bare, J Christopher and Huang, Ruili and Norman, Thea and Kellen, Mike and Menden, Michael P and Yang, Jichen and et al.}, year={2015}, month={Oct}, pages={1109–1109} } @article{o’neal_gallins_pace_dang_wolf_jones_guo_zhou_madar_huang_et al._2015, title={Gene Expression in Transformed Lymphocytes Reveals Variation in Endomembrane and HLA Pathways Modifying Cystic Fibrosis Pulmonary Phenotypes}, volume={96}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/J.AJHG.2014.12.022}, DOI={10.1016/J.AJHG.2014.12.022}, abstractNote={Variation in cystic fibrosis (CF) phenotypes, including lung disease severity, age of onset of persistent Pseudomonas aeruginosa (P. aeruginosa) lung infection, and presence of meconium ileus (MI), has been partially explained by genome-wide association studies (GWASs). It is not expected that GWASs alone are sufficiently powered to uncover all heritable traits associated with CF phenotypic diversity. Therefore, we utilized gene expression association from lymphoblastoid cells lines from 754 p.Phe508del CF-affected homozygous individuals to identify genes and pathways. LPAR6, a G protein coupled receptor, associated with lung disease severity (false discovery rate q value = 0.0006). Additional pathway analyses, utilizing a stringent permutation-based approach, identified unique signals for all three phenotypes. Pathways associated with lung disease severity were annotated in three broad categories: (1) endomembrane function, containing p.Phe508del processing genes, providing evidence of the importance of p.Phe508del processing to explain lung phenotype variation; (2) HLA class I genes, extending previous GWAS findings in the HLA region; and (3) endoplasmic reticulum stress response genes. Expression pathways associated with lung disease were concordant for some endosome and HLA pathways, with pathways identified using GWAS associations from 1,978 CF-affected individuals. Pathways associated with age of onset of persistent P. aeruginosa infection were enriched for HLA class II genes, and those associated with MI were related to oxidative phosphorylation. Formal testing demonstrated that genes showing differential expression associated with lung disease severity were enriched for heritable genetic variation and expression quantitative traits. Gene expression provided a powerful tool to identify unrecognized heritable variation, complementing ongoing GWASs in this rare disease. Variation in cystic fibrosis (CF) phenotypes, including lung disease severity, age of onset of persistent Pseudomonas aeruginosa (P. aeruginosa) lung infection, and presence of meconium ileus (MI), has been partially explained by genome-wide association studies (GWASs). It is not expected that GWASs alone are sufficiently powered to uncover all heritable traits associated with CF phenotypic diversity. Therefore, we utilized gene expression association from lymphoblastoid cells lines from 754 p.Phe508del CF-affected homozygous individuals to identify genes and pathways. LPAR6, a G protein coupled receptor, associated with lung disease severity (false discovery rate q value = 0.0006). Additional pathway analyses, utilizing a stringent permutation-based approach, identified unique signals for all three phenotypes. Pathways associated with lung disease severity were annotated in three broad categories: (1) endomembrane function, containing p.Phe508del processing genes, providing evidence of the importance of p.Phe508del processing to explain lung phenotype variation; (2) HLA class I genes, extending previous GWAS findings in the HLA region; and (3) endoplasmic reticulum stress response genes. Expression pathways associated with lung disease were concordant for some endosome and HLA pathways, with pathways identified using GWAS associations from 1,978 CF-affected individuals. Pathways associated with age of onset of persistent P. aeruginosa infection were enriched for HLA class II genes, and those associated with MI were related to oxidative phosphorylation. Formal testing demonstrated that genes showing differential expression associated with lung disease severity were enriched for heritable genetic variation and expression quantitative traits. Gene expression provided a powerful tool to identify unrecognized heritable variation, complementing ongoing GWASs in this rare disease. The genetic architecture of phenotypic variability in cystic fibrosis (CF [MIM 219700]) is beginning to be defined,1Wright F.A. Strug L.J. Doshi V.K. Commander C.W. Blackman S.M. Sun L. Berthiaume Y. Cutler D. Cojocaru A. Collaco J.M. et al.Genome-wide association and linkage identify modifier loci of lung disease severity in cystic fibrosis at 11p13 and 20q13.2.Nat. Genet. 2011; 43: 539-546Crossref PubMed Scopus (190) Google Scholar, 2Green D.M. Collaco J.M. McDougal K.E. Naughton K.M. Blackman S.M. Cutting G.R. Heritability of respiratory infection with Pseudomonas aeruginosa in cystic fibrosis.J. Pediatr. 2012; 161: 290-295.e1Abstract Full Text Full Text PDF PubMed Scopus (29) Google Scholar, 3Emond M.J. Louie T. Emerson J. Zhao W. Mathias R.A. Knowles M.R. Wright F.A. Rieder M.J. Tabor H.K. Nickerson D.A. et al.National Heart, Lung, and Blood Institute (NHLBI) GO Exome Sequencing ProjectLung GOExome sequencing of extreme phenotypes identifies DCTN4 as a modifier of chronic Pseudomonas aeruginosa infection in cystic fibrosis.Nat. Genet. 2012; 44: 886-889Crossref PubMed Scopus (170) Google Scholar, 4Li W. Soave D. Miller M.R. Keenan K. Lin F. Gong J. Chiang T. Stephenson A.L. Durie P. Rommens J. et al.Unraveling the complex genetic model for cystic fibrosis: pleiotropic effects of modifier genes on early cystic fibrosis-related morbidities.Hum. Genet. 2014; 133: 151-161Crossref PubMed Scopus (82) Google Scholar, 5Sun L. Rommens J.M. Corvol H. Li W. Li X. Chiang T.A. Lin F. Dorfman R. Busson P.F. Parekh R.V. et al.Multiple apical plasma membrane constituents are associated with susceptibility to meconium ileus in individuals with cystic fibrosis.Nat. Genet. 2012; 44: 562-569Crossref PubMed Scopus (150) Google Scholar but GWASs for CF are limited by numbers of subjects compared to common diseases, where tens of thousands of subjects have been used to identify pathophysiologically relevant pathways.6Okada Y. Wu D. Trynka G. Raj T. Terao C. Ikari K. Kochi Y. Ohmura K. Suzuki A. Yoshida S. et al.RACI consortiumGARNET consortiumGenetics of rheumatoid arthritis contributes to biology and drug discovery.Nature. 2014; 506: 376-381Crossref PubMed Scopus (1418) Google Scholar, 7Bønnelykke K. Matheson M.C. Pers T.H. Granell R. Strachan D.P. Alves A.C. Linneberg A. Curtin J.A. Warrington N.M. Standl M. et al.Australian Asthma Genetics Consortium (AAGC)EArly Genetics and Lifecourse Epidemiology (EAGLE) ConsortiumMeta-analysis of genome-wide association studies identifies ten loci influencing allergic sensitization.Nat. Genet. 2013; 45: 902-906Crossref PubMed Scopus (190) Google Scholar, 8Jostins L. Ripke S. Weersma R.K. Duerr R.H. McGovern D.P. Hui K.Y. Lee J.C. Schumm L.P. Sharma Y. Anderson C.A. et al.International IBD Genetics Consortium (IIBDGC)Host-microbe interactions have shaped the genetic architecture of inflammatory bowel disease.Nature. 2012; 491: 119-124Crossref PubMed Scopus (3230) Google Scholar Studies of gene expression provide an alternative approach to identify gene modifiers.9Cookson W. Liang L. Abecasis G. Moffatt M. Lathrop M. Mapping complex disease traits with global gene expression.Nat. Rev. Genet. 2009; 10: 184-194Crossref PubMed Scopus (612) Google Scholar, 10Emilsson V. Thorleifsson G. Zhang B. Leonardson A.S. Zink F. Zhu J. Carlson S. Helgason A. Walters G.B. Gunnarsdottir S. et al.Genetics of gene expression and its effect on disease.Nature. 2008; 452: 423-428Crossref PubMed Scopus (1007) Google Scholar, 11Nica A.C. Montgomery S.B. Dimas A.S. Stranger B.E. Beazley C. Barroso I. Dermitzakis E.T. Candidate causal regulatory effects by integration of expression QTLs with complex trait genetic associations.PLoS Genet. 2010; 6: e1000895Crossref PubMed Scopus (313) Google Scholar Based upon the established utility of gene expression studies in lymphoblastoid cell lines (LCLs),12Nicolae D.L. Gamazon E. Zhang W. Duan S. Dolan M.E. Cox N.J. Trait-associated SNPs are more likely to be eQTLs: annotation to enhance discovery from GWAS.PLoS Genet. 2010; 6: e1000888Crossref PubMed Scopus (910) Google Scholar, 13Stranger B.E. Forrest M.S. Dunning M. Ingle C.E. Beazley C. Thorne N. Redon R. Bird C.P. de Grassi A. Lee C. et al.Relative impact of nucleotide and copy number variation on gene expression phenotypes.Science. 2007; 315: 848-853Crossref PubMed Scopus (1347) Google Scholar, 14Zhang W. Duan S. Kistner E.O. Bleibel W.K. Huang R.S. Clark T.A. Chen T.X. Schweitzer A.C. Blume J.E. Cox N.J. Dolan M.E. Evaluation of genetic variation contributing to differences in gene expression between populations.Am. J. Hum. Genet. 2008; 82: 631-640Abstract Full Text Full Text PDF PubMed Scopus (157) Google Scholar global gene expression was measured from LCLs of a highly phenotyped CF cohort previously used for GWAS analysis1Wright F.A. Strug L.J. Doshi V.K. Commander C.W. Blackman S.M. Sun L. Berthiaume Y. Cutler D. Cojocaru A. Collaco J.M. et al.Genome-wide association and linkage identify modifier loci of lung disease severity in cystic fibrosis at 11p13 and 20q13.2.Nat. Genet. 2011; 43: 539-546Crossref PubMed Scopus (190) Google Scholar and analyzed for association with three distinct CF phenotypes: lung disease severity, age of onset of persistent Pseudomonas aeruginosa (P. aeruginosa) pulmonary infection, and meconium ileus (MI [MIM 614665]) at birth (Table 1; Figure S1).Table 1Characteristics of Subject Population for PhenotypesStudy GroupConsortium Lung Phenotype (Primary Analysis)aSubjects were classified as having either severe or mild lung disease, as defined by the quantitative Consortium lung phenotype (KNoRMA) value of <0.3 or >0.3, respectively.18Age of Onset of Persistent Pseudomonas aeruginosaMeconium Ileus (MI)Size of PopulationAge at Enrollment (year)No. Males (%)No. EuropeanbBased on self-identified ancestry and principal components analysis via SNP genotypes. (%)Persistent Culture PositivecData were obtained at the encounter level (each clinic visit) from the Cystic Fibrosis Foundation (CFF) Patient Registry. Persistent is defined as cultured P. aeruginosa in respiratory cultures 2 years in a row, or 2 out of 3 years, unless subjects had at least 5 consecutive years of negative cultures after meeting minimal criteria (2 out of 3 years of positive cultures). Subjects who were above age 7 needed to have a negative culture before the first positive culture to be included into the analysis.28 There were 14 severe and 30 mild subjects who were negative for P. aeruginosa at last culture. (%)Age of Onset (year)Presence of MIdSubjects were confirmed to have MI if a diagnosis at birth was supported by source documents, such as the original surgical or medical report, detailed clinical or admissions note, or verbal confirmation from the subject or the parent with documentation of an abdominal scar. Subjects were removed from the analysis if MI could not be confirmed or if the diagnosis was unclear or unknown. (%)Mean ± SDRangeMean ± SDRangeSevere31716.5 ± 4.68–25157 (49.5)317 (100)208 of 222 (93.7)5.2 ± 4.30.6–1952 of 301 (17.3)Mild43728.0 ± 9.915–58221 (50.5)437 (100)203 of 233 (87.1)16.8 ± 10.30.6–5754 of 405 (13.3)ePresence of MI was 17.6% (36 of 205) for subjects enrolled at 15–25 years of age.Total754455706a Subjects were classified as having either severe or mild lung disease, as defined by the quantitative Consortium lung phenotype (KNoRMA) value of <0.3 or >0.3, respectively.18Taylor C. Commander C.W. Collaco J.M. Strug L.J. Li W. Wright F.A. Webel A.D. Pace R.G. Stonebraker J.R. Naughton K. et al.A novel lung disease phenotype adjusted for mortality attrition for cystic fibrosis genetic modifier studies.Pediatr. Pulmonol. 2011; 46: 857-869Crossref PubMed Scopus (41) Google Scholarb Based on self-identified ancestry and principal components analysis via SNP genotypes.c Data were obtained at the encounter level (each clinic visit) from the Cystic Fibrosis Foundation (CFF) Patient Registry. Persistent is defined as cultured P. aeruginosa in respiratory cultures 2 years in a row, or 2 out of 3 years, unless subjects had at least 5 consecutive years of negative cultures after meeting minimal criteria (2 out of 3 years of positive cultures). Subjects who were above age 7 needed to have a negative culture before the first positive culture to be included into the analysis.28Pittman J.E. Calloway E.H. Kiser M. Yeatts J. Davis S.D. Drumm M.L. Schechter M.S. Leigh M.W. Emond M. Van Rie A. Knowles M.R. Age of Pseudomonas aeruginosa acquisition and subsequent severity of cystic fibrosis lung disease.Pediatr. Pulmonol. 2011; 46: 497-504PubMed Google Scholar There were 14 severe and 30 mild subjects who were negative for P. aeruginosa at last culture.d Subjects were confirmed to have MI if a diagnosis at birth was supported by source documents, such as the original surgical or medical report, detailed clinical or admissions note, or verbal confirmation from the subject or the parent with documentation of an abdominal scar. Subjects were removed from the analysis if MI could not be confirmed or if the diagnosis was unclear or unknown.e Presence of MI was 17.6% (36 of 205) for subjects enrolled at 15–25 years of age. Open table in a new tab Affymetrix Human Exon (1.0 ST) microarray data were collected from RNA isolated from 754 LCLs selected from a cohort of 1,137 samples from CFTR (MIM 602421) p.Phe508del European individuals homozygous for the mutation (chr7: 98,809–98,811 delCTT; RefSeq accession number NG_016465.3; c.1521_1523delCTT). These CF-affected individuals were originally obtained for the Genetic Modifiers in CF Lung Disease Study where a GWAS had been performed1Wright F.A. Strug L.J. Doshi V.K. Commander C.W. Blackman S.M. Sun L. Berthiaume Y. Cutler D. Cojocaru A. Collaco J.M. et al.Genome-wide association and linkage identify modifier loci of lung disease severity in cystic fibrosis at 11p13 and 20q13.2.Nat. Genet. 2011; 43: 539-546Crossref PubMed Scopus (190) Google Scholar (Figure S2). Considerable efforts were taken to ensure that high-quality microarray data were utilized and that interpretation would not be confused by known effect of SNPs on probe hybridization kinetics (Figure S2). For the highly polymorphic HLA region, probe set filtering removed 438 of the 797 probe sets. However, because of the concern that probe set filtering might not have been adequate in HLA genes, additional analysis was performed to identify HLA genes whose expression values were probably affected by probe set binding (Figure S3). As a result of this analysis, HLA-DRB1 (MIM 142857) expression values were removed from subsequent analysis. The study was approved by the biomedical institutional review board of the University of North Carolina and the institutional review board of each participating institution. CF-affected individuals and their parents (if they were a minor) provided written informed consent. Linear regression was utilized to establish association of gene expression with phenotypes. Gene expression values meeting a minimal threshold of expression above 6.03 (on the Affymetrix RMA standard log2 scale) were utilized, based on the 95th percentile of mean "expression" in females for genes on the Y chromosome, because this threshold was considered to reliably represent true signal above background. All genes meeting this criterion (12,033 out of 17,868 annotated genes; 67.3%) were included in the linear regression analysis, including genes whose probes overlaid SNPs with high minor allele frequency (MAF), but these genes were "flagged" so that potentially important interpretive issues could be considered later. The covariates used for all analyses are listed in Table S1. The genotype PCs used as covariates were calculated with Eigenstrat15Price A.L. Patterson N.J. Plenge R.M. Weinblatt M.E. Shadick N.A. Reich D. Principal components analysis corrects for stratification in genome-wide association studies.Nat. Genet. 2006; 38: 904-909Crossref PubMed Scopus (6831) Google Scholar and available genotype data from the previously conducted GWASs.1Wright F.A. Strug L.J. Doshi V.K. Commander C.W. Blackman S.M. Sun L. Berthiaume Y. Cutler D. Cojocaru A. Collaco J.M. et al.Genome-wide association and linkage identify modifier loci of lung disease severity in cystic fibrosis at 11p13 and 20q13.2.Nat. Genet. 2011; 43: 539-546Crossref PubMed Scopus (190) Google Scholar The surrogate variables of gene expression data were calculated with the "sva" package in Bioconductor in R.16Leek J.T. Johnson W.E. Parker H.S. Jaffe A.E. Storey J.D. The sva package for removing batch effects and other unwanted variation in high-throughput experiments.Bioinformatics. 2012; 28: 882-883Crossref PubMed Scopus (2291) Google Scholar The Q-Q plots for all three phenotypes suggested that the covariates included were appropriate to control for population stratification or technical factors that could potentially lead to false positives (Figure S4). The expression of lysophosphatidic acid receptor 6 (LPAR6 [MIM 278150]) achieved transcriptome-wide significance for association with lung disease (false discovery rate q value = 0.0006, p value = 5.35 × 10−8), using both standard and alternative probe annotation (ANNMAP, formerly known as X:MAP),17Yates T. Okoniewski M.J. Miller C.J. X:Map: annotation and visualization of genome structure for Affymetrix exon array analysis.Nucleic Acids Res. 2008; 36: D780-D786Crossref PubMed Scopus (53) Google Scholar with higher levels of LPAR6 being associated with worse lung function. Array-based LPAR6 expression was technically validated by TaqMan quantitative real-time PCR (p < 0.0001 between 36 low-expressing and 40 high-expressing LCL samples from CF-affected individuals). CHMP4C (p = 1.05 × 10−5 [MIM 610899]), SSBP2 (p = 2.60 × 10−5 [MIM 607389]), and P2RX4 (p = 8.03 × 10−5 [MIM 600846]) were suggestive for association (Table S2; Figure S5; see Table S5 for complete list). As explicitly accounted for by the Consortium lung phenotype,18Taylor C. Commander C.W. Collaco J.M. Strug L.J. Li W. Wright F.A. Webel A.D. Pace R.G. Stonebraker J.R. Naughton K. et al.A novel lung disease phenotype adjusted for mortality attrition for cystic fibrosis genetic modifier studies.Pediatr. Pulmonol. 2011; 46: 857-869Crossref PubMed Scopus (41) Google Scholar older surviving CF-affected individuals have milder lung disease, reflecting high mortality in CF (Table 1). To investigate a possible relationship between age and gene expression in the CF cohort, but unrelated to CF lung disease, we examined three large external studies of LCL gene expression. These included a childhood asthma (MIM 600807) cohort evaluated on the Affymetrix platform,19Liang L. Morar N. Dixon A.L. Lathrop G.M. Abecasis G.R. Moffatt M.F. Cookson W.O. A cross-platform analysis of 14,177 expression quantitative trait loci derived from lymphoblastoid cell lines.Genome Res. 2013; 23: 716-726Crossref PubMed Scopus (108) Google Scholar available data from the Cholesterol and Pharmacogenomics (CAP) trial (available on ArrayExpress),20Yu C.Y. Theusch E. Lo K. Mangravite L.M. Naidoo D. Kutilova M. Medina M.W. HNRNPA1 regulates HMGCR alternative splicing and modulates cellular cholesterol metabolism.Hum. Mol. Genet. 2014; 23: 319-332Crossref PubMed Scopus (46) Google Scholar and the Multiple Tissue Human Expression Resource (MuTHER) study.21Glass D. Viñuela A. Davies M.N. Ramasamy A. Parts L. Knowles D. Brown A.A. Hedman A.K. Small K.S. Buil A. et al.UK Brain Expression consortiumMuTHER consortiumGene expression changes with age in skin, adipose tissue, blood and brain.Genome Biol. 2013; 14: R75Crossref PubMed Scopus (189) Google Scholar No correspondence emerged between differentially expressed genes for the Consortium lung phenotype and those associated with age in these three non-CF populations (Figure S6), although LPAR6 was nominally associated with age (not corrected for multiple comparison) in older women (age ∼59 years) in the MuTHER study.21Glass D. Viñuela A. Davies M.N. Ramasamy A. Parts L. Knowles D. Brown A.A. Hedman A.K. Small K.S. Buil A. et al.UK Brain Expression consortiumMuTHER consortiumGene expression changes with age in skin, adipose tissue, blood and brain.Genome Biol. 2013; 14: R75Crossref PubMed Scopus (189) Google Scholar Consequently, we conclude that the associations seen in our study reflect CF lung disease severity and not aging. Rigorous "pathway" (gene set) analysis was conducted via a permutation-based approach (Significance Analysis of Function and Expression; SAFE), which accounts for gene expression correlation structures and allows testing of both standard and custom-derived pathways.22Barry W.T. Nobel A.B. Wright F.A. Significance analysis of functional categories in gene expression studies: a structured permutation approach.Bioinformatics. 2005; 21: 1943-1949Crossref PubMed Scopus (253) Google Scholar Pathway analysis was conducted by SAFE in R (v.3.0) and annotation databases (available at Bioconductor) hugene10stprobeset.db and GO.db (Gene Ontology annotation maps). Multiple pathways with q values < 0.15 were found to associate with lung disease severity (Table 2; Table S6, tab A). Of the 35 pathways listed (Table 2), 16 were related to the endomembrane system for synthesis and post-translational modification of membrane proteins (membranes, vesicle traffic, and Golgi/endoplasmic reticulum [ER]) and two pathways were related to ER stress response, which also could represent a subset of endomembrane processes. Of the 11 Gene Ontology (GO) Cellular Component pathways, 7 contained HLA class I genes, and custom-derived pathways consisting exclusively of HLA genes were also highly significant (Table 2). Importantly, although the HLA genes clearly contributed to the significance of the endomembrane pathways, these same pathways also contained TTC35 (Table 2 [MIM 607722]) and TMEM85 (Table S6, tab A; p value = 0.06), which are the human homologs of yeast genes EMC2 and EMC4, respectively, known to modulate yeast homolog of p.Phe508del processing.23Louie R.J. Guo J. Rodgers J.W. White R. Shah N. Pagant S. Kim P. Livstone M. Dolinski K. McKinney B.A. et al.A yeast phenomic model for the gene interaction network modulating CFTR-ΔF508 protein biogenesis.Genome Med. 2012; 4: 103Crossref PubMed Scopus (55) Google Scholar MetaMiner Cystic Fibrosis Specific Pathways not containing HLA genes also supported association with p.Phe508del processing (Table 2). We conclude that three important pathophysiological signals have emerged: HLA class I, p.Phe508del processing, and the ER stress response. The significance of the miR21 (miRNA-21 [MIM 611020]) pathway is also relevant given the expanding role of this microRNA (miRNA) in pulmonary biology.24Kumarswamy R. Volkmann I. Thum T. Regulation and function of miRNA-21 in health and disease.RNA Biol. 2011; 8: 706-713Crossref PubMed Scopus (444) Google Scholar Most pathways trended in the "up" direction (increased expression of genes in the pathways associated with milder lung disease), with two pathways (annotated to germ cell nuclei) trending "down."Table 2Gene Expression Pathways Significantly Associated with Consortium Lung PhenotypePathwayGenesStatisticsIDNameNumber↑aNumber of genes in pathway with increased expression.↓bNumber of genes in pathway with decreased expression.TrendcUp (increased) or down (decreased) differential expression of genes in the pathways associated with milder lung disease. Two-sided indicates pathways that contained both increased and decreased differentially expressed genes that contributed significantly to the signal.p ValuedDetermined by 10,000 permutations in the SAFE package.22q ValueeBenjamini-Hochberg false-discovery for pathways testing within each pathway set; q values < 0.15 were included.Genes with Gene-Level p Value < 0.05 (Ordered by p Value)fSee Table S6 (tab A) for the inclusive list of genes for these pathways; "none" indicates that no individual genes within the pathway had a p value less than 0.05; see Table S5 for gene MIM numbers.GO Cellular Component Pathways0001673male germ cell nucleus14014down0.00010.0164TNP1; REC8; TCFL50012507ER to Golgi transport vesicle membrane25232up0.00030.0481HLA-E; MCFD2; TMED7; HLA-F0043073germ cell nucleus17116down0.00040.0442TNP1; REC8; TCFL50042470;0048770melanosome; pigment granule785226up0.00070.0582SLC3A2; TPP1; CTSD; ANXA2; STOM; HSPA5; BSG0030134ER to Golgi transport vesicle29254up0.00110.0737HLA-E; MCFD2; TMED7; HLA-F0030176integral to endoplasmic reticulum membrane856421up0.00240.1181TTC35; HLA-E; EDEM1; TAP1; SELS; HLA-F; HSPA5; MMGT10031301integral to organelle membrane17111358up0.00260.1181TTC35; HLA-E; EDEM1; TAP1; SELS; ST6GALNAC6; HLA-F; A4GALT; ARMCX3; P2RX7; LARGE; HSPA5; MMGT10000421autophagic vacuole membrane13112up0.00280.1181WIPI1; ATG9A0031227intrinsic to endoplasmic reticulum membrane957025up0.00310.1181TTC35; HLA-E; EDEM1; TAP1; SELS; HLA-F; HSPA5; MMGT10031300intrinsic to organelle membrane18412163up0.00360.1231TTC35; HLA-E; EDEM1; TAP1; SELS; ST6GALNAC6; HLA-F; A4GALT; ARMCX3; P2RX7; LARGE; HSPA5; MMGT10030658transport vesicle membrane493316up0.00390.1231HLA-E; MCFD2; TMED7; HLA-F; NCALDGO Biological Process Pathways0006518peptide metabolic process644618up0.00010.0837GSTK1; DNPEP; PSEN2; TPP10072384organelle transport along microtubule24213up0.00010.0837PRKCZ; COPG0006925inflammatory cell apoptotic process10100up0.00030.1107none0006944cellular membrane fusion614219up0.00030.1107CD9; PLDN; ANXA2; BET10007030golgi organization382810up0.00030.1107GCC2; BHLHA15; GOLGB1; PLK3; COG1; TMED20043603cellular amide metabolic process1016536up0.00030.1107GSTK1; DNPEP; PSEN2; TPP1; PRKCD0034067protein localization to Golgi apparatus14131up0.00040.1166GOLGA4; GCC2; ATG9A0045684positive regulation of epidermis development11101up0.00040.1166noneGO Molecular Function Pathways0050839cell adhesion molecule binding331518two sided0.00040.1181P2RX4; MLLT4; CD1D;gFor the two-sided "Trend," these genes have a "down" trend. CTNNA1; PVRL1gFor the two-sided "Trend," these genes have a "down" trend.0042287MHC protein binding1596two sided0.00060.1191TAP1; LAG3; MARCH8MSigDB PathwaysATAAGCT.MIR.21814536two sided0.00010.0387BAHD1; BTBD3;gFor the two-sided "Trend," these genes have a "down" trend. C5orf41; STK40; UBR3; NF2;gFor the two-sided "Trend," these genes have a "down" trend. SSFA2; JAG1; PPARA; PELI1; RHOB; CREBL2V.HMGIY_Q61587088two sided0.00060.1499ZNF675;gFor the two-sided "Trend," these genes have a "down" trend. LMO4; TNFSF11;gFor the two-sided "Trend," these genes have a "down" trend. PLAGL2; POLD3;gFor the two-sided "Trend," these genes have a "down" trend. SLC7A1; UBE2E2;gFor the two-sided "Trend," these genes have a "down" trend. TAZ; UBR3; MRC2;gFor the two-sided "Trend," these genes have a "down" trend. TNFSF4; IKZF2gFor the two-sided "Trend," these genes have a "down" trend.MetaMiner Cystic Fibrosis Specific PathwayshMetaMiner CF Specific Pathways represent a version of Thomson Reuters' (formerly GeneGo) MetaDiscovery suite that is enriched with content specific for cystic fibrosis.cholesterol and sphingolipids transport/recycling to plasma membrane in lung (normal and CF)1495two sided0.00360.0597ABCG1gFor the two-sided "Trend," these genes have a "down" trend.normal wtCFTR traffic/sorting endosome formation14113up0.00520.0621noneF508-CFTR traffic/ER-to-Golgi in CF; Normal wtCFTR traffic/ER-to-Golgi22202up0.00750.0621COPG; COPZ2mucin expression in CF via TLRs, EGFR signaling pathways483414up0.01160.0770JUN; PRKCDPFAM Pathways00035double-stranded RNA binding motif17215down0.00010.0135STRBP; STAU207716basic region leucine zipper1174two sided0.00020.0276DDIT3; CREBL2; CEBPB03953tubulin C-terminal domain15213down0.00090.0804TUBB2BCF Relevant Custom PathwaysER stress response16912742up0.00050.0106DNAJB9; EDEM1; CISD2; TANK; DDIT3; SERP1; FDPS; LONP1; NANS; SSR4; JUN; GADD45A; LY9; PGM3; HSPA5; ARF4; IER3IP1; BTG2; CEBPB; CNIH; MANF; PDIA6XBP1 target genes13103two sided0.00790.1165DNAJB9; EDEM1; SERP1; PDIA6HLA-Specific Pathwaysclass I330up0.02210.0261HLA-E; HLA-Fclass II871up0.08680.0580noneclass I and class II11101up0.02990.0261HLA-E; HLA-FPathways limited to those with ≥10 but ≤200 genes. SAFE analysis utilized 10,000 permutations to establish significance thresholds. CF Relevant Custom Pathways developed primarily as described for mice46Saini Y. Dang H. Livraghi-Butrico A. Kelly E.J. Jones L.C. O'Neal W.K. Boucher R.C. Gene expression in whole lung and pulmonary macrophages reflects the dynamic pathology associated with airway surface dehydration.BMC Genomics. 2014; 15: 726Crossref PubMed Scopus (34) Google Scholar using human gene counterparts (Table S8).a Number of genes in pathway with increased expression.b Number of genes in pathway with decreased expression.c Up (increased) or down (decreased) differential expression of genes in the pathways associated with milder lung disease. Two-sided indicates pathways that contained both increased and decreased differentially expressed genes that contributed significantly to the signal.d Determined by 10,000 permutations in the SAFE package.22Barry W.T. Nobel A.B. Wright F.A. Significance analysis of functional categories in gene expression studies: a structured permutation approach.Bioinformatics. 2005; 21: 1943-1949Crossref PubMed Scopus (253) Google Scholare Benjamini-Hochberg false-discovery for pathways testing within each pathway set; q values < 0.15 were included.f See Table S6 (tab A) for the inclusive list of genes for these pathways; "none" indicates that no individual genes within the pathway had a p value less than 0.05; see Table S5 for gene MIM numbers.g For the two-sided "Trend," these genes have a "down" trend.h MetaMiner CF Specific Pathways represent a version of Thomson Reuters' (formerly GeneGo) MetaDiscovery suite that is enriched with content specific for cystic fibrosis. Open table in a new tab Pathways limited to those with ≥10 but ≤200 genes. SAFE analysis utilized 10,000 permutations to establish s}, number={2}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={O’Neal, Wanda K. and Gallins, Paul and Pace, Rhonda G. and Dang, Hong and Wolf, Whitney E. and Jones, Lisa C. and Guo, XueLiang and Zhou, Yi-Hui and Madar, Vered and Huang, Jinyan and et al.}, year={2015}, month={Feb}, pages={318–328} } @article{jansen_penninx_madar_xia_milaneschi_hottenga_hammerschlag_beekman_van der wee_smit_et al._2015, title={Gene expression in major depressive disorder}, volume={21}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/MP.2015.57}, DOI={10.1038/MP.2015.57}, abstractNote={The search for genetic variants underlying major depressive disorder (MDD) has not yet provided firm leads to its underlying molecular biology. A complementary approach is to study gene expression in relation to MDD. We measured gene expression in peripheral blood from 1848 subjects from The Netherlands Study of Depression and Anxiety. Subjects were divided into current MDD (N=882), remitted MDD (N=635) and control (N=331) groups. MDD status and gene expression were measured again 2 years later in 414 subjects. The strongest gene expression differences were between the current MDD and control groups (129 genes at false-discovery rate, FDR<0.1). Gene expression differences across MDD status were largely unrelated to antidepressant use, inflammatory status and blood cell counts. Genes associated with MDD were enriched for interleukin-6 (IL-6)-signaling and natural killer (NK) cell pathways. We identified 13 gene expression clusters with specific clusters enriched for genes involved in NK cell activation (downregulated in current MDD, FDR=5.8 × 10−5) and IL-6 pathways (upregulated in current MDD, FDR=3.2 × 10−3). Longitudinal analyses largely confirmed results observed in the cross-sectional data. Comparisons of gene expression results to the Psychiatric Genomics Consortium (PGC) MDD genome-wide association study results revealed overlap with DVL3. In conclusion, multiple gene expression associations with MDD were identified and suggest a measurable impact of current MDD state on gene expression. Identified genes and gene clusters are enriched with immune pathways previously associated with the etiology of MDD, in line with the immune suppression and immune activation hypothesis of MDD.}, number={3}, journal={Molecular Psychiatry}, publisher={Springer Nature}, author={Jansen, R and Penninx, B W J H and Madar, V and Xia, K and Milaneschi, Y and Hottenga, J J and Hammerschlag, A R and Beekman, A and van der Wee, N and Smit, J H and et al.}, year={2015}, month={May}, pages={339–347} } @article{corvol_blackman_boëlle_gallins_pace_stonebraker_accurso_clement_collaco_dang_et al._2015, title={Genome-wide association meta-analysis identifies five modifier loci of lung disease severity in cystic fibrosis}, volume={6}, ISSN={2041-1723}, url={http://dx.doi.org/10.1038/NCOMMS9382}, DOI={10.1038/NCOMMS9382}, abstractNote={AbstractThe identification of small molecules that target specific CFTR variants has ushered in a new era of treatment for cystic fibrosis (CF), yet optimal, individualized treatment of CF will require identification and targeting of disease modifiers. Here we use genome-wide association analysis to identify genetic modifiers of CF lung disease, the primary cause of mortality. Meta-analysis of 6,365 CF patients identifies five loci that display significant association with variation in lung disease. Regions on chr3q29 (MUC4/MUC20; P=3.3 × 10−11), chr5p15.3 (SLC9A3; P=6.8 × 10−12), chr6p21.3 (HLA Class II; P=1.2 × 10−8) and chrXq22-q23 (AGTR2/SLC6A14; P=1.8 × 10−9) contain genes of high biological relevance to CF pathophysiology. The fifth locus, on chr11p12-p13 (EHF/APIP; P=1.9 × 10−10), was previously shown to be associated with lung disease. These results provide new insights into potential targets for modulating lung disease severity in CF.}, number={1}, journal={Nature Communications}, publisher={Springer Science and Business Media LLC}, author={Corvol, Harriet and Blackman, Scott M. and Boëlle, Pierre-Yves and Gallins, Paul J. and Pace, Rhonda G. and Stonebraker, Jaclyn R. and Accurso, Frank J. and Clement, Annick and Collaco, Joseph M. and Dang, Hong and et al.}, year={2015}, month={Sep} } @article{corvol_blackman_boelle_gallins_pace_stonebraker_accurso_clement_collaco_dang_et al._2015, title={Genome-wide association meta-analysis identifies five modifier loci of lung disease severity in cystic fibrosis}, volume={6}, journal={Nature Communications}, author={Corvol, H. and Blackman, S. M. and Boelle, P. Y. and Gallins, P. J. and Pace, R. G. and Stonebraker, J. R. and Accurso, F. J. and Clement, A. and Collaco, J. M. and Dang, H. and et al.}, year={2015} } @article{zhou_wright_2015, title={Hypothesis testing at the extremes: fast and robust association for high-throughput data}, volume={16}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxv007}, abstractNote={A number of biomedical problems require performing many hypothesis tests, with an attendant need to apply stringent thresholds. Often the data take the form of a series of predictor vectors, each of which must be compared with a single response vector, perhaps with nuisance covariates. Parametric tests of association are often used, but can result in inaccurate type I error at the extreme thresholds, even for large sample sizes. Furthermore, standard two-sided testing can reduce power compared with the doubled [Formula: see text]-value, due to asymmetry in the null distribution. Exact (permutation) testing is attractive, but can be computationally intensive and cumbersome. We present an approximation to exact association tests of trend that is accurate and fast enough for standard use in high-throughput settings, and can easily provide standard two-sided or doubled [Formula: see text]-values. The approach is shown to be equivalent under permutation to likelihood ratio tests for the most commonly used generalized linear models (GLMs). For linear regression, covariates are handled by working with covariate-residualized responses and predictors. For GLMs, stratified covariates can be handled in a manner similar to exact conditional testing. Simulations and examples illustrate the wide applicability of the approach. The accompanying mcc package is available on CRAN http://cran.r-project.org/web/packages/mcc/index.html.}, number={3}, journal={BIOSTATISTICS}, author={Zhou, Yi-Hui and Wright, Fred A.}, year={2015}, month={Jul}, pages={611–625} } @article{rager_tilley_tulenko_smeester_ray_yosim_currier_ishida_gonzález-horta_sánchez-ramírez_et al._2015, title={Identification of Novel Gene Targets and Putative Regulators of Arsenic-Associated DNA Methylation in Human Urothelial Cells and Bladder Cancer}, volume={28}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/TX500393Y}, DOI={10.1021/TX500393Y}, abstractNote={There is strong epidemiologic evidence linking chronic exposure to inorganic arsenic (iAs) to myriad adverse health effects, including cancer of the bladder. We set out to identify DNA methylation patterns associated with arsenic and its metabolites in exfoliated urothelial cells (EUCs) that originate primarily from the urinary bladder, one of the targets of arsenic-induced carcinogenesis. Genome-wide, gene-specific promoter DNA methylation levels were assessed in EUCs from 46 residents of Chihuahua, Mexico, and the relationship was examined between promoter methylation profiles and the intracellular concentrations of total arsenic and arsenic species. A set of 49 differentially methylated genes was identified with increased promoter methylation associated with EUC tAs, iAs, and/or monomethylated As (MMAs) enriched for their roles in metabolic disease and cancer. Notably, no genes had differential methylation associated with EUC dimethylated As (DMAs), suggesting that DMAs may influence DNA methylation-mediated urothelial cell responses to a lesser extent than iAs or MMAs. Further analysis showed that 22 of the 49 arsenic-associated genes (45%) are also differentially methylated in bladder cancer tissue identified using The Cancer Genome Atlas repository. Both the arsenic- and cancer-associated genes are enriched for the binding sites of common transcription factors known to play roles in carcinogenesis, demonstrating a novel potential mechanistic link between iAs exposure and bladder cancer.}, number={6}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Rager, Julia E. and Tilley, Sloane K. and Tulenko, Samantha E. and Smeester, Lisa and Ray, Paul D. and Yosim, Andrew and Currier, Jenna M. and Ishida, María C. and González-Horta, Maria del Carmen and Sánchez-Ramírez, Blanca and et al.}, year={2015}, month={Jun}, pages={1144–1155} } @article{rager_tilley_tulenko_smeester_ray_yosim_currier_ishida_gonzalez-horta_sanchez-ramirez_et al._2015, title={Identification of novel gene targets and putative regulators of arsenic-associated DNA methylation in human urothelial cells and bladder cancer}, volume={28}, number={6}, journal={Chemical Research in Toxicology}, author={Rager, J. E. and Tilley, S. K. and Tulenko, S. E. and Smeester, L. and Ray, P. D. and Yosim, A. and Currier, J. M. and Ishida, M. C. and Gonzalez-Horta, M. D. and Sanchez-Ramirez, B. and et al.}, year={2015}, pages={1144–1155} } @article{abdo_wetmore_chappell_shea_wright_rusyn_2015, title={In vitro screening for population variability in toxicity of pesticide-containing mixtures}, volume={85}, ISSN={["1873-6750"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84941887442&partnerID=MN8TOARS}, DOI={10.1016/j.envint.2015.09.012}, abstractNote={Population-based human in vitro models offer exceptional opportunities for evaluating the potential hazard and mode of action of chemicals, as well as variability in responses to toxic insults among individuals. This study was designed to test the hypothesis that comparative population genomics with efficient in vitro experimental design can be used for evaluation of the potential for hazard, mode of action, and the extent of population variability in responses to chemical mixtures. We selected 146 lymphoblast cell lines from 4 ancestrally and geographically diverse human populations based on the availability of genome sequence and basal RNA-seq data. Cells were exposed to two pesticide mixtures – an environmental surface water sample comprised primarily of organochlorine pesticides and a laboratory-prepared mixture of 36 currently used pesticides – in concentration response and evaluated for cytotoxicity. On average, the two mixtures exhibited a similar range of in vitro cytotoxicity and showed considerable inter-individual variability across screened cell lines. However, when in vitro-to-in vivo extrapolation (IVIVE) coupled with reverse dosimetry was employed to convert the in vitro cytotoxic concentrations to oral equivalent doses and compared to the upper bound of predicted human exposure, we found that a nominally more cytotoxic chlorinated pesticide mixture is expected to have greater margin of safety (more than 5 orders of magnitude) as compared to the current use pesticide mixture (less than 2 orders of magnitude) due primarily to differences in exposure predictions. Multivariate genome-wide association mapping revealed an association between the toxicity of current use pesticide mixture and a polymorphism in rs1947825 in C17orf54. We conclude that a combination of in vitro human population-based cytotoxicity screening followed by dosimetric adjustment and comparative population genomics analyses enables quantitative evaluation of human health hazard from complex environmental mixtures. Additionally, such an approach yields testable hypotheses regarding potential toxicity mechanisms.}, journal={ENVIRONMENT INTERNATIONAL}, author={Abdo, Nour and Wetmore, Barbara A. and Chappell, Grace A. and Shea, Damian and Wright, Fred A. and Rusyn, Ivan}, year={2015}, month={Dec}, pages={147–155} } @article{abdo_xia_brown_kosyk_huang_sakamuru_zhou_jack_gallins_xia_et al._2015, title={Population-Based in Vitro Hazard and Concentration-Response Assessment of Chemicals: The 1000 Genomes High-Throughput Screening Study}, volume={123}, ISSN={["1552-9924"]}, DOI={10.1289/ehp.1408775}, abstractNote={Background: Understanding of human variation in toxicity to environmental chemicals remains limited, so human health risk assessments still largely rely on a generic 10-fold factor (10½ each for toxicokinetics and toxicodynamics) to account for sensitive individuals or subpopulations. Objectives: We tested a hypothesis that population-wide in vitro cytotoxicity screening can rapidly inform both the magnitude of and molecular causes for interindividual toxicodynamic variability. Methods: We used 1,086 lymphoblastoid cell lines from the 1000 Genomes Project, representing nine populations from five continents, to assess variation in cytotoxic response to 179 chemicals. Analysis included assessments of population variation and heritability, and genome-wide association mapping, with attention to phenotypic relevance to human exposures. Results: For about half the tested compounds, cytotoxic response in the 1% most “sensitive” individual occurred at concentrations within a factor of 10½ (i.e., approximately 3) of that in the median individual; however, for some compounds, this factor was > 10. Genetic mapping suggested important roles for variation in membrane and transmembrane genes, with a number of chemicals showing association with SNP rs13120371 in the solute carrier SLC7A11, previously implicated in chemoresistance. Conclusions: This experimental approach fills critical gaps unaddressed by recent large-scale toxicity testing programs, providing quantitative, experimentally based estimates of human toxicodynamic variability, and also testable hypotheses about mechanisms contributing to interindividual variation. Citation: Abdo N, Xia M, Brown CC, Kosyk O, Huang R, Sakamuru S, Zhou YH, Jack JR, Gallins P, Xia K, Li Y, Chiu WA, Motsinger-Reif AA, Austin CP, Tice RR, Rusyn I, Wright FA. 2015. Population-based in vitro hazard and concentration–response assessment of chemicals: the 1000 Genomes high-throughput screening study. Environ Health Perspect 123:458–466; http://dx.doi.org/10.1289/ehp.1408775}, number={5}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Abdo, Nour and Xia, Menghang and Brown, Chad C. and Kosyk, Oksana and Huang, Ruili and Sakamuru, Srilatha and Zhou, Yi-Hui and Jack, John R. and Gallins, Paul and Xia, Kai and et al.}, year={2015}, month={May}, pages={458–466} } @article{eduati_mangravite_wang_tang_bare_huang_norman_kellen_menden_yang_et al._2015, title={Prediction of human population responses to toxic compounds by a collaborative competition}, volume={33}, ISSN={1087-0156 1546-1696}, url={http://dx.doi.org/10.1038/NBT.3299}, DOI={10.1038/NBT.3299}, abstractNote={The ability to computationally predict the effects of toxic compounds on humans could help address the deficiencies of current chemical safety testing. Here, we report the results from a community-based DREAM challenge to predict toxicities of environmental compounds with potential adverse health effects for human populations. We measured the cytotoxicity of 156 compounds in 884 lymphoblastoid cell lines for which genotype and transcriptional data are available as part of the Tox21 1000 Genomes Project. The challenge participants developed algorithms to predict interindividual variability of toxic response from genomic profiles and population-level cytotoxicity data from structural attributes of the compounds. 179 submitted predictions were evaluated against an experimental data set to which participants were blinded. Individual cytotoxicity predictions were better than random, with modest correlations (Pearson's r < 0.28), consistent with complex trait genomic prediction. In contrast, predictions of population-level response to different compounds were higher (r < 0.66). The results highlight the possibility of predicting health risks associated with unknown compounds, although risk estimation accuracy remains suboptimal.}, number={9}, journal={Nature Biotechnology}, publisher={Springer Science and Business Media LLC}, author={Eduati, Federica and Mangravite, Lara M and Wang, Tao and Tang, Hao and Bare, J Christopher and Huang, Ruili and Norman, Thea and Kellen, Mike and Menden, Michael P and Yang, Jichen and et al.}, year={2015}, month={Aug}, pages={933–940} } @article{eduati_mangravite_wang_tang_bare_huang_norman_kellen_menden_yang_et al._2015, title={Prediction of human population responses to toxic compounds by a collaborative competition}, volume={33}, number={9}, journal={Nature Biotechnology}, author={Eduati, F. and Mangravite, L. M. and Wang, T. and Tang, H. and Bare, J. C. and Huang, R. L. and Norman, T. and Kellen, M. and Menden, M. P. and Yang, J. C. and et al.}, year={2015}, pages={933–172} } @article{yang_huang_petralia_long_zhang_argmann_zhao_mobbs_schadt_zhu_et al._2015, title={Synchronized age-related gene expression changes across multiple tissues in human and the link to complex diseases}, volume={5}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/SREP15145}, DOI={10.1038/SREP15145}, abstractNote={AbstractAging is one of the most important biological processes and is a known risk factor for many age-related diseases in human. Studying age-related transcriptomic changes in tissues across the whole body can provide valuable information for a holistic understanding of this fundamental process. In this work, we catalogue age-related gene expression changes in nine tissues from nearly two hundred individuals collected by the Genotype-Tissue Expression (GTEx) project. In general, we find the aging gene expression signatures are very tissue specific. However, enrichment for some well-known aging components such as mitochondria biology is observed in many tissues. Different levels of cross-tissue synchronization of age-related gene expression changes are observed and some essential tissues (e.g., heart and lung) show much stronger “co-aging” than other tissues based on a principal component analysis. The aging gene signatures and complex disease genes show a complex overlapping pattern and only in some cases, we see that they are significantly overlapped in the tissues affected by the corresponding diseases. In summary, our analyses provide novel insights to the co-regulation of age-related gene expression in multiple tissues; it also presents a tissue-specific view of the link between aging and age-related diseases.}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Yang, Jialiang and Huang, Tao and Petralia, Francesca and Long, Quan and Zhang, Bin and Argmann, Carmen and Zhao, Yong and Mobbs, Charles V. and Schadt, Eric E. and Zhu, Jun and et al.}, year={2015}, month={Oct}, pages={15145} } @article{mele_ferreira_reverter_deluca_monlong_sammeth_young_goldmann_pervouchine_sullivan_et al._2015, title={The human transcriptome across tissues and individuals}, volume={348}, number={6235}, journal={Science}, author={Mele, M. and Ferreira, P. G. and Reverter, F. and DeLuca, D. S. and Monlong, J. and Sammeth, M. and Young, T. R. and Goldmann, J. M. and Pervouchine, D. D. and Sullivan, T. J. and et al.}, year={2015}, pages={660–665} } @article{lee_zou_wright_2014, title={Convergence of sample eigenvalues, eigenvectors, and principal component scores for ultra-high dimensional data}, volume={101}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/ast064}, abstractNote={The development of high-throughput biomedical technologies has led to increased interest in the analysis of high-dimensional data where the number of features is much larger than the sample size. In this paper, we investigate principal component analysis under the ultra-high dimensional regime, where both the number of features and the sample size increase as the ratio of the two quantities also increases. We bridge the existing results from the finite and the high-dimension low sample size regimes, embedding the two regimes in a more general framework. We also numerically demonstrate the universal application of the results from the finite regime.}, number={2}, journal={BIOMETRIKA}, author={Lee, Seunggeun and Zou, Fei and Wright, Fred A.}, year={2014}, month={Jun}, pages={484–490} } @article{wright_sullivan_brooks_zou_sun_xia_madar_jansen_chung_zhou_et al._2014, title={Heritability and genomics of gene expression in peripheral blood}, volume={46}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/NG.2951}, DOI={10.1038/NG.2951}, abstractNote={Fred Wright, Patrick Sullivan and colleagues present the results of a large expression QTL study of peripheral blood using a classic twin design with follow-up replication in independent samples. Their results enable a more precise estimate of the heritability of gene expression and provide a useful resource for exploring the genetic control of transcription. We assessed gene expression profiles in 2,752 twins, using a classic twin design to quantify expression heritability and quantitative trait loci (eQTLs) in peripheral blood. The most highly heritable genes (∼777) were grouped into distinct expression clusters, enriched in gene-poor regions, associated with specific gene function or ontology classes, and strongly associated with disease designation. The design enabled a comparison of twin-based heritability to estimates based on dizygotic identity-by-descent sharing and distant genetic relatedness. Consideration of sampling variation suggests that previous heritability estimates have been upwardly biased. Genotyping of 2,494 twins enabled powerful identification of eQTLs, which we further examined in a replication set of 1,895 unrelated subjects. A large number of non-redundant local eQTLs (6,756) met replication criteria, whereas a relatively small number of distant eQTLs (165) met quality control and replication standards. Our results provide a new resource toward understanding the genetic control of transcription.}, number={5}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Wright, Fred A and Sullivan, Patrick F and Brooks, Andrew I and Zou, Fei and Sun, Wei and Xia, Kai and Madar, Vered and Jansen, Rick and Chung, Wonil and Zhou, Yi-Hui and et al.}, year={2014}, month={Apr}, pages={430–437} } @article{wright_sullivan_brooks_zou_sun_xia_madar_jansen_chung_zhou_et al._2014, title={Heritability and genomics of gene expression in peripheral blood}, volume={46}, number={5}, journal={Nature Genetics}, author={Wright, F. A. and Sullivan, P. F. and Brooks, A. I. and Zou, F. and Sun, W. and Xia, K. and Madar, V. and Jansen, R. and Chung, W. I. and Zhou, Y. H. and et al.}, year={2014}, pages={430–437} } @article{chiu_campbell_clewell_zhou_wright_guyton_rusyn_2014, title={Physiologically Based Pharmacokinetic (PBPK) Modeling of Interstrain Variability in Trichloroethylene Metabolism in the Mouse}, volume={122}, ISSN={["1552-9924"]}, DOI={10.1289/ehp.1307623}, abstractNote={Background: Quantitative estimation of toxicokinetic variability in the human population is a persistent challenge in risk assessment of environmental chemicals. Traditionally, interindividual differences in the population are accounted for by default assumptions or, in rare cases, are based on human toxicokinetic data. Objectives: We evaluated the utility of genetically diverse mouse strains for estimating toxicokinetic population variability for risk assessment, using trichloroethylene (TCE) metabolism as a case study. Methods: We used data on oxidative and glutathione conjugation metabolism of TCE in 16 inbred and 1 hybrid mouse strains to calibrate and extend existing physiologically based pharmacokinetic (PBPK) models. We added one-compartment models for glutathione metabolites and a two-compartment model for dichloroacetic acid (DCA). We used a Bayesian population analysis of interstrain variability to quantify variability in TCE metabolism. Results: Concentration–time profiles for TCE metabolism to oxidative and glutathione conjugation metabolites varied across strains. Median predictions for the metabolic flux through oxidation were less variable (5-fold range) than that through glutathione conjugation (10-fold range). For oxidative metabolites, median predictions of trichloroacetic acid production were less variable (2-fold range) than DCA production (5-fold range), although the uncertainty bounds for DCA exceeded the predicted variability. Conclusions: Population PBPK modeling of genetically diverse mouse strains can provide useful quantitative estimates of toxicokinetic population variability. When extrapolated to lower doses more relevant to environmental exposures, mouse population-derived variability estimates for TCE metabolism closely matched population variability estimates previously derived from human toxicokinetic studies with TCE, highlighting the utility of mouse interstrain metabolism studies for addressing toxicokinetic variability. Citation: Chiu WA, Campbell JL Jr, Clewell HJ III, Zhou YH, Wright FA, Guyton KZ, Rusyn I. 2014. Physiologically based pharmacokinetic (PBPK) modeling of interstrain variability in trichloroethylene metabolism in the mouse. Environ Health Perspect 122:456–463; http://dx.doi.org/10.1289/ehp.1307623}, number={5}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Chiu, Weihsueh A. and Campbell, Jerry L., Jr. and Clewell, Harvey J., III and Zhou, Yi-Hui and Wright, Fred A. and Guyton, Kathryn Z. and Rusyn, Ivan}, year={2014}, month={May}, pages={456–463} } @article{lange_hu_zhang_xue_schmidt_tang_bizon_lange_smith_turner_et al._2014, title={Whole-Exome Sequencing Identifies Rare and Low-Frequency Coding Variants Associated with LDL Cholesterol}, volume={94}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/J.AJHG.2014.01.010}, DOI={10.1016/J.AJHG.2014.01.010}, abstractNote={Elevated low-density lipoprotein cholesterol (LDL-C) is a treatable, heritable risk factor for cardiovascular disease. Genome-wide association studies (GWASs) have identified 157 variants associated with lipid levels but are not well suited to assess the impact of rare and low-frequency variants. To determine whether rare or low-frequency coding variants are associated with LDL-C, we exome sequenced 2,005 individuals, including 554 individuals selected for extreme LDL-C (>98th or <2nd percentile). Follow-up analyses included sequencing of 1,302 additional individuals and genotype-based analysis of 52,221 individuals. We observed significant evidence of association between LDL-C and the burden of rare or low-frequency variants in PNPLA5, encoding a phospholipase-domain-containing protein, and both known and previously unidentified variants in PCSK9, LDLR and APOB, three known lipid-related genes. The effect sizes for the burden of rare variants for each associated gene were substantially higher than those observed for individual SNPs identified from GWASs. We replicated the PNPLA5 signal in an independent large-scale sequencing study of 2,084 individuals. In conclusion, this large whole-exome-sequencing study for LDL-C identified a gene not known to be implicated in LDL-C and provides unique insight into the design and analysis of similar experiments. Elevated low-density lipoprotein cholesterol (LDL-C) is a treatable, heritable risk factor for cardiovascular disease. Genome-wide association studies (GWASs) have identified 157 variants associated with lipid levels but are not well suited to assess the impact of rare and low-frequency variants. To determine whether rare or low-frequency coding variants are associated with LDL-C, we exome sequenced 2,005 individuals, including 554 individuals selected for extreme LDL-C (>98th or <2nd percentile). Follow-up analyses included sequencing of 1,302 additional individuals and genotype-based analysis of 52,221 individuals. We observed significant evidence of association between LDL-C and the burden of rare or low-frequency variants in PNPLA5, encoding a phospholipase-domain-containing protein, and both known and previously unidentified variants in PCSK9, LDLR and APOB, three known lipid-related genes. The effect sizes for the burden of rare variants for each associated gene were substantially higher than those observed for individual SNPs identified from GWASs. We replicated the PNPLA5 signal in an independent large-scale sequencing study of 2,084 individuals. In conclusion, this large whole-exome-sequencing study for LDL-C identified a gene not known to be implicated in LDL-C and provides unique insight into the design and analysis of similar experiments.}, number={2}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={Lange, Leslie A. and Hu, Youna and Zhang, He and Xue, Chenyi and Schmidt, Ellen M. and Tang, Zheng-Zheng and Bizon, Chris and Lange, Ethan M. and Smith, Joshua D. and Turner, Emily H. and et al.}, year={2014}, month={Feb}, pages={233–245} } @article{sirenko_cromwell_crittenden_wignall_wright_rusyn_2013, title={Assessment of beating parameters in human induced pluripotent stem cells enables quantitative in vitro screening for cardiotoxicity}, volume={273}, ISSN={0041-008X}, url={http://dx.doi.org/10.1016/J.TAAP.2013.09.017}, DOI={10.1016/J.TAAP.2013.09.017}, abstractNote={Human induced pluripotent stem cell (iPSC)-derived cardiomyocytes show promise for screening during early drug development. Here, we tested a hypothesis that in vitro assessment of multiple cardiomyocyte physiological parameters enables predictive and mechanistically-interpretable evaluation of cardiotoxicity in a high-throughput format. Human iPSC-derived cardiomyocytes were exposed for 30 min or 24 h to 131 drugs, positive (107) and negative (24) for in vivo cardiotoxicity, in up to 6 concentrations (3 nM to 30 uM) in 384-well plates. Fast kinetic imaging was used to monitor changes in cardiomyocyte function using intracellular Ca2 + flux readouts synchronous with beating, and cell viability. A number of physiological parameters of cardiomyocyte beating, such as beat rate, peak shape (amplitude, width, raise, decay, etc.) and regularity were collected using automated data analysis. Concentration–response profiles were evaluated using logistic modeling to derive a benchmark concentration (BMC) point-of-departure value, based on one standard deviation departure from the estimated baseline in vehicle (0.3% dimethyl sulfoxide)-treated cells. BMC values were used for cardiotoxicity classification and ranking of compounds. Beat rate and several peak shape parameters were found to be good predictors, while cell viability had poor classification accuracy. In addition, we applied the Toxicological Prioritization Index (ToxPi) approach to integrate and display data across many collected parameters, to derive "cardiosafety" ranking of tested compounds. Multi-parameter screening of beating profiles allows for cardiotoxicity risk assessment and identification of specific patterns defining mechanism-specific effects. These data and analysis methods may be used widely for compound screening and early safety evaluation in drug development.}, number={3}, journal={Toxicology and Applied Pharmacology}, publisher={Elsevier BV}, author={Sirenko, Oksana and Cromwell, Evan F. and Crittenden, Carole and Wignall, Jessica A. and Wright, Fred A. and Rusyn, Ivan}, year={2013}, month={Dec}, pages={500–507} } @article{zhou_mayhew_sun_xu_zou_wright_2013, title={Space-time clustering and the permutation moments of quadratic forms}, volume={2}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/STA4.37}, DOI={10.1002/STA4.37}, abstractNote={AbstractThe Mantel and Knox space–time clustering statistics are popular tools to establish transmissibility of a disease and detect outbreaks. The most commonly used null distributional approximations may provide poor fits, and researchers often resort to direct sampling from the permutation distribution. However, the exact first four moments for these statistics are available, and Pearson distributional approximations are often effective. Thus, our first goals are to clarify the literature and make these tools more widely available. In addition, by rewriting terms in the statistics, we obtain the exact first four permutation moments for the most commonly used quadratic form statistics, which need not be positive definite. The extension of this work to quadratic forms greatly expands the utility of density approximations for these problems, including for high‐dimensional applications, where the statistics must be extreme in order to exceed stringent testing thresholds. We demonstrate the methods using examples from the investigation of disease transmission in cattle, the association of a gene expression pathway with breast cancer survival, regional genetic association with cystic fibrosis lung disease and hypothesis testing for smoothed local linear regression. © The Authors. Stat published by John Wiley & Sons Ltd.}, number={1}, journal={Stat}, publisher={Wiley}, author={Zhou, Yi-Hui and Mayhew, Gregory and Sun, Zhibin and Xu, Xiaolin and Zou, Fei and Wright, Fred A.}, year={2013}, month={Nov}, pages={292–302} } @article{lonsdale_thomas_salvatore_phillips_lo_shad_hasz_walters_garcia_young_et al._2013, title={The Genotype-Tissue Expression (GTEx) project}, volume={45}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/NG.2653}, DOI={10.1038/NG.2653}, abstractNote={Genome-wide association studies have identified thousands of loci for common diseases, but, for the majority of these, the mechanisms underlying disease susceptibility remain unknown. Most associated variants are not correlated with protein-coding changes, suggesting that polymorphisms in regulatory regions probably contribute to many disease phenotypes. Here we describe the Genotype-Tissue Expression (GTEx) project, which will establish a resource database and associated tissue bank for the scientific community to study the relationship between genetic variation and gene expression in human tissues.}, number={6}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Lonsdale, John and Thomas, Jeffrey and Salvatore, Mike and Phillips, Rebecca and Lo, Edmund and Shad, Saboor and Hasz, Richard and Walters, Gary and Garcia, Fernando and Young, Nancy and et al.}, year={2013}, month={May}, pages={580–585} } @article{byrnes_wu_wright_li_li_2013, title={The Value of Statistical or Bioinformatics Annotation for Rare Variant Association With Quantitative Trait}, volume={37}, ISSN={0741-0395}, url={http://dx.doi.org/10.1002/GEPI.21747}, DOI={10.1002/GEPI.21747}, abstractNote={ABSTRACTIn the past few years, a plethora of methods for rare variant association with phenotype have been proposed. These methods aggregate information from multiple rare variants across genomic region(s), but there is little consensus as to which method is most effective. The weighting scheme adopted when aggregating information across variants is one of the primary determinants of effectiveness. Here we present a systematic evaluation of multiple weighting schemes through a series of simulations intended to mimic large sequencing studies of a quantitative trait. We evaluate existing phenotype‐independent and phenotype‐dependent methods, as well as weights estimated by penalized regression approaches including Lasso, Elastic Net, and SCAD. We find that the difference in power between phenotype‐dependent schemes is negligible when high‐quality functional annotations are available. When functional annotations are unavailable or incomplete, all methods suffer from power loss; however, the variable selection methods outperform the others at the cost of increased computational time. Therefore, in the absence of good annotation, we recommend variable selection methods (which can be viewed as “statistical annotation”) on top of regions implicated by a phenotype‐independent weighting scheme. Further, once a region is implicated, variable selection can help to identify potential causal single nucleotide polymorphisms for biological validation. These findings are supported by an analysis of a high coverage targeted sequencing study of 1,898 individuals.}, number={7}, journal={Genetic Epidemiology}, publisher={Wiley}, author={Byrnes, Andrea E. and Wu, Michael C. and Wright, Fred A. and Li, Mingyao and Li, Yun}, year={2013}, month={Jul}, pages={666–674} } @article{ghosh_wright_zou_2013, title={Unified Analysis of Secondary Traits in Case–Control Association Studies}, volume={108}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2013.793121}, DOI={10.1080/01621459.2013.793121}, abstractNote={It has been repeatedly shown that in case–control association studies, analysis of a secondary trait that ignores the original sampling scheme can produce highly biased risk estimates. Although a number of approaches have been proposed to properly analyze secondary traits, most approaches fail to reproduce the marginal logistic model assumed for the original case–control trait and/or do not allow for interaction between secondary trait and genotype marker on primary disease risk. In addition, the flexible handling of covariates remains challenging. We present a general retrospective likelihood framework to perform association testing for both binary and continuous secondary traits, which respects marginal models and incorporates the interaction term. We provide a computational algorithm, based on a reparameterized approximate profile likelihood, for obtaining the maximum likelihood (ML) estimate and its standard error for the genetic effect on secondary traits, in the presence of covariates. For completeness, we also present an alternative pseudo-likelihood method for handling covariates. We describe extensive simulations to evaluate the performance of the ML estimator in comparison with the pseudo-likelihood and other competing methods. Supplementary materials for this article are available online.}, number={502}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Ghosh, Arpita and Wright, Fred A. and Zou, Fei}, year={2013}, month={Jun}, pages={566–576} } @article{emond_louie_emerson_zhao_mathias_knowles_wright_rieder_tabor_nickerson_et al._2012, title={Exome sequencing of extreme phenotypes identifies DCTN4 as a modifier of chronic Pseudomonas aeruginosa infection in cystic fibrosis}, volume={44}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/ng.2344}, DOI={10.1038/ng.2344}, abstractNote={Michael Bamshad and colleagues report an exome sequencing study of extreme phenotypes to identify genetic variants that modify the risk for chronic Pseudomonas aeruginosa airway infection in individuals with cystic fibrosis. Exome sequencing has become a powerful and effective strategy for the discovery of genes underlying Mendelian disorders1. However, use of exome sequencing to identify variants associated with complex traits has been more challenging, partly because the sample sizes needed for adequate power may be very large2. One strategy to increase efficiency is to sequence individuals who are at both ends of a phenotype distribution (those with extreme phenotypes). Because the frequency of alleles that contribute to the trait are enriched in one or both phenotype extremes, a modest sample size can potentially be used to identify novel candidate genes and/or alleles3. As part of the National Heart, Lung, and Blood Institute (NHLBI) Exome Sequencing Project (ESP), we used an extreme phenotype study design to discover that variants in DCTN4, encoding a dynactin protein, are associated with time to first P. aeruginosa airway infection, chronic P. aeruginosa infection and mucoid P. aeruginosa in individuals with cystic fibrosis.}, number={8}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Emond, Mary J and Louie, Tin and Emerson, Julia and Zhao, Wei and Mathias, Rasika A and Knowles, Michael R and Wright, Fred A and Rieder, Mark J and Tabor, Holly K and Nickerson, Deborah A and et al.}, year={2012}, month={Jul}, pages={886–889} } @article{sun_rommens_corvol_li_li_chiang_lin_dorfman_busson_parekh_et al._2012, title={Multiple apical plasma membrane constituents are associated with susceptibility to meconium ileus in individuals with cystic fibrosis}, volume={44}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/ng.2221}, DOI={10.1038/ng.2221}, abstractNote={Lisa Strug and colleagues report a genome-wide association study for meconium ileus in individuals with cystic fibrosis. Conventional genome-wide approaches identified variants in SLC26A9 and SLC6A14 associated with meconium ileus. The authors also performed a hypothesis-driven genome-wide association study (HD-GWAS) that upweighted 3,814 SNPs within 10 kb of 155 genes expressed in the apical plasma membrane. The HD-GWAS identified variants near SLC9A3 associated with meconium ileus. Variants associated with meconium ileus in cystic fibrosis were identified in 3,763 affected individuals by genome-wide association study (GWAS). Five SNPs at two loci near SLC6A14 at Xq23-24 (minimum P = 1.28 × 10−12 at rs3788766) and SLC26A9 at 1q32.1 (minimum P = 9.88 × 10−9 at rs4077468) accounted for ∼5% of phenotypic variability and were replicated in an independent sample of affected individuals (n = 2,372; P = 0.001 and 0.0001, respectively). By incorporating the knowledge that disease-causing mutations in CFTR alter electrolyte and fluid flux across surface epithelium into a hypothesis-driven GWAS (GWAS-HD), we identified associations with the same SNPs in SLC6A14 and SLC26A9 and established evidence for the involvement of SNPs in a third solute carrier gene, SLC9A3. In addition, GWAS-HD provided evidence of association between meconium ileus and multiple genes encoding constituents of the apical plasma membrane where CFTR resides (P = 0.0002; testing of 155 apical membrane genes jointly and in replication, P = 0.022). These findings suggest that modulating activities of apical membrane constituents could complement current therapeutic paradigms for cystic fibrosis.}, number={5}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Sun, Lei and Rommens, Johanna M and Corvol, Harriet and Li, Weili and Li, Xin and Chiang, Theodore A and Lin, Fan and Dorfman, Ruslan and Busson, Pierre-François and Parekh, Rashmi V and et al.}, year={2012}, month={Apr}, pages={562–569} } @article{taylor_commander_collaco_strug_li_wright_webel_pace_stonebraker_naughton_et al._2011, title={A novel lung disease phenotype adjusted for mortality attrition for cystic fibrosis Genetic modifier studies}, volume={46}, ISSN={8755-6863}, url={http://dx.doi.org/10.1002/ppul.21456}, DOI={10.1002/ppul.21456}, abstractNote={AbstractGenetic studies of lung disease in cystic fibrosis (CF) are hampered by the lack of a severity measure that accounts for chronic disease progression and mortality attrition. Further, combining analyses across studies requires common phenotypes that are robust to study design and patient ascertainment.Using data from the North American Cystic Fibrosis Modifier Consortium (Canadian Consortium for CF Genetic Studies, Johns Hopkins University CF Twin and Sibling Study, and University of North Carolina/Case Western Reserve University Gene Modifier Study), the authors calculated age‐specific CF percentile values of FEV1 which were adjusted for CF age‐specific mortality data.The phenotype was computed for 2,061 patients representing the Canadian CF population, 1,137 extreme phenotype patients in the UNC/Case Western study, and 1,323 patients from multiple CF sib families in the CF Twin and Sibling Study. Despite differences in ascertainment and median age, our phenotype score was distributed in all three samples in a manner consistent with ascertainment differences, reflecting the lung disease severity of each individual in the underlying population. The new phenotype score was highly correlated with the previously recommended complex phenotype, but the new phenotype is more robust for shorter follow‐up and for extreme ages.A disease progression and mortality‐adjusted phenotype reduces the need for stratification or additional covariates, increasing statistical power, and avoiding possible distortions. This approach will facilitate large‐scale genetic and environmental epidemiological studies which will provide targeted therapeutic pathways for the clinical benefit of patients with CF. Pediatr. Pulmonol. 2011; 46:857–869. © 2011 Wiley‐Liss, Inc.}, number={9}, journal={Pediatric Pulmonology}, publisher={Wiley}, author={Taylor, Chelsea and Commander, Clayton W. and Collaco, Joseph M. and Strug, Lisa J. and Li, Weili and Wright, Fred A. and Webel, Aaron D. and Pace, Rhonda G. and Stonebraker, Jaclyn R. and Naughton, Kathleen and et al.}, year={2011}, month={Apr}, pages={857–869} } @article{wright_strug_doshi_commander_blackman_sun_berthiaume_cutler_cojocaru_collaco_et al._2011, title={Genome-wide association and linkage identify modifier loci of lung disease severity in cystic fibrosis at 11p13 and 20q13.2}, volume={43}, ISSN={1061-4036 1546-1718}, url={http://dx.doi.org/10.1038/ng.838}, DOI={10.1038/ng.838}, abstractNote={Garry Cutting and colleagues report a genome-wide association and linkage study for loci that affect lung disease severity in cystic fibrosis. They identify two loci that influence lung function in individuals with cystic fibrosis. A combined genome-wide association and linkage study was used to identify loci causing variation in cystic fibrosis lung disease severity. We identified a significant association (P = 3.34 × 10−8) near EHF and APIP (chr11p13) in p.Phe508del homozygotes (n = 1,978). The association replicated in p.Phe508del homozygotes (P = 0.006) from a separate family based study (n = 557), with P = 1.49 × 10−9 for the three-study joint meta-analysis. Linkage analysis of 486 sibling pairs from the family based study identified a significant quantitative trait locus on chromosome 20q13.2 (log10 odds = 5.03). Our findings provide insight into the causes of variation in lung disease severity in cystic fibrosis and suggest new therapeutic targets for this life-limiting disorder.}, number={6}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Wright, Fred A and Strug, Lisa J and Doshi, Vishal K and Commander, Clayton W and Blackman, Scott M and Sun, Lei and Berthiaume, Yves and Cutler, David and Cojocaru, Andreea and Collaco, J Michael and et al.}, year={2011}, month={May}, pages={539–546} } @article{gatti_lu_williams_sun_wright_threadgill_rusyn_2011, title={MicroRNA expression in the livers of inbred mice}, volume={714}, ISSN={0027-5107}, url={http://dx.doi.org/10.1016/j.mrfmmm.2011.05.007}, DOI={10.1016/j.mrfmmm.2011.05.007}, abstractNote={MicroRNAs are short, non-coding RNA sequences that regulate genes at the post-transcriptional level and have been shown to be important in development, tissue differentiation, and disease. Limited attention has been given to the natural variation in miRNA expression across genetically diverse populations even though it is well established that genetic polymorphisms can have a profound effect on mRNA levels. Expression level of 577 miRNAs in the livers of 70 strains of inbred mice was assessed, and we found that miRNA expression is highly stable across different strains. Globally, the expression of miRNA target transcripts does not correlate with miRNA expression, primarily due to the low variance of miRNA but high variance of mRNA expression across strains. Our results show that there is little genetic effect on the baseline miRNA levels in murine liver. The stability of mouse liver miRNA expression in a genetically diverse population suggests that treatment-induced disruptions in liver miRNA expression, a phenomenon established for a large number of toxicants, may indicate an important mechanism for the disturbance of normal liver function, and may prove to be a useful genetic background-independent biomarker of toxicant effect.}, number={1-2}, journal={Mutation Research/Fundamental and Molecular Mechanisms of Mutagenesis}, publisher={Elsevier BV}, author={Gatti, Daniel M. and Lu, Lu and Williams, Robert W. and Sun, Wei and Wright, Fred A. and Threadgill, David W. and Rusyn, Ivan}, year={2011}, month={Sep}, pages={126–133} } @article{shea_virtaneva_kupko_porcella_barry_wright_kobayashi_carmody_ireland_sturdevant_et al._2010, title={Interactome analysis of longitudinal pharyngeal infection of cynomolgus macaques by group A Streptococcus}, volume={107}, ISSN={0027-8424 1091-6490}, url={http://dx.doi.org/10.1073/pnas.0906384107}, DOI={10.1073/pnas.0906384107}, abstractNote={ Relatively little is understood about the dynamics of global host–pathogen transcriptome changes that occur during bacterial infection of mucosal surfaces. To test the hypothesis that group A Streptococcus (GAS) infection of the oropharynx provokes a distinct host transcriptome response, we performed genome-wide transcriptome analysis using a nonhuman primate model of experimental pharyngitis. We also identified host and pathogen biological processes and individual host and pathogen gene pairs with correlated patterns of expression, suggesting interaction. For this study, 509 host genes and seven biological pathways were differentially expressed throughout the entire 32-day infection cycle. GAS infection produced an initial widespread significant decrease in expression of many host genes, including those involved in cytokine production, vesicle formation, metabolism, and signal transduction. This repression lasted until day 4, at which time a large increase in expression of host genes was observed, including those involved in protein translation, antigen presentation, and GTP-mediated signaling. The interactome analysis identified 73 host and pathogen gene pairs with correlated expression levels. We discovered significant correlations between transcripts of GAS genes involved in hyaluronic capsule production and host endocytic vesicle formation, GAS GTPases and host fibrinolytic genes, and GAS response to interaction with neutrophils. We also identified a strong signal, suggesting interaction between host γδ T cells and genes in the GAS mevalonic acid synthesis pathway responsible for production of isopentenyl-pyrophosphate, a short-chain phospholipid that stimulates these T cells. Taken together, our results are unique in providing a comprehensive understanding of the host–pathogen interactome during mucosal infection by a bacterial pathogen. }, number={10}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Shea, P. R. and Virtaneva, K. and Kupko, J. J. and Porcella, S. F. and Barry, W. T. and Wright, F. A. and Kobayashi, S. D. and Carmody, A. and Ireland, R. M. and Sturdevant, D. E. and et al.}, year={2010}, month={Feb}, pages={4693–4698} } @article{o'neil_funkhouser_calvo_meyers_kim_goldberg_bernard_caskey_deal_wright_et al._2011, title={Nuclear Factor κ-Light Chain-Enhancer of Activated B Cells is Activated by Radiotherapy and is Prognostic for Overall Survival in Patients With Rectal Cancer Treated With Preoperative Fluorouracil-Based Chemoradiotheraphy}, volume={80}, ISSN={0360-3016}, url={http://dx.doi.org/10.1016/j.ijrobp.2010.02.063}, DOI={10.1016/j.ijrobp.2010.02.063}, abstractNote={Purpose Rectal cancer is often clinically resistant to radiotherapy (RT) and identifying molecular markers to define the biologic basis for this phenomenon would be valuable. The nuclear factor κ-light chain-enhancer of activated B cells (NF-κB) is a potential anti-apoptotic transcription factor that has been associated with resistance to RT in model systems. The present study was designed to evaluate NF-κB activation in patients with rectal cancer undergoing chemoradiotherapy to determine whether NF-κB activity correlates with the outcome in rectal cancer patients. Methods and Materials A total of 22 patients underwent biopsy at multiple points in a prospective study and the data from another 50 were analyzed retrospectively. The pretreatment tumor tissue was analyzed for multiple NF-κB subunits by immunohistochemistry. Serial tumor biopsy cores were analyzed for NF-κB–regulated gene expression using reverse transcriptase polymerase chain reaction and for NF-κB subunit nuclear localization using immunohistochemistry. Results Several NF-κB target genes (Bcl-2, cellular inhibitor of apoptosis protein [cIAP]2, interleukin-8, and tumor necrosis factor receptor-associated-1) were significantly upregulated by a single fraction of RT at 24 h, demonstrating for the first time that NF-κB is activated by RT in human rectal tumors. The baseline NF-κB p50 nuclear expression did not correlate with the pathologic response to RT. However, an increasing baseline p50 level was prognostic for overall survival (hazard ratio, 2.15; p = .040). Conclusion NF-κB nuclear expression at baseline in rectal cancer was prognostic for overall survival but not predictive of the response to RT. Larger patient numbers are needed to assess the effect of NF-κB target gene upregulation on the response to RT. Our results suggest that NF-κB might play an important role in tumor metastasis but not to the resistance to chemoradiotherapy. Rectal cancer is often clinically resistant to radiotherapy (RT) and identifying molecular markers to define the biologic basis for this phenomenon would be valuable. The nuclear factor κ-light chain-enhancer of activated B cells (NF-κB) is a potential anti-apoptotic transcription factor that has been associated with resistance to RT in model systems. The present study was designed to evaluate NF-κB activation in patients with rectal cancer undergoing chemoradiotherapy to determine whether NF-κB activity correlates with the outcome in rectal cancer patients. A total of 22 patients underwent biopsy at multiple points in a prospective study and the data from another 50 were analyzed retrospectively. The pretreatment tumor tissue was analyzed for multiple NF-κB subunits by immunohistochemistry. Serial tumor biopsy cores were analyzed for NF-κB–regulated gene expression using reverse transcriptase polymerase chain reaction and for NF-κB subunit nuclear localization using immunohistochemistry. Several NF-κB target genes (Bcl-2, cellular inhibitor of apoptosis protein [cIAP]2, interleukin-8, and tumor necrosis factor receptor-associated-1) were significantly upregulated by a single fraction of RT at 24 h, demonstrating for the first time that NF-κB is activated by RT in human rectal tumors. The baseline NF-κB p50 nuclear expression did not correlate with the pathologic response to RT. However, an increasing baseline p50 level was prognostic for overall survival (hazard ratio, 2.15; p = .040). NF-κB nuclear expression at baseline in rectal cancer was prognostic for overall survival but not predictive of the response to RT. Larger patient numbers are needed to assess the effect of NF-κB target gene upregulation on the response to RT. Our results suggest that NF-κB might play an important role in tumor metastasis but not to the resistance to chemoradiotherapy.}, number={3}, journal={International Journal of Radiation Oncology*Biology*Physics}, publisher={Elsevier BV}, author={O'Neil, Bert H. and Funkhouser, William K. and Calvo, Benjamin F. and Meyers, Michael O. and Kim, Hong Jin and Goldberg, Richard M. and Bernard, Stephen A. and Caskey, Laura and Deal, Allison M. and Wright, Fred and et al.}, year={2011}, month={Jul}, pages={705–711} } @article{winnike_li_wright_macdonald_o'connell_watkins_2010, title={Use of Pharmaco-Metabonomics for Early Prediction of Acetaminophen-Induced Hepatotoxicity in Humans}, volume={88}, ISSN={0009-9236 1532-6535}, url={http://dx.doi.org/10.1038/clpt.2009.240}, DOI={10.1038/clpt.2009.240}, abstractNote={Achieving the ability to identify individuals who are susceptible to drug-induced liver injury (DILI) would represent a major advance in personalized medicine. Clayton et al. demonstrated that the pattern of endogenous metabolites in urine could predict susceptibility to acetaminophen-induced liver injury in rats. We designed a clinical study to test this approach in healthy adults who received 4 g of acetaminophen per day for 7 days. Urine metabolite profiles obtained before the start of treatment were not sufficient to distinguish which of the subjects would develop mild liver injury, as indicated by a rise in alanine aminotransferase (ALT) to a level more than twice the baseline value (responders). However, profiles obtained shortly after the start of treatment, but prior to ALT elevation, could distinguish responders from nonresponders. Statistical analyses revealed that predictive metabolites included those derived from the toxic metabolite N-acetyl paraquinone imine (NAPQI), but that the inclusion of endogenous metabolites was required for significant prediction. This "early-intervention pharmaco-metabonomics" approach should now be tested in clinical trials of other potentially hepatotoxic drugs. Clinical Pharmacology & Therapeutics (2010) 88 1, 45–51. doi: 10.1038/clpt.2009.240}, number={1}, journal={Clinical Pharmacology & Therapeutics}, publisher={Springer Science and Business Media LLC}, author={Winnike, J H and Li, Z and Wright, F A and Macdonald, J M and O'Connell, T M and Watkins, P B}, year={2010}, month={Feb}, pages={45–51} } @article{blackman_hsu_ritter_naughton_wright_drumm_knowles_cutting_2009, title={A susceptibility gene for type 2 diabetes confers substantial risk for diabetes complicating cystic fibrosis}, volume={52}, ISSN={0012-186X 1432-0428}, url={http://dx.doi.org/10.1007/s00125-009-1436-2}, DOI={10.1007/s00125-009-1436-2}, abstractNote={Insulin-requiring diabetes affects 25–50% of young adults with cystic fibrosis (CF). Although the cause of diabetes in CF is unknown, recent heritability studies in CF twins and siblings indicate that genetic modifiers play a substantial role. We sought to assess whether genes conferring risk for diabetes in the general population may play a risk modifying role in CF. We tested whether a family history of type 2 diabetes affected diabetes risk in CF patients in 539 families in the CF Twin and Sibling family-based study. A type 2 diabetes susceptibility gene (transcription factor 7-like 2, or TCF7L2) was evaluated for association with diabetes in CF using 998 patients from the family-based study and 802 unrelated CF patients in an independent case–control study. Family history of type 2 diabetes increased the risk of diabetes in CF (OR 3.1; p = 0.0009). A variant in TCF7L2 associated with type 2 diabetes (the T allele at rs7903146) was associated with diabetes in CF in the family study (p = 0.004) and in the case–control study (p = 0.02; combined p = 0.0002). In the family-based study, variation in TCF7L2 increased the risk of diabetes about three-fold (HR 1.75 per allele, 95% CI 1.3–2.4; p = 0.0006), and decreased the mean age at diabetes diagnosis by 7 years. In CF patients not treated with systemic glucocorticoids, the effect of TCF7L2 was even greater (HR 2.9 per allele, 95% CI 1.7–4.9, p = 0.00011). A genetic variant conferring risk for type 2 diabetes in the general population is a modifier of risk for diabetes in CF.}, number={9}, journal={Diabetologia}, publisher={Springer Science and Business Media LLC}, author={Blackman, S. M. and Hsu, S. and Ritter, S. E. and Naughton, K. M. and Wright, F. A. and Drumm, M. L. and Knowles, M. R. and Cutting, G. R.}, year={2009}, month={Jul}, pages={1858–1865} } @article{chang_zou_wright_2010, title={An approximate Bayesian approach for quantitative trait loci estimation}, volume={54}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/j.csda.2009.09.029}, DOI={10.1016/j.csda.2009.09.029}, abstractNote={Bayesian approaches have been widely used in quantitative trait locus (QTL) linkage analysis in experimental crosses, and have advantages in interpretability and in constructing parameter probability intervals. Most existing Bayesian linkage methods involve Monte Carlo sampling, which is computationally prohibitive for high-throughput applications such as eQTL analysis. In this paper, we present a Bayesian linkage model that offers directly interpretable posterior densities or Bayes factors for linkage. For our model, we employ the Laplace approximation for integration over nuisance parameters in backcross (BC) and F2 intercross designs. Our approach is highly accurate, and very fast compared with alternatives, including grid search integration, importance sampling, and Markov Chain Monte Carlo (MCMC). Our approach is thus suitable for high-throughput applications. Simulated and real datasets are used to demonstrate our proposed approach.}, number={2}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Chang, Yu-Ling and Zou, Fei and Wright, Fred A.}, year={2010}, month={Feb}, pages={565–574} } @article{sullivan_lin_tzeng_van den oord_perkins_stroup_wagner_lee_wright_zou_et al._2009, title={Erratum: Genomewide association for schizophrenia in the CATIE study: results of stage 1}, volume={14}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/mp.2008.74}, DOI={10.1038/mp.2008.74}, abstractNote={Correction to: Molecular Psychiatry (2008) 13, 570–584; doi: 10.1038/mp.2008.25 For technical reasons, Supplementary Tables 2, 3 and 4 were not published online. They now appear online at www.nature.com/mp.}, number={12}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Sullivan, P F and Lin, D and Tzeng, J-Y and van den Oord, E and Perkins, D and Stroup, T S and Wagner, M and Lee, S and Wright, F A and Zou, F and et al.}, year={2009}, month={Nov}, pages={1144–1144} } @article{bartlett_friedman_ling_pace_bell_bourke_castaldo_castellani_cipolli_colombo_et al._2009, title={Genetic Modifiers of Liver Disease in Cystic Fibrosis}, volume={302}, ISSN={0098-7484}, url={http://dx.doi.org/10.1001/jama.2009.1295}, DOI={10.1001/jama.2009.1295}, abstractNote={CONTEXT A subset (approximately 3%-5%) of patients with cystic fibrosis (CF) develops severe liver disease with portal hypertension. OBJECTIVE To assess whether any of 9 polymorphisms in 5 candidate genes (alpha(1)-antitrypsin or alpha(1)-antiprotease [SERPINA1], angiotensin-converting enzyme [ACE], glutathione S-transferase [GSTP1], mannose-binding lectin 2 [MBL2], and transforming growth factor beta1 [TGFB1]) are associated with severe liver disease in patients with CF. DESIGN, SETTING, AND PARTICIPANTS Two-stage case-control study enrolling patients with CF and severe liver disease with portal hypertension (CFLD) from 63 CF centers in the United States as well as 32 in Canada and 18 outside of North America, with the University of North Carolina at Chapel Hill as the coordinating site. In the initial study, 124 patients with CFLD (enrolled January 1999-December 2004) and 843 control patients without CFLD were studied by genotyping 9 polymorphisms in 5 genes previously studied as modifiers of liver disease in CF. In the second stage, the SERPINA1 Z allele and TGFB1 codon 10 genotype were tested in an additional 136 patients with CFLD (enrolled January 2005-February 2007) and 1088 with no CFLD. MAIN OUTCOME MEASURES Differences in distribution of genotypes in patients with CFLD vs patients without CFLD. RESULTS The initial study showed CFLD to be associated with the SERPINA1 Z allele (odds ratio [OR], 4.72; 95% confidence interval [CI], 2.31-9.61; P = 3.3 x 10(-6)) and with TGFB1 codon 10 CC genotype (OR, 1.53; 95% CI, 1.16-2.03; P = 2.8 x 10(-3)). In the replication study, CFLD was associated with the SERPINA1 Z allele (OR, 3.42; 95% CI, 1.54-7.59; P = 1.4 x 10(-3)) but not with TGFB1 codon 10. A combined analysis of the initial and replication studies by logistic regression showed CFLD to be associated with SERPINA1 Z allele (OR, 5.04; 95% CI, 2.88-8.83; P = 1.5 x 10(-8)). CONCLUSIONS The SERPINA1 Z allele is a risk factor for liver disease in CF. Patients who carry the Z allele are at greater risk (OR, approximately 5) of developing severe liver disease with portal hypertension.}, number={10}, journal={JAMA}, publisher={American Medical Association (AMA)}, author={Bartlett, Jaclyn R. and Friedman, Kenneth J. and Ling, Simon C. and Pace, Rhonda G. and Bell, Scott C. and Bourke, Billy and Castaldo, Giuseppe and Castellani, Carlo and Cipolli, Marco and Colombo, Carla and et al.}, year={2009}, month={Sep}, pages={1076–1083} } @article{gatti_harrill_wright_threadgill_rusyn_2009, title={Replication and narrowing of gene expression quantitative trait loci using inbred mice}, volume={20}, ISSN={0938-8990 1432-1777}, url={http://dx.doi.org/10.1007/s00335-009-9199-0}, DOI={10.1007/s00335-009-9199-0}, abstractNote={Gene expression quantitative trait locus (eQTL) mapping has become a powerful tool in systems biology. While many authors have made important discoveries using this approach, one persistent challenge in eQTL studies is the selection of loci and genes that should receive further biological investigation. In this study we compared eQTL generated from gene expression profiling in the livers of two panels of mouse strains: 41 BXD recombinant inbred and 36 Mouse Diversity Panel (MDP) strains. Cis-eQTL, loci in which the transcript and its maximum QTL are colocated, have been shown to be more reproducible than trans-eQTL, which are not colocated with the transcript. We observed that between 9.9 and 12.1% of cis-eQTL and between 2.0 and 12.6% of trans-eQTL replicated between the two panels depending on the degree of statistical stringency. Notably, a significant eQTL hotspot on distal chromosome 12 observed in the BXD panel was reproduced in the MDP. Furthermore, the shorter linkage disequilibrium in the MDP strains allowed us to considerably narrow the locus and limit the number of candidate genes to a cluster of Serpin genes, which code for extracellular proteases. We conclude that this strategy has some utility in increasing confidence and resolution in eQTL mapping studies; however, due to the high false-positive rate in the MDP, eQTL mapping in inbred strains is best carried out in combination with an eQTL linkage study.}, number={7}, journal={Mammalian Genome}, publisher={Springer Science and Business Media LLC}, author={Gatti, Daniel M. and Harrill, Alison H. and Wright, Fred A. and Threadgill, David W. and Rusyn, Ivan}, year={2009}, month={Jul}, pages={437–446} } @article{zou_nie_wright_sen_2009, title={A robust QTL mapping procedure}, volume={139}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/j.jspi.2008.06.009}, DOI={10.1016/j.jspi.2008.06.009}, abstractNote={In quantitative-trait linkage studies using experimental crosses, the conventional normal location-shift model or other parameterizations may be unnecessarily restrictive. We generalize the mapping problem to a genuine nonparametric setup and provide a robust estimation procedure for the situation where the underlying phenotype distributions are completely unspecified. Classical Wilcoxon-Mann-Whitney statistics are employed for point and interval estimation of QTL positions and effects.}, number={3}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Zou, Fei and Nie, Lei and Wright, Fred. A. and Sen, Pranab K.}, year={2009}, month={Mar}, pages={978–989} } @article{lee_sullivan_zou_wright_2008, title={Comment on a Simple and Improved Correction for Population Stratification}, volume={82}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/j.ajhg.2007.10.014}, DOI={10.1016/j.ajhg.2007.10.014}, abstractNote={To the Editor: In the May 2007 issue of the American Journal of Human Genetics, Epstein, Allen, and Satten1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar (hereafter referred to as EAS) introduced a new method for controlling population stratification in case-control association studies. The method computes a stratification score by performing partial least-squares regression (PLS) of phenotypes (case-control status) on a matrix of genotypes at markers used to correct for ancestry. The quantitative stratification score is then used to divide subjects into a number of strata, so that a stratified test of case-control association may be performed at any test locus not in linkage disequilibrium with the ancestry-informative markers. The stratification and testing procedure are implemented in the program StratScore, available as SAS code from the authors. EAS described a retrospective case-control model involving the latent true stratification variable and provided practical recommendations for dividing the estimated stratification score into a number of strata. The PLS procedure, however, was presented in less detail, although it is key to the performance of the overall approach. A primary motivation was the claim that stratified analysis based on principal components2Price A.L. Patterson N.J. Plenge R.M. Weinblatt M.E. Shadick N.A. Reich D. Principal components analysis corrects for stratification in genome-wide association studies.Nat. Genet. 2006; 38: 904-909Crossref PubMed Scopus (6190) Google Scholar or genomic control3Devlin B. Roeder K. Genomic control for association studies.Biometrics. 1999; 55: 997-1004Crossref PubMed Scopus (2159) Google Scholar cannot fully control for population ancestry. The authors cited an example and provided simulations in which stratification resulted in inflated type I errors when using these methods for 100 ancestry-informative markers. An immediate concern is whether these results reflect current practice—in a modern whole-genome scan, hundreds of thousands of markers are available for ancestry control. The results of Price et al.2Price A.L. Patterson N.J. Plenge R.M. Weinblatt M.E. Shadick N.A. Reich D. Principal components analysis corrects for stratification in genome-wide association studies.Nat. Genet. 2006; 38: 904-909Crossref PubMed Scopus (6190) Google Scholar suggest that, with the availability of thousands of markers, principal components do provide effective ancestry control, and indeed a large number of markers may be necessary for correcting stratification within continental-level populations.5Freedman M.L. Reich D. Penney K.L. McDonald G.J. Mignault A.A. Patterson N. Gabriel S.B. Topol E.J. Smoller J.W. Pato C.N. et al.Assessing the impact of population stratification on genetic association studies.Nat. Genet. 2004; 36: 388-393Crossref PubMed Scopus (600) Google Scholar Moreover, the use of principal components does not require predefined ancestry-informative markers and thus may potentially control for unanticipated strata, including technical phenomena unrelated to ancestry.2Price A.L. Patterson N.J. Plenge R.M. Weinblatt M.E. Shadick N.A. Reich D. Principal components analysis corrects for stratification in genome-wide association studies.Nat. Genet. 2006; 38: 904-909Crossref PubMed Scopus (6190) Google Scholar In terms of statistical power, the principal-components-based approach appeared to fare quite well in EAS.1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar To better understand the issues and how the EAS approach might be best applied, we examined the PLS procedure more closely. Here, PLS finds linear combinations T of the matrix of ancestry-informative markers X such that the covariance between phenotypes Y and T is maximized (see 4Abdi H. Partial least squares regression (PLS-regression).in: Lewis-Beck M. Bryman A. Futing T. Encyclopedia for Research Methods for the Social Sciences. Sage, Thousand Oaks, CA2003: 792-795Google Scholar for details on partial least-squares regression). Predictions of case status from a logistic-regression model (Y on T) are then used as the stratification score. A risk of PLS is the potential for finding spurious relationships, although EAS employed a variable selection technique to control the number of T variables used. If spurious apparent stratification arises from PLS, it has the potential to greatly reduce statistical power because the stratification variable could account for phenotype variation caused by a true disease gene. Moreover, although the inclusion of a large number of ancestry-informative markers should be desirable for ancestry prediction, the resulting increased flexibility in the PLS factors might produce even stronger spurious stratification, thereby resulting in decreased power as the number of such markers increases. To further investigate the utility of StratScore and to test our predictions about the method, we performed simulations under no stratification, for random unlinked markers with minor allele frequencies (MAF) ranging uniformly from 0.1 to 0.5. Table 1 shows the results from representative simulations analyzed by StratScore, with m = 100, 200, 500, and 800 markers used to infer ancestry. Note that the stratification score has a very high correlation with case-control status, although no true correlation exists between the markers and phenotype because no stratification exists. As the number of markers increases, the spurious correlation increases, and the case-control numbers for many of the strata become highly imbalanced. Such strata cannot meaningfully contribute to detection of case-control association.Table 1Illustrative Simulations of Case-Control Status versus StratScore Inferred StrataNumber of Markers and Case StatusStratum 1Stratum 2Stratum 3Stratum 4Stratum 5Totalm = 100 Case6580110106139500 Control135120909461500 Total2002002002002001000m = 200 Case4670105124155500 Control154130957645500 Total2002002002002001000m = 500 Case541103157194500 Control19515997436500 Total2002002002002001000m = 800 Case01100199200500 Control20019910010500 Total2002002002002001000This table shows case-control status versus Stratscore inferred strata, based on 500 cases and 500 controls. m is the number of markers used for computation of stratification score. Open table in a new tab This table shows case-control status versus Stratscore inferred strata, based on 500 cases and 500 controls. m is the number of markers used for computation of stratification score. We further performed simulations of case-control association data, by following the conditions and terminology described in EAS. For each setup, 5000 simulations were performed for 500 cases and 500 controls, with three underlying populations of equal size. We simulated substantial stratification by sampling cases in the proportions 0.45, 0.33, and 0.22 from subpopulations 1, 2, and 3. Moderate stratification was achieved by sampling in the proportions 0.40, 0.33, and 0.27. The alternative hypothesis was simulated with odds of disease increasing by a factor 1.4 for each copy of the risk allele for the test locus, which had Fst values of 0.03 and 0.15 in various simulation setups. EAS simulated ancestry markers on the basis of Fst selection criteria applied to SNPs from a real data set. To reproduce their results and to better control the simulation conditions, we simulated marker SNPs following the method in Price et al.1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar For each of MAF values 0.1, 0.25, and 0.4, sets of random marker SNPs were simulated with Fst = 0.03, and highly ancestry-informative markers with Fst values were drawn uniformly from 0.5 to 0.8. Although EAS reported results for sets of m = 100 ancestry markers,1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar we also performed simulations for sets of m = 200, 500, and 800 markers. With a significance threshold of α = 0.05 and a test locus with Fst = 0.03, we found approximately correct type I error control by using the StratScore approach for all choices of m markers (Table 2, effectively an expanded version of Table 2, Table 3 in EAS). However, when the test locus had Fst = 0.15, we found type I errors ranging from 0.02 to 0.098 (Table 3), depending on the ancestry marker setup and degree of stratification. EAS had reported correct StratScore error control for some of these same setups (see Table 4 in EAS). We are unsure of the reason for the discrepancy, although minor variation in generalized PLS1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar versus the standard PLS implemented in StratScore is a possibility. To investigate whether the results might be specific to our use of the simulation approach of Price et al.1Epstein M.P. Allen A.S. Satten G.A. A simple and improved correction for population stratification in case-control studies.Am. J. Hum. Genet. 2007; 80: 921-930Abstract Full Text Full Text PDF PubMed Scopus (121) Google Scholar (beta sampling of minor allele frequencies, followed by rejection sampling of Fst values), we also employed a deterministic approach. We set allele frequencies for the three populations (order determined randomly) as p/a, p, and pa, where a and p were determined to achieve specified Fst and MAF values. Our conclusions under this scheme were unchanged. Although our main focus is on the power of StratScore, these results suggest a lack of robustness that may be problematic in StratScore error control and deserves further inquiry.Table 2Type I Error under Substantial and Moderate StratificationMarker Type and Test Locus MAFNo AdjustmentKnown StrataStratScore with 100 SNPsStratScore with 200 SNPsStratScore with 500 SNPsStratScore with 800 SNPsHighly Ancestry Informative0.10.155 (0.079)0.051 (0.047)0.049 (0.046)0.049 (0.047)0.042 (0.046)0.057 (0.058)0.250.220 (0.097)0.051 (0.047)0.051 (0.041)0.048 (0.039)0.046 (0.041)0.056 (0.057)0.40.178 (0.090)0.053 (0.053)0.046 (0.049)0.049 (0.048)0.048 (0.045)0.054 (0.057)Random0.10.160 (0.085)0.049 (0.055)0.050 (0.057)0.048 (0.055)0.041 (0.046)0.054 (0.052)0.250.223 (0.097)0.047 (0.045)0.059 (0.048)0.049 (0.043)0.049 (0.040)0.059 (0.049)0.40.166 (0.089)0.054 (0.047)0.059 (0.053)0.047 (0.045)0.044 (0.044)0.047 (0.049)Type I error results at nominal α = 0.05 for 500 cases and 500 controls, when a test locus with Fst = 0.03 is used. Each entry shows the type I error under substantial stratification, followed by the type I error under moderate stratification in parentheses. Simulation conditions are described in the text. Open table in a new tab Table 3Type I Error, Test Locus Fst = 0.15Marker Type and Test Locus MAFNo AdjustmentKnown StrataStratScore with 100 SNPsHighly Ancestry Informative0.10.433 (0.150)0.051 (0.053)0.040 (0.028)0.250.751 (0.264)0.046 (0.050)0.020 (0.020)0.40.757 (0.270)0.051 (0.049)0.028 (0.024)Random0.10.446 (0.155)0.050 (0.048)0.078 (0.049)0.250.759 (0.271)0.053 (0.049)0.096 (0.054)0.40.757 (0.267)0.048 (0.050)0.098 (0.051)Type I error results at nominal α = 0.05 for 500 cases and 500 controls, when a test locus with Fst = 0.15 is used. Each entry shows the type I error under substantial stratification, followed by the type I error under moderate stratification in parentheses. Open table in a new tab Table 4Power under Substantial and Moderate StratificationMarker Type and Test Locus MAFKnown StrataStratScore with 100 SNPsStratScore with 200 SNPsStratScore with 500 SNPsStratScore with 800 SNPsHighly Ancestry Informative0.10.691 (0.670)0.67 (0.643)0.619 (0.580)0.403 (0.382)0.243 (0.226)0.250.914 (0.914)0.902 (0.888)0.871 (0.848)0.648 (0.609)0.412 (0.360)0.40.953 (0.958)0.940 (0.941)0.911 (0.915)0.702 (0.708)0.437 (0.430)Random0.10.678 (0.688)0.739 (0.700)0.650 (0.617)0.404 (0.383)0.230 (0.200)0.250.914 (0.910)0.932 (0.914)0.883 (0.863)0.634 (0.620)0.376 (0.345)0.40.959 (0.952)0.967 (0.949)0.937 (0.915)0.719 (0.709)0.430 (0.395)Power results at nominal α = 0.05 for 500 cases and 500 controls. The test locus has Fst = 0.03 and confers an odds ratio of 1.4 for each risk allele. Each entry shows the power under substantial stratification, followed by the power under moderate stratification in parentheses. Open table in a new tab Type I error results at nominal α = 0.05 for 500 cases and 500 controls, when a test locus with Fst = 0.03 is used. Each entry shows the type I error under substantial stratification, followed by the type I error under moderate stratification in parentheses. Simulation conditions are described in the text. Type I error results at nominal α = 0.05 for 500 cases and 500 controls, when a test locus with Fst = 0.15 is used. Each entry shows the type I error under substantial stratification, followed by the type I error under moderate stratification in parentheses. Power results at nominal α = 0.05 for 500 cases and 500 controls. The test locus has Fst = 0.03 and confers an odds ratio of 1.4 for each risk allele. Each entry shows the power under substantial stratification, followed by the power under moderate stratification in parentheses. We next investigated power for StratScore as the number of markers increases. Table 4 presents the power under the alternative hypothesis for Cochran Mantel Haenszel (CMH) tests under moderate and substantial true stratification. Here, the best-case scenario of known strata is compared to the StratScore approach for various numbers of ancestry markers. As predicted, the power drops dramatically as the number of ancestry markers increases, thereby restricting the number of markers that can be used. Note that this restriction depends in an essential way on the case-control sample size. Studies in which the true stratification is subtle may require a larger number of markers for ancestry control and therefore limit the utility of StratScore. Another aspect of EAS that was unclear was the degree of correspondence between the stratification score and the true subpopulations. For the alternative-hypothesis simulation setups, we computed average ANOVA R2 values for the stratification score versus the three true subpopulations. For m = 100 markers and substantial stratification, R2 was ∼0.19 when highly ancestry-informative markers were used, regardless of MAF, and 0.12 for random markers with Fst = 0.03. Under moderate stratification, the R2 values were 0.07 for highly ancestry-informative markers, and 0.04 for random markers. As m increased, the R2 values dropped even further. These relatively low values were apparently enough to provide error-control correction for the simulations reported in EAS, and other measures of correspondence than R2 might be preferred. Nonetheless, these results further call into question the robustness of the PLS procedure, in which the stratification score does not strongly reflect the true stratification. In summary, we conclude that aspects of the EAS method may be worthy of further exploration and development. However, in its present form, we have concerns about the routine use of StratScore, especially in the context of genome-wide scans. At the very least, the genomics community should be aware of the potential for power loss and sensitivity to the number of ancestry-informative markers employed. Additional, larger simulations in the context of whole-genome scans are necessary to provide convincing comparisons of the major approaches for controlling spurious association in case-control association studies. The authors are supported in part by NIH grant R01 GM074175 and EPA RD-83272001. We thank the editors and reviewers for their comments. Response to Lee et al.Epstein et al.The American Journal of Human GeneticsFebruary 08, 2008In BriefTo the Editor: We thank Drs. Lee, Sullivan, Zou, and Wright (LSZW) for their letter, and for this opportunity to further discuss the use of stratification scores to control for confounding. We also take this opportunity to discuss the general question of model selection for stratification scores. Full-Text PDF Open ArchiveA Simple and Improved Correction for Population Stratification in Case-Control StudiesEpstein et al.The American Journal of Human GeneticsMay, 2007In BriefPopulation stratification remains an important issue in case-control studies of disease-marker association, even within populations considered to be genetically homogeneous. Campbell et al. (Nature Genetics 2005;37:868–872) illustrated this by showing that stratification induced a spurious association between the lactase gene (LCT) and tall/short status in a European American sample. Furthermore, existing approaches for controlling stratification by use of substructure-informative loci (e.g., genomic control, structured association, and principal components) could not resolve this confounding. Full-Text PDF Open Archive}, number={2}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={Lee, Seunggeun and Sullivan, Patrick F. and Zou, Fei and Wright, Fred A.}, year={2008}, month={Feb}, pages={524–526} } @article{ghosh_zou_wright_2008, title={Estimating Odds Ratios in Genome Scans: An Approximate Conditional Likelihood Approach}, volume={82}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/j.ajhg.2008.03.002}, DOI={10.1016/j.ajhg.2008.03.002}, abstractNote={In modern whole-genome scans, the use of stringent thresholds to control the genome-wide testing error distorts the estimation process, producing estimated effect sizes that may be on average far greater in magnitude than the true effect sizes. We introduce a method, based on the estimate of genetic effect and its standard error as reported by standard statistical software, to correct for this bias in case-control association studies. Our approach is widely applicable, is far easier to implement than competing approaches, and may often be applied to published studies without access to the original data. We evaluate the performance of our approach via extensive simulations for a range of genetic models, minor allele frequencies, and genetic effect sizes. Compared to the naive estimation procedure, our approach reduces the bias and the mean squared error, especially for modest effect sizes. We also develop a principled method to construct confidence intervals for the genetic effect that acknowledges the conditioning on statistical significance. Our approach is described in the specific context of odds ratios and logistic modeling but is more widely applicable. Application to recently published data sets demonstrates the relevance of our approach to modern genome scans. In modern whole-genome scans, the use of stringent thresholds to control the genome-wide testing error distorts the estimation process, producing estimated effect sizes that may be on average far greater in magnitude than the true effect sizes. We introduce a method, based on the estimate of genetic effect and its standard error as reported by standard statistical software, to correct for this bias in case-control association studies. Our approach is widely applicable, is far easier to implement than competing approaches, and may often be applied to published studies without access to the original data. We evaluate the performance of our approach via extensive simulations for a range of genetic models, minor allele frequencies, and genetic effect sizes. Compared to the naive estimation procedure, our approach reduces the bias and the mean squared error, especially for modest effect sizes. We also develop a principled method to construct confidence intervals for the genetic effect that acknowledges the conditioning on statistical significance. Our approach is described in the specific context of odds ratios and logistic modeling but is more widely applicable. Application to recently published data sets demonstrates the relevance of our approach to modern genome scans.}, number={5}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={Ghosh, Arpita and Zou, Fei and Wright, Fred A.}, year={2008}, month={May}, pages={1064–1074} } @article{ghosh_zou_wright_2008, title={Estimating Odds Ratios in Genome Scans: An Approximate Conditional Likelihood Approach}, volume={82}, ISSN={0002-9297}, url={http://dx.doi.org/10.1016/j.ajhg.2008.04.004}, DOI={10.1016/j.ajhg.2008.04.004}, abstractNote={(American Journal of Human Genetics 82, 1064–1074; May 2008) In the version of this article published online on April 17, 2008, the ratio LLR=2log(L(μˆ)/L(0)) incorrectly appeared as LLR=−2log(L(μˆ)/L(0)) on page 3. In the formula for the specification of x under the three genetic models on page 5, under “additive,” x was incorrectly coded as 0 instead of 1 for the Aa genotype. In the Note Added in Proof on page 11, the first author's first initial incorrectly appeared as “E.” instead of “H.,” and the title of the referenced paper appeared as “Bias-reduced estimators and confidence intervals for logs ratios in genome-wide association studies” instead of “Bias-reduced estimators for odds ratios in genome-wide association studies.” The corrected version of this article now appears online and in print. Estimating Odds Ratios in Genome Scans: An Approximate Conditional Likelihood ApproachGhosh et al.The American Journal of Human GeneticsApril 17, 2008In BriefIn modern whole-genome scans, the use of stringent thresholds to control the genome-wide testing error distorts the estimation process, producing estimated effect sizes that may be on average far greater in magnitude than the true effect sizes. We introduce a method, based on the estimate of genetic effect and its standard error as reported by standard statistical software, to correct for this bias in case-control association studies. Our approach is widely applicable, is far easier to implement than competing approaches, and may often be applied to published studies without access to the original data. Full-Text PDF Open Archive}, number={5}, journal={The American Journal of Human Genetics}, publisher={Elsevier BV}, author={Ghosh, Arpita and Zou, Fei and Wright, Fred A.}, year={2008}, month={May}, pages={1224} } @article{sullivan_lin_tzeng_van den oord_perkins_stroup_wagner_lee_wright_zou_et al._2008, title={Genomewide association for schizophrenia in the CATIE study: results of stage 1}, volume={13}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/mp.2008.25}, DOI={10.1038/mp.2008.25}, abstractNote={Little is known for certain about the genetics of schizophrenia. The advent of genomewide association has been widely anticipated as a promising means to identify reproducible DNA sequence variation associated with this important and debilitating disorder. A total of 738 cases with DSM-IV schizophrenia (all participants in the CATIE study) and 733 group-matched controls were genotyped for 492 900 single-nucleotide polymorphisms (SNPs) using the Affymetrix 500K two-chip genotyping platform plus a custom 164K fill-in chip. Following multiple quality control steps for both subjects and SNPs, logistic regression analyses were used to assess the evidence for association of all SNPs with schizophrenia. We identified a number of promising SNPs for follow-up studies, although no SNP or multimarker combination of SNPs achieved genomewide statistical significance. Although a few signals coincided with genomic regions previously implicated in schizophrenia, chance could not be excluded. These data do not provide evidence for the involvement of any genomic region with schizophrenia detectable with moderate sample size. However, a planned genomewide association study for response phenotypes and inclusion of individual phenotype and genotype data from this study in meta-analyses hold promise for eventual identification of susceptibility and protective variants.}, number={6}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Sullivan, P F and Lin, D and Tzeng, J-Y and van den Oord, E and Perkins, D and Stroup, T S and Wagner, M and Lee, S and Wright, F A and Zou, F and et al.}, year={2008}, month={Mar}, pages={570–584} } @article{drumm_konstan_schluchter_handler_pace_zou_zariwala_fargo_xu_dunn_et al._2005, title={Genetic Modifiers of Lung Disease in Cystic Fibrosis}, volume={353}, ISSN={0028-4793 1533-4406}, url={http://dx.doi.org/10.1056/NEJMoa051469}, DOI={10.1056/NEJMoa051469}, abstractNote={BACKGROUND Polymorphisms in genes other than the cystic fibrosis transmembrane conductance regulator (CFTR) gene may modify the severity of pulmonary disease in patients with cystic fibrosis. METHODS We performed two studies with different patient samples. We first tested 808 patients who were homozygous for the DeltaF508 mutation and were classified as having either severe or mild lung disease, as defined by the lowest or highest quartile of forced expiratory volume in one second (FEV1), respectively, for age. We genotyped 16 polymorphisms in 10 genes reported by others as modifiers of disease severity in cystic fibrosis and tested for an association in patients with severe disease (263 patients) or mild disease (545). In the replication (second) study, we tested 498 patients, with various CFTR genotypes and a range of FEV1 values, for an association of the TGFbeta1 codon 10 CC genotype with low FEV1. RESULTS In the initial study, significant allelic and genotypic associations with phenotype were seen only for TGFbeta1 (the gene encoding transforming growth factor beta1), particularly the -509 and codon 10 polymorphisms (with P values obtained with the use of Fisher's exact test and logistic regression ranging from 0.006 to 0.0002). The odds ratio was about 2.2 for the highest-risk TGFbeta1 genotype (codon 10 CC) in association with the phenotype for severe lung disease. The replication study confirmed the association of the TGFbeta1 codon 10 CC genotype with more severe lung disease in comparisons with the use of dichotomized FEV1 for severity status (P=0.0002) and FEV1 values directly (P=0.02). CONCLUSIONS Genetic variation in the 5' end of TGFbeta1 or a nearby upstream region modifies disease severity in cystic fibrosis.}, number={14}, journal={New England Journal of Medicine}, publisher={Massachusetts Medical Society}, author={Drumm, Mitchell L. and Konstan, Michael W. and Schluchter, Mark D. and Handler, Allison and Pace, Rhonda and Zou, Fei and Zariwala, Maimoona and Fargo, David and Xu, Airong and Dunn, John M. and et al.}, year={2005}, month={Oct}, pages={1443–1453} } @article{graham_virtaneva_porcella_barry_gowen_johnson_wright_musser_2005, title={Group A Streptococcus Transcriptome Dynamics during Growth in Human Blood Reveals Bacterial Adaptive and Survival Strategies}, volume={166}, ISSN={0002-9440}, url={http://dx.doi.org/10.1016/S0002-9440(10)62268-7}, DOI={10.1016/S0002-9440(10)62268-7}, abstractNote={The molecular basis for bacterial responses to host signals during natural infections is poorly understood. The gram-positive bacterial pathogen group A Streptococcus (GAS) causes human mucosal, skin, and life-threatening systemic infections. During the transition from a throat or skin infection to an invasive infection, GAS must adapt to changing environments and host factors. To better understand how GAS adapts, we used transcript profiling and functional analysis to investigate the transcriptome of a wild-type serotype M1 GAS strain in human blood. Global changes in GAS gene expression occur rapidly in response to human blood exposure. Increased transcription was observed for many genes that likely enhance bacterial survival, including those encoding superantigens and host-evasion proteins regulated by a multiple gene activator called Mga. GAS also coordinately expressed genes involved in proteolysis, transport, and catabolism of oligopeptides to obtain amino acids in this protein-rich host environment. Comparison of the transcriptome of the wild-type strain to that of an isogenic deletion mutant (ΔcovR) mutated in the two-component regulatory system designated CovR-CovS reinforced the hypothesis that CovR-CovS has an important role linking key biosynthetic, catabolic, and virulence functions during transcriptome restructuring. Taken together, the data provide crucial insights into strategies used by pathogenic bacteria for thwarting host defenses and surviving in human blood. The molecular basis for bacterial responses to host signals during natural infections is poorly understood. The gram-positive bacterial pathogen group A Streptococcus (GAS) causes human mucosal, skin, and life-threatening systemic infections. During the transition from a throat or skin infection to an invasive infection, GAS must adapt to changing environments and host factors. To better understand how GAS adapts, we used transcript profiling and functional analysis to investigate the transcriptome of a wild-type serotype M1 GAS strain in human blood. Global changes in GAS gene expression occur rapidly in response to human blood exposure. Increased transcription was observed for many genes that likely enhance bacterial survival, including those encoding superantigens and host-evasion proteins regulated by a multiple gene activator called Mga. GAS also coordinately expressed genes involved in proteolysis, transport, and catabolism of oligopeptides to obtain amino acids in this protein-rich host environment. Comparison of the transcriptome of the wild-type strain to that of an isogenic deletion mutant (ΔcovR) mutated in the two-component regulatory system designated CovR-CovS reinforced the hypothesis that CovR-CovS has an important role linking key biosynthetic, catabolic, and virulence functions during transcriptome restructuring. Taken together, the data provide crucial insights into strategies used by pathogenic bacteria for thwarting host defenses and surviving in human blood. Little is known about how pathogenic bacteria adapt to permit growth in human blood. A model organism to address this issue is group A Streptococcus (GAS), which causes a broad spectrum of human diseases ranging from relatively mild throat and skin infections to fulminant, life-threatening invasive diseases such as puerperal sepsis, myositis, necrotizing fasciitis, and streptococcal toxic shock syndrome.1Musser JM Krause RM The revival of group A streptococcal diseases, with a commentary on staphylococcal toxic shock syndrome.in: Krause RM Emerging Infections. Academic Press, New York1998: 185-218Crossref Scopus (78) Google Scholar, 2Cunningham MW Pathogenesis of group A streptococcal infections.Clin Microbiol Rev. 2000; 13: 470-511Crossref PubMed Scopus (1744) Google Scholar, 3Bisno AL Brito MO Collins CM Molecular basis of group A streptococcal virulence.Lancet Infect Dis. 2003; 3: 191-200Abstract Full Text Full Text PDF PubMed Scopus (394) Google Scholar GAS has long been known to be capable of replicating in nonopsonizing human blood.2Cunningham MW Pathogenesis of group A streptococcal infections.Clin Microbiol Rev. 2000; 13: 470-511Crossref PubMed Scopus (1744) Google Scholar However, despite years of study, the molecular mechanisms mediating GAS-host interactions remain poorly understood. Several bacterially encoded molecules contribute to GAS immune evasion by interfering with opsonophagocytosis and killing by polymorphonuclear lymphocytes.2Cunningham MW Pathogenesis of group A streptococcal infections.Clin Microbiol Rev. 2000; 13: 470-511Crossref PubMed Scopus (1744) Google Scholar, 3Bisno AL Brito MO Collins CM Molecular basis of group A streptococcal virulence.Lancet Infect Dis. 2003; 3: 191-200Abstract Full Text Full Text PDF PubMed Scopus (394) Google Scholar Others protect GAS by disrupting important innate host defenses such as complement activation and complement-mediated cell lysis.2Cunningham MW Pathogenesis of group A streptococcal infections.Clin Microbiol Rev. 2000; 13: 470-511Crossref PubMed Scopus (1744) Google Scholar, 3Bisno AL Brito MO Collins CM Molecular basis of group A streptococcal virulence.Lancet Infect Dis. 2003; 3: 191-200Abstract Full Text Full Text PDF PubMed Scopus (394) Google Scholar However, additional bacterial proteins likely are involved. Recently, Gryllos and colleagues4Gryllos I Cywes C Shearer MH Cary M Kennedy RC Wessels MR Regulation of capsule gene expression by group A streptococcus during pharyngeal colonization and invasive infection.Mol Microbiol. 2001; 42: 61-74Crossref PubMed Scopus (41) Google Scholar demonstrated that the expression of the hyaluronic acid capsule biosynthesis (has) operon is stimulated in the bloodstream of infected mice. In addition, two transcriptome studies have demonstrated GAS adaptive transcription after in vitro exposure to human polymorphonuclear lymphocytes and iron limitation.5Voyich JM Sturdevant DE Braughton KR Kobayashi SD Lei B Virtaneva K Dorward DW Musser JM DeLeo FR Genome-wide protective response used by group A streptococcus to evade destruction by human polymorphonuclear leukocytes.Proc Natl Acad Sci USA. 2003; 100: 1996-2001Crossref PubMed Scopus (134) Google Scholar, 6Smoot LM Smoot JC Graham MR Somerville GA Sturdevant DE Migliaccio CA Sylva GL Musser JM Global differential gene expression in response to growth temperature alteration in group A streptococcus.Proc Natl Acad Sci USA. 2001; 98: 10416-10421Crossref PubMed Scopus (166) Google Scholar However, no studies have assessed GAS global transcription or the regulatory networks that govern GAS adaptive responses during growth in human blood. A two-component regulatory system (TCS) designated CovR-CovS (Cov, control of virulence; also known as CsrR-CsrS) plays an important role in GAS virulence by negatively regulating the has operon and other genes encoding secreted and membrane-anchored factors that promote survival and virulence in humans.7Levin JC Wessels MR Identification of csrR/csrS, a genetic locus that regulates hyaluronic acid capsule synthesis in group A streptococcus.Mol Microbiol. 1998; 30: 209-219Crossref PubMed Scopus (231) Google Scholar, 8Federle MJ McIver KS Scott JR A response regulator that represses transcription of several virulence operons in the group A streptococcus.J Bacteriol. 1999; 181: 3649-3657Crossref PubMed Google Scholar, 9Graham MR Smoot LM Migliaccio CAL Virtaneva K Sturdevant DE Porcella SF Federle MJ Adams GJ Scott JR Musser JM Virulence control in group A streptococcus by a two-component gene regulatory system: global expression profiling and in vivo infection modeling.Proc Natl Acad Sci USA. 2002; 99: 13855-13860Crossref PubMed Scopus (304) Google Scholar, 10Lei B DeLeo FR Reid SD Voyich JM Magoun L Liu M Braughton KR Ricklefs S Hoe NP Cole RL Leong JM Musser JM Opsonophagocytosis-inhibiting Mac protein of group A streptococcus: identification and characteristics of two genetic complexes.Infect Immun. 2002; 70: 6880-6890Crossref PubMed Scopus (41) Google Scholar Isogenic Δcov mutant strains are hypervirulent in mouse skin infections and have enhanced resistance in vitro to complement-mediated opsonophagocytic killing by human polymorphonuclear lymphocytes,7Levin JC Wessels MR Identification of csrR/csrS, a genetic locus that regulates hyaluronic acid capsule synthesis in group A streptococcus.Mol Microbiol. 1998; 30: 209-219Crossref PubMed Scopus (231) Google Scholar, 8Federle MJ McIver KS Scott JR A response regulator that represses transcription of several virulence operons in the group A streptococcus.J Bacteriol. 1999; 181: 3649-3657Crossref PubMed Google Scholar, 9Graham MR Smoot LM Migliaccio CAL Virtaneva K Sturdevant DE Porcella SF Federle MJ Adams GJ Scott JR Musser JM Virulence control in group A streptococcus by a two-component gene regulatory system: global expression profiling and in vivo infection modeling.Proc Natl Acad Sci USA. 2002; 99: 13855-13860Crossref PubMed Scopus (304) Google Scholar, 11Heath A DiRita VJ Barg NL Engleberg NC A two-component regulatory system, CsrR-CsrS, represses expression of three streptococcus pyogenes virulence factors, hyaluronic acid capsule, streptolysin S, and pyrogenic exotoxin B.Infect Immun. 1999; 67: 5298-5305Crossref PubMed Google Scholar consistent with increased virulence gene transcription and extracellular capsule production. Frameshift mutations in the covRS locus also arise spontaneously in vivo, and synergistically enhance the virulence of wild-type (WT) bacteria,12Engleberg CN Heath A Miller A Rivera C DiRita VJ Spontaneous mutations in the CsrRS two-component regulatory system of Streptococcus pyogenes result in enhanced virulence in a murine model of skin and soft tissue infection.J Infect Dis. 2001; 183: 1043-1054Crossref PubMed Scopus (162) Google Scholar and hyperencapsulated GAS variants have been isolated after in vitro passage in human blood.13Raeder R Harokopakis E Hollingshead S Boyle MD Absence of SpeB production in virulent large capsular forms of group A streptococcal strain 64.Infect Immun. 2000; 68: 744-751Crossref PubMed Scopus (40) Google Scholar Taken together, these observations suggest that the CovR-CovS TCS responds to molecular signals in human blood. We directly analyzed GAS global transcription during ex vivo culture in human whole blood using a high-density oligonucleotide array. We hypothesized that the CovR-CovS TCS is involved in GAS adaptation allowing growth in blood and that virulence gene expression would be augmented. To test this hypothesis, we compared the transcriptomes of a WT, serotype M1 GAS strain and its isogenic covR-deletion mutant (ΔcovR). The data provide important new insights into the early stages of GAS survival in blood and evidence that the CovR-CovS TCS functions to coordinate bacterial fitness attributes during disseminated host infections. Serotype M1 strain MGAS5005 and the isogenic MGAS5005 ΔcovR derivative (JRS950) have been described.9Graham MR Smoot LM Migliaccio CAL Virtaneva K Sturdevant DE Porcella SF Federle MJ Adams GJ Scott JR Musser JM Virulence control in group A streptococcus by a two-component gene regulatory system: global expression profiling and in vivo infection modeling.Proc Natl Acad Sci USA. 2002; 99: 13855-13860Crossref PubMed Scopus (304) Google Scholar GAS was cultured on Trypticase soy agar containing 5% sheep blood agar (Becton Dickinson, Cockeysville, MD), or in Todd-Hewitt (TH) broth (Becton Dickinson) containing 0.2% (w/v) yeast extract (THY; Difco Laboratories, Detroit, MI), at 37°C in 5% CO2. Bacteria were grown in THY broth to late-exponential phase (OD600 = 0.8), harvested by centrifugation at 6000 × g at 37°C for 8 minutes, suspended in an equal volume of human whole blood maintained at 37°C with 5% CO2, and then incubated. Aliquots were removed at 0, 30, 60, and 90 minutes, and added to 2 vol of RNAProtect bacteria reagent (Qiagen, Valencia, CA). Cells were harvested by centrifugation and stored at −80°C before bacterial RNA isolation. Viable counts were obtained for GAS cultures immediately before time course initiation and after 4-hour co-culture in human blood. Owing to inherent interindividual and gender-related variability of human peripheral blood specimens, 12 human blood donors were used to provide generalizability and sufficient statistical power. Clinical data measurements showed subject and gender-associated variability, so that six donor patients of each gender were used. All blood donors were within normal parameters for 24 tested analytes (data not shown). Heparinized human venous blood (125-ml) was collected from the 12 healthy individuals in accordance with a protocol approved by the Institutional Review Board for Human Subjects, National Institute of Allergy and Infectious Diseases. Informed consent was obtained from all study participants. GAS clinical disease history was not assessed. Blood donors (six females, six males) were from many ethnic backgrounds and their ages ranged from 26 to 54 years; (mean age: females, 37.2 years; males, 36.2 years). Heparin was used in preference to ethylenediaminetetraacetic acid as an anti-coagulant because ethylenediaminetetraacetic acid chelates divalent cations, which would influence cellular functions during GAS-blood cell interactions. On collection, venous blood was divided into aliquots for antibody (Ab) testing (1 ml), cytokine analysis (1 ml), and blood analysis (1 ml; Alpha Veterinary Laboratories, Hamilton, MT). The remaining blood was maintained at 37°C with 5% CO2 until the time course was initiated. Bacterial cell pellets were suspended in 5 vol of EL buffer (Qiagen), incubated for 20 minutes on ice, and separated from lysed erythrocytes by centrifugation at 4500 × g at 4°C for 6 minutes. Cells were rinsed with 2 vol of EL buffer. RNA was isolated from the bacterial pellets as described,9Graham MR Smoot LM Migliaccio CAL Virtaneva K Sturdevant DE Porcella SF Federle MJ Adams GJ Scott JR Musser JM Virulence control in group A streptococcus by a two-component gene regulatory system: global expression profiling and in vivo infection modeling.Proc Natl Acad Sci USA. 2002; 99: 13855-13860Crossref PubMed Scopus (304) Google Scholar except that 0.8 μg of bacteriophage MS2 carrier RNA (Roche Bioscience, Indianapolis, IN) and 250 μg of glycogen (Roche) were added. RNA was purified further using the RNeasy 96 kit (Qiagen), with on-column RNase-free DNase I treatment and after treatment with DNAFree (Ambion, Austin, TX). Electrophoretic analysis with an Agilent 2100 Bioanalyzer (Agilent Technologies Inc., Palo Alto, CA) and A260/A280 ratios were used to assess RNA integrity. TaqMan polymerase chain reaction (PCR) assays were performed with RNA templates to ensure contaminating genomic DNA was absent as described.9Graham MR Smoot LM Migliaccio CAL Virtaneva K Sturdevant DE Porcella SF Federle MJ Adams GJ Scott JR Musser JM Virulence control in group A streptococcus by a two-component gene regulatory system: global expression profiling and in vivo infection modeling.Proc Natl Acad Sci USA. 2002; 99: 13855-13860Crossref PubMed Scopus (304) Google Scholar RML GeneChip targets were prepared according to the protocol supplied by the manufacturer (Affymetrix Inc., Santa Clara, CA), with modifications. Control spike transcript mixes (containing 0.025 to 0.000025 pmol each of DAP, LYS, THR, and TRP spike transcript cRNAs) (1 μl) were added to each RNA aliquot, and 4.5 μg of random primers (Invitrogen, Carlsbad, CA) were annealed (10 minutes at 70°C, 10 minutes at 25°C). First-strand cDNA was synthesized with 25 U/μl SuperScript III (Invitrogen) in the presence of 0.5 mmol/L dNTPs, 0.5 U/μl SUPERaseIn RNase inhibitor (Ambion), and 10 mmol/L dithiothreitol (10 minutes at 25°C, 60 minutes at 37°C, 60 minutes at 42°C, 10 minutes at 70°C). RNA was removed by hydrolysis in 1 N NaOH (30 minutes at 65°C), and neutralized with 1 N HCl before cDNA purification using the QIAquick 96 kit (Qiagen) according to the manufacturer's recommendations, except that an extra 10-minute centrifugation was used to remove trace phycoerythrin-ethanol buffer. For cDNA fragmentation, 10.5 μg of cDNA and 1.75 U (0.35U/μg) of DNase I (Roche) were used (10 minutes at 37°C, 10 minutes at 98°C). The fragmented cDNA (averaging 50 to 100 bases) was 3′ end-labeled with biotin-ddUTP using the BioArray terminal labeling kit (Enzo Life Sciences, Inc., Farmingdale, NY) (60 minutes at 37°C) according to the manufacturer's instructions. The fragmented and end-labeled cDNA was added to the hybridization solution without further purification. An anti-sense oligonucleotide array (18-μm feature size) representing ∼249,690 25-mer probe pairs (16 pairs per probe set) was manufactured by Affymetrix Inc.14Lipshutz RJ Fodor SP Gingeras TR Lockhart DJ High density synthetic oligonucleotide arrays.Nat Genet. 1999; 21: 20-24Crossref PubMed Scopus (1858) Google Scholar The custom GeneChip (RMLChip herein) contains 2636 probe sets (42,351 probe pairs) for 2636 predicted GAS open reading frames (ORFs). These features represent a composite superset of six GAS genomic sequences representative of serotypes M1, M3, M5, M12, M18, and M49 (sequenced strains are designated SF370, MGAS315, Manfredo, MGAS9429, MGAS8232, and CS101, respectively). To facilitate the analysis of GAS samples in the presence of host cells, all probe set sequences were pruned during the design process to exclude cross-hybridizing sequences (those exhibiting sequence similarity) with human, rat, and mouse genome ORFs represented on Affymetrix Inc. arrays, and 12 additional bacterial genome sequences. Although the RMLChip was not designed based on the genome sequence of strain MGAS5005, the genome sequence has since been obtained and annotated for this strain under GenBank accession no. CP000017 (Sumby PA, Madrigal A, Kent KD, Porcella SF, Ricklefs SM, Virtaneva K, Sturdevant D, Graham MR, Vuopio-Varkila J, Hoe NP, Musser JM, submitted), and the composite RMLChip contains 1692 redundant probe sets (high BLAST score match to MGAS5005) that represent more than 90% coverage of the total number of predicted coding regions (1869 ORFs) encoded by this M1 GAS genome. Target hybridizations, washing, staining, and scanning were performed by the National Institute of Allergy and Infectious Diseases Affymetrix core facility (Science Applications International Corporation (SAIC) Frederick, MD), following the manufacturer's recommendations (Affymetrix). For each of the 12 human blood donors, arrays were hybridized in a complete two-factor experimental design with two treatment levels (WT or mutant GAS strain) and four time points (0, 30, 60, and 90 minutes). To minimize experimental variability, all 12 blood samples were collected within a 2-hour time period and GAS culturing was conducted in parallel. Cultured samples were randomized before all preparation procedures were performed. Expression estimates for each gene were obtained using the PM-MM difference model of dCHIP version 1.3 software available at .15Li C Wong WH Model-based analysis of oligonucleotide arrays: expression index computation and outlier detection.Proc Natl Acad Sci USA. 2001; 98: 31-36Crossref PubMed Scopus (2702) Google Scholar The gene expression estimates were further normalized across samples by simple quadratic scaling on all genes with the median expression for each gene as a baseline.16Yoon H Liyanarachchi S Wright FA Davuluri R Lockman JC de la Chapelle A Pellegata NS Gene expression profiling of isogenic cells with different TP53 gene dosage reveals numerous genes that are affected by TP53 dosage and identifies CSPG2 as a direct target of p53.Proc Natl Acad Sci USA. 2002; 99: 15632-15637Crossref PubMed Scopus (72) Google Scholar Two-dimensional scatterplots were generated for all pairs of samples within a factor level to examine the uniformity of the normalized expression values across donors; five samples with low correlation to the other within-factor samples were removed as outliers (data not shown). Principal component analyses were performed using all MGAS5005 genome-specific probe sets (n = 1925). Hierarchical clustering also was performed to explore single gene effects. A mixed-effects analysis of variance model was applied to an absolute square root transform of the dCHIP expression estimates, with time, treatment, and gender as fixed effects, and subject as a random effect using Partek Pro 5.1 (Partek Inc., St. Louis, MO). In reporting the significance of effects, both the nominal P values and the false discovery rate (FDR) Q-value17Storey JD Tibshirani R Statistical significance for genomewide studies.Proc Natl Acad Sci USA. 2003; 100: 9440-9445Crossref PubMed Scopus (7033) Google Scholar were reported (Supplementary Table 1 ; supplementary data available at ) because it is important to account for multiple testing. The experimental design permitted differentially expressed genes to be identified with very high confidence, corresponding to FDRs of 0.06% for time, strain, and subject effects. FDR levels of 0.06% are equivalent to ∼1 false positive in a genome encoding ∼1900 ORFs (approximately the size of the GAS genome).Table 1Flow Cytometric Analysis of Group A Streptococcus Surface ProteinsDesignationProteinDescriptionMutantWTP valueSPy0319Surface lipoprotein22.43 ± 0.6015.00 ± 0.680.0001SPyM18_0281OppASurface lipoprotein23.31 ± 1.0515.03 ± 0.980.0006SPy2191SceDSecreted protein22.90 ± 0.7018.20 ± 0.500.0007SPy0453MtsASurface lipoprotein12.74 ± 0.809.60 ± 0.450.0040SPy1592Surface lipoprotein69.87 ± 5.0850.02 ± 4.200.0064SPy1245PstSSurface lipoprotein25.70 ± 0.9519.73 ± 1.970.0090SPyM3_1204SLASecreted protein13.75 ± 0.7810.58 ± 0.950.0111Auto-fluorescenceFACs control4.14 ± 0.054.29 ± 0.040.0127SPy0252Surface lipoprotein10.78 ± 0.828.93 ± 0.430.0258Secondary AbFACs control4.07 ± 0.084.15 ± 0.120.3414MGAS5005 (WT) and JRS950 (ΔcovR) cells were harvested at OD600 = 0.8 after in vitro growth in THY broth at 37°C with 5% CO2. Immunostaining was performed with affinity-purified rabbit polyclonal antibodies, or control rabbit α-SLA antibodies. GAS surface antigens were detected with a phycoerythrin-conjugated donkey anti-rabbit IgG secondary antibody, and analyzed by flow cytometry.Listed are mean fluorescence within the analysis gate ± SD from two independent experiments comprised of triplicate measurements. Minimums of 17,900 gated events (representing GAS cells) were analyzed for each replicate. Statistical significance was assessed at the P < 0.05 level after Bonferroni correction for 10 comparisons (adjusted P < 0.005). Open table in a new tab MGAS5005 (WT) and JRS950 (ΔcovR) cells were harvested at OD600 = 0.8 after in vitro growth in THY broth at 37°C with 5% CO2. Immunostaining was performed with affinity-purified rabbit polyclonal antibodies, or control rabbit α-SLA antibodies. GAS surface antigens were detected with a phycoerythrin-conjugated donkey anti-rabbit IgG secondary antibody, and analyzed by flow cytometry. Listed are mean fluorescence within the analysis gate ± SD from two independent experiments comprised of triplicate measurements. Minimums of 17,900 gated events (representing GAS cells) were analyzed for each replicate. Statistical significance was assessed at the P < 0.05 level after Bonferroni correction for 10 comparisons (adjusted P < 0.005). To elucidate the biology underlying the GAS transcriptional response, we looked for evidence that sets of genes belonging to a functional category showed a coordinated response to experimental factors. Functional annotation for the MGAS5005 genome was generated through in-house compilation. All probe sets were assigned to 1 of 17 functional categories (including unknown), and further classified into 1 of 52 subcategories. The differential expression of a functional category was assessed across both time and treatment by an approach first used in Virtaneva and colleagues,18Virtaneva K Wright FA Tanner SM Yuan B Lemon WJ Caligiuri MA Bloomfield CD de la Chapelle A Krahe R Expression profiling reveals fundamental biological differences in acute myeloid leukemia with isolated trisomy 8 and normal cytogenetics.Proc Natl Acad Sci USA. 2001; 98: 1124-1129Crossref PubMed Scopus (245) Google Scholar that is similar to recent efforts, such as Mootha and colleagues19Mootha VK Lindgren CM Eriksson KF Subramanian A Sihag S Lehar J Puigserver P Carlsson E Ridderstrale M Laurila E Houstis N Daly MJ Patterson N Mesirov JP Golub TR Tamayo P Spiegelman B Lander ES Hirschhorn JN Altshuler D Groop LC PGC-1alpha-responsive genes involved in oxidative phosphorylation are coordinately downregulated in human diabetes.Nat Genet. 2003; 34: 267-273Crossref PubMed Scopus (5955) Google Scholar For each gene, F-statistics were obtained for the time and treatment effects from a fixed-effects analysis of variance model. A two-sample Wilcoxon ranked sum statistic was then computed for the rank statistics of genes belonging to the functional category relative to the ranks of all remaining genes. Empirical P values for each functional category were obtained by recomputing Wilcoxon statistics across 10,000 permutations of the array assignments. In each permutation, the subject assignment of each array was held constant, whereas the treatment and time assignments were randomized. This approach allowed computation of permutation-based estimates for the FDR20Yekutieli D Benjamini Y Resampling based FDR controlling multiple hypotheses testing.J Stat Plan Infer. 1999; 82: 171-196Crossref Google Scholar to account for the multiple testing of functional categories. Nominal P values and FDR estimates are reported for the set of 17 categories and 52 subcategories (Supplementary Table 2). Real-time PCR assays were conducted to confirm a subset of the microarray data as described,6Smoot LM Smoot JC Graham MR Somerville GA Sturdevant DE Migliaccio CA Sylva GL Musser JM Global differential gene expression in response to growth temperature alteration in group A streptococcus.Proc Natl Acad Sci USA. 2001; 98: 10416-10421Crossref PubMed Scopus (166) Google Scholar except that Platinum Quantitative PCR SuperMix (Invitrogen) was used and each PCR reaction was performed in quadruplicate. Strains MGAS5005 and JRS950 were grown in vitro in THY broth to late-exponential phase. Bacteria were collected by centrifugation, rinsed once with Dulbecco's phosphate-buffered saline (PBS) (Sigma-Aldrich, St. Louis, MO), suspended in Dulbecco's PBS in 96-well plates, and maintained at 4°C throughout the staining procedure. Cells were blocked with 2% human serum in Dulbecco's PBS (staining buffer, SB) for 10 minutes before immunostaining for 30 minutes with GAS-specific, affinity-purified primary Abs (Bethyl Laboratories, Montgomery, TX) in SB.21Lei B Smoot LM Menning HM Voyich JM Kala SV Deleo FR Reid SD Musser JM Identification and characterization of a novel heme-associated cell surface protein made by Streptococcus pyogenes.Infect Immun. 2002; 70: 4494-4500Crossref PubMed Scopus (88) Google Scholar GAS antigens that were more highly expressed at the RNA level in the ΔcovR mutant strain were selected for the analysis. Control Abs were raised in rabbits against rSLA (spyM3_1204), an ORF that is not encoded in the GAS serotype M1 MGAS5005 genome. Detection was achieved using phycoerythrin-conjugated polyclonal donkey anti-rabbit IgG (1:500; Jackson ImmunoResearch Laboratories, Inc., West Grove, PA) in SB for 30 minutes, and subsequent flow cytometric analysis performed with a FACscalibur flow cytometer (Becton Dickinson, Mountain View, CA).21Lei B Smoot LM Menning HM Voyich JM Kala SV Deleo FR Reid SD Musser JM Identification and characterization of a novel heme-associated cell surface protein made by Streptococcus pyogenes.Infect Immun. 2002; 70: 4494-4500Crossref PubMed Scopus (88) Google Scholar To model bacteria proliferating rapidly during host sepsis, GAS cells in the late-exponential phase of growth were recultured at approximately the same cell density in freshly heparinized human whole blood for 0 to 90 minutes, and their transcribed cDNAs used to prepare microarray hybridization targets. Evaluation of scatterplots, spike-in control transcripts (data not shown), and histograms of the ex vivo expression data (Supplementary Figure 1 ) indicated high quality for the resultant data set (comprised of 91 RMLChips). The resultant principal component analyses plots and clustering dendrogram discriminated by treatment within time (Supplementary Figure 2 ).Figure 2Statistical analysis of GAS functional categories for treatment effects during ex vivo blood culture. A and B: Plots show the cumulative distributions for ranked test statistics of genes within the selected functional categories for treatment effects in blood (hatched lines). Genes differentially expressed with treatment that exhibit a leftward shift indicate up-regulation in the ΔcovR strain and those showing a rightward shift indicate down-regulation. All genes exhibiting significant differential expression at the nominal 0.05 level (t > 1.99) are shaded in the plot. The effect of treatment in blood on GAS amino acid metabolism (A) showing up-regulation in the ΔcovR strain (nominal P value <0.05; FDR = 0.14), and virulence interacting (B), showing up-regulation in the ΔcovR strain, but not in a statistically significant manner (nominal P value = 0.11; FDR = 0.69).View Large Image Figure ViewerDownload Hi-res image Download (PPT) The expression data revealed that extensive remodeling of the transcript profile occurred in both strains during ex vivo blood culture. Within 30 minutes, 716 transcripts were more abunda}, number={2}, journal={The American Journal of Pathology}, publisher={Elsevier BV}, author={Graham, Morag R. and Virtaneva, Kimmo and Porcella, Stephen F. and Barry, William T. and Gowen, Brian B. and Johnson, Claire R. and Wright, Fred A. and Musser, James M.}, year={2005}, month={Feb}, pages={455–465} }