@article{davis_wiegers_sciaky_barkalow_strong_wyatt_wiegers_mcmorran_abrar_mattingly_2024, title={Comparative toxicogenomics database's 20th anniversary: update 2025}, volume={10}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkae883}, abstractNote={For 20 years, the Comparative Toxicogenomics Database (CTD; https://ctdbase.org) has provided high-quality, literature-based curated content describing how environmental chemicals affect human health. Today, CTD includes over 94 million toxicogenomic connections relating chemicals, genes/proteins, phenotypes, anatomical terms, diseases, comparative species, pathways and exposures. In this 20th year anniversary update, we reflect on CTD's remarkable growth and provide an overview of the increased data content and new features, including enhancements to the curation workflow (e.g. new exposure curation tool and expanded use of natural language processing), added functionality (e.g. improvements to CTD Tetramers and Pathway View tools) and significant upgrades to software and infrastructure. Linking lab-based core curation with real-world human exposure curation via the use of controlled vocabularies facilitates analysis of content across the entire environmental health continuum, from molecular toxicological mechanisms to the population level, and vice versa. The 'prototype database' originally described in 2004 has evolved into a premier, sophisticated, highly cited and well-engineered knowledgebase and discoverybase that is utilized by scientists worldwide to design testable hypotheses about environmental health.}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Wiegers, Thomas C. and Sciaky, Daniela and Barkalow, Fern and Strong, Melissa and Wyatt, Brent and Wiegers, Jolene and McMorran, Roy and Abrar, Sakib and Mattingly, Carolyn J.}, year={2024}, month={Oct} } @article{wyatt_davis_wiegers_wiegers_abrar_sciaky_barkalow_strong_mattingly_2024, title={Transforming environmental health datasets from the comparative toxicogenomics database into chord diagrams to visualize molecular mechanisms}, volume={6}, ISSN={["2673-3080"]}, DOI={10.3389/ftox.2024.1437884}, abstractNote={In environmental health, the specific molecular mechanisms connecting a chemical exposure to an adverse endpoint are often unknown, reflecting knowledge gaps. At the public Comparative Toxicogenomics Database (CTD; https://ctdbase.org/), we integrate manually curated, literature-based interactions from CTD to compute four-unit blocks of information organized as a potential step-wise molecular mechanism, known as "CGPD-tetramers," wherein a chemical interacts with a gene product to trigger a phenotype which can be linked to a disease. These computationally derived datasets can be used to fill the gaps and offer testable mechanistic information. Users can generate CGPD-tetramers for any combination of chemical, gene, phenotype, and/or disease of interest at CTD; however, such queries typically result in the generation of thousands of CGPD-tetramers. Here, we describe a novel approach to transform these large datasets into user-friendly chord diagrams using R. This visualization process is straightforward, simple to implement, and accessible to inexperienced users that have never used R before. Combining CGPD-tetramers into a single chord diagram helps identify potential key chemicals, genes, phenotypes, and diseases. This visualization allows users to more readily analyze computational datasets that can fill the exposure knowledge gaps in the environmental health continuum.}, journal={FRONTIERS IN TOXICOLOGY}, author={Wyatt, Brent and Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene and Abrar, Sakib and Sciaky, Daniela and Barkalow, Fern and Strong, Melissa and Mattingly, Carolyn J.}, year={2024}, month={Jul} } @article{davis_wiegers_wiegers_wyatt_johnson_sciaky_barkalow_strong_planchart_mattingly_2023, title={CTD tetramers: a new online tool that computationally links curated chemicals, genes, phenotypes, and diseases to inform molecular mechanisms for environmental health}, volume={195}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfad069}, abstractNote={Abstract The molecular mechanisms connecting environmental exposures to adverse endpoints are often unknown, reflecting knowledge gaps. At the Comparative Toxicogenomics Database (CTD), we developed a bioinformatics approach that integrates manually curated, literature-based interactions from CTD to generate a “CGPD-tetramer”: a 4-unit block of information organized as a step-wise molecular mechanism linking an initiating Chemical, an interacting Gene, a Phenotype, and a Disease outcome. Here, we describe a novel, user-friendly tool called CTD Tetramers that generates these evidence-based CGPD-tetramers for any curated chemical, gene, phenotype, or disease of interest. Tetramers offer potential solutions for the unknown underlying mechanisms and intermediary phenotypes connecting a chemical exposure to a disease. Additionally, multiple tetramers can be assembled to construct detailed modes-of-action for chemical-induced disease pathways. As well, tetramers can help inform environmental influences on adverse outcome pathways (AOPs). We demonstrate the tool’s utility with relevant use cases for a variety of environmental chemicals (eg, perfluoroalkyl substances, bisphenol A), phenotypes (eg, apoptosis, spermatogenesis, inflammatory response), and diseases (eg, asthma, obesity, male infertility). Finally, we map AOP adverse outcome terms to corresponding CTD terms, allowing users to query for tetramers that can help augment AOP pathways with additional stressors, genes, and phenotypes, as well as formulate potential AOP disease networks (eg, liver cirrhosis and prostate cancer). This novel tool, as part of the complete suite of tools offered at CTD, provides users with computational datasets and their supporting evidence to potentially fill exposure knowledge gaps and develop testable hypotheses about environmental health.}, number={2}, journal={TOXICOLOGICAL SCIENCES}, author={Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene and Wyatt, Brent and Johnson, Robin J. and Sciaky, Daniela and Barkalow, Fern and Strong, Melissa and Planchart, Antonio and Mattingly, Carolyn J.}, year={2023}, month={Sep}, pages={155–168} } @article{green_wall_weeks_mattingly_marsden_planchart_2023, title={Developmental cadmium exposure disrupts zebrafish vestibular calcium channels interfering with otolith formation and inner ear function}, volume={96}, ISSN={["1872-9711"]}, DOI={10.1016/j.neuro.2023.04.006}, abstractNote={Dizziness or balance problems are estimated to affect approximately 3.3 million children aged three to 17 years. These disorders develop from a breakdown in the balance control system and can be caused by anything that affects the inner ear or the brain, including exposure to environmental toxicants. One potential environmental toxicant linked to balance disorders is cadmium, an extremely toxic metal that occurs naturally in the earth's crust and is released as a byproduct of industrial processes. Cadmium is associated with balance and vestibular dysfunction in adults exposed occupationally, but little is known about the developmental effects of low-concentration cadmium exposure. Our findings indicate that zebrafish exposed to 10–60 parts per billion (ppb) cadmium from four hours post-fertilization (hpf) to seven days post-fertilization (dpf) exhibit abnormal behaviors, including pronounced increases in auditory sensitivity and circling behavior, both of which are linked to reductions in otolith growth and are rescued by the addition of calcium to the media. Pharmacological intervention shows that agonist-induced activation of the P2X calcium ion channel in the presence of cadmium restores otolith size. In conclusion, cadmium-induced ototoxicity is linked to vestibular-based behavioral abnormalities and auditory sensitivity following developmental exposure, and calcium ion channel function is associated with these defects.}, journal={NEUROTOXICOLOGY}, author={Green, Adrian J. and Wall, Alex R. and Weeks, Ryan D. and Mattingly, Carolyn J. and Marsden, Kurt C. and Planchart, Antonio}, year={2023}, month={May}, pages={129–139} } @article{davis_wiegers_johnson_sciaky_wiegers_mattingly_2022, title={Comparative Toxicogenomics Database (CTD): update 2023}, volume={9}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkac833}, abstractNote={Abstract The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions by manually curating and interrelating chemical, gene, phenotype, anatomy, disease, taxa, and exposure content from the published literature. This curated information is integrated to generate inferences, providing potential molecular mediators to develop testable hypotheses and fill in knowledge gaps for environmental health. This dual nature, acting as both a knowledgebase and a discoverybase, makes CTD a unique resource for the scientific community. Here, we report a 20% increase in overall CTD content for 17 100 chemicals, 54 300 genes, 6100 phenotypes, 7270 diseases and 202 000 exposure statements. We also present CTD Tetramers, a novel tool that computationally generates four-unit information blocks connecting a chemical, gene, phenotype, and disease to construct potential molecular mechanistic pathways. Finally, we integrate terms for human biological media used in the CTD Exposure module to corresponding CTD Anatomy pages, allowing users to survey the chemical profiles for any tissue-of-interest and see how these environmental biomarkers are related to phenotypes for any anatomical site. These, and other webpage visual enhancements, continue to promote CTD as a practical, user-friendly, and innovative resource for finding information and generating testable hypotheses about environmental health.}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Wiegers, Thomas C. and Johnson, Robin J. and Sciaky, Daniela and Wiegers, Jolene and Mattingly, Carolyn J.}, year={2022}, month={Sep} } @article{davis_wiegers_wiegers_grondin_johnson_sciaky_mattingly_2021, title={CTD anatomy: Analyzing chemical-induced phenotypes and exposures from an anatomical perspective, with implications for environmental health studies}, volume={2}, ISSN={["2666-027X"]}, DOI={10.1016/j.crtox.2021.03.001}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a freely available public resource that curates and interrelates chemical, gene/protein, phenotype, disease, organism, and exposure data. CTD can be used to address toxicological mechanisms for environmental chemicals and facilitate the generation of testable hypotheses about how exposures affect human health. At CTD, manually curated interactions for chemical-induced phenotypes are enhanced with anatomy terms (tissues, fluids, and cell types) to describe the physiological system of the reported event. These same anatomy terms are used to annotate the human media (e.g., urine, hair, nail, blood, etc.) in which an environmental chemical was assayed for exposure. Currently, CTD uses more than 880 unique anatomy terms to contextualize over 255,000 chemical-phenotype interactions and 167,000 exposure statements. These annotations allow chemical-phenotype interactions and exposure data to be explored from a novel, anatomical perspective. Here, we describe CTD's anatomy curation process (including the construction of a controlled, interoperable vocabulary) and new anatomy webpages (that coalesce and organize the curated chemical-phenotype and exposure data sets). We also provide examples that demonstrate how this feature can be used to identify system- and cell-specific chemical-induced toxicities, help inform exposure data, prioritize phenotypes for environmental diseases, survey tissue and pregnancy exposomes, and facilitate data connections with external resources. Anatomy annotations advance understanding of environmental health by providing new ways to explore and survey chemical-induced events and exposure studies in the CTD framework.}, journal={CURRENT RESEARCH IN TOXICOLOGY}, author={Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Mattingly, Carolyn J.}, year={2021}, pages={128–139} } @article{grondin_davis_wiegers_wiegers_sciaky_johnson_mattingly_2021, title={Predicting molecular mechanisms, pathways, and health outcomes induced by Juul e-cigarette aerosol chemicals using the Comparative Toxicogenomics Database}, volume={2}, ISSN={["2666-027X"]}, DOI={10.1016/j.crtox.2021.08.001}, abstractNote={There is a critical need to understand the health risks associated with vaping e-cigarettes, which has reached epidemic levels among teens. Juul is currently the most popular type of e-cigarette on the market. Using the Comparative Toxicogenomics Database (CTD; http://ctdbase.org), a public resource that integrates chemical, gene, phenotype and disease data, we aimed to analyze the potential molecular mechanisms of eight chemicals detected in the aerosols generated by heating Juul e-cigarette pods: nicotine, acetaldehyde, formaldehyde, free radicals, crotonaldehyde, acetone, pyruvaldehyde, and particulate matter. Curated content in CTD, including chemical-gene, chemical-phenotype, and chemical-disease interactions, as well as associated phenotypes and pathway enrichment, were analyzed to help identify potential molecular mechanisms and diseases associated with vaping. Nicotine shows the most direct disease associations of these chemicals, followed by particulate matter and formaldehyde. Together, these chemicals show a direct marker or mechanistic relationship with 400 unique diseases in CTD, particularly in the categories of cardiovascular diseases, nervous system diseases, respiratory tract diseases, cancers, and mental disorders. We chose three respiratory tract diseases to investigate further, and found that in addition to cellular processes of apoptosis and cell proliferation, prioritized phenotypes underlying Juul-associated respiratory tract disease outcomes include response to oxidative stress, inflammatory response, and several cell signaling pathways (p38MAPK, NIK/NFkappaB, calcium-mediated).}, journal={CURRENT RESEARCH IN TOXICOLOGY}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Jolene A. and Wiegers, Thomas C. and Sciaky, Daniela and Johnson, Robin J. and Mattingly, Carolyn J.}, year={2021}, pages={272–281} } @article{pinkhasova_jameson_conrow_simeone_davis_wiegers_mattingly_leung_2021, title={Regulatory status of pesticide residues in cannabis: Implications to medical use in neurological diseases}, volume={2}, ISSN={["2666-027X"]}, DOI={10.1016/j.crtox.2021.02.007}, abstractNote={Medical cannabis represents a potential route of pesticide exposure to susceptible populations. We compared the qualifying conditions for medical use and pesticide testing requirements of cannabis in 33 states and Washington, D.C. Movement disorders were the most common neurological category of qualifying conditions, including epilepsy, certain symptoms of multiple sclerosis, Parkinson's Disease, and any cause of symptoms leading to seizures or spasticity. Different approaches of pesticide regulation were implemented in cannabis and cannabis-derived products. Six states imposed the strictest U.S. EPA tolerances (i.e. maximum residue levels) for food commodities on up to 400 pesticidal active ingredients in cannabis, while pesticide testing was optional in three states. Dimethomorph showed the largest variation in action levels, ranging from 0.1 to 60 ppm in 5 states. We evaluated the potential connections between insecticides, cannabinoids, and seizure using the Comparative Toxicogenomics Database. Twenty-two insecticides, two cannabinoids, and 63 genes were associated with 674 computationally generated chemical-gene-phenotype-disease (CGPD) tetramer constructs. Notable functional clusters included oxidation-reduction process (183 CGPD-tetramers), synaptic signaling pathways (151), and neuropeptide hormone activity (46). Cholinergic, dopaminergic, and retrograde endocannabinoid signaling pathways were linked to 10 genetic variants of epilepsy patients. Further research is needed to assess human health risk of cannabinoids and pesticides in support of a national standard for cannabis pesticide regulations.}, journal={CURRENT RESEARCH IN TOXICOLOGY}, author={Pinkhasova, Dorina V. and Jameson, Laura E. and Conrow, Kendra D. and Simeone, Michael P. and Davis, Allan Peter and Wiegers, Thomas C. and Mattingly, Carolyn J. and Leung, Maxwell C. K.}, year={2021}, pages={140–148} } @misc{hollander_cory-slechta_jacka_szabo_guilarte_bilbo_mattingly_moy_haroon_hornig_et al._2020, title={Beyond the looking glass: recent advances in understanding the impact of environmental exposures on neuropsychiatric disease}, volume={45}, ISSN={["1740-634X"]}, DOI={10.1038/s41386-020-0648-5}, abstractNote={AbstractThe etiologic pathways leading to neuropsychiatric diseases remain poorly defined. As genomic technologies have advanced over the past several decades, considerable progress has been made linking neuropsychiatric disorders to genetic underpinnings. Interest and consideration of nongenetic risk factors (e.g., lead exposure and schizophrenia) have, in contrast, lagged behind heritable frameworks of explanation. Thus, the association of neuropsychiatric illness to environmental chemical exposure, and their potential interactions with genetic susceptibility, are largely unexplored. In this review, we describe emerging approaches for considering the impact of chemical risk factors acting alone and in concert with genetic risk, and point to the potential role of epigenetics in mediating exposure effects on transcription of genes implicated in mental disorders. We highlight recent examples of research in nongenetic risk factors in psychiatric disorders that point to potential shared biological mechanisms—synaptic dysfunction, immune alterations, and gut–brain interactions. We outline new tools and resources that can be harnessed for the study of environmental factors in psychiatric disorders. These tools, combined with emerging experimental evidence, suggest that there is a need to broadly incorporate environmental exposures in psychiatric research, with the ultimate goal of identifying modifiable risk factors and informing new treatment strategies for neuropsychiatric disease.}, number={7}, journal={NEUROPSYCHOPHARMACOLOGY}, author={Hollander, Jonathan A. and Cory-Slechta, Deborah A. and Jacka, Felice N. and Szabo, Steven T. and Guilarte, Tomas R. and Bilbo, Staci D. and Mattingly, Carolyn J. and Moy, Sheryl S. and Haroon, Ebrahim and Hornig, Mady and et al.}, year={2020}, month={Jun}, pages={1086–1096} } @article{davis_grondin_johnson_sciaky_wiegers_wiegers_mattingly_2021, title={Comparative Toxicogenomics Database (CTD): update 2021}, volume={49}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkaa891}, abstractNote={Abstract The public Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health. Literature-based, manually curated interactions are integrated to create a knowledgebase that harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions. In this biennial update, we report a 20% increase in CTD curated content and now provide 45 million toxicogenomic relationships for over 16 300 chemicals, 51 300 genes, 5500 phenotypes, 7200 diseases and 163 000 exposure events, from 600 comparative species. Furthermore, we increase the functionality of chemical–phenotype content with new data-tabs on CTD Disease pages (to help fill in knowledge gaps for environmental health) and new phenotype search parameters (for Batch Query and Venn analysis tools). As well, we introduce new CTD Anatomy pages that allow users to uniquely explore and analyze chemical–phenotype interactions from an anatomical perspective. Finally, we have enhanced CTD Chemical pages with new literature-based chemical synonyms (to improve querying) and added 1600 amino acid-based compounds (to increase chemical landscape). Together, these updates continue to augment CTD as a powerful resource for generating testable hypotheses about the etiologies and molecular mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2021}, month={Jan}, pages={D1138–D1143} } @article{davis_wiegers_grondin_johnson_sciaky_wiegers_mattingly_2020, title={Leveraging the Comparative Toxicogenomics Database to Fill in Knowledge Gaps for Environmental Health: A Test Case for Air Pollution-induced Cardiovascular Disease}, volume={177}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfaa113}, abstractNote={Abstract Environmental health studies relate how exposures (eg, chemicals) affect human health and disease; however, in most cases, the molecular and biological mechanisms connecting an exposure with a disease remain unknown. To help fill in these knowledge gaps, we sought to leverage content from the public Comparative Toxicogenomics Database (CTD) to identify potential intermediary steps. In a proof-of-concept study, we systematically compute the genes, molecular mechanisms, and biological events for the environmental health association linking air pollution toxicants with 2 cardiovascular diseases (myocardial infarction and hypertension) as a test case. Our approach integrates 5 types of curated interactions in CTD to build sets of “CGPD-tetramers,” computationally constructed information blocks relating a Chemical- Gene interaction with a Phenotype and Disease. This bioinformatics strategy generates 653 CGPD-tetramers for air pollution-associated myocardial infarction (involving 5 pollutants, 58 genes, and 117 phenotypes) and 701 CGPD-tetramers for air pollution-associated hypertension (involving 3 pollutants, 96 genes, and 142 phenotypes). Collectively, we identify 19 genes and 96 phenotypes shared between these 2 air pollutant-induced outcomes, and suggest important roles for oxidative stress, inflammation, immune responses, cell death, and circulatory system processes. Moreover, CGPD-tetramers can be assembled into extensive chemical-induced disease pathways involving multiple gene products and sequential biological events, and many of these computed intermediary steps are validated in the literature. Our method does not require a priori knowledge of the toxicant, interacting gene, or biological system, and can be used to analyze any environmental chemical-induced disease curated within the public CTD framework. This bioinformatics strategy links and interrelates chemicals, genes, phenotypes, and diseases to fill in knowledge gaps for environmental health studies, as demonstrated for air pollution-associated cardiovascular disease, but can be adapted by researchers for any environmentally influenced disease-of-interest.}, number={2}, journal={TOXICOLOGICAL SCIENCES}, author={Davis, Allan Peter and Wiegers, Thomas C. and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Wiegers, Jolene and Mattingly, Carolyn J.}, year={2020}, month={Oct}, pages={392–404} } @article{grondin_davis_wiegers_wiegers_mattingly_2018, title={Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database}, volume={126}, ISSN={["1552-9924"]}, DOI={10.1289/ehp2873}, abstractNote={Summary: The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical–gene interactions, chemical–disease and gene–disease associations, chemical–phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD’s four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical–gene, chemical–disease and gene–disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873}, number={1}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene A. and Mattingly, Carolyn J.}, year={2018}, month={Jan} } @article{green_hoyo_mattingly_luo_tzeng_murphy_buchwalter_planchart_2018, title={Cadmium exposure increases the risk of juvenile obesity: a human and zebrafish comparative study}, volume={42}, ISSN={0307-0565 1476-5497}, url={http://dx.doi.org/10.1038/s41366-018-0036-y}, DOI={10.1038/s41366-018-0036-y}, abstractNote={Human obesity is a complex metabolic disorder disproportionately affecting people of lower socioeconomic strata, and ethnic minorities, especially African Americans and Hispanics. Although genetic predisposition and a positive energy balance are implicated in obesity, these factors alone do not account for the excess prevalence of obesity in lower socioeconomic populations. Therefore, environmental factors, including exposure to pesticides, heavy metals, and other contaminants, are agents widely suspected to have obesogenic activity, and they also are spatially correlated with lower socioeconomic status. Our study investigates the causal relationship between exposure to the heavy metal, cadmium (Cd), and obesity in a cohort of children and in a zebrafish model of adipogenesis. An extensive collection of first trimester maternal blood samples obtained as part of the Newborn Epigenetics Study (NEST) was analyzed for the presence of Cd, and these results were cross analyzed with the weight-gain trajectory of the children through age 5 years. Next, the role of Cd as a potential obesogen was analyzed in an in vivo zebrafish model. Our analysis indicates that the presence of Cd in maternal blood during pregnancy is associated with increased risk of juvenile obesity in the offspring, independent of other variables, including lead (Pb) and smoking status. Our results are recapitulated in a zebrafish model, in which exposure to Cd at levels approximating those observed in the NEST study is associated with increased adiposity. Our findings identify Cd as a potential human obesogen. Moreover, these observations are recapitulated in a zebrafish model, suggesting that the underlying mechanisms may be evolutionarily conserved, and that zebrafish may be a valuable model for uncovering pathways leading to Cd-mediated obesity in human populations.}, number={7}, journal={International Journal of Obesity}, publisher={Springer Nature}, author={Green, Adrian J. and Hoyo, Cathrine and Mattingly, Carolyn J. and Luo, Yiwen and Tzeng, Jung-Ying and Murphy, Susan K. and Buchwalter, David B. and Planchart, Antonio}, year={2018}, month={Feb}, pages={1285–1295} } @article{davis_wiegers_wiegers_johnson_sciaky_grondin_mattingly_2018, title={Chemical-Induced Phenotypes at CTD Help Inform the Predisease State and Construct Adverse Outcome Pathways}, volume={165}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfy131}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a public resource that manually curates the scientific literature to provide content that illuminates the molecular mechanisms by which environmental exposures affect human health. We introduce our new chemical-phenotype module that describes how chemicals can affect molecular, cellular, and physiological phenotypes. At CTD, we operationally distinguish between phenotypes and diseases, wherein a phenotype refers to a nondisease biological event: eg, decreased cell cycle arrest (phenotype) versus liver cancer (disease), increased fat cell proliferation (phenotype) versus morbid obesity (disease), etc. Chemical-phenotype interactions are expressed in a formal structured notation using controlled terms for chemicals, phenotypes, taxon, and anatomical descriptors. Combining this information with CTD's chemical-disease module allows inferences to be made between phenotypes and diseases, yielding potential insight into the predisease state. Integration of all 4 CTD modules furnishes unique opportunities for toxicologists to generate computationally predictive adverse outcome pathways, linking chemical-gene molecular initiating events with phenotypic key events, adverse diseases, and population-level health outcomes. As examples, we present 3 diverse case studies discerning the effect of vehicle emissions on altered leukocyte migration, the role of cadmium in influencing phenotypes preceding Alzheimer disease, and the connection of arsenic-induced glucose metabolic phenotypes with diabetes. To date, CTD contains over 165 000 interactions that connect more than 6400 chemicals to 3900 phenotypes for 760 anatomical terms in 215 species, from over 19 000 scientific articles. To our knowledge, this is the first comprehensive set of manually curated, literature-based, contextualized, chemical-induced, nondisease phenotype data provided to the public.}, number={1}, journal={TOXICOLOGICAL SCIENCES}, author={Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene and Johnson, Robin J. and Sciaky, Daniela and Grondin, Cynthia J. and Mattingly, Carolyn J.}, year={2018}, month={Sep}, pages={145–156} } @article{planchart_green_hoyo_mattingly_2018, title={Heavy Metal Exposure and Metabolic Syndrome: Evidence from Human and Model System Studies}, volume={5}, ISSN={2196-5412}, url={http://dx.doi.org/10.1007/s40572-018-0182-3}, DOI={10.1007/s40572-018-0182-3}, abstractNote={Metabolic syndrome (MS) describes the co-occurrence of conditions that increase one’s risk for heart disease and other disorders such as diabetes and stroke. The worldwide increase in the prevalence of MS cannot be fully explained by lifestyle factors such as sedentary behavior and caloric intake alone. Environmental exposures, such as heavy metals, have been implicated, but results are conflicting and possible mechanisms remain unclear. To assess recent progress in determining a possible role between heavy metal exposure and MS, we reviewed epidemiological and model system data for cadmium (Cd), lead (Pb), and mercury (Hg) from the last decade. Data from 36 epidemiological studies involving 17 unique countries/regions and 13 studies leveraging model systems are included in this review. Epidemiological and model system studies support a possible association between heavy metal exposure and MS or comorbid conditions; however, results remain conflicting. Epidemiological studies were predominantly cross-sectional and collectively, they highlight a global interest in this question and reveal evidence of differential susceptibility by sex and age to heavy metal exposures. In vivo studies in rats and mice and in vitro cell-based assays provide insights into potential mechanisms of action relevant to MS including altered regulation of lipid and glucose homeostasis, adipogenesis, and oxidative stress. Heavy metal exposure may contribute to MS or comorbid conditions; however, available data are conflicting. Causal inference remains challenging as epidemiological data are largely cross-sectional; and variation in study design, including samples used for heavy metal measurements, age of subjects at which MS outcomes are measured; the scope and treatment of confounding factors; and the population demographics vary widely. Prospective studies, standardization or increased consistency across study designs and reporting, and consideration of molecular mechanisms informed by model system studies are needed to better assess potential causal links between heavy metal exposure and MS.}, number={1}, journal={Current Environmental Health Reports}, publisher={Springer Nature}, author={Planchart, Antonio and Green, Adrian and Hoyo, Cathrine and Mattingly, Carolyn J.}, year={2018}, month={Feb}, pages={110–124} } @article{davis_grondin_johnson_sciaky_mcmorran_wiegers_wiegers_mattingly_2019, title={The Comparative Toxicogenomics Database: update 2019}, volume={47}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gky868}, abstractNote={Abstract The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a premier public resource for literature-based, manually curated associations between chemicals, gene products, phenotypes, diseases, and environmental exposures. In this biennial update, we present our new chemical–phenotype module that codes chemical-induced effects on phenotypes, curated using controlled vocabularies for chemicals, phenotypes, taxa, and anatomical descriptors; this module provides unique opportunities to explore cellular and system-level phenotypes of the pre-disease state and allows users to construct predictive adverse outcome pathways (linking chemical–gene molecular initiating events with phenotypic key events, diseases, and population-level health outcomes). We also report a 46% increase in CTD manually curated content, which when integrated with other datasets yields more than 38 million toxicogenomic relationships. We describe new querying and display features for our enhanced chemical–exposure science module, providing greater scope of content and utility. As well, we discuss an updated MEDIC disease vocabulary with over 1700 new terms and accession identifiers. To accommodate these increases in data content and functionality, CTD has upgraded its computational infrastructure. These updates continue to improve CTD and help inform new testable hypotheses about the etiology and mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and McMorran, Roy and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2019}, month={Jan}, pages={D948–D954} } @misc{leung_procter_goldstone_foox_desalle_mattingly_siddall_timme-laragy_2017, title={Applying evolutionary genetics to developmental toxicology and risk assessment}, volume={69}, ISSN={["0890-6238"]}, DOI={10.1016/j.reprotox.2017.03.003}, abstractNote={Evolutionary thinking continues to challenge our views on health and disease. Yet, there is a communication gap between evolutionary biologists and toxicologists in recognizing the connections among developmental pathways, high-throughput screening, and birth defects in humans. To increase our capability in identifying potential developmental toxicants in humans, we propose to apply evolutionary genetics to improve the experimental design and data interpretation with various in vitro and whole-organism models. We review five molecular systems of stress response and update 18 consensual cell-cell signaling pathways that are the hallmark for early development, organogenesis, and differentiation; and revisit the principles of teratology in light of recent advances in high-throughput screening, big data techniques, and systems toxicology. Multiscale systems modeling plays an integral role in the evolutionary approach to cross-species extrapolation. Phylogenetic analysis and comparative bioinformatics are both valuable tools in identifying and validating the molecular initiating events that account for adverse developmental outcomes in humans. The discordance of susceptibility between test species and humans (ontogeny) reflects their differences in evolutionary history (phylogeny). This synthesis not only can lead to novel applications in developmental toxicity and risk assessment, but also can pave the way for applying an evo-devo perspective to the study of developmental origins of health and disease.}, journal={REPRODUCTIVE TOXICOLOGY}, author={Leung, Maxwell C. K. and Procter, Andrew C. and Goldstone, Jared V. and Foox, Jonathan and DeSalle, Robert and Mattingly, Carolyn J. and Siddall, Mark E. and Timme-Laragy, Alicia R.}, year={2017}, month={Apr}, pages={174–186} } @article{manrai_cui_bushel_hall_karakitsios_mattingly_ritchie_schmitt_sarigiannis_thomas_et al._2017, title={Informatics and Data Analytics to Support Exposome-Based Discovery for Public Health}, volume={38}, ISSN={["0163-7525"]}, DOI={10.1146/annurev-publhealth-082516-012737}, abstractNote={ The complexity of the human exposome—the totality of environmental exposures encountered from birth to death—motivates systematic, high-throughput approaches to discover new environmental determinants of disease. In this review, we describe the state of science in analyzing the human exposome and provide recommendations for the public health community to consider in dealing with analytic challenges of exposome-based biomedical research. We describe extant and novel analytic methods needed to associate the exposome with critical health outcomes and contextualize the data-centered challenges by drawing parallels to other research endeavors such as human genomics research. We discuss efforts for training scientists who can bridge public health, genomics, and biomedicine in informatics and statistics. If an exposome data ecosystem is brought to fruition, it will likely play a role as central as genomic science has had in molding the current and new generations of biomedical researchers, computational scientists, and public health research programs. }, journal={ANNUAL REVIEW OF PUBLIC HEALTH, VOL 38}, author={Manrai, Arjun K. and Cui, Yuxia and Bushel, Pierre R. and Hall, Molly and Karakitsios, Spyros and Mattingly, Carolyn J. and Ritchie, Marylyn and Schmitt, Charles and Sarigiannis, Denis A. and Thomas, Duncan C. and et al.}, year={2017}, pages={279–294} } @article{grondin_davis_wiegers_king_wiegers_reif_hoppin_mattingly_2016, title={Advancing Exposure Science through Chemical Data Curation and Integration in the Comparative Toxicogenomics Database}, volume={124}, ISSN={0091-6765 1552-9924}, url={http://dx.doi.org/10.1289/EHP174}, DOI={10.1289/ehp174}, abstractNote={Background: Exposure science studies the interactions and outcomes between environmental stressors and human or ecological receptors. To augment its role in understanding human health and the exposome, we aimed to centralize and integrate exposure science data into the broader biological framework of the Comparative Toxicogenomics Database (CTD), a public resource that promotes understanding of environmental chemicals and their effects on human health. Objectives: We integrated exposure data within the CTD to provide a centralized, freely available resource that facilitates identification of connections between real-world exposures, chemicals, genes/proteins, diseases, biological processes, and molecular pathways. Methods: We developed a manual curation paradigm that captures exposure data from the scientific literature using controlled vocabularies and free text within the context of four primary exposure concepts: stressor, receptor, exposure event, and exposure outcome. Using data from the Agricultural Health Study, we have illustrated the benefits of both centralization and integration of exposure information with CTD core data. Results: We have described our curation process, demonstrated how exposure data can be accessed and analyzed in the CTD, and shown how this integration provides a broad biological context for exposure data to promote mechanistic understanding of environmental influences on human health. Conclusions: Curation and integration of exposure data within the CTD provides researchers with new opportunities to correlate exposures with human health outcomes, to identify underlying potential molecular mechanisms, and to improve understanding about the exposome. Citation: Grondin CJ, Davis AP, Wiegers TC, King BL, Wiegers JA, Reif DM, Hoppin JA, Mattingly CJ. 2016. Advancing exposure science through chemical data curation and integration in the Comparative Toxicogenomics Database. Environ Health Perspect 124:1592–1599; http://dx.doi.org/10.1289/EHP174}, number={10}, journal={Environmental Health Perspectives}, publisher={Environmental Health Perspectives}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Thomas C. and King, Benjamin L. and Wiegers, Jolene A. and Reif, David M. and Hoppin, Jane A. and Mattingly, Carolyn J.}, year={2016}, month={Oct}, pages={1592–1599} } @article{planchart_mattingly_allen_ceger_casey_hinton_kanungo_kullman_tal_bondesson_et al._2016, title={Advancing toxicology research using in vivo high throughput toxicology with small fish models}, volume={33}, number={4}, journal={Altex-alternatives to Animal Experimentation}, author={Planchart, A. and Mattingly, C. J. and Allen, D. and Ceger, P. and Casey, W. and Hinton, D. and Kanungo, J. and Kullman, S. W. and Tal, T. and Bondesson, M. and et al.}, year={2016}, pages={435–452} } @article{wei_peng_leaman_davis_mattingly_li_wiegers_lu_2016, title={Assessing the state of the art in biomedical relation extraction: overview of the BioCreative V chemical-disease relation (CDR) task}, ISSN={["1758-0463"]}, DOI={10.1093/database/baw032}, abstractNote={Manually curating chemicals, diseases and their relationships is significantly important to biomedical research, but it is plagued by its high cost and the rapid growth of the biomedical literature. In recent years, there has been a growing interest in developing computational approaches for automatic chemical-disease relation (CDR) extraction. Despite these attempts, the lack of a comprehensive benchmarking dataset has limited the comparison of different techniques in order to assess and advance the current state-of-the-art. To this end, we organized a challenge task through BioCreative V to automatically extract CDRs from the literature. We designed two challenge tasks: disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction. To assist system development and assessment, we created a large annotated text corpus that consisted of human annotations of chemicals, diseases and their interactions from 1500 PubMed articles. 34 teams worldwide participated in the CDR task: 16 (DNER) and 18 (CID). The best systems achieved an F-score of 86.46% for the DNER task—a result that approaches the human inter-annotator agreement (0.8875)—and an F-score of 57.03% for the CID task, the highest results ever reported for such tasks. When combining team results via machine learning, the ensemble system was able to further improve over the best team results by achieving 88.89% and 62.80% in F-score for the DNER and CID task, respectively. Additionally, another novel aspect of our evaluation is to test each participating system’s ability to return real-time results: the average response time for each team’s DNER and CID web service systems were 5.6 and 9.3 s, respectively. Most teams used hybrid systems for their submissions based on machining learning. Given the level of participation and results, we found our task to be successful in engaging the text-mining research community, producing a large annotated corpus and improving the results of automatic disease recognition and CDR extraction. Database URL: http://www.biocreative.org/tasks/biocreative-v/track-3-cdr/}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Wei, Chih-Hsuan and Peng, Yifan and Leaman, Robert and Davis, Allan Peter and Mattingly, Carolyn J. and Li, Jiao and Wiegers, Thomas C. and Lu, Zhiyong}, year={2016}, month={Mar} } @article{li_sun_johnson_sciaky_wei_leaman_davis_mattingly_wiegers_lu_et al._2016, title={BioCreative V CDR task corpus: a resource for chemical disease relation extraction}, ISSN={["1758-0463"]}, DOI={10.1093/database/baw068}, abstractNote={Community-run, formal evaluations and manually annotated text corpora are critically important for advancing biomedical text-mining research. Recently in BioCreative V, a new challenge was organized for the tasks of disease named entity recognition (DNER) and chemical-induced disease (CID) relation extraction. Given the nature of both tasks, a test collection is required to contain both disease/chemical annotations and relation annotations in the same set of articles. Despite previous efforts in biomedical corpus construction, none was found to be sufficient for the task. Thus, we developed our own corpus called BC5CDR during the challenge by inviting a team of Medical Subject Headings (MeSH) indexers for disease/chemical entity annotation and Comparative Toxicogenomics Database (CTD) curators for CID relation annotation. To ensure high annotation quality and productivity, detailed annotation guidelines and automatic annotation tools were provided. The resulting BC5CDR corpus consists of 1500 PubMed articles with 4409 annotated chemicals, 5818 diseases and 3116 chemical-disease interactions. Each entity annotation includes both the mention text spans and normalized concept identifiers, using MeSH as the controlled vocabulary. To ensure accuracy, the entities were first captured independently by two annotators followed by a consensus annotation: The average inter-annotator agreement (IAA) scores were 87.49% and 96.05% for the disease and chemicals, respectively, in the test set according to the Jaccard similarity coefficient. Our corpus was successfully used for the BioCreative V challenge tasks and should serve as a valuable resource for the text-mining research community. Database URL: http://www.biocreative.org/tasks/biocreative-v/track-3-cdr/}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Li, J. and Sun, Y. P. and Johnson, R. J. and Sciaky, D. and Wei, C. H. and Leaman, R. and Davis, A. P. and Mattingly, Carolyn and Wiegers, T. C. and Lu, Z. Y. and et al.}, year={2016}, month={May} } @article{watson_planchart_mattingly_winkler_reif_kullman_2016, title={From the Cover: Embryonic Exposure to TCDD Impacts Osteogenesis of the Axial Skeleton in Japanese medaka,Oryzias latipes}, volume={155}, ISSN={1096-6080 1096-0929}, url={http://dx.doi.org/10.1093/toxsci/kfw229}, DOI={10.1093/toxsci/kfw229}, abstractNote={Recent studies from mammalian, fish, and in vitro models have identified bone and cartilage development as sensitive targets for dioxins and other aryl hydrocarbon receptor ligands. In this study, we assess how embryonic 2,3,7,8-tetrachlorochlorodibenzo-p-dioxin (TCDD) exposure impacts axial osteogenesis in Japanese medaka (Oryzias latipes), a vertebrate model of human bone development. Embryos from inbred wild-type Orange-red Hd-dR and 3 transgenic medaka lines (twist:EGFP, osx/sp7:mCherry, col10a1:nlGFP) were exposed to 0.15 nM and 0.3 nM TCDD and reared until 20 dpf. Individuals were stained for mineralized bone and imaged using confocal microscopy to assess skeletal alterations in medial vertebrae in combination with a qualitative spatial analysis of osteoblast and osteoblast progenitor cell populations. Exposure to TCDD resulted in an overall attenuation of vertebral ossification characterized by truncated centra, and reduced neural and hemal arch lengths. Effects on mineralization were consistent with modifications in cell number and cell localization of transgene-labeled osteoblast and osteoblast progenitor cells. Endogenous expression of osteogenic regulators runt-related transcription factor 2 (runx2) and osterix (osx/sp7), and extracellular matrix genes osteopontin (spp1), collagen type I alpha I (col1), collagen type X alpha I (col10a1), and osteocalcin (bglap/osc) was significantly diminished at 20 dpf following TCDD exposure as compared with controls. Through global transcriptomic analysis more than 590 differentially expressed genes were identified and mapped to select pathological states including inflammatory disease, connective tissue disorders, and skeletal and muscular disorders. Taken together, results from this study suggest that TCDD exposure inhibits axial bone formation through dysregulation of osteoblast differentiation. This approach highlights the advantages and sensitivity of using small fish models to investigate how xenobiotic exposure may impact skeletal development.}, number={2}, journal={Toxicological Sciences}, publisher={Oxford University Press (OUP)}, author={Watson, AtLee T. D. and Planchart, Antonio and Mattingly, Carolyn J. and Winkler, Christoph and Reif, David M. and Kullman, Seth W.}, year={2016}, month={Nov}, pages={485–496} } @article{davis_wiegers_king_wiegers_grondin_sciaky_johnson_mattingly_2016, title={Generating Gene Ontology-Disease Inferences to Explore Mechanisms of Human Disease at the Comparative Toxicogenomics Database}, volume={11}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0155530}, abstractNote={Strategies for discovering common molecular events among disparate diseases hold promise for improving understanding of disease etiology and expanding treatment options. One technique is to leverage curated datasets found in the public domain. The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) manually curates chemical-gene, chemical-disease, and gene-disease interactions from the scientific literature. The use of official gene symbols in CTD interactions enables this information to be combined with the Gene Ontology (GO) file from NCBI Gene. By integrating these GO-gene annotations with CTD’s gene-disease dataset, we produce 753,000 inferences between 15,700 GO terms and 4,200 diseases, providing opportunities to explore presumptive molecular underpinnings of diseases and identify biological similarities. Through a variety of applications, we demonstrate the utility of this novel resource. As a proof-of-concept, we first analyze known repositioned drugs (e.g., raloxifene and sildenafil) and see that their target diseases have a greater degree of similarity when comparing GO terms vs. genes. Next, a computational analysis predicts seemingly non-intuitive diseases (e.g., stomach ulcers and atherosclerosis) as being similar to bipolar disorder, and these are validated in the literature as reported co-diseases. Additionally, we leverage other CTD content to develop testable hypotheses about thalidomide-gene networks to treat seemingly disparate diseases. Finally, we illustrate how CTD tools can rank a series of drugs as potential candidates for repositioning against B-cell chronic lymphocytic leukemia and predict cisplatin and the small molecule inhibitor JQ1 as lead compounds. The CTD dataset is freely available for users to navigate pathologies within the context of extensive biological processes, molecular functions, and cellular components conferred by GO. This inference set should aid researchers, bioinformaticists, and pharmaceutical drug makers in finding commonalities in disease mechanisms, which in turn could help identify new therapeutics, new indications for existing pharmaceuticals, potential disease comorbidities, and alerts for side effects.}, number={5}, journal={PLOS ONE}, author={Davis, Allan Peter and Wiegers, Thomas C. and King, Benjamin L. and Wiegers, Jolene and Grondin, Cynthia J. and Sciaky, Daniela and Johnson, Robin J. and Mattingly, Carolyn J.}, year={2016}, month={May} } @misc{mattingly_boyles_lawler_haugen_dearry_haendel_2016, title={Laying a Community-Based Foundation for Data-Driven Semantic Standards in Environmental Health Sciences}, volume={124}, ISSN={["1552-9924"]}, DOI={10.1289/ehp.1510438}, abstractNote={Background: Despite increasing availability of environmental health science (EHS) data, development, and implementation of relevant semantic standards, such as ontologies or hierarchical vocabularies, has lagged. Consequently, integration and analysis of information needed to better model environmental influences on human health remains a significant challenge. Objectives: We aimed to identify a committed community and mechanisms needed to develop EHS semantic standards that will advance understanding about the impacts of environmental exposures on human disease. Methods: The National Institute of Environmental Health Sciences sponsored the “Workshop for the Development of a Framework for Environmental Health Science Language” hosted at North Carolina State University on 15–16 September 2014. Through the assembly of data generators, users, publishers, and funders, we aimed to develop a foundation for enabling the development of community-based and data-driven standards that will ultimately improve standardization, sharing, and interoperability of EHS information. Discussion: Creating and maintaining an EHS common language is a continuous and iterative process, requiring community building around research interests and needs, enabling integration and reuse of existing data, and providing a low barrier of access for researchers needing to use or extend such a resource. Conclusions: Recommendations included developing a community-supported web-based toolkit that would enable a) collaborative development of EHS research questions and use cases, b) construction of user-friendly tools for searching and extending existing semantic resources, c) education and guidance about standards and their implementation, and d) creation of a plan for governance and sustainability. Citation: Mattingly CJ, Boyles R, Lawler CP, Haugen AC, Dearry A, Haendel M. 2016. Laying a community-based foundation for data-driven semantic standards in environmental health sciences. Environ Health Perspect 124:1136–1140; http://dx.doi.org/10.1289/ehp.1510438}, number={8}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Mattingly, Carolyn J. and Boyles, Rebecca and Lawler, Cindy P. and Haugen, Astrid C. and Dearry, Allen and Haendel, Melissa}, year={2016}, month={Aug}, pages={1136–1140} } @article{davis_grondin_johnson_sciaky_king_mcmorran_wiegers_wiegers_mattingly_2017, title={The Comparative Toxicogenomics Database: update 2017}, volume={45}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkw838}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between chemicals and gene products, and their relationships to diseases. Core CTD content (chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature) are integrated with each other as well as with select external datasets to generate expanded networks and predict novel associations. Today, core CTD includes more than 30.5 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, Gene Ontology (GO) annotations, pathways, and gene interaction modules. In this update, we report a 33% increase in our core data content since 2015, describe our new exposure module (that harmonizes exposure science information with core toxicogenomic data) and introduce a novel dataset of GO-disease inferences (that identify common molecular underpinnings for seemingly unrelated pathologies). These advancements centralize and contextualize real-world chemical exposures with molecular pathways to help scientists generate testable hypotheses in an effort to understand the etiology and mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and King, Benjamin L. and McMorran, Roy and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2017}, month={Jan}, pages={D972–D978} } @article{pelletier_wiegers_enayetallah_kibbey_gosink_koza-taylor_mattingly_lawton_2016, title={ToxEvaluator: an integrated computational platform to aid the interpretation of toxicology study-related findings}, ISSN={["1758-0463"]}, DOI={10.1093/database/baw062}, abstractNote={Attempts are frequently made to investigate adverse findings from preclinical toxicology studies in order to better understand underlying toxicity mechanisms. These efforts often begin with limited information, including a description of the adverse finding, knowledge of the structure of the chemical associated with its cause and the intended pharmacological target. ToxEvaluator was developed jointly by Pfizer and the Comparative Toxicogenomics Database (http://ctdbase.org) team at North Carolina State University as an in silico platform to facilitate interpretation of toxicity findings in light of prior knowledge. Through the integration of a diverse set of in silico tools that leverage a number of public and proprietary databases, ToxEvaluator streamlines the process of aggregating and interrogating diverse sources of information. The user enters compound and target identifiers, and selects adverse event descriptors from a safety lexicon and mapped MeSH disease terms. ToxEvaluator provides a summary report with multiple distinct areas organized according to what target or structural aspects have been linked to the adverse finding, including primary pharmacology, structurally similar proprietary compounds, structurally similar public domain compounds, predicted secondary (i.e. off-target) pharmacology and known secondary pharmacology. Similar proprietary compounds and their associated in vivo toxicity findings are reported, along with a link to relevant supporting documents. For similar public domain compounds and interacting targets, ToxEvaluator integrates relationships curated in Comparative Toxicogenomics Database, returning all direct and inferred linkages between them. As an example of its utility, we demonstrate how ToxEvaluator rapidly identified direct (primary pharmacology) and indirect (secondary pharmacology) linkages between cerivastatin and myopathy.}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Pelletier, D. and Wiegers, T. C. and Enayetallah, A. and Kibbey, C. and Gosink, M. and Koza-Taylor, P. and Mattingly, C. J. and Lawton, M.}, year={2016}, month={May} } @inproceedings{mattingly_2015, title={Integrating literature-based curated data to predict mechanisms of toxicity}, author={Mattingly, C.J.}, year={2015} } @article{comeau_batista-navarro_dai_dogan_yepes_khare_lu_marques_mattingly_neves_et al._2014, title={BioC interoperability track overview}, ISSN={["1758-0463"]}, DOI={10.1093/database/bau053}, abstractNote={BioC is a new simple XML format for sharing biomedical text and annotations and libraries to read and write that format. This promotes the development of interoperable tools for natural language processing (NLP) of biomedical text. The interoperability track at the BioCreative IV workshop featured contributions using or highlighting the BioC format. These contributions included additional implementations of BioC, many new corpora in the format, biomedical NLP tools consuming and producing the format and online services using the format. The ease of use, broad support and rapidly growing number of tools demonstrate the need for and value of the BioC format. Database URL: http://bioc.sourceforge.net/}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Comeau, Donald C. and Batista-Navarro, Riza Theresa and Dai, Hong-Jie and Dogan, Rezarta Islamaj and Yepes, Antonio Jimeno and Khare, Ritu and Lu, Zhiyong and Marques, Hernani and Mattingly, Carolyn J. and Neves, Mariana and et al.}, year={2014}, month={Jun} } @article{davis_grondin_lennon-hopkins_saraceni-richards_sciaky_king_wiegers_mattingly_2015, title={The Comparative Toxicogenomics Database's 10th year anniversary: update 2015}, volume={43}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gku935}, abstractNote={Ten years ago, the Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) was developed out of a need to formalize, harmonize and centralize the information on numerous genes and proteins responding to environmental toxic agents across diverse species. CTD's initial approach was to facilitate comparisons of nucleotide and protein sequences of toxicologically significant genes by curating these sequences and electronically annotating them with chemical terms from their associated references. Since then, however, CTD has vastly expanded its scope to robustly represent a triad of chemical–gene, chemical–disease and gene–disease interactions that are manually curated from the scientific literature by professional biocurators using controlled vocabularies, ontologies and structured notation. Today, CTD includes 24 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, phenotypes, Gene Ontology annotations, pathways and interaction modules. In this 10th year anniversary update, we outline the evolution of CTD, including our increased data content, new ‘Pathway View’ visualization tool, enhanced curation practices, pilot chemical–phenotype results and impending exposure data set. The prototype database originally described in our first report has transformed into a sophisticated resource used actively today to help scientists develop and test hypotheses about the etiologies of environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Lennon-Hopkins, Kelley and Saraceni-Richards, Cynthia and Sciaky, Daniela and King, Benjamin L. and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2015}, month={Jan}, pages={D914–D920} } @article{wiegers_davis_mattingly_2014, title={Web services-based text-mining demonstrates broad impacts for interoperability and process simplification}, ISSN={["1758-0463"]}, DOI={10.1093/database/bau050}, abstractNote={The Critical Assessment of Information Extraction systems in Biology (BioCreAtIvE) challenge evaluation tasks collectively represent a community-wide effort to evaluate a variety of text-mining and information extraction systems applied to the biological domain. The BioCreative IV Workshop included five independent subject areas, including Track 3, which focused on named-entity recognition (NER) for the Comparative Toxicogenomics Database (CTD; http://ctdbase.org). Previously, CTD had organized document ranking and NER-related tasks for the BioCreative Workshop 2012; a key finding of that effort was that interoperability and integration complexity were major impediments to the direct application of the systems to CTD's text-mining pipeline. This underscored a prevailing problem with software integration efforts. Major interoperability-related issues included lack of process modularity, operating system incompatibility, tool configuration complexity and lack of standardization of high-level inter-process communications. One approach to potentially mitigate interoperability and general integration issues is the use of Web services to abstract implementation details; rather than integrating NER tools directly, HTTP-based calls from CTD's asynchronous, batch-oriented text-mining pipeline could be made to remote NER Web services for recognition of specific biological terms using BioC (an emerging family of XML formats) for inter-process communications. To test this concept, participating groups developed Representational State Transfer /BioC-compliant Web services tailored to CTD's NER requirements. Participants were provided with a comprehensive set of training materials. CTD evaluated results obtained from the remote Web service-based URLs against a test data set of 510 manually curated scientific articles. Twelve groups participated in the challenge. Recall, precision, balanced F-scores and response times were calculated. Top balanced F-scores for gene, chemical and disease NER were 61, 74 and 51%, respectively. Response times ranged from fractions-of-a-second to over a minute per article. We present a description of the challenge and summary of results, demonstrating how curation groups can effectively use interoperable NER technologies to simplify text-mining pipeline implementation. Database URL: http://ctdbase.org/}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Wiegers, Thomas C. and Davis, Allan Peter and Mattingly, Carolyn J.}, year={2014}, month={Jun} } @article{davis_wiegers_roberts_king_lay_lennon-hopkins_sciaky_johnson_keating_greene_et al._2013, title={A CTD-Pfizer collaboration: manual curation of 88 000 scientific articles text mined for drug-disease and drug-phenotype interactions}, ISSN={["1758-0463"]}, DOI={10.1093/database/bat080}, abstractNote={Improving the prediction of chemical toxicity is a goal common to both environmental health research and pharmaceutical drug development. To improve safety detection assays, it is critical to have a reference set of molecules with well-defined toxicity annotations for training and validation purposes. Here, we describe a collaboration between safety researchers at Pfizer and the research team at the Comparative Toxicogenomics Database (CTD) to text mine and manually review a collection of 88 629 articles relating over 1 200 pharmaceutical drugs to their potential involvement in cardiovascular, neurological, renal and hepatic toxicity. In 1 year, CTD biocurators curated 2 54 173 toxicogenomic interactions (1 52 173 chemical–disease, 58 572 chemical–gene, 5 345 gene–disease and 38 083 phenotype interactions). All chemical–gene–disease interactions are fully integrated with public CTD, and phenotype interactions can be downloaded. We describe Pfizer’s text-mining process to collate the articles, and CTD’s curation strategy, performance metrics, enhanced data content and new module to curate phenotype information. As well, we show how data integration can connect phenotypes to diseases. This curation can be leveraged for information about toxic endpoints important to drug safety and help develop testable hypotheses for drug–disease events. The availability of these detailed, contextualized, high-quality annotations curated from seven decades’ worth of the scientific literature should help facilitate new mechanistic screening assays for pharmaceutical compound survival. This unique partnership demonstrates the importance of resource sharing and collaboration between public and private entities and underscores the complementary needs of the environmental health science and pharmaceutical communities. Database URL: http://ctdbase.org/}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Davis, Allan Peter and Wiegers, Thomas C. and Roberts, Phoebe M. and King, Benjamin L. and Lay, Jean M. and Lennon-Hopkins, Kelley and Sciaky, Daniela and Johnson, Robin and Keating, Heather and Greene, Nigel and et al.}, year={2013}, month={Nov} } @article{davis_wiegers_johnson_lay_lennon-hopkins_saraceni-richards_sciaky_murphy_mattingly_2013, title={Text Mining Effectively Scores and Ranks the Literature for Improving Chemical-Gene-Disease Curation at the Comparative Toxicogenomics Database}, volume={8}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0058201}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a public resource that curates interactions between environmental chemicals and gene products, and their relationships to diseases, as a means of understanding the effects of environmental chemicals on human health. CTD provides a triad of core information in the form of chemical-gene, chemical-disease, and gene-disease interactions that are manually curated from scientific articles. To increase the efficiency, productivity, and data coverage of manual curation, we have leveraged text mining to help rank and prioritize the triaged literature. Here, we describe our text-mining process that computes and assigns each article a document relevancy score (DRS), wherein a high DRS suggests that an article is more likely to be relevant for curation at CTD. We evaluated our process by first text mining a corpus of 14,904 articles triaged for seven heavy metals (cadmium, cobalt, copper, lead, manganese, mercury, and nickel). Based upon initial analysis, a representative subset corpus of 3,583 articles was then selected from the 14,094 articles and sent to five CTD biocurators for review. The resulting curation of these 3,583 articles was analyzed for a variety of parameters, including article relevancy, novel data content, interaction yield rate, mean average precision, and biological and toxicological interpretability. We show that for all measured parameters, the DRS is an effective indicator for scoring and improving the ranking of literature for the curation of chemical-gene-disease information at CTD. Here, we demonstrate how fully incorporating text mining-based DRS scoring into our curation pipeline enhances manual curation by prioritizing more relevant articles, thereby increasing data content, productivity, and efficiency.}, number={4}, journal={PLOS ONE}, author={Davis, Allan Peter and Wiegers, Thomas C. and Johnson, Robin J. and Lay, Jean M. and Lennon-Hopkins, Kelley and Saraceni-Richards, Cynthia and Sciaky, Daniela and Murphy, Cynthia Grondin and Mattingly, Carolyn J.}, year={2013}, month={Apr} } @article{wu_arighi_cohen_hirschman_krallinger_lu_mattingly_valencia_wiegers_wilbur_et al._2012, title={BioCreative-2012 Virtual Issue}, ISSN={["1758-0463"]}, DOI={10.1093/database/bas049}, abstractNote={BioCreative: Critical Assessment of Information Extraction in Biology is an international community-wide effort for evaluating text mining and information extraction systems applied to the biological domain (http://www.biocreative.org/). The Challenge Evaluations and the accompanying BioCreative Workshops bring together the text mining and biology communities to drive the development of text mining systems that can be integrated into the biocuration workflow and the knowledge discovery process. To address the current barriers in using text mining in biology, BioCreative has further been conducting user requirement analysis, user-based evaluations and fostering standard development for text mining tool re-use and integration. This DATABASE virtual issue captures the major results from the BioCreative-2012 Workshop on Interactive Text Mining in the Biocuration Workflow and is the fifth special issue devoted to BioCreative.}, journal={DATABASE-THE JOURNAL OF BIOLOGICAL DATABASES AND CURATION}, author={Wu, C. H. and Arighi, C. N. and Cohen, K. B. and Hirschman, L. and Krallinger, M. and Lu, Z. Y. and Mattingly, Carolyn and Valencia, A. and Wiegers, T. C. and Wilbur, W. J. and et al.}, year={2012}, month={Dec} } @article{wiegers_davis_mattingly_2012, title={Collaborative biocuration--text-mining development task for document prioritization for curation}, volume={2012}, ISSN={1758-0463}, url={http://dx.doi.org/10.1093/database/bas037}, DOI={10.1093/database/bas037}, abstractNote={The Critical Assessment of Information Extraction systems in Biology (BioCreAtIvE) challenge evaluation is a community-wide effort for evaluating text mining and information extraction systems for the biological domain. The ‘BioCreative Workshop 2012’ subcommittee identified three areas, or tracks, that comprised independent, but complementary aspects of data curation in which they sought community input: literature triage (Track I); curation workflow (Track II) and text mining/natural language processing (NLP) systems (Track III). Track I participants were invited to develop tools or systems that would effectively triage and prioritize articles for curation and present results in a prototype web interface. Training and test datasets were derived from the Comparative Toxicogenomics Database (CTD; http://ctdbase.org) and consisted of manuscripts from which chemical–gene–disease data were manually curated. A total of seven groups participated in Track I. For the triage component, the effectiveness of participant systems was measured by aggregate gene, disease and chemical ‘named-entity recognition’ (NER) across articles; the effectiveness of ‘information retrieval’ (IR) was also measured based on ‘mean average precision’ (MAP). Top recall scores for gene, disease and chemical NER were 49, 65 and 82%, respectively; the top MAP score was 80%. Each participating group also developed a prototype web interface; these interfaces were evaluated based on functionality and ease-of-use by CTD’s biocuration project manager. In this article, we present a detailed description of the challenge and a summary of the results.}, number={0}, journal={Database}, publisher={Oxford University Press (OUP)}, author={Wiegers, T. C. and Davis, A. P. and Mattingly, C. J.}, year={2012}, month={Nov}, pages={bas037–bas037} } @article{mattingly_2012, title={Computation of Neutron Multiplicity Statistics Using Deterministic Transport}, volume={59}, ISSN={["1558-1578"]}, DOI={10.1109/tns.2012.2185060}, abstractNote={Nuclear nonproliferation efforts are supported by measurements that are capable of rapidly characterizing special nuclear materials (SNM). Neutron multiplicity counting is frequently used to estimate properties of SNM, including neutron source strength, multiplication, and generation time. Different classes of model have been used to estimate these and other properties from the measured neutron counting distribution and its statistics. This paper describes a technique to compute statistics of the neutron counting distribution using deterministic neutron transport models. This approach can be applied to rapidly and accurately analyze neutron multiplicity counting measurements.}, number={2}, journal={IEEE TRANSACTIONS ON NUCLEAR SCIENCE}, author={Mattingly, John}, year={2012}, month={Apr}, pages={314–322} } @article{bello_richardson_davis_wiegers_mattingly_dolan_smith_blake_eppig_2012, title={Disease model curation improvements at Mouse Genome Informatics}, volume={2012}, ISSN={1758-0463}, url={http://dx.doi.org/10.1093/database/bar063}, DOI={10.1093/database/bar063}, abstractNote={Optimal curation of human diseases requires an ontology or structured vocabulary that contains terms familiar to end users, is robust enough to support multiple levels of annotation granularity, is limited to disease terms and is stable enough to avoid extensive reannotation following updates. At Mouse Genome Informatics (MGI), we currently use disease terms from Online Mendelian Inheritance in Man (OMIM) to curate mouse models of human disease. While OMIM provides highly detailed disease records that are familiar to many in the medical community, it lacks structure to support multilevel annotation. To improve disease annotation at MGI, we evaluated the merged Medical Subject Headings (MeSH) and OMIM disease vocabulary created by the Comparative Toxicogenomics Database (CTD) project. Overlaying MeSH onto OMIM provides hierarchical access to broad disease terms, a feature missing from the OMIM. We created an extended version of the vocabulary to meet the genetic disease-specific curation needs at MGI. Here we describe our evaluation of the CTD application, the extensions made by MGI and discuss the strengths and weaknesses of this approach. Database URL: http://www.informatics.jax.org/}, number={0}, journal={Database}, publisher={Oxford University Press (OUP)}, author={Bello, S. M. and Richardson, J. E. and Davis, A. P. and Wiegers, T. C. and Mattingly, C. J. and Dolan, M. E. and Smith, C. L. and Blake, J. A. and Eppig, J. T.}, year={2012}, month={Mar}, pages={bar063–bar063} } @article{davis_wiegers_rosenstein_mattingly_2012, title={MEDIC: a practical disease vocabulary used at the Comparative Toxicogenomics Database}, volume={2012}, ISSN={1758-0463}, url={http://dx.doi.org/10.1093/database/bar065}, DOI={10.1093/database/bar065}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators manually curate a triad of chemical–gene, chemical–disease and gene–disease relationships from the scientific literature. The CTD curation paradigm uses controlled vocabularies for chemicals, genes and diseases. To curate disease information, CTD first had to identify a source of controlled terms. Two resources seemed to be good candidates: the Online Mendelian Inheritance in Man (OMIM) and the ‘Diseases’ branch of the National Library of Medicine's Medical Subject Headers (MeSH). To maximize the advantages of both, CTD biocurators undertook a novel initiative to map the flat list of OMIM disease terms into the hierarchical nature of the MeSH vocabulary. The result is CTD’s ‘merged disease vocabulary’ (MEDIC), a unique resource that integrates OMIM terms, synonyms and identifiers with MeSH terms, synonyms, definitions, identifiers and hierarchical relationships. MEDIC is both a deep and broad vocabulary, composed of 9700 unique diseases described by more than 67 000 terms (including synonyms). It is freely available to download in various formats from CTD. While neither a true ontology nor a perfect solution, this vocabulary has nonetheless proved to be extremely successful and practical for our biocurators in generating over 2.5 million disease-associated toxicogenomic relationships in CTD. Other external databases have also begun to adopt MEDIC for their disease vocabulary. Here, we describe the construction, implementation, maintenance and use of MEDIC to raise awareness of this resource and to offer it as a putative scaffold in the formal construction of an official disease ontology. Database URL: http://ctd.mdibl.org/voc.go?type=disease}, number={0}, journal={Database}, publisher={Oxford University Press (OUP)}, author={Davis, A. P. and Wiegers, T. C. and Rosenstein, M. C. and Mattingly, C. J.}, year={2012}, month={Mar}, pages={bar065–bar065} } @article{mattingly_mckone_callahan_blake_hubal_2012, title={Providing the Missing Link: the Exposure Science Ontology ExO}, volume={46}, ISSN={["0013-936X"]}, DOI={10.1021/es2033857}, abstractNote={Environmental health information resources lack exposure data required to translate molecular insights, elucidate environmental contributions to diseases, and assess human health and ecological risks. We report development of an Exposure Ontology, ExO, designed to address this information gap by facilitating centralization and integration of exposure data. Major concepts were defined and the ontology drafted and evaluated by a working group of exposure scientists and other ontology and database experts. The resulting major concepts forming the basis for the ontology are “exposure stressor”, “exposure receptor”, “exposure event”, and “exposure outcome”. Although design of the first version of ExO focused on human exposure to chemicals, we anticipate expansion by the scientific community to address exposures of human and ecological receptors to the full suite of environmental stressors. Like other widely used ontologies, ExO is intended to link exposure science and diverse environmental health disciplines including toxicology, epidemiology, disease surveillance, and epigenetics.}, number={6}, journal={ENVIRONMENTAL SCIENCE & TECHNOLOGY}, author={Mattingly, Carolyn J. and McKone, Thomas E. and Callahan, Michael A. and Blake, Judith A. and Hubal, Elaine A. Cohen}, year={2012}, month={Mar}, pages={3046–3053} } @article{king_davis_rosenstein_wiegers_mattingly_2012, title={Ranking Transitive Chemical-Disease Inferences Using Local Network Topology in the Comparative Toxicogenomics Database}, volume={7}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0046524}, abstractNote={Exposure to chemicals in the environment is believed to play a critical role in the etiology of many human diseases. To enhance understanding about environmental effects on human health, the Comparative Toxicogenomics Database (CTD; http://ctdbase.org) provides unique curated data that enable development of novel hypotheses about the relationships between chemicals and diseases. CTD biocurators read the literature and curate direct relationships between chemicals-genes, genes-diseases, and chemicals-diseases. These direct relationships are then computationally integrated to create additional inferred relationships; for example, a direct chemical-gene statement can be combined with a direct gene-disease statement to generate a chemical-disease inference (inferred via the shared gene). In CTD, the number of inferences has increased exponentially as the number of direct chemical, gene and disease interactions has grown. To help users navigate and prioritize these inferences for hypothesis development, we implemented a statistic to score and rank them based on the topology of the local network consisting of the chemical, disease and each of the genes used to make an inference. In this network, chemicals, diseases and genes are nodes connected by edges representing the curated interactions. Like other biological networks, node connectivity is an important consideration when evaluating the CTD network, as the connectivity of nodes follows the power-law distribution. Topological methods reduce the influence of highly connected nodes that are present in biological networks. We evaluated published methods that used local network topology to determine the reliability of protein–protein interactions derived from high-throughput assays. We developed a new metric that combines and weights two of these methods and uniquely takes into account the number of common neighbors and the connectivity of each entity involved. We present several CTD inferences as case studies to demonstrate the value of this metric and the biological relevance of the inferences.}, number={11}, journal={PLOS ONE}, author={King, Benjamin L. and Davis, Allan Peter and Rosenstein, Michael C. and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2012}, month={Nov} } @article{davis_johnson_lennon-hopkins_sciaky_rosenstein_wiegers_mattingly_2012, title={Targeted journal curation as a method to improve data currency at the Comparative Toxicogenomics Database}, journal={Database-The Journal of Biological Databases and Curation}, author={Davis, A. P. and Johnson, R. J. and Lennon-Hopkins, K. and Sciaky, D. and Rosenstein, M. C. and Wiegers, T. C. and Mattingly, C. J.}, year={2012} } @article{davis_murphy_johnson_lay_lennon-hopkins_saraceni-richards_sciaky_king_rosenstein_wiegers_et al._2012, title={The Comparative Toxicogenomics Database: update 2013}, volume={41}, ISSN={0305-1048 1362-4962}, url={http://dx.doi.org/10.1093/nar/gks994}, DOI={10.1093/nar/gks994}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between environmental chemicals and gene products and their relationships to diseases. Chemical–gene, chemical–disease and gene–disease interactions manually curated from the literature are integrated to generate expanded networks and predict many novel associations between different data types. CTD now contains over 15 million toxicogenomic relationships. To navigate this sea of data, we added several new features, including DiseaseComps (which finds comparable diseases that share toxicogenomic profiles), statistical scoring for inferred gene–disease and pathway–chemical relationships, filtering options for several tools to refine user analysis and our new Gene Set Enricher (which provides biological annotations that are enriched for gene sets). To improve data visualization, we added a Cytoscape Web view to our ChemComps feature, included color-coded interactions and created a ‘slim list’ for our MEDIC disease vocabulary (allowing diseases to be grouped for meta-analysis, visualization and better data management). CTD continues to promote interoperability with external databases by providing content and cross-links to their sites. Together, this wealth of expanded chemical–gene–disease data, combined with novel ways to analyze and view content, continues to help users generate testable hypotheses about the molecular mechanisms of environmental diseases.}, number={D1}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Davis, Allan Peter and Murphy, Cynthia Grondin and Johnson, Robin and Lay, Jean M. and Lennon-Hopkins, Kelley and Saraceni-Richards, Cynthia and Sciaky, Daniela and King, Benjamin L. and Rosenstein, Michael C. and Wiegers, Thomas C. and et al.}, year={2012}, month={Oct}, pages={D1104–D1114} } @article{cheng_hinton_mattingly_planchart_2012, title={Aquatic models, genomics and chemical risk management}, volume={155}, ISSN={1532-0456}, url={http://dx.doi.org/10.1016/j.cbpc.2011.06.009}, DOI={10.1016/j.cbpc.2011.06.009}, abstractNote={The 5th Aquatic Animal Models for Human Disease meeting follows four previous meetings (Nairn et al., 2001, Schmale, 2004, Schmale et al., 2007; Hinton et al., 2009) in which advances in aquatic animal models for human disease research were reported, and community discussion of future direction was pursued. At this meeting, discussion at a workshop entitled Bioinformatics and Computational Biology with Web-based Resources (20 September 2010) led to an important conclusion: Aquatic model research using feral and experimental fish, in combination with web-based access to annotated anatomical atlases and toxicological databases, yields data that advance our understanding of human gene function, and can be used to facilitate environmental management and drug development. We propose here that the effects of genes and environment are best appreciated within an anatomical context — the specifically affected cells and organs in the whole animal. We envision the use of automated, whole-animal imaging at cellular resolution and computational morphometry facilitated by high-performance computing and automated entry into toxicological databases, as anchors for genetic and toxicological data, and as connectors between human and model system data. These principles should be applied to both laboratory and feral fish populations, which have been virtually irreplaceable sentinals for environmental contamination that results in human morbidity and mortality. We conclude that automation, database generation, and web-based accessibility, facilitated by genomic/transcriptomic data and high-performance and cloud computing, will potentiate the unique and potentially key roles that aquatic models play in advancing systems biology, drug development, and environmental risk management.}, number={1}, journal={Comparative Biochemistry and Physiology Part C: Toxicology & Pharmacology}, publisher={Elsevier BV}, author={Cheng, Keith C. and Hinton, David E. and Mattingly, Carolyn J. and Planchart, Antonio}, year={2012}, month={Jan}, pages={169–173} } @article{davis_rosenstein_wiegers_mattingly_2011, title={DiseaseComps: a metric that discovers similar diseases based upon common toxicogenomic profiles at CTD}, volume={7}, ISSN={0973-8894 0973-2063}, url={http://dx.doi.org/10.6026/97320630007154}, DOI={10.6026/97320630007154}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a free resource that describes chemical-gene-disease networks to help understand the effects of environmental exposures on human health. The database contains more than 13,500 chemical-disease and 14,200 gene-disease interactions. In CTD, chemicals and genes are associated with a disease via two types of relationships: as a biomarker or molecular mechanism for the disease (M-type) or as a real or putative therapy for the disease (T-type). We leveraged these curated datasets to compute similarity indices that can be used to produce lists of comparable diseases (“DiseaseComps”) based upon shared toxicogenomic profiles. This new metric now classifies diseases with common molecular characteristics, instead of the traditional approach of using histology or tissue of origin to define the disorder. In the dawning era of “personalized medicine”, this feature provides a new way to view and describe diseases and will help develop testable hypotheses about chemical-gene-disease networks. Availability The database is available for free at http://ctd.mdibl.org/}, number={4}, journal={Bioinformation}, publisher={Biomedical Informatics}, author={Davis, Allan Peter and Rosenstein, Michael C. and Wiegers, Thomas Conrad and Mattingly, Carolyn J.}, year={2011}, month={Oct}, pages={154–156} } @article{davis_wiegers_murphy_mattingly_2011, title={The curation paradigm and application tool used for manual curation of the scientific literature at the Comparative Toxicogenomics Database}, volume={2011}, ISSN={1758-0463}, url={http://dx.doi.org/10.1093/database/bar034}, DOI={10.1093/database/bar034}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the effects of environmental chemicals on human health. CTD biocurators read the scientific literature and convert free-text information into a structured format using official nomenclature, integrating third party controlled vocabularies for chemicals, genes, diseases and organisms, and a novel controlled vocabulary for molecular interactions. Manual curation produces a robust, richly annotated dataset of highly accurate and detailed information. Currently, CTD describes over 349 000 molecular interactions between 6800 chemicals, 20 900 genes (for 330 organisms) and 4300 diseases that have been manually curated from over 25 400 peer-reviewed articles. This manually curated data are further integrated with other third party data (e.g. Gene Ontology, KEGG and Reactome annotations) to generate a wealth of toxicogenomic relationships. Here, we describe our approach to manual curation that uses a powerful and efficient paradigm involving mnemonic codes. This strategy allows biocurators to quickly capture detailed information from articles by generating simple statements using codes to represent the relationships between data types. The paradigm is versatile, expandable, and able to accommodate new data challenges that arise. We have incorporated this strategy into a web-based curation tool to further increase efficiency and productivity, implement quality control in real-time and accommodate biocurators working remotely. Database URL: http://ctd.mdibl.org}, number={0}, journal={Database}, publisher={Oxford University Press (OUP)}, author={Davis, A. P. and Wiegers, T. C. and Murphy, C. G. and Mattingly, C. J.}, year={2011}, month={Sep}, pages={bar034–bar034} } @article{planchart_mattingly_2010, title={2,3,7,8-Tetrachlorodibenzo-p-dioxin Upregulates FoxQ1b in Zebrafish Jaw Primordium}, volume={23}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx9003165}, DOI={10.1021/tx9003165}, abstractNote={Vertebrate jaw development can be disrupted by exposure to 2,3,7,8-tetrachlorodibenzo-p-dioxin (TCDD)-a potent activator of the aryl hydrocarbon receptor (AHR) transcription factor required for transducing the toxic effects of TCDD. We used zebrafish (Danio rerio) embryos to investigate transcriptional responses to TCDD with the goal of discovering novel, jaw-specific genes affected by TCDD exposure. Our results uncovered a novel target of TCDD-activated Ahr belonging to the evolutionarily conserved family of forkhead box transcription factors. Quantitative real-time polymerase chain reaction analysis demonstrated that FoxQ1b was upregulated by TCDD 7- and 10-fold at 24 and 48 h postfertilization (hpf), respectively. The rate of TCDD-induced FoxQ1b expression was more rapid than that of Cyp1a, a known direct target of TCDD-activated Ahr. TCDD-mediated induction of FoxQ1b was suppressed in the presence of an Ahr antagonist, alpha-naphthoflavone, as well as following knockdown of Ahr2 expression using an Ahr2-specific morpholino antisense oligonucleotide. In situ hybridization analysis of FoxQ1b expression at 48 hpf demonstrated that FoxQ1b is specifically expressed in the jaw primordium where it discretely outlines a developing jaw structure known as Meckel's cartilage--a conserved structure in all jawed vertebrates that develops abnormally in the presence of TCDD. These results identify a novel target of TCDD-activated Ahr and suggest that FoxQ1b may play a role in craniofacial abnormalities induced by developmental exposure to TCDD.}, number={3}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Planchart, Antonio and Mattingly, Carolyn J.}, year={2010}, month={Mar}, pages={480–487} } @article{davis_king_mockus_murphy_saraceni-richards_rosenstein_wiegers_mattingly_2010, title={The Comparative Toxicogenomics Database: update 2011}, volume={39}, ISSN={0305-1048 1362-4962}, url={http://dx.doi.org/10.1093/nar/gkq813}, DOI={10.1093/nar/gkq813}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a public resource that promotes understanding about the interaction of environmental chemicals with gene products, and their effects on human health. Biocurators at CTD manually curate a triad of chemical–gene, chemical–disease and gene–disease relationships from the literature. These core data are then integrated to construct chemical–gene–disease networks and to predict many novel relationships using different types of associated data. Since 2009, we dramatically increased the content of CTD to 1.4 million chemical–gene–disease data points and added many features, statistical analyses and analytical tools, including GeneComps and ChemComps (to find comparable genes and chemicals that share toxicogenomic profiles), enriched Gene Ontology terms associated with chemicals, statistically ranked chemical–disease inferences, Venn diagram tools to discover overlapping and unique attributes of any set of chemicals, genes or disease, and enhanced gene pathway data content, among other features. Together, this wealth of expanded chemical–gene–disease data continues to help users generate testable hypotheses about the molecular mechanisms of environmental diseases. CTD is freely available at http://ctd.mdibl.org.}, number={Database}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Davis, A. P. and King, B. L. and Mockus, S. and Murphy, C. G. and Saraceni-Richards, C. and Rosenstein, M. and Wiegers, T. and Mattingly, C. J.}, year={2010}, month={Sep}, pages={D1067–D1072} } @inbook{mattingly_2010, title={Understanding environment-disease connections: An introduction to the comparative toxicogenomics database (ctd)}, booktitle={Nature Pathway Interaction Database}, author={Mattingly, C.J.}, year={2010} } @article{davis_murphy_saraceni-richards_rosenstein_wiegers_hampton_mattingly_2009, title={GeneComps and ChemComps: a new CTD metric to identify genes and chemicals with shared toxicogenomic profiles}, volume={4}, ISSN={0973-8894 0973-2063}, url={http://dx.doi.org/10.6026/97320630004173}, DOI={10.6026/97320630004173}, abstractNote={The Comparative Toxicogenomics Database is a public resource that promotes understanding about the effects of environmental chemicals on human health. Currently, CTD describes over 184,000 molecular interactions for more than 5,100 chemicals and 16,300 genes/proteins. We have leveraged this dataset of chemical-gene relationships to compute similarity indices following the statistical method of the Jaccard index. These scores are used to produce lists of comparable genes (“GeneComps”) or chemicals (“ChemComps”) based on shared toxicogenomic profiles. GeneComps and ChemComps are now provided for every curated gene and chemical in CTD. ChemComps are particularly significant because they provide a way to group chemicals based upon their biological effects, instead of their physical or structural properties. These metrics provide a novel way to view and classify genes and chemicals and will help advance testable hypotheses about environmental chemical-genedisease networks. Availability CTD is freely available at http://ctd.mdibl.org/}, number={4}, journal={Bioinformation}, publisher={Biomedical Informatics}, author={Davis, Allan Peter and Murphy, Cynthia G. and Saraceni-Richards, Cynthia A. and Rosenstein, Michael C. and Wiegers, Thomas C. and Hampton, Thomas H. and Mattingly, Carolyn J.}, year={2009}, month={Oct}, pages={173–174} } @article{gohlke_thomas_zhang_rosenstein_davis_murphy_becker_mattingly_portier_2009, title={Genetic and environmental pathways to complex diseases}, volume={3}, ISSN={1752-0509}, url={http://dx.doi.org/10.1186/1752-0509-3-46}, DOI={10.1186/1752-0509-3-46}, abstractNote={Abstract Background Pathogenesis of complex diseases involves the integration of genetic and environmental factors over time, making it particularly difficult to tease apart relationships between phenotype, genotype, and environmental factors using traditional experimental approaches. Results Using gene-centered databases, we have developed a network of complex diseases and environmental factors through the identification of key molecular pathways associated with both genetic and environmental contributions. Comparison with known chemical disease relationships and analysis of transcriptional regulation from gene expression datasets for several environmental factors and phenotypes clustered in a metabolic syndrome and neuropsychiatric subnetwork supports our network hypotheses. This analysis identifies natural and synthetic retinoids, antipsychotic medications, Omega 3 fatty acids, and pyrethroid pesticides as potential environmental modulators of metabolic syndrome phenotypes through PPAR and adipocytokine signaling and organophosphate pesticides as potential environmental modulators of neuropsychiatric phenotypes. Conclusion Identification of key regulatory pathways that integrate genetic and environmental modulators define disease associated targets that will allow for efficient screening of large numbers of environmental factors, screening that could set priorities for further research and guide public health decisions. }, number={1}, journal={BMC Systems Biology}, publisher={Springer Nature}, author={Gohlke, Julia M and Thomas, Reuben and Zhang, Yonqing and Rosenstein, Michael C and Davis, Allan P and Murphy, Cynthia and Becker, Kevin G and Mattingly, Carolyn J and Portier, Christopher J}, year={2009}, pages={46} } @article{mattingly_hampton_brothers_griffin_planchart_2009, title={Perturbation of Defense Pathways by Low-Dose Arsenic Exposure in Zebrafish Embryos}, volume={117}, ISSN={0091-6765 1552-9924}, url={http://dx.doi.org/10.1289/ehp.0900555}, DOI={10.1289/ehp.0900555}, abstractNote={Background Exposure to arsenic is a critical risk factor in the complex interplay among genetics, the environment, and human disease. Despite the potential for in utero exposure, the mechanism of arsenic action on vertebrate development and disease is unknown. Objectives The objective of this study was to identify genes and gene networks perturbed by arsenic during development in order to enhance understanding of the molecular mechanisms of arsenic action. Methods We exposed zebrafish embryos at 0.25–1.25 hr postfertilization to 10 or 100 ppb arsenic for 24 or 48 hr. We then used total RNA to interrogate genome microarrays and to test levels of gene expression changes by quantitative real-time polymerase chain reaction (QPCR). Computational analysis was used to identify gene expression networks perturbed by arsenic during vertebrate development. Results We identified a set of 99 genes that responded to low levels of arsenic. Nineteen of these genes were predicted to function in a common regulatory network that was significantly associated with immune response and cancer (p < 10−41). Arsenic-mediated expression changes were validated by QPCR. Conclusions In this study we demonstrated that arsenic significantly down-regulates expression levels of multiple genes potentially critical for regulating the establishment of an immune response. The data also provide molecular evidence consistent with phenotypic observations reported in other model systems. Additional mechanistic studies will help explain molecular events regulating early stages of the immune system and long-term consequences of arsenic-mediated perturbation of this system during development.}, number={6}, journal={Environmental Health Perspectives}, publisher={Environmental Health Perspectives}, author={Mattingly, Carolyn J. and Hampton, Thomas H. and Brothers, Kimberly M. and Griffin, Nina E. and Planchart, Antonio}, year={2009}, month={Jun}, pages={981–987} } @article{wiegers_davis_cohen_hirschman_mattingly_2009, title={Text mining and manual curation of chemical-gene-disease networks for the Comparative Toxicogenomics Database (CTD)}, volume={10}, ISSN={1471-2105}, url={http://dx.doi.org/10.1186/1471-2105-10-326}, DOI={10.1186/1471-2105-10-326}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a publicly available resource that promotes understanding about the etiology of environmental diseases. It provides manually curated chemical-gene/protein interactions and chemical- and gene-disease relationships from the peer-reviewed, published literature. The goals of the research reported here were to establish a baseline analysis of current CTD curation, develop a text-mining prototype from readily available open source components, and evaluate its potential value in augmenting curation efficiency and increasing data coverage. Prototype text-mining applications were developed and evaluated using a CTD data set consisting of manually curated molecular interactions and relationships from 1,600 documents. Preliminary results indicated that the prototype found 80% of the gene, chemical, and disease terms appearing in curated interactions. These terms were used to re-rank documents for curation, resulting in increases in mean average precision (63% for the baseline vs. 73% for a rule-based re-ranking), and in the correlation coefficient of rank vs. number of curatable interactions per document (baseline 0.14 vs. 0.38 for the rule-based re-ranking). This text-mining project is unique in its integration of existing tools into a single workflow with direct application to CTD. We performed a baseline assessment of the inter-curator consistency and coverage in CTD, which allowed us to measure the potential of these integrated tools to improve prioritization of journal articles for manual curation. Our study presents a feasible and cost-effective approach for developing a text mining solution to enhance manual curation throughput and efficiency.}, number={1}, journal={BMC Bioinformatics}, publisher={Springer Science and Business Media LLC}, author={Wiegers, Thomas C and Davis, Allan Peter and Cohen, K Bretonnel and Hirschman, Lynette and Mattingly, Carolyn J}, year={2009}, month={Oct} } @article{bates congdon_aman_nava_gaskins_mattingly_2008, title={An Evaluation of Information Content as a Metric for the Inference of Putative Conserved Noncoding Regions in DNA Sequences Using a Genetic Algorithms Approach}, volume={5}, ISSN={1545-5963}, url={http://dx.doi.org/10.1109/tcbb.2007.1059}, DOI={10.1109/tcbb.2007.1059}, abstractNote={In previous work, we presented GAMI [1], an approach to motif inference that uses a genetic algorithms search. GAMI is designed specifically to find putative conserved regulatory motifs in noncoding regions of divergent species and is designed to allow for analysis of long nucleotide sequences. In this work, we compare GAMI's performance when run with its original fitness function (a simple count of the number of matches) and when run with information content (IC), as well as several variations on these metrics. Results indicate that IC does not identify highly conserved regions and, thus, is not the appropriate metric for this task, whereas variations on IC, as well as the original metric, succeed in identifying putative conserved regions.}, number={1}, journal={IEEE/ACM Transactions on Computational Biology and Bioinformatics}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Bates Congdon, C. and Aman, J.C. and Nava, G.M. and Gaskins, H.R. and Mattingly, C.J.}, year={2008}, month={Jan}, pages={1–14} } @article{mattingly_2009, title={Chemical databases for environmental health and clinical research}, volume={186}, ISSN={0378-4274}, url={http://dx.doi.org/10.1016/j.toxlet.2008.10.003}, DOI={10.1016/j.toxlet.2008.10.003}, abstractNote={The increasing number of publicly available biological databases reflects the evolving need for managing and evaluating abundant and complex data in biological, clinical and computational research. Currently there are over 1000 biologically relevant databases in the public domain with varied content and diverse approaches to capturing and presenting data. This review summarizes the comparatively small niche of sophisticated databases and other resources that aim to enhance understanding of chemicals and their biological actions. The databases reviewed include 1 that emphasizes environmental chemicals and 9 that emphasize drugs and small molecules. These databases and their associated resources are incrementally strengthening the expanding field of toxicogenomics-based research by providing centralized sources of manually and computationally curated datasets and highly sophisticated tools for the meta-analysis of continually increasing environmental chemical, drug and small-molecule datasets.}, number={1}, journal={Toxicology Letters}, publisher={Elsevier BV}, author={Mattingly, Carolyn J.}, year={2009}, month={Apr}, pages={62–65} } @article{davis_murphy_saraceni-richards_rosenstein_wiegers_mattingly_2009, title={Comparative Toxicogenomics Database: a knowledgebase and discovery tool for chemical-gene-disease networks}, volume={37}, ISSN={0305-1048 1362-4962}, url={http://dx.doi.org/10.1093/nar/gkn580}, DOI={10.1093/nar/gkn580}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a curated database that promotes understanding about the effects of environmental chemicals on human health. Biocurators at CTD manually curate chemical–gene interactions, chemical–disease relationships and gene–disease relationships from the literature. This strategy allows data to be integrated to construct chemical–gene–disease networks. CTD is unique in numerous respects: curation focuses on environmental chemicals; interactions are manually curated; interactions are constructed using controlled vocabularies and hierarchies; additional gene attributes (such as Gene Ontology, taxonomy and KEGG pathways) are integrated; data can be viewed from the perspective of a chemical, gene or disease; results and batch queries can be downloaded and saved; and most importantly, CTD acts as both a knowledgebase (by reporting data) and a discovery tool (by generating novel inferences). Over 116 000 interactions between 3900 chemicals and 13 300 genes have been curated from 270 species, and 5900 gene–disease and 2500 chemical–disease direct relationships have been captured. By integrating these data, 350 000 gene–disease relationships and 77 000 chemical–disease relationships can be inferred. This wealth of chemical–gene–disease information yields testable hypotheses for understanding the effects of environmental chemicals on human health. CTD is freely available at http://ctd.mdibl.org.}, number={Database}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Davis, A. P. and Murphy, C. G. and Saraceni-Richards, C. A. and Rosenstein, M. C. and Wiegers, T. C. and Mattingly, C. J.}, year={2009}, month={Jan}, pages={D786–D792} } @article{davis_murphy_rosenstein_wiegers_mattingly_2008, title={The Comparative Toxicogenomics Database facilitates identification and understanding of chemical-gene-disease associations: arsenic as a case study}, volume={1}, ISSN={1755-8794}, url={http://dx.doi.org/10.1186/1755-8794-1-48}, DOI={10.1186/1755-8794-1-48}, abstractNote={Abstract Background The etiology of many chronic diseases involves interactions between environmental factors and genes that modulate physiological processes. Understanding interactions between environmental chemicals and genes/proteins may provide insights into the mechanisms of chemical actions, disease susceptibility, toxicity, and therapeutic drug interactions. The Comparative Toxicogenomics Database (CTD; http://ctd.mdibl.org) provides these insights by curating and integrating data describing relationships between chemicals, genes/proteins, and human diseases. To illustrate the scope and application of CTD, we present an analysis of curated data for the chemical arsenic. Arsenic represents a major global environmental health threat and is associated with many diseases. The mechanisms by which arsenic modulates these diseases are not well understood. Methods Curated interactions between arsenic compounds and genes were downloaded using export and batch query tools at CTD. The list of genes was analyzed for molecular interactions, Gene Ontology (GO) terms, KEGG pathway annotations, and inferred disease relationships. Results CTD contains curated data from the published literature describing 2,738 molecular interactions between 21 different arsenic compounds and 1,456 genes and proteins. Analysis of these genes and proteins provide insight into the biological functions and molecular networks that are affected by exposure to arsenic, including stress response, apoptosis, cell cycle, and specific protein signaling pathways. Integrating arsenic-gene data with gene-disease data yields a list of diseases that may be associated with arsenic exposure and genes that may explain this association. Conclusion CTD data integration and curation strategies yield insight into the actions of environmental chemicals and provide a basis for developing hypotheses about the molecular mechanisms underlying the etiology of environmental diseases. While many reports describe the molecular response to arsenic, CTD integrates these data with additional curated data sets that facilitate construction of chemical-gene-disease networks and provide the groundwork for investigating the molecular basis of arsenic-associated diseases or toxicity. The analysis reported here is extensible to any environmental chemical or therapeutic drug. }, number={1}, journal={BMC Medical Genomics}, publisher={Springer Science and Business Media LLC}, author={Davis, Allan P and Murphy, Cynthia G and Rosenstein, Michael C and Wiegers, Thomas C and Mattingly, Carolyn J}, year={2008}, month={Oct} } @inproceedings{congdon_gaskins_nava_mattingly_2007, title={Towards Interactive Visualization for Exploring Conserved Motifs in Noncoding DNA Sequence}, ISBN={9780769529998}, url={http://dx.doi.org/10.1109/fbit.2007.149}, DOI={10.1109/fbit.2007.149}, abstractNote={Computational inference of putative functional elements in noncoding DNA sequence can significantly hasten the search for verified functional elements, such as transcription factor binding sites. However, while computational methods are able to identify promising putative elements, the number of regions of interest may remain unrealistic for experimental validation. Visualization of putative functional elements can assist tremendously in identifying the regions of greatest interest; interactive vizualization allows researchers additional means of asking and answering questions about putative elements. For example, interactive visualizations allow one to look at motifs in the context of verified functional elements or other putative elements, interactively altering the amount of information displayed and the level of resolution at which it is displayed. The work presented here describes initial efforts toward interactive visualization of putative functional elements specifically for our work with conserved elements.}, booktitle={2007 Frontiers in the Convergence of Bioscience and Information Technologies}, publisher={IEEE}, author={Congdon, Clare Bates and Gaskins, H. Rex and Nava, Gerardo M. and Mattingly, Carolyn}, year={2007} } @article{mattingly_rosenstein_colby_forrest jr_boyer_2006, title={The Comparative Toxicogenomics Database (CTD): a resource for comparative toxicological studies}, volume={305A}, ISSN={1548-8969 1552-499X}, url={http://dx.doi.org/10.1002/jez.a.307}, DOI={10.1002/jez.a.307}, abstractNote={AbstractThe etiology of most chronic diseases involves interactions between environmental factors and genes that modulate important biological processes (Olden and Wilson, 2000. Nat Rev Genet 1(2):149–153). We are developing the publicly available Comparative Toxicogenomics Database (CTD) to promote understanding about the effects of environmental chemicals on human health. CTD identifies interactions between chemicals and genes and facilitates cross‐species comparative studies of these genes. The use of diverse animal models and cross‐species comparative sequence studies has been critical for understanding basic physiological mechanisms and gene and protein functions. Similarly, these approaches will be valuable for exploring the molecular mechanisms of action of environmental chemicals and the genetic basis of differential susceptibility. J. Exp. Zool. 305A:689–692, 2006. © 2006 Wiley‐Liss, Inc.}, number={9}, journal={Journal of Experimental Zoology Part A: Comparative Experimental Biology}, publisher={Wiley}, author={Mattingly, C.J. and Rosenstein, M.C. and Colby, G.T. and Forrest Jr, J.N. and Boyer, J.L.}, year={2006}, pages={689–692} } @article{mattingly_rosenstein_davis_colby_forrest_boyer_2006, title={The Comparative Toxicogenomics Database: A Cross-Species Resource for Building Chemical-Gene Interaction Networks}, volume={92}, ISSN={1096-6080 1096-0929}, url={http://dx.doi.org/10.1093/toxsci/kfl008}, DOI={10.1093/toxsci/kfl008}, abstractNote={Chemicals in the environment play a critical role in the etiology of many human diseases. Despite their prevalence, the molecular mechanisms of action and the effects of chemicals on susceptibility to disease are not well understood. To promote understanding of these mechanisms, the Comparative Toxicogenomics Database (CTD; http://ctd.mdibl.org/) presents scientifically reviewed and curated information on chemicals, relevant genes and proteins, and their interactions in vertebrates and invertebrates. CTD integrates sequence, reference, species, microarray, and general toxicology information to provide a unique centralized resource for toxicogenomic research. The database also provides visualization capabilities that enable cross-species comparisons of gene and protein sequences. These comparisons will facilitate understanding of structure-function correlations and the genetic basis of susceptibility. Manual curation and integration of cross-species chemical-gene and chemical-protein interactions from the literature are now underway. These data will provide information for building complex interaction networks. New CTD features include (1) cross-species gene, rather than sequence, query and visualization capabilities; (2) integrated cross-links to microarray data from chemicals, genes, and sequences in CTD; (3) a reference set related to chemical-gene and protein interactions identified by an information retrieval system; and (4) a "Chemicals in the News" initiative that provides links from CTD chemicals to environmental health articles from the popular press. Here we describe these new features and our novel cross-species curation of chemical-gene and chemical-protein interactions.}, number={2}, journal={Toxicological Sciences}, publisher={Oxford University Press (OUP)}, author={Mattingly, Carolyn J. and Rosenstein, Michael C. and Davis, Allan Peter and Colby, Glenn T. and Forrest, John N., Jr and Boyer, James L.}, year={2006}, month={May}, pages={587–595} } @inproceedings{congdon_fizer_smith_gaskins_aman_nava_mattingly_2005, title={Preliminary Results for GAMI: A Genetic Algorithms Approach to Motif Inference}, ISBN={0780393872}, url={http://dx.doi.org/10.1109/cibcb.2005.1594904}, DOI={10.1109/cibcb.2005.1594904}, abstractNote={We have developed GAMI, an approach to motif inference that uses a genetic algorithms search and is designed specifically to work with divergent species and possibly long nucleotide sequences. The system design reduces the size of the search space as compared to typical window-location approaches for motif inference. This paper describes the motivation and system design for GAMI, discusses how we have designed the search space and compares this to the search space of other approaches, and presents initial results with data from the literature and from novel tasks. GAMI is able to find a host of putative conserved patterns; possible approaches for validating the utility of the conserved regions are discussed.}, booktitle={2005 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology}, publisher={IEEE}, author={Congdon, C.B. and Fizer, C.W. and Smith, N.W. and Gaskins, H.R. and Aman, J. and Nava, G.M. and Mattingly, C.}, year={2005} } @article{mattingly_parton_dowell_rafferty_barnes_2004, title={Cell and Molecular Biology of Marine Elasmobranchs: Squalus acanthias and Raja erinacea}, volume={1}, ISSN={1545-8547 1557-8542}, url={http://dx.doi.org/10.1089/zeb.2004.1.111}, DOI={10.1089/zeb.2004.1.111}, abstractNote={Elasmobranchs are among the most primitive existing species exhibiting fundamental vertebrate characteristics, such as neural crest, jaws, teeth, and an adaptive immune system. They are also among the earliest-evolved vertebrates with a closed, pressurized circulatory system and related signaling molecules. Although many species are used experimentally, the spiny dogfish shark (Squalus acanthias) and little skate (Raja erinacea) have particular advantages and are the most commonly used elasmobranch biomedical models. These animals display powerful molecular systems for dealing with salt and water homeostasis, cell volume regulation, and environmental and internal osmotic sensing. They have become important unique models in studies of transport-related diseases such as cystic fibrosis and anion or xenobiotic transport. Much of this work has relied on physiological experiments combined with molecular approaches and the advantages of comparative genomic analyses to identify conserved regions representing functional protein domains. Recent work has seen the development of cell cultures and the beginning of expressed sequence tags (EST) and genomic libraries. Other areas in which elasmobranches have played critical roles include immunology and neurobiology. It also appears that sharks have tissue regenerative capability beyond what is commonly seen in mammals. For example, sharks and skates possess a region of renal regeneration, with new tubules being formed continually through adulthood. As comparative functional genomics comes of age, these comparative vertebrate models may play an increasing role in the larger picture of human biomedical research. There is plenty of ocean to share.}, number={2}, journal={Zebrafish}, publisher={Mary Ann Liebert Inc}, author={Mattingly, Carolyn and Parton, Angela and Dowell, Lori and Rafferty, Jason and Barnes, David}, year={2004}, month={May}, pages={111–120} } @article{barnes_mattingly_parton_dowell_bayne_forrest_2004, title={Marine Organism Cell Biology and Regulatory Sequence Discoveryin Comparative Functional Genomics}, volume={46}, ISSN={0920-9069 1573-0778}, url={http://dx.doi.org/10.1007/s10616-005-1719-5}, DOI={10.1007/s10616-005-1719-5}, abstractNote={The use of bioinformatics to integrate phenotypic and genomic data from mammalian models is well established as a means of understanding human biology and disease. Beyond direct biomedical applications of these approaches in predicting structure-function relationships between coding sequences and protein activities, comparative studies also promote understanding of molecular evolution and the relationship between genomic sequence and morphological and physiological specialization. Recently recognized is the potential of comparative studies to identify functionally significant regulatory regions and to generate experimentally testable hypotheses that contribute to understanding mechanisms that regulate gene expression, including transcriptional activity, alternative splicing and transcript stability. Functional tests of hypotheses generated by computational approaches require experimentally tractable in vitro systems, including cell cultures. Comparative sequence analysis strategies that use genomic sequences from a variety of evolutionarily diverse organisms are critical for identifying conserved regulatory motifs in the 5'-upstream, 3'-downstream and introns of genes. Genomic sequences and gene orthologues in the first aquatic vertebrate and protovertebrate organisms to be fully sequenced (Fugu rubripes, Ciona intestinalis, Tetraodon nigroviridis, Danio rerio) as well as in the elasmobranchs, spiny dogfish shark (Squalus acanthias) and little skate (Raja erinacea), and marine invertebrate models such as the sea urchin (Strongylocentrotus purpuratus) are valuable in the prediction of putative genomic regulatory regions. Cell cultures have been derived for these and other model species. Data and tools resulting from these kinds of studies will contribute to understanding transcriptional regulation of biomedically important genes and provide new avenues for medical therapeutics and disease prevention.}, number={2-3}, journal={Cytotechnology}, publisher={Springer Science and Business Media LLC}, author={Barnes, David W. and Mattingly, Carolyn J. and Parton, Angela and Dowell, Lori M. and Bayne, Christopher J. and Forrest, John N., Jr.}, year={2004}, month={Oct}, pages={123–137} } @article{mattingly_colby_rosenstein_forrest_boyer_2004, title={Promoting comparative molecular studies in environmental health research: an overview of the comparative toxicogenomics database (CTD)}, volume={4}, ISSN={1470-269X 1473-1150}, url={http://dx.doi.org/10.1038/sj.tpj.6500225}, DOI={10.1038/sj.tpj.6500225}, abstractNote={Promoting comparative molecular studies in environmental health research: an overview of the comparative toxicogenomics database (CTD)}, number={1}, journal={The Pharmacogenomics Journal}, publisher={Springer Nature}, author={Mattingly, C J and Colby, G T and Rosenstein, M C and Forrest, J N and Boyer, J L}, year={2004}, month={Jan}, pages={5–8} } @article{mattingly_colby_forrest_boyer_2003, title={The Comparative Toxicogenomics Database (CTD).}, volume={111}, ISSN={0091-6765 1552-9924}, url={http://dx.doi.org/10.1289/ehp.6028}, DOI={10.1289/ehp.6028}, abstractNote={The Mount Desert Island Biological Laboratory in Salsbury Cove, Maine, USA, is developing the Comparative Toxicogenomics Database (CTD), a community-supported genomic resource devoted to genes and proteins of human toxicologic significance. CTD will be the first publicly available database to a) provide annotated associations among genes, proteins, references, and toxic agents, with a focus on annotating data from aquatic and mammalian organisms; b) include nucleotide and protein sequences from diverse species; c) offer a range of analysis tools for customized comparative studies; and d) provide information to investigators on available molecular reagents. This combination of features will facilitate cross-species comparisons of toxicologically significant genes and proteins. These comparisons will promote understanding of molecular evolution, the significance of conserved sequences, the genetic basis of variable sensitivity to environmental agents, and the complex interactions between the environment and human health. CTD is currently under development, and the planned scope and functions of the database are described herein. The intent of this report is to invite community participation in the development of CTD to ensure that it will be a valuable resource for environmental health, molecular biology, and toxicology research.}, number={6}, journal={Environmental Health Perspectives}, publisher={Environmental Health Perspectives}, author={Mattingly, Carolyn J and Colby, Glenn T and Forrest, John N and Boyer, James L}, year={2003}, month={May}, pages={793–795} } @article{mattingly_mclachlan_toscano_2001, title={Green fluorescent protein (GFP) as a marker of aryl hydrocarbon receptor (AhR) function in developing zebrafish (Danio rerio).}, volume={109}, ISSN={0091-6765 1552-9924}, url={http://dx.doi.org/10.1289/ehp.01109845}, DOI={10.1289/ehp.01109845}, abstractNote={We developed an inducible in vivo reporter system to examine expression of the aryl hydrocarbon receptor (AhR) during development in zebrafish (Danio rerio). AhR is a ligand-activated transcription factor that mediates the toxic actions of environmental contaminants such as 2,3,7,8-tetrachlorodibenzo-p-dioxin (TCDD). Induction of cytochrome P4501A1 (CYP1A1) is an early biomarker of AhR activation. A 1905 base pair region of the human CYP1A1 promoter/enhancer region was regulated by AhR in zebrafish liver cells after exposure to TCDD (10 nM) in a transient transfection assay. This regulatory region was fused to the cDNA sequence encoding green fluorescent protein (GFP) of jellyfish (Aequorea victoria). Transgenic zebrafish were generated to express this AhR-regulated GFP construct. Injected fish exposed to TCDD exhibited induction of GFP in the eye, nose, and vertebrae of zebrafish embryos (48 and 72 hr after fertilization) compared to vehicle controls (DMSO), which did not express GFP. To investigate whether AhR-regulated GFP expression correlated with sites of TCDD toxicity, we exposed wild-type zebrafish to DMSO or TCDD and examined them for morphologic abnormalities. By 5 days after fertilization, TCDD-exposed fish exhibited gross dysmorphogenesis in cranio-facial and vertebral development.}, number={8}, journal={Environmental Health Perspectives}, publisher={Environmental Health Perspectives}, author={Mattingly, C J and McLachlan, J A and Toscano, W A, Jr}, year={2001}, month={Aug}, pages={845–849} } @article{mattingly_toscano_2001, title={Posttranscriptional silencing of cytochrome P4501A1 (CYP1A1) during zebrafish (Danio rerio) development}, volume={222}, ISSN={1058-8388 1097-0177}, url={http://dx.doi.org/10.1002/dvdy.1215}, DOI={10.1002/dvdy.1215}, abstractNote={AbstractInduction patterns of cytochrome P4501A1 (CYP1A1), an early biochemical marker of exposure to the environmental toxicant 2,3,7,8‐tetrachlorodibenzo‐p‐dioxin (TCDD, or dioxin) were investigated during zebrafish (Danio rerio) development. A zebrafish CYP1A1 cDNA fragment was cloned and used to detect CYP1A1 mRNA in embryos exposed to TCDD (1 or 10 nM). Induction of CYP1A1 activity was dependent on age and state of hatch. CYP1A1 mRNA was observed by 15 hr postfertilization. CYP1A1 protein and monooxygenase activity were not detected until 3 days postfertilization and after hatch, as determined by Western immunoblot analysis and ethoxyresorufin O‐deethylase (EROD) activity, respectively. In contrast to embryos, concomitant induction of mRNA and activity was detected in juvenile zebrafish (3 days posthatch) after 6 hr of TCDD exposure. Asynchronous induction of CYP1A1 mRNA and activity during development may be a general regulatory mechanism, as similar ontogenetic expression of this gene was demonstrated in mouse embryos. To our knowledge, this is the first report of CYP1A1 posttranscriptional silencing during embryogenesis. Our data suggest that TCDD‐mediated induction of CYP1A1 activity is regulated differentially in developing and mature systems. © 2001 Wiley‐Liss, Inc.}, number={4}, journal={Developmental Dynamics}, publisher={Wiley}, author={Mattingly, Carolyn J. and Toscano, William A.}, year={2001}, pages={645–654} } @article{ricci_toscano_mattingly_toscano_1999, title={Estrogen Receptor Reduces CYP1A1 Induction in Cultured Human Endometrial Cells}, volume={274}, ISSN={0021-9258 1083-351X}, url={http://dx.doi.org/10.1074/jbc.274.6.3430}, DOI={10.1074/jbc.274.6.3430}, abstractNote={2,3,7,8-Tetrachlorodibenzo-p-dioxin (TCDD) exerts its toxic action via the aryl hydrocarbon (Ah) receptor, which induces a battery of xenobiotic-metabolizing enzymes, including the cytochrome P450 isozyme, CYP1A1. TCDD-induced 7-ethoxycoumarin-O-deethylase activity was reduced 75% in cultured human endometrial ECC-1 cells exposed to various concentrations of 17β-estradiol for up to 72 h, with a half-maximal effective concentration (EC50) of 0.9 nm. Reduced enzyme activity was correlated with decreased CYP1A1 mRNA levels, and transcription. Exposure to TCDD plus 17β-estradiol also reduced CYP1A1 activity in MCF-7 breast cancer cells but not in Hep-3B human liver cells or HuE primary human keratinocytes, suggesting that the effect was specific to estrogen-regulated cells. Estrogen receptor antagonists 4-hydroxytamoxifen and 7α-[9-(4,4,5,5,5-pentafluoro-pentylsulfinyl)nonyl]estra-1,3,5(10)-triene3, 17β-diol restored TCDD-induced CYP1A1 transcription, steady-state mRNA levels, and enzymatic activity in ECC-1 cells. Gel mobility shift assay showed that 17β-estradiol had little effect on Ah receptor binding to its DNA-responsive element. 17β-Estradiol did not alter the induction of another Ah receptor-regulated gene,CYP1B1, suggesting that altered Ah receptor binding to DNA does not mediate reduced CYP1A1 transcription. Transfecting ECC-1 cells with a general transcription factor involved in CYP1A1 induction, nuclear factor-1, reversed 17β-estradiol antagonism of dioxin induced-CYP1A1. The data suggest that 17β-estradiol reduced CYP1A1 expression at the transcriptional level by squelching available nuclear factor-1, a transcription factor that interacts with both Ah and estrogen receptors.}, number={6}, journal={Journal of Biological Chemistry}, publisher={American Society for Biochemistry & Molecular Biology (ASBMB)}, author={Ricci, M. Stacey and Toscano, Diane G. and Mattingly, Carolyn J. and Toscano, William A., Jr.}, year={1999}, month={Feb}, pages={3430–3438} }