@article{chan_thessen_duncan_matentzoglu_schmitt_grondin_vasilevsky_mcmurry_robinson_mungall_et al._2023, title={The Environmental Conditions, Treatments, and Exposures Ontology (ECTO): connecting toxicology and exposure to human health and beyond}, volume={14}, ISSN={["2041-1480"]}, DOI={10.1186/s13326-023-00283-x}, abstractNote={Abstract
                Background
                Evaluating the impact of environmental exposures on organism health is a key goal of modern biomedicine and is critically important in an age of greater pollution and chemicals in our environment. Environmental health utilizes many different research methods and generates a variety of data types. However, to date, no comprehensive database represents the full spectrum of environmental health data. Due to a lack of interoperability between databases, tools for integrating these resources are needed. In this manuscript we present the Environmental Conditions, Treatments, and Exposures Ontology (ECTO), a species-agnostic ontology focused on exposure events that occur as a result of natural and experimental processes, such as diet, work, or research activities. ECTO is intended for use in harmonizing environmental health data resources to support cross-study integration and inference for mechanism discovery.
              
                Methods and findings
                ECTO is an ontology designed for describing organismal exposures such as toxicological research, environmental variables, dietary features, and patient-reported data from surveys. ECTO utilizes the base model established within the Exposure Ontology (ExO). ECTO is developed using a combination of manual curation and Dead Simple OWL Design Patterns (DOSDP), and contains over 2700 environmental exposure terms, and incorporates chemical and environmental ontologies. ECTO is an Open Biological and Biomedical Ontology (OBO) Foundry ontology that is designed for interoperability, reuse, and axiomatization with other ontologies. ECTO terms have been utilized in axioms within the Mondo Disease Ontology to represent diseases caused or influenced by environmental factors, as well as for survey encoding for the Personalized Environment and Genes Study (PEGS).
              
                Conclusions
                We constructed ECTO to meet Open Biological and Biomedical Ontology (OBO) Foundry principles to increase translation opportunities between environmental health and other areas of biology. ECTO has a growing community of contributors consisting of toxicologists, public health epidemiologists, and health care providers to provide the necessary expertise for areas that have been identified previously as gaps.
              }, number={1}, journal={JOURNAL OF BIOMEDICAL SEMANTICS}, author={Chan, Lauren E. E. and Thessen, Anne E. and Duncan, William D. and Matentzoglu, Nicolas and Schmitt, Charles and Grondin, Cynthia J. and Vasilevsky, Nicole and McMurry, Julie A. and Robinson, Peter N. and Mungall, Christopher J. and et al.}, year={2023}, month={Feb} }
 @article{davis_wiegers_wiegers_grondin_johnson_sciaky_mattingly_2021, title={CTD anatomy: Analyzing chemical-induced phenotypes and exposures from an anatomical perspective, with implications for environmental health studies}, volume={2}, ISSN={["2666-027X"]}, DOI={10.1016/j.crtox.2021.03.001}, abstractNote={The Comparative Toxicogenomics Database (CTD) is a freely available public resource that curates and interrelates chemical, gene/protein, phenotype, disease, organism, and exposure data. CTD can be used to address toxicological mechanisms for environmental chemicals and facilitate the generation of testable hypotheses about how exposures affect human health. At CTD, manually curated interactions for chemical-induced phenotypes are enhanced with anatomy terms (tissues, fluids, and cell types) to describe the physiological system of the reported event. These same anatomy terms are used to annotate the human media (e.g., urine, hair, nail, blood, etc.) in which an environmental chemical was assayed for exposure. Currently, CTD uses more than 880 unique anatomy terms to contextualize over 255,000 chemical-phenotype interactions and 167,000 exposure statements. These annotations allow chemical-phenotype interactions and exposure data to be explored from a novel, anatomical perspective. Here, we describe CTD's anatomy curation process (including the construction of a controlled, interoperable vocabulary) and new anatomy webpages (that coalesce and organize the curated chemical-phenotype and exposure data sets). We also provide examples that demonstrate how this feature can be used to identify system- and cell-specific chemical-induced toxicities, help inform exposure data, prioritize phenotypes for environmental diseases, survey tissue and pregnancy exposomes, and facilitate data connections with external resources. Anatomy annotations advance understanding of environmental health by providing new ways to explore and survey chemical-induced events and exposure studies in the CTD framework.}, journal={CURRENT RESEARCH IN TOXICOLOGY}, author={Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Mattingly, Carolyn J.}, year={2021}, pages={128–139} }
 @article{grondin_davis_wiegers_wiegers_sciaky_johnson_mattingly_2021, title={Predicting molecular mechanisms, pathways, and health outcomes induced by Juul e-cigarette aerosol chemicals using the Comparative Toxicogenomics Database}, volume={2}, ISSN={["2666-027X"]}, DOI={10.1016/j.crtox.2021.08.001}, abstractNote={There is a critical need to understand the health risks associated with vaping e-cigarettes, which has reached epidemic levels among teens. Juul is currently the most popular type of e-cigarette on the market. Using the Comparative Toxicogenomics Database (CTD; http://ctdbase.org), a public resource that integrates chemical, gene, phenotype and disease data, we aimed to analyze the potential molecular mechanisms of eight chemicals detected in the aerosols generated by heating Juul e-cigarette pods: nicotine, acetaldehyde, formaldehyde, free radicals, crotonaldehyde, acetone, pyruvaldehyde, and particulate matter. Curated content in CTD, including chemical-gene, chemical-phenotype, and chemical-disease interactions, as well as associated phenotypes and pathway enrichment, were analyzed to help identify potential molecular mechanisms and diseases associated with vaping. Nicotine shows the most direct disease associations of these chemicals, followed by particulate matter and formaldehyde. Together, these chemicals show a direct marker or mechanistic relationship with 400 unique diseases in CTD, particularly in the categories of cardiovascular diseases, nervous system diseases, respiratory tract diseases, cancers, and mental disorders. We chose three respiratory tract diseases to investigate further, and found that in addition to cellular processes of apoptosis and cell proliferation, prioritized phenotypes underlying Juul-associated respiratory tract disease outcomes include response to oxidative stress, inflammatory response, and several cell signaling pathways (p38MAPK, NIK/NFkappaB, calcium-mediated).}, journal={CURRENT RESEARCH IN TOXICOLOGY}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Jolene A. and Wiegers, Thomas C. and Sciaky, Daniela and Johnson, Robin J. and Mattingly, Carolyn J.}, year={2021}, pages={272–281} }
 @article{thessen_grondin_kulkarni_brander_truong_vasilevsky_callahan_chan_westra_willis_et al._2020, title={Community Approaches for Integrating Environmental Exposures into Human Models of Disease}, volume={128}, ISSN={["1552-9924"]}, DOI={10.1289/EHP7215}, abstractNote={Background: A critical challenge in genomic medicine is identifying the genetic and environmental risk factors for disease. Currently, the available data links a majority of known coding human genes to phenotypes, but the environmental component of human disease is extremely underrepresented in these linked data sets. Without environmental exposure information, our ability to realize precision health is limited, even with the promise of modern genomics. Achieving integration of gene, phenotype, and environment will require extensive translation of data into a standard, computable form and the extension of the existing gene/phenotype data model. The data standards and models needed to achieve this integration do not currently exist. Objectives: Our objective is to foster development of community-driven data-reporting standards and a computational model that will facilitate the inclusion of exposure data in computational analysis of human disease. To this end, we present a preliminary semantic data model and use cases and competency questions for further community-driven model development and refinement. Discussion: There is a real desire by the exposure science, epidemiology, and toxicology communities to use informatics approaches to improve their research workflow, gain new insights, and increase data reuse. Critical to success is the development of a community-driven data model for describing environmental exposures and linking them to existing models of human disease. https://doi.org/10.1289/EHP7215}, number={12}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Thessen, Anne E. and Grondin, Cynthia J. and Kulkarni, Resham D. and Brander, Susanne and Truong, Lisa and Vasilevsky, Nicole A. and Callahan, Tiffany J. and Chan, Lauren E. and Westra, Brian and Willis, Mary and et al.}, year={2020}, month={Dec} }
 @article{davis_grondin_johnson_sciaky_wiegers_wiegers_mattingly_2021, title={Comparative Toxicogenomics Database (CTD): update 2021}, volume={49}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkaa891}, abstractNote={Abstract
               The public Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is an innovative digital ecosystem that relates toxicological information for chemicals, genes, phenotypes, diseases, and exposures to advance understanding about human health. Literature-based, manually curated interactions are integrated to create a knowledgebase that harmonizes cross-species heterogeneous data for chemical exposures and their biological repercussions. In this biennial update, we report a 20% increase in CTD curated content and now provide 45 million toxicogenomic relationships for over 16 300 chemicals, 51 300 genes, 5500 phenotypes, 7200 diseases and 163 000 exposure events, from 600 comparative species. Furthermore, we increase the functionality of chemical–phenotype content with new data-tabs on CTD Disease pages (to help fill in knowledge gaps for environmental health) and new phenotype search parameters (for Batch Query and Venn analysis tools). As well, we introduce new CTD Anatomy pages that allow users to uniquely explore and analyze chemical–phenotype interactions from an anatomical perspective. Finally, we have enhanced CTD Chemical pages with new literature-based chemical synonyms (to improve querying) and added 1600 amino acid-based compounds (to increase chemical landscape). Together, these updates continue to augment CTD as a powerful resource for generating testable hypotheses about the etiologies and molecular mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2021}, month={Jan}, pages={D1138–D1143} }
 @article{davis_wiegers_grondin_johnson_sciaky_wiegers_mattingly_2020, title={Leveraging the Comparative Toxicogenomics Database to Fill in Knowledge Gaps for Environmental Health: A Test Case for Air Pollution-induced Cardiovascular Disease}, volume={177}, ISSN={["1096-0929"]}, DOI={10.1093/toxsci/kfaa113}, abstractNote={Abstract
               Environmental health studies relate how exposures (eg, chemicals) affect human health and disease; however, in most cases, the molecular and biological mechanisms connecting an exposure with a disease remain unknown. To help fill in these knowledge gaps, we sought to leverage content from the public Comparative Toxicogenomics Database (CTD) to identify potential intermediary steps. In a proof-of-concept study, we systematically compute the genes, molecular mechanisms, and biological events for the environmental health association linking air pollution toxicants with 2 cardiovascular diseases (myocardial infarction and hypertension) as a test case. Our approach integrates 5 types of curated interactions in CTD to build sets of “CGPD-tetramers,” computationally constructed information blocks relating a Chemical- Gene interaction with a Phenotype and Disease. This bioinformatics strategy generates 653 CGPD-tetramers for air pollution-associated myocardial infarction (involving 5 pollutants, 58 genes, and 117 phenotypes) and 701 CGPD-tetramers for air pollution-associated hypertension (involving 3 pollutants, 96 genes, and 142 phenotypes). Collectively, we identify 19 genes and 96 phenotypes shared between these 2 air pollutant-induced outcomes, and suggest important roles for oxidative stress, inflammation, immune responses, cell death, and circulatory system processes. Moreover, CGPD-tetramers can be assembled into extensive chemical-induced disease pathways involving multiple gene products and sequential biological events, and many of these computed intermediary steps are validated in the literature. Our method does not require a priori knowledge of the toxicant, interacting gene, or biological system, and can be used to analyze any environmental chemical-induced disease curated within the public CTD framework. This bioinformatics strategy links and interrelates chemicals, genes, phenotypes, and diseases to fill in knowledge gaps for environmental health studies, as demonstrated for air pollution-associated cardiovascular disease, but can be adapted by researchers for any environmentally influenced disease-of-interest.}, number={2}, journal={TOXICOLOGICAL SCIENCES}, author={Davis, Allan Peter and Wiegers, Thomas C. and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and Wiegers, Jolene and Mattingly, Carolyn J.}, year={2020}, month={Oct}, pages={392–404} }
 @article{grondin_davis_wiegers_wiegers_mattingly_2018, title={Accessing an Expanded Exposure Science Module at the Comparative Toxicogenomics Database}, volume={126}, ISSN={["1552-9924"]}, DOI={10.1289/ehp2873}, abstractNote={Summary: The Comparative Toxicogenomics Database (CTD; http://ctdbase.org) is a free resource that provides manually curated information on chemical, gene, phenotype, and disease relationships to advance understanding of the effect of environmental exposures on human health. Four core content areas are independently curated: chemical–gene interactions, chemical–disease and gene–disease associations, chemical–phenotype interactions, and environmental exposure data (e.g., effects of chemical stressors on humans). Since releasing exposure data in 2015, we have vastly increased our coverage of chemicals and disease/phenotype outcomes; greatly expanded access to exposure content; added search capability by stressors, cohorts, population demographics, and measured outcomes; and created user-specified displays of content. These enhancements aim to facilitate human studies by allowing comparisons among experimental parameters and across studies involving specified chemicals, populations, or outcomes. Integration of data among CTD’s four content areas and external data sets, such as Gene Ontology annotations and pathway information, links exposure data with over 1.8 million chemical–gene, chemical–disease and gene–disease interactions. Our analysis tools reveal direct and inferred relationships among the data and provide opportunities to generate predictive connections between environmental exposures and population-level health outcomes. https://doi.org/10.1289/EHP2873}, number={1}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Thomas C. and Wiegers, Jolene A. and Mattingly, Carolyn J.}, year={2018}, month={Jan} }
 @article{davis_grondin_johnson_sciaky_mcmorran_wiegers_wiegers_mattingly_2019, title={The Comparative Toxicogenomics Database: update 2019}, volume={47}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gky868}, abstractNote={Abstract The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a premier public resource for literature-based, manually curated associations between chemicals, gene products, phenotypes, diseases, and environmental exposures. In this biennial update, we present our new chemical–phenotype module that codes chemical-induced effects on phenotypes, curated using controlled vocabularies for chemicals, phenotypes, taxa, and anatomical descriptors; this module provides unique opportunities to explore cellular and system-level phenotypes of the pre-disease state and allows users to construct predictive adverse outcome pathways (linking chemical–gene molecular initiating events with phenotypic key events, diseases, and population-level health outcomes). We also report a 46% increase in CTD manually curated content, which when integrated with other datasets yields more than 38 million toxicogenomic relationships. We describe new querying and display features for our enhanced chemical–exposure science module, providing greater scope of content and utility. As well, we discuss an updated MEDIC disease vocabulary with over 1700 new terms and accession identifiers. To accommodate these increases in data content and functionality, CTD has upgraded its computational infrastructure. These updates continue to improve CTD and help inform new testable hypotheses about the etiology and mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and McMorran, Roy and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2019}, month={Jan}, pages={D948–D954} }
 @article{grondin_davis_wiegers_king_wiegers_reif_hoppin_mattingly_2016, title={Advancing Exposure Science through Chemical Data Curation and Integration in the Comparative Toxicogenomics Database}, volume={124}, ISSN={0091-6765 1552-9924}, url={http://dx.doi.org/10.1289/EHP174}, DOI={10.1289/ehp174}, abstractNote={Background: Exposure science studies the interactions and outcomes between environmental stressors and human or ecological receptors. To augment its role in understanding human health and the exposome, we aimed to centralize and integrate exposure science data into the broader biological framework of the Comparative Toxicogenomics Database (CTD), a public resource that promotes understanding of environmental chemicals and their effects on human health. Objectives: We integrated exposure data within the CTD to provide a centralized, freely available resource that facilitates identification of connections between real-world exposures, chemicals, genes/proteins, diseases, biological processes, and molecular pathways. Methods: We developed a manual curation paradigm that captures exposure data from the scientific literature using controlled vocabularies and free text within the context of four primary exposure concepts: stressor, receptor, exposure event, and exposure outcome. Using data from the Agricultural Health Study, we have illustrated the benefits of both centralization and integration of exposure information with CTD core data. Results: We have described our curation process, demonstrated how exposure data can be accessed and analyzed in the CTD, and shown how this integration provides a broad biological context for exposure data to promote mechanistic understanding of environmental influences on human health. Conclusions: Curation and integration of exposure data within the CTD provides researchers with new opportunities to correlate exposures with human health outcomes, to identify underlying potential molecular mechanisms, and to improve understanding about the exposome. Citation: Grondin CJ, Davis AP, Wiegers TC, King BL, Wiegers JA, Reif DM, Hoppin JA, Mattingly CJ. 2016. Advancing exposure science through chemical data curation and integration in the Comparative Toxicogenomics Database. Environ Health Perspect 124:1592–1599; http://dx.doi.org/10.1289/EHP174}, number={10}, journal={Environmental Health Perspectives}, publisher={Environmental Health Perspectives}, author={Grondin, Cynthia J. and Davis, Allan Peter and Wiegers, Thomas C. and King, Benjamin L. and Wiegers, Jolene A. and Reif, David M. and Hoppin, Jane A. and Mattingly, Carolyn J.}, year={2016}, month={Oct}, pages={1592–1599} }
 @article{davis_wiegers_king_wiegers_grondin_sciaky_johnson_mattingly_2016, title={Generating Gene Ontology-Disease Inferences to Explore Mechanisms of Human Disease at the Comparative Toxicogenomics Database}, volume={11}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0155530}, abstractNote={Strategies for discovering common molecular events among disparate diseases hold promise for improving understanding of disease etiology and expanding treatment options. One technique is to leverage curated datasets found in the public domain. The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) manually curates chemical-gene, chemical-disease, and gene-disease interactions from the scientific literature. The use of official gene symbols in CTD interactions enables this information to be combined with the Gene Ontology (GO) file from NCBI Gene. By integrating these GO-gene annotations with CTD’s gene-disease dataset, we produce 753,000 inferences between 15,700 GO terms and 4,200 diseases, providing opportunities to explore presumptive molecular underpinnings of diseases and identify biological similarities. Through a variety of applications, we demonstrate the utility of this novel resource. As a proof-of-concept, we first analyze known repositioned drugs (e.g., raloxifene and sildenafil) and see that their target diseases have a greater degree of similarity when comparing GO terms vs. genes. Next, a computational analysis predicts seemingly non-intuitive diseases (e.g., stomach ulcers and atherosclerosis) as being similar to bipolar disorder, and these are validated in the literature as reported co-diseases. Additionally, we leverage other CTD content to develop testable hypotheses about thalidomide-gene networks to treat seemingly disparate diseases. Finally, we illustrate how CTD tools can rank a series of drugs as potential candidates for repositioning against B-cell chronic lymphocytic leukemia and predict cisplatin and the small molecule inhibitor JQ1 as lead compounds. The CTD dataset is freely available for users to navigate pathologies within the context of extensive biological processes, molecular functions, and cellular components conferred by GO. This inference set should aid researchers, bioinformaticists, and pharmaceutical drug makers in finding commonalities in disease mechanisms, which in turn could help identify new therapeutics, new indications for existing pharmaceuticals, potential disease comorbidities, and alerts for side effects.}, number={5}, journal={PLOS ONE}, author={Davis, Allan Peter and Wiegers, Thomas C. and King, Benjamin L. and Wiegers, Jolene and Grondin, Cynthia J. and Sciaky, Daniela and Johnson, Robin J. and Mattingly, Carolyn J.}, year={2016}, month={May} }
 @article{davis_grondin_johnson_sciaky_king_mcmorran_wiegers_wiegers_mattingly_2017, title={The Comparative Toxicogenomics Database: update 2017}, volume={45}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gkw838}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) provides information about interactions between chemicals and gene products, and their relationships to diseases. Core CTD content (chemical-gene, chemical-disease and gene-disease interactions manually curated from the literature) are integrated with each other as well as with select external datasets to generate expanded networks and predict novel associations. Today, core CTD includes more than 30.5 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, Gene Ontology (GO) annotations, pathways, and gene interaction modules. In this update, we report a 33% increase in our core data content since 2015, describe our new exposure module (that harmonizes exposure science information with core toxicogenomic data) and introduce a novel dataset of GO-disease inferences (that identify common molecular underpinnings for seemingly unrelated pathologies). These advancements centralize and contextualize real-world chemical exposures with molecular pathways to help scientists generate testable hypotheses in an effort to understand the etiology and mechanisms underlying environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Johnson, Robin J. and Sciaky, Daniela and King, Benjamin L. and McMorran, Roy and Wiegers, Jolene and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2017}, month={Jan}, pages={D972–D978} }
 @article{davis_grondin_lennon-hopkins_saraceni-richards_sciaky_king_wiegers_mattingly_2015, title={The Comparative Toxicogenomics Database's 10th year anniversary: update 2015}, volume={43}, ISSN={["1362-4962"]}, DOI={10.1093/nar/gku935}, abstractNote={Ten years ago, the Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) was developed out of a need to formalize, harmonize and centralize the information on numerous genes and proteins responding to environmental toxic agents across diverse species. CTD's initial approach was to facilitate comparisons of nucleotide and protein sequences of toxicologically significant genes by curating these sequences and electronically annotating them with chemical terms from their associated references. Since then, however, CTD has vastly expanded its scope to robustly represent a triad of chemical–gene, chemical–disease and gene–disease interactions that are manually curated from the scientific literature by professional biocurators using controlled vocabularies, ontologies and structured notation. Today, CTD includes 24 million toxicogenomic connections relating chemicals/drugs, genes/proteins, diseases, taxa, phenotypes, Gene Ontology annotations, pathways and interaction modules. In this 10th year anniversary update, we outline the evolution of CTD, including our increased data content, new ‘Pathway View’ visualization tool, enhanced curation practices, pilot chemical–phenotype results and impending exposure data set. The prototype database originally described in our first report has transformed into a sophisticated resource used actively today to help scientists develop and test hypotheses about the etiologies of environmentally influenced diseases.}, number={D1}, journal={NUCLEIC ACIDS RESEARCH}, author={Davis, Allan Peter and Grondin, Cynthia J. and Lennon-Hopkins, Kelley and Saraceni-Richards, Cynthia and Sciaky, Daniela and King, Benjamin L. and Wiegers, Thomas C. and Mattingly, Carolyn J.}, year={2015}, month={Jan}, pages={D914–D920} }
 @article{davis_wiegers_johnson_lay_lennon-hopkins_saraceni-richards_sciaky_murphy_mattingly_2013, title={Text Mining Effectively Scores and Ranks the Literature for Improving Chemical-Gene-Disease Curation at the Comparative Toxicogenomics Database}, volume={8}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0058201}, abstractNote={The Comparative Toxicogenomics Database (CTD; http://ctdbase.org/) is a public resource that curates interactions between environmental chemicals and gene products, and their relationships to diseases, as a means of understanding the effects of environmental chemicals on human health. CTD provides a triad of core information in the form of chemical-gene, chemical-disease, and gene-disease interactions that are manually curated from scientific articles. To increase the efficiency, productivity, and data coverage of manual curation, we have leveraged text mining to help rank and prioritize the triaged literature. Here, we describe our text-mining process that computes and assigns each article a document relevancy score (DRS), wherein a high DRS suggests that an article is more likely to be relevant for curation at CTD. We evaluated our process by first text mining a corpus of 14,904 articles triaged for seven heavy metals (cadmium, cobalt, copper, lead, manganese, mercury, and nickel). Based upon initial analysis, a representative subset corpus of 3,583 articles was then selected from the 14,094 articles and sent to five CTD biocurators for review. The resulting curation of these 3,583 articles was analyzed for a variety of parameters, including article relevancy, novel data content, interaction yield rate, mean average precision, and biological and toxicological interpretability. We show that for all measured parameters, the DRS is an effective indicator for scoring and improving the ranking of literature for the curation of chemical-gene-disease information at CTD. Here, we demonstrate how fully incorporating text mining-based DRS scoring into our curation pipeline enhances manual curation by prioritizing more relevant articles, thereby increasing data content, productivity, and efficiency.}, number={4}, journal={PLOS ONE}, author={Davis, Allan Peter and Wiegers, Thomas C. and Johnson, Robin J. and Lay, Jean M. and Lennon-Hopkins, Kelley and Saraceni-Richards, Cynthia and Sciaky, Daniela and Murphy, Cynthia Grondin and Mattingly, Carolyn J.}, year={2013}, month={Apr} }