@article{muratov_amaro_andrade_brown_ekins_fourches_isayev_kozakov_medina-franco_merz_et al._2021, title={A critical overview of computational approaches employed for COVID-19 drug discovery}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs01065k}, abstractNote={We cover diverse methodologies, computational approaches, and case studies illustrating the ongoing efforts to develop viable drug candidates for treatment of COVID-19.}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Amaro, Rommie and Andrade, Carolina H. and Brown, Nathan and Ekins, Sean and Fourches, Denis and Isayev, Olexandr and Kozakov, Dima and Medina-Franco, Jose L. and Merz, Kenneth M. and et al.}, year={2021}, month={Jul} }
@article{mansouri_karmaus_fitzpatrick_patlewicz_pradeep_alberga_alepee_allen_allen_alves_et al._2021, title={CATMoS: Collaborative Acute Toxicity Modeling Suite}, volume={129}, ISSN={["1552-9924"]}, DOI={10.1289/EHP8495}, abstractNote={Humans are exposed to tens of thousands of chemical substances that need to be assessed for their potential toxicity. Acute systemic toxicity testing serves as the basis for regulatory hazard classification, labeling, and risk management. However, it is cost- and time-prohibitive to evaluate all new and existing chemicals using traditional rodent acute toxicity tests. In silico models built using existing data facilitate rapid acute toxicity predictions without using animals.The U.S. Interagency Coordinating Committee on the Validation of Alternative Methods (ICCVAM) Acute Toxicity Workgroup organized an international collaboration to develop in silico models for predicting acute oral toxicity based on five different end points: Lethal Dose 50 (LD50 value, U.S. Environmental Protection Agency hazard (four) categories, Globally Harmonized System for Classification and Labeling hazard (five) categories, very toxic chemicals [LD50 (LD50≤50mg/kg)], and nontoxic chemicals (LD50>2,000mg/kg).An acute oral toxicity data inventory for 11,992 chemicals was compiled, split into training and evaluation sets, and made available to 35 participating international research groups that submitted a total of 139 predictive models. Predictions that fell within the applicability domains of the submitted models were evaluated using external validation sets. These were then combined into consensus models to leverage strengths of individual approaches.The resulting consensus predictions, which leverage the collective strengths of each individual model, form the Collaborative Acute Toxicity Modeling Suite (CATMoS). CATMoS demonstrated high performance in terms of accuracy and robustness when compared with in vivo results.CATMoS is being evaluated by regulatory agencies for its utility and applicability as a potential replacement for in vivo rat acute oral toxicity studies. CATMoS predictions for more than 800,000 chemicals have been made available via the National Toxicology Program's Integrated Chemical Environment tools and data sets (ice.ntp.niehs.nih.gov). The models are also implemented in a free, standalone, open-source tool, OPERA, which allows predictions of new and untested chemicals to be made. https://doi.org/10.1289/EHP8495.}, number={4}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Mansouri, Kamel and Karmaus, Agnes L. and Fitzpatrick, Jeremy and Patlewicz, Grace and Pradeep, Prachi and Alberga, Domenico and Alepee, Nathalie and Allen, Timothy E. H. and Allen, Dave and Alves, Vinicius M. and et al.}, year={2021}, month={Apr} }
@article{borrel_melander_fourches_2021, title={Cheminformatics Analysis of Fluoroquinolones and Their Inhibition Potency Against Four Pathogens}, volume={40}, ISSN={["1868-1751"]}, DOI={10.1002/minf.202000215}, abstractNote={Drug-resistant bacteria are a worldwide public health concern. As the prevalence of multi-drug resistant pathogens outpaces the discovery of new antibacterials, it is of importance to explore the structure-activity relationships for series of known bactericides with proven scaffolds. Herein, we assembled a set of 507 fluoroquinolone analogues all experimentally tested for their inhibition potency against four pathogens: Escherichia coli, Pseudomonas aeruginosa, Staphylococcus aureus, and Streptococcus pneumoniae. We relied on cheminformatics techniques to characterize and cluster them based on their structural similarity and analyzed the structure-activity relationships identified for each cluster of fluoroquinolones. Then, we utilized machine learning techniques to develop and validate predictive QSAR models for computing the inhibition potencies (pMIC) of analogues for each pathogen. These QSAR models afforded reasonable external prediction performances (R2≥0.6, MAE∼0.4). This study confirmed that (i) there are both global and local inter-pathogen concordance regarding the antibacterial potency of fluoroquinolones, (ii) small clusters of fluoroquinolone analogues are characterized by unique patterns of strain selectivity and potency, the latter being potentially useful to design new analogues with enhanced potency and/or selectivity towards a given pathogen, and (iii) robust QSAR models were obtained allowing for future design of new bioactive fluoroquinolones.}, number={5}, journal={MOLECULAR INFORMATICS}, author={Borrel, Alexandre and Melander, Christian and Fourches, Denis}, year={2021}, month={May} }
@article{li_fourches_2021, title={SMILES Pair Encoding: A Data-Driven Substructure Tokenization Algorithm for Deep Learning}, volume={61}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.0c01127}, abstractNote={Simplified molecular input line entry system (SMILES)-based deep learning models are slowly emerging as an important research topic in cheminformatics. In this study, we introduce SMILES pair encoding (SPE), a data-driven tokenization algorithm. SPE first learns a vocabulary of high-frequency SMILES substrings from a large chemical dataset (e.g., ChEMBL) and then tokenizes SMILES based on the learned vocabulary for the actual training of deep learning models. SPE augments the widely used atom-level tokenization by adding human-readable and chemically explainable SMILES substrings as tokens. Case studies show that SPE can achieve superior performances on both molecular generation and quantitative structure–activity relationship (QSAR) prediction tasks. In particular, the SPE-based generative models outperformed the atom-level tokenization model in the aspects of novelty, diversity, and ability to resemble the training set distribution. The performance of SPE-based QSAR prediction models were evaluated using 24 benchmark datasets where SPE consistently either did match or outperform atom-level and k-mer tokenization. Therefore, SPE could be a promising tokenization method for SMILES-based deep learning models. An open-source Python package SmilesPE was developed to implement this algorithm and is now freely available at https://github.com/XinhaoLi74/SmilesPE.}, number={4}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Li, Xinhao and Fourches, Denis}, year={2021}, month={Apr}, pages={1560–1569} }
@article{takeda_ikenaka_fourches_tanaka_nakayama_triki_li_igarashi_tanikawa_ishizuka_2021, title={The VKORC1 ER-luminal loop mutation (Leu76Pro) leads to a significant resistance to warfarin in black rats (Rattus rattus)}, volume={173}, ISSN={["1095-9939"]}, DOI={10.1016/j.pestbp.2021.104774}, abstractNote={Well-known 4-hydroxycoumarin derivatives, such as warfarin, act as inhibitors of the vitamin K epoxide reductase (VKOR) and are used as anticoagulants. Mutations of the VKOR enzyme can lead to resistance to those compounds. This has been a problem in using them as medicine or rodenticide. Most of these mutations lie in the vicinity of potential warfarin-binding sites within the ER-luminal loop structure (Lys30, Phe55) and the transmembrane helix (Tyr138). However, a VKOR mutation found in Tokyo in warfarin-resistant rats does not follow that pattern (Leu76Pro), and its effect on VKOR function and structure remains unclear. We conducted both in vitro kinetic analyses and in silico docking studies to characterize the VKOR mutant. On the one hand, resistant rats (R-rats) showed a 37.5-fold increased IC50 value to warfarin when compared to susceptible rats (S-rats); on the other hand, R-rats showed a 16.5-fold lower basal VKOR activity (Vmax/Km). Docking calculations exhibited that the mutated VKOR of R-rats has a decreased affinity for warfarin. Molecular dynamics simulations further revealed that VKOR-associated warfarin was more exposed to solvents in R-rats and key interactions between Lys30, Phe55, and warfarin were less favored. This study concludes that a single mutation of VKOR at position 76 leads to a significant resistance to warfarin by modifying the types and numbers of intermolecular interactions between the two.}, journal={PESTICIDE BIOCHEMISTRY AND PHYSIOLOGY}, author={Takeda, Kazuki and Ikenaka, Yoshinori and Fourches, Denis and Tanaka, Kazuyuki D. and Nakayama, Shouta M. M. and Triki, Dhoha and Li, Xinhao and Igarashi, Manabu and Tanikawa, Tsutomu and Ishizuka, Mayumi}, year={2021}, month={Mar} }
@article{zin_borrel_fourches_2020, title={Benchmarking 2D/3D/MD-QSAR Models for Imatinib Derivatives: How Far Can We Predict?}, volume={60}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.0c00200}, abstractNote={Imatinib, a 2-phenylaminopyridine-based BCR-ABL tyrosine kinase inhibitor, is a highly effective drug for treating Chronic Myeloid Leukemia (CML). However, cases of drug resistance are constantly e...}, number={7}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Zin, Phyo Phyo Kyaw and Borrel, Alexandre and Fourches, Denis}, year={2020}, month={Jul}, pages={3342–3360} }
@article{mansouri_kleinstreuer_abdelaziz_alberga_alves_andersson_andrade_bai_balabin_ballabio_et al._2020, title={CoMPARA: Collaborative Modeling Project for Androgen Receptor Activity}, volume={128}, ISSN={["1552-9924"]}, DOI={10.1289/EHP5580}, abstractNote={Endocrine disrupting chemicals (EDCs) are xenobiotics that mimic the interaction of natural hormones and alter synthesis, transport, or metabolic pathways. The prospect of EDCs causing adverse health effects in humans and wildlife has led to the development of scientific and regulatory approaches for evaluating bioactivity. This need is being addressed using high-throughput screening (HTS) in vitro approaches and computational modeling.In support of the Endocrine Disruptor Screening Program, the U.S. Environmental Protection Agency (EPA) led two worldwide consortiums to virtually screen chemicals for their potential estrogenic and androgenic activities. Here, we describe the Collaborative Modeling Project for Androgen Receptor Activity (CoMPARA) efforts, which follows the steps of the Collaborative Estrogen Receptor Activity Prediction Project (CERAPP).The CoMPARA list of screened chemicals built on CERAPP's list of 32,464 chemicals to include additional chemicals of interest, as well as simulated ToxCast™ metabolites, totaling 55,450 chemical structures. Computational toxicology scientists from 25 international groups contributed 91 predictive models for binding, agonist, and antagonist activity predictions. Models were underpinned by a common training set of 1,746 chemicals compiled from a combined data set of 11 ToxCast™/Tox21 HTS in vitro assays.The resulting models were evaluated using curated literature data extracted from different sources. To overcome the limitations of single-model approaches, CoMPARA predictions were combined into consensus models that provided averaged predictive accuracy of approximately 80% for the evaluation set.The strengths and limitations of the consensus predictions were discussed with example chemicals; then, the models were implemented into the free and open-source OPERA application to enable screening of new chemicals with a defined applicability domain and accuracy assessment. This implementation was used to screen the entire EPA DSSTox database of ∼875,000 chemicals, and their predicted AR activities have been made available on the EPA CompTox Chemicals dashboard and National Toxicology Program's Integrated Chemical Environment. https://doi.org/10.1289/EHP5580.}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Mansouri, Kamel and Kleinstreuer, Nicole and Abdelaziz, Ahmed M. and Alberga, Domenico and Alves, Vinicius M. and Andersson, Patrik L. and Andrade, Carolina H. and Bai, Fang and Balabin, Ilya and Ballabio, Davide and et al.}, year={2020}, month={Feb} }
@article{li_kleinstreuer_fourches_2020, title={Hierarchical Quantitative Structure-Activity Relationship Modeling Approach for Integrating Binary, Multiclass, and Regression Models of Acute Oral Systemic Toxicity}, volume={33}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.9b00259}, abstractNote={Reliable in silico approaches to replace animal testing for the evaluation of potential acute toxic effects are highly demanded by regulatory agencies. In particular, quantitative structure–activit...}, number={2}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Li, Xinhao and Kleinstreuer, Nicole C. and Fourches, Denis}, year={2020}, month={Feb}, pages={353–366} }
@article{reich_guan_fourches_warren_sarnat_chang_2020, title={INTEGRATIVE STATISTICAL METHODS FOR EXPOSURE MIXTURES AND HEALTH}, volume={14}, ISSN={["1941-7330"]}, DOI={10.1214/20-AOAS1364}, abstractNote={Humans are concurrently exposed to chemically, structurally and toxicologically diverse chemicals. A critical challenge for environmental epidemiology is to quantify the risk of adverse health outcomes resulting from exposures to such chemical mixtures and to identify which mixture constituents may be driving etiologic associations. A variety of statistical methods have been proposed to address these critical research questions. However, they generally rely solely on measured exposure and health data available within a specific study. Advancements in understanding of the role of mixtures on human health impacts may be better achieved through the utilization of external data and knowledge from multiple disciplines with innovative statistical tools. In this paper we develop new methods for health analyses that incorporate auxiliary information about the chemicals in a mixture, such as physicochemical, structural and/or toxicological data. We expect that the constituents identified using auxiliary information will be more biologically meaningful than those identified by methods that solely utilize observed correlations between measured exposure. We develop flexible Bayesian models by specifying prior distributions for the exposures and their effects that include auxiliary information and examine this idea over a spectrum of analyses from regression to factor analysis. The methods are applied to study the effects of volatile organic compounds on emergency room visits in Atlanta. We find that including cheminformatic information about the exposure variables improves prediction and provides a more interpretable model for emergency room visits for respiratory diseases.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Guan, Yawen and Fourches, Denis and Warren, Joshua L. and Sarnat, Stefanie E. and Chang, Howard H.}, year={2020}, month={Dec}, pages={1945–1963} }
@article{cools_triki_geerts_delputte_fourches_cos_2020, title={In vitroandin vivoEvaluation ofin silicoPredicted Pneumococcal UDPG:PP Inhibitors}, volume={11}, ISSN={["1664-302X"]}, DOI={10.3389/fmicb.2020.01596}, abstractNote={Pneumonia, of which Streptococcus pneumoniae is the most common causative agent, is considered one of the three top leading causes of death worldwide. As seen in other bacterial species, antimicrobial resistance is on the rise for this pathogen. Therefore, there is a pressing need for novel antimicrobial strategies to combat these infections. Recently, uridine diphosphate glucose pyrophosphorylase (UDPG:PP) has been put forward as a potential drug target worth investigating. Moreover, earlier research demonstrated that streptococci lacking a functional galU gene (encoding for UDPG:PP) were characterized by significantly reduced in vitro and in vivo virulence. Therefore, in this study we evaluated the anti-virulence activity of potential UDPG:PP inhibitors. They were selected in silico using a tailor-made streptococcal homology model, based on earlier listerial research. While the compounds didn't affect bacterial growth, nor affected in vitro adhesion to and phagocytosis in macrophages, the amount of polysaccharide capsule was significantly reduced after co-incubation with these inhibitors. Moreover, co-incubation proved to have a positive effect on survival in an in vivo Galleria mellonella larval infection model. Therefore, rather than targeting bacterial survival directly, these compounds proved to have an effect on streptococcal virulence by lowering the amount of polysaccharide and thereby probably boosting recognition of this pathogen by the innate immune system. While the compounds need adaptation to broaden their activity to more streptococcal strains rather than being strain-specific, this study consolidates UDPG:PP as a potential novel drug target.}, journal={FRONTIERS IN MICROBIOLOGY}, author={Cools, Freya and Triki, Dhoha and Geerts, Nele and Delputte, Peter and Fourches, Denis and Cos, Paul}, year={2020}, month={Jul} }
@article{li_fourches_2020, title={Inductive transfer learning for molecular activity prediction: Next-Gen QSAR Models with MolPMoFiT}, volume={12}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-020-00430-x}, abstractNote={Abstract Deep neural networks can directly learn from chemical structures without extensive, user-driven selection of descriptors in order to predict molecular properties/activities with high reliability. But these approaches typically require large training sets to learn the endpoint-specific structural features and ensure reasonable prediction accuracy. Even though large datasets are becoming the new normal in drug discovery, especially when it comes to high-throughput screening or metabolomics datasets, one should also consider smaller datasets with challenging endpoints to model and forecast. Thus, it would be highly relevant to better utilize the tremendous compendium of unlabeled compounds from publicly-available datasets for improving the model performances for the user’s particular series of compounds. In this study, we propose the Mol ecular P rediction Mo del Fi ne- T uning ( MolPMoFiT ) approach, an effective transfer learning method based on self-supervised pre-training + task-specific fine-tuning for QSPR/QSAR modeling. A large-scale molecular structure prediction model is pre-trained using one million unlabeled molecules from ChEMBL in a self-supervised learning manner, and can then be fine-tuned on various QSPR/QSAR tasks for smaller chemical datasets with specific endpoints. Herein, the method is evaluated on four benchmark datasets (lipophilicity, FreeSolv, HIV, and blood–brain barrier penetration). The results showed the method can achieve strong performances for all four datasets compared to other state - of - the - art machine learning modeling techniques reported in the literature so far.}, number={1}, journal={JOURNAL OF CHEMINFORMATICS}, author={Li, Xinhao and Fourches, Denis}, year={2020}, month={Apr} }
@article{reese_xiao_shanahan_driessche_fourches_carbonell_hall_menegatti_2020, title={Novel peptide ligands for antibody purification provide superior clearance of host cell protein impurities}, volume={1625}, ISSN={["1873-3778"]}, DOI={10.1016/j.chroma.2020.461237}, abstractNote={The quest for ligands alternative to Protein A for the purification of monoclonal antibodies (mAbs) has been pursued for almost three decades. Yet, the IgG-binding peptides known to date still fall short of the host cell protein (HCP) logarithmic removal value (LRV) set by Protein A media (2.5-3.1). In this study, we present an integrated computational-experimental approach leading to the discovery of peptide ligands that provide HCP LRVs on par with Protein A. First, the screening of 60,000 peptide variants was performed using a high-throughput search algorithm to identify sequences that ensure IgG affinity binding. Select sequences WQRHGI, MWRGWQ, RHLGWF, and GWLHQR were then negatively screened in silico against a panel of model HCPs to ensure the selection of peptides with high binding selectivity. Candidate ligands WQRHGI and MWRGWQ were conjugated to chromatographic resins and characterized by isothermal binding and breakthrough assays to quantify static and dynamic binding capacity (Qmax and DBC10%), respectively. The resulting Qmax were 52.6 mg of IgG per mL of adsorbent for WQRHGI and 57.48 mg/mL for MWRGWQ, while the DBC10% (2 minutes residence time) were 30.1 mg/mL for WQRHGI and 36.4 mg/mL for MWRGWQ. Evaluation of the peptides by isothermal titration calorimetry (ITC) confirmed the binding energy predicted in silico, and an amino acid scanning study corroborated the affinity-like binding activity of the peptides. WQRHGI-WorkBeads resin was finally characterized by purification of a monoclonal antibody from a Chinese Hamster Ovary (CHO) cell culture harvest, affording a remarkable HCP LRV of 2.7, and consistent product yield and purity over 100 chromatographic cycles. These results demonstrate the potential of WQRHGI as an effective alternative to Protein A for antibody purification.}, journal={JOURNAL OF CHROMATOGRAPHY A}, author={Reese, Hannah R. and Xiao, Xingqing and Shanahan, Calvin C. and Driessche, George A. and Fourches, Denis and Carbonell, Ruben G. and Hall, Carol K. and Menegatti, Stefano}, year={2020}, month={Aug} }
@article{day_schneible_young_pozdin_driessche_gaffney_prodromou_freytes_fourches_daniele_et al._2020, title={Photoinduced reconfiguration to control the protein-binding affinity of azobenzene-cyclized peptides}, volume={8}, ISSN={["2050-7518"]}, DOI={10.1039/d0tb01189d}, abstractNote={Light-controlled switching of cell-binding activity of fluorescently-labeled peptides for on-demand cell labeling.}, number={33}, journal={JOURNAL OF MATERIALS CHEMISTRY B}, author={Day, Kevin and Schneible, John D. and Young, Ashlyn T. and Pozdin, Vladimir A. and Driessche, George and Gaffney, Lewis A. and Prodromou, Raphael and Freytes, Donald O. and Fourches, Denis and Daniele, Michael and et al.}, year={2020}, month={Sep}, pages={7413–7427} }
@misc{muratov_bajorath_sheridan_tetko_filimonov_poroikov_oprea_baskin_varnek_roitberg_et al._2020, title={QSAR without borders}, volume={49}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs00098a}, abstractNote={Word cloud summary of diverse topics associated with QSAR modeling that are discussed in this review.}, number={11}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Bajorath, Jurgen and Sheridan, Robert P. and Tetko, Igor V and Filimonov, Dmitry and Poroikov, Vladimir and Oprea, Tudor I and Baskin, Igor I. and Varnek, Alexandre and Roitberg, Adrian and et al.}, year={2020}, month={Jun}, pages={3525–3564} }
@article{muratov_bajorath_sheridan_tetko_filimonov_poroikov_oprea_baskin_varnek_roitberg_et al._2020, title={QSAR without borders (vol 10, pg 531, 2020)}, volume={49}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs90041a}, abstractNote={Correction for ‘QSAR without borders’ by Eugene N. Muratov et al., Chem. Soc. Rev., 2020, DOI: 10.1039/d0cs00098a.}, number={11}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Bajorath, Juergen and Sheridan, Robert P. and Tetko, Igor V. and Filimonov, Dmitry and Poroikov, Vladimir and Oprea, Tudor I. and Baskin, Igor I. and Varnek, Alexandre and Roitberg, Adrian and et al.}, year={2020}, month={Jun}, pages={3716–3716} }
@article{zin_williams_fourches_2020, title={SIME: synthetic insight-based macrolide enumerator to generate the V1B library of 1 billion macrolides}, volume={12}, ISSN={1758-2946}, url={http://dx.doi.org/10.1186/s13321-020-00427-6}, DOI={10.1186/s13321-020-00427-6}, abstractNote={Abstract We report on a new cheminformatics enumeration technology—SIME, synthetic insight-based macrolide enumerator—a new and improved software technology. SIME can enumerate fully assembled macrolides with synthetic feasibility by utilizing the constitutional and structural knowledge extracted from biosynthetic aspects of macrolides. Taken into account by the software are key information such as positions in macrolide structures at which chemical components can be inserted, and the types of structural motifs and sugars of interest that can be synthesized and incorporated at those positions. Additionally, we report on the chemical distribution analysis of the newly SIME-generated V1B (virtual 1 billion) library of macrolides. Those compounds were built based on the core of the Erythromycin structure, 13 structural motifs and a library of sugars derived from eighteen bioactive macrolides. This new enumeration technology can be coupled with cheminformatics approaches such as QSAR modeling and molecular docking to aid in drug discovery for rational designing of next generation macrolide therapeutics with desirable pharmacokinetic properties.}, number={1}, journal={Journal of Cheminformatics}, publisher={Springer Science and Business Media LLC}, author={Zin, Phyo Phyo Kyaw and Williams, Gavin and Fourches, Denis}, year={2020}, month={Apr} }
@article{odenkirk_zin_ash_reif_fourches_baker_2020, title={Structural-based connectivity and omic phenotype evaluations (SCOPE): a cheminformatics toolbox for investigating lipidomic changes in complex systems}, volume={145}, ISSN={["1364-5528"]}, DOI={10.1039/d0an01638a}, abstractNote={SCOPE is a toolbox for expanding upon lipid data interpretation capabilities. Herein we utilize SCOPE to explore how lipid structure, biological connections and metadata linkages contribute to the results observed from lipidomic experiments.}, number={22}, journal={ANALYST}, author={Odenkirk, Melanie T. and Zin, Phyo Phyo K. and Ash, Jeremy R. and Reif, David M. and Fourches, Denis and Baker, Erin S.}, year={2020}, month={Nov}, pages={7197–7209} }
@article{singam_tachachartvanich_fourches_soshilov_hsieh_la merrill_smith_durkin_2020, title={Structure-based virtual screening of perfluoroalkyl and polyfluoroalkyl substances (PFASs) as endocrine disruptors of androgen receptor activity using molecular docking and machine learning}, volume={190}, ISSN={["1096-0953"]}, DOI={10.1016/j.envres.2020.109920}, abstractNote={Perfluoroalkyl and polyfluoroalkyl substances (PFASs) pose a substantial threat as endocrine disruptors, and thus early identification of those that may interact with steroid hormone receptors, such as the androgen receptor (AR), is critical. In this study we screened 5,206 PFASs from the CompTox database against the different binding sites on the AR using both molecular docking and machine learning techniques. We developed support vector machine models trained on Tox21 data to classify the active and inactive PFASs for AR using different chemical fingerprints as features. The maximum accuracy was 95.01% and Matthew's correlation coefficient (MCC) was 0.76 respectively, based on MACCS fingerprints (MACCSFP). The combination of docking-based screening and machine learning models identified 29 PFASs that have strong potential for activity against the AR and should be considered priority chemicals for biological toxicity testing.}, journal={ENVIRONMENTAL RESEARCH}, author={Singam, Ettayapuram Ramaprasad Azhagiya and Tachachartvanich, Phum and Fourches, Denis and Soshilov, Anatoly and Hsieh, Jennifer C. Y. and La Merrill, Michele A. and Smith, Martyn T. and Durkin, Kathleen A.}, year={2020}, month={Nov} }
@article{odenkirk_stratton_gritsenko_bramer_webb-robertson_bloodsworth_weitz_lipton_monroe_ash_et al._2020, title={Unveiling molecular signatures of preeclampsia and gestational diabetes mellitus with multi-omics and innovative cheminformatics visualization tools}, volume={16}, ISSN={["2515-4184"]}, DOI={10.1039/d0mo00074d}, abstractNote={To fully enable the development of diagnostic tools and progressive pharmaceutical drugs, it is imperative to understand the molecular changes occurring before and during disease onset and progression. Systems biology assessments utilizing multi-omic analyses (e.g. the combination of proteomics, lipidomics, genomics, etc.) have shown enormous value in determining molecules prevalent in diseases and their associated mechanisms. Herein, we utilized multi-omic evaluations, multi-dimensional analysis methods, and new cheminformatics-based visualization tools to provide an in depth understanding of the molecular changes taking place in preeclampsia (PRE) and gestational diabetes mellitus (GDM) patients. Since PRE and GDM are two prevalent pregnancy complications that result in adverse health effects for both the mother and fetus during pregnancy and later in life, a better understanding of each is essential. The multi-omic evaluations performed here provide new insight into the end-stage molecular profiles of each disease, thereby supplying information potentially crucial for earlier diagnosis and treatments.}, number={6}, journal={MOLECULAR OMICS}, author={Odenkirk, Melanie T. and Stratton, Kelly G. and Gritsenko, Marina A. and Bramer, Lisa M. and Webb-Robertson, Bobbie-Jo M. and Bloodsworth, Kent J. and Weitz, Karl K. and Lipton, Anna K. and Monroe, Matthew E. and Ash, Jeremy R. and et al.}, year={2020}, month={Dec} }
@misc{fourches_ash_2019, title={4D-quantitative structure-activity relationship modeling: making a comeback}, volume={14}, ISSN={["1746-045X"]}, DOI={10.1080/17460441.2019.1664467}, abstractNote={Introduction: Predictive Quantitative Structure–Activity Relationship (QSAR) modeling has become an essential methodology for rapidly assessing various properties of chemicals. The vast majority of these QSAR models utilize numerical descriptors derived from the two- and/or three-dimensional structures of molecules. However, the conformation-dependent characteristics of flexible molecules and their dynamic interactions with biological target(s) is/are not encoded by these descriptors, leading to limited prediction performances and reduced interpretability. 2D/3D QSAR models are successful for virtual screening, but typically suffer at lead optimization stages. That is why conformation-dependent 4D-QSAR modeling methods were developed two decades ago. However, these methods have always suffered from the associated computational cost. Recently, 4D-QSAR has been experiencing a significant come-back due to rapid advances in GPU-accelerated molecular dynamic simulations and modern machine learning techniques.Areas covered: Herein, the authors briefly review the literature regarding 4D-QSAR modeling and describe its modern workflow called MD-QSAR. Challenges and current limitations are also highlighted.Expert opinion: The development of hyper-predictive MD-QSAR models could represent a disruptive technology for analyzing, understanding, and optimizing dynamic protein-ligand interactions with countless applications for drug discovery and chemical toxicity assessment. Therefore, there has never been a better time and relevance for molecular modeling teams to engage in hyper-predictive MD-QSAR modeling.}, number={12}, journal={EXPERT OPINION ON DRUG DISCOVERY}, author={Fourches, Denis and Ash, Jeremy}, year={2019}, month={Dec}, pages={1227–1235} }
@article{wen_wang_van den driessche_chen_zhang_chen_li_soto_liu_ohashi_et al._2019, title={Adipocytes as Anticancer Drug Delivery Depot}, volume={1}, ISSN={["2590-2385"]}, DOI={10.1016/j.matt.2019.08.007}, abstractNote={Tumor-associated adipocytes promote tumor growth by providing energy and causing chronic inflammation. Here, we have exploited the lipid metabolism to engineer adipocytes that serve as a depot to deliver cancer therapeutics at the tumor site. Rumenic acid (RA), as an anticancer fatty acid, and a doxorubicin prodrug (pDox) with a reactive oxygen species (ROS)-cleavable linker, are encapsulated in adipocytes to deliver therapeutics in a tumor-specific bioresponsive manner. After intratumoral or postsurgical administration, lipolysis releases the RA and pDox that is activated by intracellular ROS-responsive conversion, subsequently promoting antitumor efficacy. Furthermore, downregulation of PD-L1 expression is observed in tumor cells, favoring the emergence of CD4+ and CD8+ T cell-mediated immune responses.}, number={5}, journal={MATTER}, author={Wen, Di and Wang, Jinqiang and Van Den Driessche, George and Chen, Qian and Zhang, Yuqi and Chen, Guojun and Li, Hongjun and Soto, Jennifer and Liu, Ming and Ohashi, Masao and et al.}, year={2019}, month={Nov}, pages={1203–1214} }
@article{plundrich_cook_maleki_fourches_lila_2019, title={Binding of peanut allergen Ara h 2 with Vaccinium fruit polyphenols}, volume={284}, ISSN={["1873-7072"]}, DOI={10.1016/j.foodchem.2019.01.081}, abstractNote={The potential for 42 different polyphenols found in Vaccinium fruits to bind to peanut allergen Ara h 2 and inhibit IgE binding epitopes was investigated using cheminformatics techniques. Out of 12 predicted binders, delphinidin-3-glucoside, cyanidin-3-glucoside, procyanidin C1, and chlorogenic acid were further evaluated in vitro. Circular dichroism, UV-Vis spectroscopy, and immunoblotting determined their capacity to (i) bind to Ara h 2, (ii) induce protein secondary structural changes, and (iii) inhibit IgE binding epitopes. UV-Vis spectroscopy clearly indicated that procyanidin C1 and chlorogenic acid interacted with Ara h 2, and circular dichroism results suggested that interactions with these polyphenols resulted in changes to Ara h 2 secondary structures. Immunoblotting showed that procyanidin C1 and chlorogenic acid bound to Ara h 2 significantly decreased the IgE binding capacity by 37% and 50%, respectively. These results suggest that certain polyphenols can inhibit IgE recognition of Ara h 2 by obstructing linear IgE epitopes.}, journal={FOOD CHEMISTRY}, author={Plundrich, Nathalie J. and Cook, Bethany T. and Maleki, Soheila J. and Fourches, Denis and Lila, Mary Ann}, year={2019}, month={Jun}, pages={287–295} }
@article{ash_kuenemann_rotroff_motsinger-reif_fourches_2019, title={Cheminformatics approach to exploring and modeling trait-associated metabolite profiles}, volume={11}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-019-0366-3}, abstractNote={Developing predictive and transparent approaches to the analysis of metabolite profiles across patient cohorts is of critical importance for understanding the events that trigger or modulate traits of interest (e.g., disease progression, drug metabolism, chemical risk assessment). However, metabolites’ chemical structures are still rarely used in the statistical modeling workflows that establish these trait-metabolite relationships. Herein, we present a novel cheminformatics-based approach capable of identifying predictive, interpretable, and reproducible trait-metabolite relationships. As a proof-of-concept, we utilize a previously published case study consisting of metabolite profiles from non-small-cell lung cancer (NSCLC) adenocarcinoma patients and healthy controls. By characterizing each structurally annotated metabolite using both computed molecular descriptors and patient metabolite concentration profiles, we show that these complementary features enhance the identification and understanding of key metabolites associated with cancer. Ultimately, we built multi-metabolite classification models for assessing patients’ cancer status using specific groups of metabolites identified based on high structural similarity through chemical clustering. We subsequently performed a metabolic pathway enrichment analysis to identify potential mechanistic relationships between metabolites and NSCLC adenocarcinoma. This cheminformatics-inspired approach relies on the metabolites’ structural features and chemical properties to provide critical information about metabolite-trait associations. This method could ultimately facilitate biological understanding and advance research based on metabolomics data, especially with respect to the identification of novel biomarkers.}, journal={JOURNAL OF CHEMINFORMATICS}, author={Ash, Jeremy R. and Kuenemann, Melaine A. and Rotroff, Daniel and Motsinger-Reif, Alison and Fourches, Denis}, year={2019}, month={Jun} }
@article{menden_wang_mason_szalai_bulusu_guan_yu_kang_jeon_wolfinger_et al._2019, title={Community assessment to advance computational prediction of cancer drug combinations in a pharmacogenomic screen}, volume={10}, ISSN={["2041-1723"]}, DOI={10.1038/s41467-019-09799-2}, abstractNote={The effectiveness of most cancer targeted therapies is short-lived. Tumors often develop resistance that might be overcome with drug combinations. However, the number of possible combinations is vast, necessitating data-driven approaches to find optimal patient-specific treatments. Here we report AstraZeneca's large drug combination dataset, consisting of 11,576 experiments from 910 combinations across 85 molecularly characterized cancer cell lines, and results of a DREAM Challenge to evaluate computational strategies for predicting synergistic drug pairs and biomarkers. 160 teams participated to provide a comprehensive methodological development and benchmarking. Winning methods incorporate prior knowledge of drug-target interactions. Synergy is predicted with an accuracy matching biological replicates for >60% of combinations. However, 20% of drug combinations are poorly predicted by all methods. Genomic rationale for synergy predictions are identified, including ADAM17 inhibitor antagonism when combined with PIK3CB/D inhibition contrasting to synergy when combined with other PI3K-pathway inhibitors in PIK3CA mutant cells.}, journal={NATURE COMMUNICATIONS}, author={Menden, Michael P. and Wang, Dennis and Mason, Mike J. and Szalai, Bence and Bulusu, Krishna C. and Guan, Yuanfang and Yu, Thomas and Kang, Jaewoo and Jeon, Minji and Wolfinger, Russ and et al.}, year={2019}, month={Jun} }
@article{williams_van den driessche_valery_fourches_freeman_2019, title={Corrections to “Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library”}, volume={7}, ISSN={2168-0485 2168-0485}, url={http://dx.doi.org/10.1021/ACSSUSCHEMENG.8B05545}, DOI={10.1021/ACSSUSCHEMENG.8B05545}, abstractNote={ADVERTISEMENT RETURN TO ISSUEPREVCorrectionNEXTORIGINAL ARTICLEThis notice is a correctionCorrections to “Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library”Tova N. Williams*Tova N. WilliamsMore by Tova N. Williamshttp://orcid.org/0000-0003-4284-3068, George A. Van Den DriesscheGeorge A. Van Den DriesscheMore by George A. Van Den Driessche, Alain R. B. ValeryAlain R. B. ValeryMore by Alain R. B. Valery, Denis Fourches*Denis FourchesMore by Denis Fourcheshttp://orcid.org/0000-0001-5642-8303, and Harold S. Freeman*Harold S. FreemanMore by Harold S. FreemanCite this: ACS Sustainable Chem. Eng. 2019, 7, 1, 1806Publication Date (Web):December 4, 2018Publication History Received27 October 2018Published online4 December 2018Published inissue 7 January 2019https://doi.org/10.1021/acssuschemeng.8b05545Copyright © 2018 American Chemical SocietyRIGHTS & PERMISSIONSArticle Views542Altmetric-Citations-LEARN ABOUT THESE METRICSArticle Views are the COUNTER-compliant sum of full text article downloads since November 2008 (both PDF and HTML) across all institutions and individuals. These metrics are regularly updated to reflect usage leading up to the last few days.Citations are the number of other articles citing this article, calculated by Crossref and updated daily. Find more information about Crossref citation counts.The Altmetric Attention Score is a quantitative measure of the attention that a research article has received online. Clicking on the donut icon will load a page at altmetric.com with additional details about the score and the social media presence for the given article. Find more information on the Altmetric Attention Score and how the score is calculated. Share Add toView InAdd Full Text with ReferenceAdd Description ExportRISCitationCitation and abstractCitation and referencesMore Options Share onFacebookTwitterWechatLinked InReddit PDF (259 KB) Get e-AlertsSUBJECTS:Biological databases,Chemoinformatics,Dyes and pigments,Rational design Get e-Alerts}, number={1}, journal={ACS Sustainable Chemistry & Engineering}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Van Den Driessche, George A. and Valery, Alain R. B. and Fourches, Denis and Freeman, Harold S.}, year={2019}, month={Jan}, pages={1806–1806} }
@article{west_lu_rotroff_kuenemann_chang_wu_wagner_buse_motsinger-reif_fourches_et al._2019, title={Identifying individual risk rare variants using protein structure guided local tests (POINT)}, volume={15}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1006722}, abstractNote={Rare variants are of increasing interest to genetic association studies because of their etiological contributions to human complex diseases. Due to the rarity of the mutant events, rare variants are routinely analyzed on an aggregate level. While aggregation analyses improve the detection of global-level signal, they are not able to pinpoint causal variants within a variant set. To perform inference on a localized level, additional information, e.g., biological annotation, is often needed to boost the information content of a rare variant. Following the observation that important variants are likely to cluster together on functional domains, we propose a protein structure guided local test (POINT) to provide variant-specific association information using structure-guided aggregation of signal. Constructed under a kernel machine framework, POINT performs local association testing by borrowing information from neighboring variants in the 3-dimensional protein space in a data-adaptive fashion. Besides merely providing a list of promising variants, POINT assigns each variant a p-value to permit variant ranking and prioritization. We assess the selection performance of POINT using simulations and illustrate how it can be used to prioritize individual rare variants in PCSK9, ANGPTL4 and CETP in the Action to Control Cardiovascular Risk in Diabetes (ACCORD) clinical trial data.}, number={2}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={West, Rachel Marceau and Lu, Wenbin and Rotroff, Daniel M. and Kuenemann, Melaine A. and Chang, Sheng-Mao and Wu, Michael C. and Wagner, Michael J. and Buse, John B. and Motsinger-Reif, Alison A. and Fourches, Denis and et al.}, year={2019}, month={Feb} }
@misc{burnum-johnson_zheng_dodds_ash_fourches_nicora_wendler_metz_waters_jansson_et al._2019, title={Ion mobility spectrometry and the omics: Distinguishing isomers, molecular classes and contaminant ions in complex samples}, volume={116}, ISSN={["1879-3142"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85065908529&partnerID=MN8TOARS}, DOI={10.1016/j.trac.2019.04.022}, abstractNote={Ion mobility spectrometry (IMS) is a widely used analytical technique providing rapid gas phase separations. IMS alone is useful, but its coupling with mass spectrometry (IMS-MS) and various front-end separation techniques has greatly increased the molecular information achievable from different omic analyses. IMS-MS analyses are specifically gaining attention for improving metabolomic, lipidomic, glycomic, proteomic and exposomic analyses by increasing measurement sensitivity (e.g. S/N ratio), lowering the detection limit, and amplifying peak capacity. Numerous studies including national security-related analyses, disease screenings and environmental evaluations are illustrating that IMS-MS is able to extract information not possible with MS alone. Furthermore, IMS-MS has shown great utility in salvaging molecular information for low abundance molecules of interest when high concentration contaminant ions are present in the sample by reducing detector suppression. This review highlights how IMS-MS is currently being used in omic analyses to distinguish structurally similar molecules, isomers, molecular classes and contaminant ions.}, journal={TRAC-TRENDS IN ANALYTICAL CHEMISTRY}, author={Burnum-Johnson, Kristin E. and Zheng, Xueyun and Dodds, James N. and Ash, Jeremy and Fourches, Denis and Nicora, Carrie D. and Wendler, Jason P. and Metz, Thomas O. and Waters, Katrina M. and Jansson, Janet K. and et al.}, year={2019}, month={Jul}, pages={292–299} }
@article{fourches_feducia_2019, title={Student-Guided Three-Dimensional Printing Activity in Large Lecture Courses: A Practical Guideline}, volume={96}, ISSN={["1938-1328"]}, DOI={10.1021/acs.jchemed.8b00346}, abstractNote={Modern technology stimulates the development of innovative classroom activities. We designed a 3D printing activity in two separate Organic Chemistry lectures of at least 200 students each. This assignment required students to 3D print a molecule of their choice, relying on services made available through the university libraries. Data obtained through a survey at the end of the semester provided key information on the students’ experiences with printing 3D models for the first time. A summary of this feedback and constructive remarks on the best practices regarding 3D printing assignments in large lecture courses are presented.}, number={2}, journal={JOURNAL OF CHEMICAL EDUCATION}, author={Fourches, Denis and Feducia, Jeremiah}, year={2019}, month={Feb}, pages={291–295} }
@article{van den driessche_fourches_2018, title={Adverse drug reactions triggered by the common HLA-B*57:01 variant: Virtual screening of DrugBank using 3D molecular docking}, volume={10}, journal={Journal of Cheminformatics}, author={Van Den Driessche, G. and Fourches, D.}, year={2018} }
@article{kuenemann_fourches_2018, title={Cheminformatics Analysis of Dynamic WNK-Inhibitor Interactions}, volume={37}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201700138}, DOI={10.1002/MINF.201700138}, abstractNote={The With-No-Lysine (WNK) serine/threonine kinase family constitutes a unique and distinctive branch of the kinome. The four proteins of this family (WNK1/2/3/4) are involved in blood pressure regulation, body fluid, and electrolyte homeostasis. Herein, we modeled and analyzed the binding modes of all publicly-available small orthosteric and allosteric binders (including WNK463 and WNK467) experimentally tested towards any of the WNK family member. To do so, we relied on state-of-the-art cheminformatics approaches including structure-based molecular docking and molecular dynamics simulations. In particular, we computed and analyzed the (i) molecular selectivity of known inhibitors when docked in the binding site of each WNK family member, (ii) the dynamic WNK-inhibitor interactions at both orthosteric and allosteric sites to derive new structure-activity relationships, and (iii) the key specific interactions present in each binding site. This study reports on the first, cheminformatics-powered analysis of the entire chemical space of known WNK inhibitors. We discuss the conservation of critical WNK-inhibitor interactions and the existence of isoform-specific interactions that could enable the rational design of more potent and selective WNK binders.}, number={6-7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Fourches, Denis}, year={2018}, month={Feb}, pages={1700138} }
@article{zin_williams_fourches_2018, title={Cheminformatics-based enumeration and analysis of large libraries of macrolide scaffolds}, volume={10}, ISSN={1758-2946}, url={http://dx.doi.org/10.1186/s13321-018-0307-6}, DOI={10.1186/s13321-018-0307-6}, abstractNote={We report on the development of a cheminformatics enumeration technology and the analysis of a resulting large dataset of virtual macrolide scaffolds. Although macrolides have been shown to have valuable biological properties, there is no ready-to-screen virtual library of diverse macrolides in the public domain. Conducting molecular modeling (especially virtual screening) of these complex molecules is highly relevant as the organic synthesis of these compounds, when feasible, typically requires many synthetic steps, and thus dramatically slows the discovery of new bioactive macrolides. Herein, we introduce a cheminformatics approach and associated software that allows for designing and generating libraries of virtual macrocycle/macrolide scaffolds with user-defined constitutional and structural constraints (e.g., types and numbers of structural motifs to be included in the macrocycle, ring size, maximum number of compounds generated). To study the chemical diversity of such generated molecules, we enumerated V1M (Virtual 1 million Macrolide scaffolds) library, each containing twelve common structural motifs. For each macrolide scaffold, we calculated several key properties, such as molecular weight, hydrogen bond donors/acceptors, topological polar surface area. In this study, we discuss (1) the initial concept and current features of our PKS (polyketides) Enumerator software, (2) the chemical diversity and distribution of structural motifs in V1M library, and (3) the unique opportunities for future virtual screening of such enumerated ensembles of macrolides. Importantly, V1M is provided in the Supplementary Material of this paper allowing other researchers to conduct any type of molecular modeling and virtual screening studies. Therefore, this technology for enumerating extremely large libraries of macrolide scaffolds could hold a unique potential in the field of computational chemistry and drug discovery for rational designing of new antibiotics and anti-cancer agents.}, number={1}, journal={Journal of Cheminformatics}, publisher={Springer Science and Business Media LLC}, author={Zin, Phyo Phyo Kyaw and Williams, Gavin and Fourches, Denis}, year={2018}, month={Nov} }
@article{low_alves_fourches_sedykh_andrade_muratov_rusyn_tropsha_2018, title={Chemistry-Wide Association Studies (CWAS): A Novel Framework for Identifying and Interpreting Structure-Activity Relationships}, volume={58}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.8b00450}, abstractNote={Quantitative structure-activity relationships (QSAR) models are often seen as a "black box" because they are considered difficult to interpret. Meanwhile, qualitative approaches, e.g., structural alerts (SA) or read-across, provide mechanistic insight, which is preferred for regulatory purposes, but predictive accuracy of such approaches is often low. Herein, we introduce the chemistry-wide association study (CWAS) approach, a novel framework that both addresses such deficiencies and combines advantages of statistical QSAR and alert-based approaches. The CWAS framework consists of the following steps: (i) QSAR model building for an end point of interest, (ii) identification of key chemical features, (iii) determination of communities of such features disproportionately co-occurring more frequently in the active than in the inactive class, and (iv) assembling these communities to form larger (and not necessarily chemically connected) novel structural alerts with high specificity. As a proof-of-concept, we have applied CWAS to model Ames mutagenicity and Stevens-Johnson Syndrome (SJS). For the well-studied Ames mutagenicity data set, we identified 76 important individual fragments and assembled co-occurring fragments into SA both replicative of known as well as representing novel mutagenicity alerts. For the SJS data set, we identified 29 important fragments and assembled co-occurring communities into SA including both known and novel alerts. In summary, we demonstrate that CWAS provides a new framework to interpret predictive QSAR models and derive refined structural alerts for more effective design and safety assessment of drugs and drug candidates.}, number={11}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Low, Yen S. and Alves, Vinicius M. and Fourches, Denis and Sedykh, Alexander and Andrade, Carolina Horta and Muratov, Eugene N. and Rusyn, Ivan and Tropsha, Alexander}, year={2018}, month={Nov}, pages={2203–2213} }
@article{mahapatra_franzosa_roell_kuenemann_houck_reif_fourches_kullman_2018, title={Confirmation of high-throughput screening data and novel mechanistic insights into VDR-xenobiotic interactions by orthogonal assays}, volume={8}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/S41598-018-27055-3}, DOI={10.1038/S41598-018-27055-3}, abstractNote={Abstract High throughput screening (HTS) programs have demonstrated that the Vitamin D receptor (VDR) is activated and/or antagonized by a wide range of structurally diverse chemicals. In this study, we examined the Tox21 qHTS data set generated against VDR for reproducibility and concordance and elucidated functional insights into VDR-xenobiotic interactions. Twenty-one potential VDR agonists and 19 VDR antagonists were identified from a subset of >400 compounds with putative VDR activity and examined for VDR functionality utilizing select orthogonal assays. Transient transactivation assay (TT) using a human VDR plasmid and Cyp24 luciferase reporter construct revealed 20/21 active VDR agonists and 18/19 active VDR antagonists. Mammalian-2-hybrid assay (M2H) was then used to evaluate VDR interactions with co-activators and co-regulators. With the exception of a select few compounds, VDR agonists exhibited significant recruitment of co-regulators and co-activators whereas antagonists exhibited considerable attenuation of recruitment by VDR. A unique set of compounds exhibiting synergistic activity in antagonist mode and no activity in agonist mode was identified. Cheminformatics modeling of VDR-ligand interactions were conducted and revealed selective ligand VDR interaction. Overall, data emphasizes the molecular complexity of ligand-mediated interactions with VDR and suggest that VDR transactivation may be a target site of action for diverse xenobiotics.}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Mahapatra, Debabrata and Franzosa, Jill A. and Roell, Kyle and Kuenemann, Melaine Agnes and Houck, Keith A. and Reif, David M. and Fourches, Denis and Kullman, Seth W.}, year={2018}, month={Jun} }
@article{borrel_kleinstreuer_fourches_2018, title={Exploring drug space with ChemMaps.com}, volume={34}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bty412}, abstractNote={Abstract Motivation Easily navigating chemical space has become more important due to the increasing size and diversity of publicly-accessible databases such as DrugBank, ChEMBL or Tox21. To do so, modelers typically rely on complex projection techniques using molecular descriptors computed for all the chemicals to be visualized. However, the multiple cheminformatics steps required to prepare, characterize, compute and explore those molecules, are technical, typically necessitate scripting skills, and thus represent a real obstacle for non-specialists. Results We developed the ChemMaps.com webserver to easily browse, navigate and mine chemical space. The first version of ChemMaps.com features more than 8000 approved, in development, and rejected drugs, as well as over 47 000 environmental chemicals. Availability and implementation The webserver is freely available at http://www.chemmaps.com.}, number={21}, journal={BIOINFORMATICS}, author={Borrel, Alexandre and Kleinstreuer, Nicole C. and Fourches, Denis}, year={2018}, month={Nov}, pages={3773–3775} }
@article{kuenemann_spears_orndorff_fourches_2018, title={In silico
Predicted Glucose-1-phosphate Uridylyltransferase (GalU) Inhibitors Block a Key Pathway Required for Listeria
Virulence}, volume={37}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201800004}, DOI={10.1002/MINF.201800004}, abstractNote={Peptidoglycan walls of gram positive bacteria are functionalized by glycopolymers called wall teichoic acid (WTA). In Listeria monocytogenes, multiple enzymes including the glucose-1-phosphate uridylyltransferase (GalU) were identified as mandatory for WTA galactosylation, so that the inhibition of GalU is associated with a significant attenuation of Listeria virulence. Herein, we report on a series of in silico predicted GalU inhibitors identified using structure-based virtual screening and experimentally validated to be effective in blocking the WTA galactosylation pathway in vitro. Several hits such as C04, a pyrimidinyl benzamide, afforded promising experimental potencies. This proof-of-concept study opens new perspectives for the development of potent and selective GalU inhibitors of high interest to attenuate Listeria virulence. It also underscores the high relevance of using molecular modeling for facilitating the identification of bacterial virulence attenuators and more generally antibacterials.}, number={6-7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Spears, Patricia A. and Orndorff, Paul E. and Fourches, Denis}, year={2018}, month={Mar}, pages={1800004} }
@article{sanabria-ojeda_fukuyama_fourches_baumer_2018, title={Janus kinase inhibitors differ in their affinity to the TRPV1 receptor - implications for their use in itch and pain}, volume={41}, journal={Journal of Veterinary Pharmacology and Therapeutics}, author={Sanabria-Ojeda, L. and Fukuyama, T. and Fourches, D. and Baumer, W.}, year={2018}, pages={160–160} }
@article{la_sedykh_fourches_muratov_tropsha_2018, title={Predicting Adverse Drug Effects from Literature- and Database-Mined Assertions}, volume={41}, ISSN={["1179-1942"]}, DOI={10.1007/s40264-018-0688-5}, abstractNote={Given that adverse drug effects (ADEs) have led to post-market patient harm and subsequent drug withdrawal, failure of candidate agents in the drug development process, and other negative outcomes, it is essential to attempt to forecast ADEs and other relevant drug–target–effect relationships as early as possible. Current pharmacologic data sources, providing multiple complementary perspectives on the drug–target–effect paradigm, can be integrated to facilitate the inference of relationships between these entities. This study aims to identify both existing and unknown relationships between chemicals (C), protein targets (T), and ADEs (E) based on evidence in the literature. Cheminformatics and data mining approaches were employed to integrate and analyze publicly available clinical pharmacology data and literature assertions interrelating drugs, targets, and ADEs. Based on these assertions, a C–T–E relationship knowledge base was developed. Known pairwise relationships between chemicals, targets, and ADEs were collected from several pharmacological and biomedical data sources. These relationships were curated and integrated according to Swanson’s paradigm to form C–T–E triangles. Missing C–E edges were then inferred as C–E relationships. Unreported associations between drugs, targets, and ADEs were inferred, and inferences were prioritized as testable hypotheses. Several C–E inferences, including testosterone → myocardial infarction, were identified using inferences based on the literature sources published prior to confirmatory case reports. Timestamping approaches confirmed the predictive ability of this inference strategy on a larger scale. The presented workflow, based on free-access databases and an association-based inference scheme, provided novel C–E relationships that have been validated post hoc in case reports. With refinement of prioritization schemes for the generated C–E inferences, this workflow may provide an effective computational method for the early detection of potential drug candidate ADEs that can be followed by targeted experimental investigations.}, number={11}, journal={DRUG SAFETY}, author={La, Mary K. and Sedykh, Alexander and Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2018}, month={Nov}, pages={1059–1072} }
@article{williams_kuenemann_driessche_williams_fourches_freeman_2018, title={Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 1. Database Development and Analysis}, volume={6}, ISSN={["2168-0485"]}, url={https://doi.org/10.1021/acssuschemeng.7b03795}, DOI={10.1021/acssuschemeng.7b03795}, abstractNote={Herein, we report on the initial step of the design process of new hair dyes with the desired properties. The first step is dedicated to the development of the largest, publicly available database of hair dye substances (containing temporary and semipermanent hair dyes as well as permanent hair dye precursors) used in commercial hair dye formulations. The database was utilized to perform a cheminformatics study assessing the computed physicochemical properties of the different hair dye substances, especially within each cluster of structurally similar dyes. The various substances could be differentiated based on their average molecular weight, hydrophobicity, topological polar surface area, and number of hydrogen bond acceptors, with some overlap also observed. In particular, we found that dyes such as C.I. Basic Orange 1 and 2 were clustered among the precursors, suggesting that their diffusion behavior is similar to that of permanent hair dye precursors. We anticipate taking advantage of this interesting knowledge in the second design phase of our investigation. As a step in that direction, we used QSAR models and noted that 65% of the substances were predicted to be mutagenic (22 with confidence thresholds >90%), whereas 79% were predicted to be skin sensitizers (37 with confidence thresholds >90%). We discuss the relevance of these preliminary calculations in view of literature-extracted experimental data.}, number={2}, journal={ACS SUSTAINABLE CHEMISTRY & ENGINEERING}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Kuenemann, Melaine A. and Driessche, George A. and Williams, Antony J. and Fourches, Denis and Freeman, Harold S.}, year={2018}, month={Feb}, pages={2344–2352} }
@article{williams_driessche_valery_fourches_freeman_2018, title={Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library}, volume={6}, ISSN={["2168-0485"]}, url={https://doi.org/10.1021/acssuschemeng.8b02882}, DOI={10.1021/acssuschemeng.8b02882}, abstractNote={We report on part 2 of the cheminformatics-assisted development of sustainable hair dyes with enhanced technical and toxicological properties. In this study, an initial similarity search analysis was performed using two reference probes (C.I. Basic Orange 1 and Orange 2) as structural templates for the identification of potential analogs among the Max Weaver Dye Library (MWDL). The analysis revealed an interesting subset of 158 MWDL compounds that were close analogs of the classical aminoazobenzene dyes. A more detailed similarity search analysis of this subset ultimately led to the selection of four dyes for further in silico quantum calculations and experimental dye uptake (color depth on hair) studies. Results from quantum calculations indicated that the ESP surface properties of these dyes were consistent with nonionic interactions between dye and keratin. Among the four dye analogs, 2-amino-6-methyl-5-(phenyldiazenyl)pyrimidin-4-ol and 2-amino-4-chloro-1,6-dimethyl-5-(phenyldiazenyl)-pyrimidin-1-ium methyl sulfate achieved the best dye uptake on hair (∑K/S 227.31 and 149.26). The results of this study show that cheminformatics-based tools can be used to both build and screen dye databases containing potential alternatives to colorants believed to pose environmental concerns, providing a more sustainable (green) approach to hair dye design, by reducing the number of compounds requiring synthesis and analysis before suitable replacements are identified.}, number={11}, journal={ACS SUSTAINABLE CHEMISTRY & ENGINEERING}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Driessche, George A. and Valery, Alain R. B. and Fourches, Denis and Freeman, Harold S.}, year={2018}, month={Nov}, pages={14248–14256} }
@article{driessche_fourches_2017, title={Adverse drug reactions triggered by the common HLA-B*57:01 variant: A molecular docking study}, volume={9}, journal={Journal of Cheminformatics}, author={Driessche, G. and Fourches, D.}, year={2017} }
@article{ash_fourches_2017, title={Characterizing the Chemical Space of ERK2 Kinase Inhibitors Using Descriptors Computed from Molecular Dynamics Trajectories}, volume={57}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.7b00048}, abstractNote={Quantitative Structure-Activity Relationship (QSAR) models typically rely on 2D and 3D molecular descriptors to characterize chemicals and forecast their experimental activities. Previously, we showed that even the most reliable 2D QSAR models and structure-based 3D molecular docking techniques were not capable of accurately ranking a set of known inhibitors for the ERK2 kinase, a key player in various types of cancer. Herein, we calculated and analyzed a series of chemical descriptors computed from the molecular dynamics (MD) trajectories of ERK2-ligand complexes. First, the docking of 87 ERK2 ligands with known binding affinities was accomplished using Schrodinger's Glide software; then, solvent-explicit MD simulations (20 ns, NPT, 300 K, TIP3P, 1 fs) were performed using the GPU-accelerated Desmond program. Second, we calculated a series of MD descriptors based on the distributions of 3D descriptors computed for representative samples of the ligand's conformations over the MD simulations. Third, we analyzed the data set of 87 inhibitors in the MD chemical descriptor space. We showed that MD descriptors (i) had little correlation with conventionally used 2D/3D descriptors, (ii) were able to distinguish the most active ERK2 inhibitors from the moderate/weak actives and inactives, and (iii) provided key and complementary information about the unique characteristics of active ligands. This study represents the largest attempt to utilize MD-extracted chemical descriptors to characterize and model a series of bioactive molecules. MD descriptors could enable the next generation of hyperpredictive MD-QSAR models for computer-aided lead optimization and analogue prioritization.}, number={6}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Ash, Jeremy and Fourches, Denis}, year={2017}, month={Jun}, pages={1286–1299} }
@article{kuenemann_fourches_2017, title={Cheminformatics Modeling of Amine Solutions for Assessing their CO2Absorption Properties}, volume={36}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201600143}, DOI={10.1002/MINF.201600143}, abstractNote={As stricter regulations on CO2 emissions are adopted worldwide, identifying efficient chemical processes to capture and recycle CO2 is of critical importance for industry. The most common process known as amine scrubbing suffers from the lack of available amine solutions capable of capturing CO2 efficiently. Tertiary amines characterized by low heats of reaction are considered good candidates but their absorption properties can significantly differ from one analogue to another despite high structural similarity. Herein, after collecting and curating experimental data from the literature, we have built a modeling set of 41 amine structures with their absorption properties. Then we analyzed their chemical composition using molecular descriptors and non-supervised clustering. Furthermore, we developed a series of quantitative structure-property relationships (QSPR) to assess amines’ CO2 absorption properties from their structural characteristics. These models afforded reasonable prediction performances (e. g., Q2LOO=0.63 for CO2 absorption amount) even though they are solely based on 2D chemical descriptors and individual machine learning techniques (random forest and neural network). Overall, we believe the chemical analysis and the series of QSPR models presented in this proof-of-concept study represent new knowledge and innovative tools that could be very useful for screening and prioritizing hypothetical amines to be synthesized and tested experimentally for their CO2 absorption properties.}, number={7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Fourches, Denis}, year={2017}, month={Mar}, pages={1600143} }
@article{kuenemann_fourches_2017, title={Cheminformatics modeling of amine solutions for assessing their CO2 absorption properties}, volume={36}, number={7}, journal={Molecular Informatics}, author={Kuenemann, M. A. and Fourches, D.}, year={2017} }
@article{muratov_lewis_fourches_tropsha_cox_2017, title={Computer-assisted decision support for student admissions based on their predicted academic performance}, volume={81}, number={3}, journal={American Journal of Pharmaceutical Education}, author={Muratov, E. and Lewis, M. and Fourches, D. and Tropsha, A. and Cox, W. C.}, year={2017} }
@article{legge_hamshere_ripke_pardinas_goldstein_rees_richards_leonenko_jorskog_fourches_et al._2017, title={Genome-wide common and rare variant analysis provides novel insights into clozapine-associated neutropenia}, volume={22}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/MP.2016.97}, DOI={10.1038/MP.2016.97}, abstractNote={Abstract The antipsychotic clozapine is uniquely effective in the management of schizophrenia; however, its use is limited by its potential to induce agranulocytosis. The causes of this, and of its precursor neutropenia, are largely unknown, although genetic factors have an important role. We sought risk alleles for clozapine-associated neutropenia in a sample of 66 cases and 5583 clozapine-treated controls, through a genome-wide association study (GWAS), imputed human leukocyte antigen (HLA) alleles, exome array and copy-number variation (CNV) analyses. We then combined associated variants in a meta-analysis with data from the Clozapine-Induced Agranulocytosis Consortium (up to 163 cases and 7970 controls). In the largest combined sample to date, we identified a novel association with rs149104283 (odds ratio (OR)=4.32, P =1.79 × 10 −8 ), intronic to transcripts of SLCO1B3 and SLCO1B7 , members of a family of hepatic transporter genes previously implicated in adverse drug reactions including simvastatin-induced myopathy and docetaxel-induced neutropenia. Exome array analysis identified gene-wide associations of uncommon non-synonymous variants within UBAP2 and STARD9 . We additionally provide independent replication of a previously identified variant in HLA-DQB1 (OR=15.6, P =0.015, positive predictive value=35.1%). These results implicate biological pathways through which clozapine may act to cause this serious adverse effect.}, number={10}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Legge, S E and Hamshere, M L and Ripke, S and Pardinas, A F and Goldstein, J I and Rees, E and Richards, A L and Leonenko, G and Jorskog, L F and Fourches, Denis and et al.}, year={2017}, pages={1502–1508} }
@article{fourches_2017, title={Reaction: Molecular Modeling for Novel Antibacterials}, volume={3}, ISSN={2451-9294}, url={http://dx.doi.org/10.1016/J.CHEMPR.2017.06.016}, DOI={10.1016/J.CHEMPR.2017.06.016}, abstractNote={Denis Fourches, PhD, is a molecular modeler and expert in cheminformatics in the Department of Chemistry and the Bioinformatics Research Center at North Carolina State University. His research focuses on the development and applications of novel predictive cheminformatics methods.}, number={1}, journal={Chem}, publisher={Elsevier BV}, author={Fourches, Denis}, year={2017}, month={Jul}, pages={13–14} }
@article{borrel_fourches_2017, title={RealityConvert: a tool for preparing 3D models of biochemical structures for augmented and virtual reality}, volume={33}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btx485}, abstractNote={Abstract Motivation There is a growing interest for the broad use of Augmented Reality (AR) and Virtual Reality (VR) in the fields of bioinformatics and cheminformatics to visualize complex biological and chemical structures. AR and VR technologies allow for stunning and immersive experiences, offering untapped opportunities for both research and education purposes. However, preparing 3D models ready to use for AR and VR is time-consuming and requires a technical expertise that severely limits the development of new contents of potential interest for structural biologists, medicinal chemists, molecular modellers and teachers. Results Herein we present the RealityConvert software tool and associated website, which allow users to easily convert molecular objects to high quality 3D models directly compatible for AR and VR applications. For chemical structures, in addition to the 3D model generation, RealityConvert also generates image trackers, useful to universally call and anchor that particular 3D model when used in AR applications. The ultimate goal of RealityConvert is to facilitate and boost the development and accessibility of AR and VR contents for bioinformatics and cheminformatics applications. Availability and implementation http://www.realityconvert.com Supplementary information Supplementary data are available at Bioinformatics online.}, number={23}, journal={BIOINFORMATICS}, author={Borrel, Alexandre and Fourches, Denis}, year={2017}, month={Dec}, pages={3816–3818} }
@article{kuenemann_szymczyk_chen_sultana_hinks_freeman_williams_fourches_vinueza_2017, title={Weaver's historic accessible collection of synthetic dyes: a cheminformatics analysis}, volume={8}, ISSN={["2041-6539"]}, DOI={10.1039/c7sc00567a}, abstractNote={The Max Weaver Dye Library is presented to the scientific community with a cheminformatics approach to enhance research opportunities with this unique collection of ∼98 000 vials of custom-made dyes.}, number={6}, journal={CHEMICAL SCIENCE}, author={Kuenemann, Melaine A. and Szymczyk, Malgorzata and Chen, Yufei and Sultana, Nadia and Hinks, David and Freeman, Harold S. and Williams, Antony J. and Fourches, Denis and Vinueza, Nelson R.}, year={2017}, month={Jun}, pages={4334–4339} }
@article{borysov_hannig_marron_muratov_fourches_tropsha_2016, title={Activity prediction and identification of mis-annotated chemical compounds using extreme descriptors}, volume={30}, ISSN={0886-9383}, url={http://dx.doi.org/10.1002/CEM.2776}, DOI={10.1002/CEM.2776}, abstractNote={Data pre‐processing that includes removal of descriptors with low variance is a standard first step in quantitative structure–activity relationship modeling. In this paper, we study low‐variance descriptors and show that some of them contain significant amounts of useful information. In particular, we define the notion of extreme descriptors (those variables that have the same value for almost all compounds and only a few values that are different from the common median). We show that extreme descriptors can be helpful for activity prediction in a standard binary classification setting. Moreover, we demonstrate using two case studies ( M 2 muscarinic receptors and skin sensitization) that extreme descriptors can be used for the identification of possibly mislabeled compounds. Because of these previously unknown, but important, properties, extreme descriptors should be considered in quantitative structure–activity relationship modeling studies. Copyright © 2016 John Wiley & Sons, Ltd.}, number={3}, journal={Journal of Chemometrics}, publisher={Wiley}, author={Borysov, Petro and Hannig, Jan and Marron, J. S. and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander}, year={2016}, month={Feb}, pages={99–108} }
@article{alves_muratov_capuzzi_politi_low_braga_zakharov_sedykh_mokshyna_farag_et al._2016, title={Alarms about structural alerts}, volume={18}, ISSN={["1463-9270"]}, DOI={10.1039/c6gc01492e}, abstractNote={Integrative approach for safety assessment of new chemicals by combining structural alerts and QSAR models.}, number={16}, journal={GREEN CHEMISTRY}, author={Alves, Vinicius M. and Muratov, Eugene N. and Capuzzi, Stephen J. and Politi, Regina and Low, Yen and Braga, Rodolpho C. and Zakharov, Alexey V. and Sedykh, Alexander and Mokshyna, Elena and Farag, Sherif and et al.}, year={2016}, pages={4348–4360} }
@article{elkins_fedele_szklarz_azeez_salah_mikolajczyk_romanov_sepetov_huang_roth_et al._2016, title={Comprehensive characterization of the Published Kinase Inhibitor Set}, volume={34}, number={1}, journal={Nature Biotechnology}, author={Elkins, J. M. and Fedele, V. and Szklarz, M. and Azeez, K. R. A. and Salah, E. and Mikolajczyk, J. and Romanov, S. and Sepetov, N. and Huang, X. P. and Roth, B. L. and et al.}, year={2016}, pages={95–103} }
@article{fourches_pu_li_zhou_mu_su_yan_tropsha_2016, title={Computer-aided design of carbon nanotubes with the desired bioactivity and safety profiles}, volume={10}, ISSN={["1743-5404"]}, DOI={10.3109/17435390.2015.1073397}, abstractNote={Growing experimental evidences suggest the existence of direct relationships between the surface chemistry of nanomaterials and their biological effects. Herein, we have employed computational approaches to design a set of biologically active carbon nanotubes (CNTs) with controlled protein binding and cytotoxicity. Quantitative structure–activity relationship (QSAR) models were built and validated using a dataset of 83 surface-modified CNTs. A subset of a combinatorial virtual library of 240 000 ligands potentially attachable to CNTs was selected to include molecules that were within the chemical similarity threshold with respect to the modeling set compounds. QSAR models were then employed to virtually screen this subset and prioritize CNTs for chemical synthesis and biological evaluation. Ten putatively active and 10 putatively inactive CNTs decorated with the ligands prioritized by virtual screening for either protein-binding or cytotoxicity assay were synthesized and tested. We found that all 10 putatively inactive and 7 of 10 putatively active CNTs were confirmed in the protein-binding assay, whereas all 10 putatively inactive and 6 of 10 putatively active CNTs were confirmed in the cytotoxicity assay. This proof-of-concept study shows that computational models can be employed to guide the design of surface-modified nanomaterials with the desired biological and safety profiles.}, number={3}, journal={NANOTOXICOLOGY}, author={Fourches, Denis and Pu, Dongqiuye and Li, Liwen and Zhou, Hongyu and Mu, Qingxin and Su, Gaoxing and Yan, Bing and Tropsha, Alexander}, year={2016}, month={Mar}, pages={374–383} }
@article{zakharov_varlamova_lagunin_dmitriev_muratov_fourches_kuz'min_poroikov_tropsha_nicklaus_2016, title={QSAR Modeling and Prediction of Drug-Drug Interactions}, volume={13}, ISSN={["1543-8392"]}, DOI={10.1021/acs.molpharmaceut.5b00762}, abstractNote={Severe adverse drug reactions (ADRs) are the fourth leading cause of fatality in the U.S. with more than 100,000 deaths per year. As up to 30% of all ADRs are believed to be caused by drug-drug interactions (DDIs), typically mediated by cytochrome P450s, possibilities to predict DDIs from existing knowledge are important. We collected data from public sources on 1485, 2628, 4371, and 27,966 possible DDIs mediated by four cytochrome P450 isoforms 1A2, 2C9, 2D6, and 3A4 for 55, 73, 94, and 237 drugs, respectively. For each of these data sets, we developed and validated QSAR models for the prediction of DDIs. As a unique feature of our approach, the interacting drug pairs were represented as binary chemical mixtures in a 1:1 ratio. We used two types of chemical descriptors: quantitative neighborhoods of atoms (QNA) and simplex descriptors. Radial basis functions with self-consistent regression (RBF-SCR) and random forest (RF) were utilized to build QSAR models predicting the likelihood of DDIs for any pair of drug molecules. Our models showed balanced accuracy of 72-79% for the external test sets with a coverage of 81.36-100% when a conservative threshold for the model's applicability domain was applied. We generated virtually all possible binary combinations of marketed drugs and employed our models to identify drug pairs predicted to be instances of DDI. More than 4500 of these predicted DDIs that were not found in our training sets were confirmed by data from the DrugBank database.}, number={2}, journal={MOLECULAR PHARMACEUTICS}, author={Zakharov, Alexey V. and Varlamova, Ekaterina V. and Lagunin, Alexey A. and Dmitriev, Alexander V. and Muratov, Eugene N. and Fourches, Denis and Kuz'min, Victor E. and Poroikov, Vladimir V. and Tropsha, Alexander and Nicklaus, Marc C.}, year={2016}, month={Feb}, pages={545–556} }
@article{alves_capuzzi_muratov_braga_thornton_fourches_strickland_kleinstreuer_andrade_tropsha_2016, title={QSAR models of human data can enrich or replace LLNA testing for human skin sensitization}, volume={18}, ISSN={["1463-9270"]}, DOI={10.1039/c6gc01836j}, abstractNote={Skin sensitization is a major environmental and occupational health hazard. Although many chemicals have been evaluated in humans, there have been no efforts to model these data to date. We have compiled, curated, analyzed, and compared the available human and LLNA data. Using these data, we have developed reliable computational models and applied them for virtual screening of chemical libraries to identify putative skin sensitizers. The overall concordance between murine LLNA and human skin sensitization responses for a set of 135 unique chemicals was low (R = 28-43%), although several chemical classes had high concordance. We have succeeded to develop predictive QSAR models of all available human data with the external correct classification rate of 71%. A consensus model integrating concordant QSAR predictions and LLNA results afforded a higher CCR of 82% but at the expense of the reduced external dataset coverage (52%). We used the developed QSAR models for virtual screening of CosIng database and identified 1061 putative skin sensitizers; for seventeen of these compounds, we found published evidence of their skin sensitization effects. Models reported herein provide more accurate alternative to LLNA testing for human skin sensitization assessment across diverse chemical data. In addition, they can also be used to guide the structural optimization of toxic compounds to reduce their skin sensitization potential.}, number={24}, journal={GREEN CHEMISTRY}, author={Alves, Vinicius M. and Capuzzi, Stephen J. and Muratov, Eugene N. and Braga, Rodolpho C. and Thornton, Thomas E. and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2016}, pages={6501–6515} }
@article{fourches_muratov_tropsha_2016, title={Trust, but Verify II: A Practical Guide to Chemogenomics Data Curation}, volume={56}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.6b00129}, abstractNote={There is a growing public concern about the lack of reproducibility of experimental data published in peer-reviewed scientific literature. Herein, we review the most recent alerts regarding experimental data quality and discuss initiatives taken thus far to address this problem, especially in the area of chemical genomics. Going beyond just acknowledging the issue, we propose a chemical and biological data curation workflow that relies on existing cheminformatics approaches to flag, and when appropriate, correct possibly erroneous entries in large chemogenomics data sets. We posit that the adherence to the best practices for data curation is important for both experimental scientists who generate primary data and deposit them in chemical genomics databases and computational researchers who rely on these data for model development.}, number={7}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2016}, month={Jul}, pages={1243–1252} }
@article{elkins_fedele_szklarz_abdul azeez_salah_mikolajczyk_romanov_sepetov_huang_roth_et al._2015, title={Comprehensive characterization of the Published Kinase Inhibitor Set}, volume={34}, ISSN={1087-0156 1546-1696}, url={http://dx.doi.org/10.1038/NBT.3374}, DOI={10.1038/NBT.3374}, abstractNote={Despite the success of protein kinase inhibitors as approved therapeutics, drug discovery has focused on a small subset of kinase targets. Here we provide a thorough characterization of the Published Kinase Inhibitor Set (PKIS), a set of 367 small-molecule ATP-competitive kinase inhibitors that was recently made freely available with the aim of expanding research in this field and as an experiment in open-source target validation. We screen the set in activity assays with 224 recombinant kinases and 24 G protein-coupled receptors and in cellular assays of cancer cell proliferation and angiogenesis. We identify chemical starting points for designing new chemical probes of orphan kinases and illustrate the utility of these leads by developing a selective inhibitor for the previously untargeted kinases LOK and SLK. Our cellular screens reveal compounds that modulate cancer cell growth and angiogenesis in vitro. These reagents and associated data illustrate an efficient way forward to increasing understanding of the historically untargeted kinome.}, number={1}, journal={Nature Biotechnology}, publisher={Springer Science and Business Media LLC}, author={Elkins, Jonathan M and Fedele, Vita and Szklarz, Marta and Abdul Azeez, Kamal R and Salah, Eidarus and Mikolajczyk, Jowita and Romanov, Sergei and Sepetov, Nikolai and Huang, Xi-Ping and Roth, Bryan L and et al.}, year={2015}, month={Oct}, pages={95–103} }
@misc{fourches_muratov_tropsha_2015, title={Curation of chemogenomics data}, volume={11}, ISSN={["1552-4469"]}, DOI={10.1038/nchembio.1881}, number={8}, journal={NATURE CHEMICAL BIOLOGY}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2015}, month={Aug}, pages={535–535} }
@article{baker_fourches_tropsha_2015, title={Drug Side Effect Profiles as Molecular Descriptors for Predictive Modeling of Target Bioactivity}, volume={34}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201400134}, DOI={10.1002/MINF.201400134}, abstractNote={We have explored the potential of using side effect profiles of drugs to predict their bioactivities at the receptor level. Serotonin 5-HT6 binding and dopamine antagonism were investigated in separate studies. A set of 5-HT6 binders and non-binders was retrieved from the PDSP Ki database, whereas dopamine antagonists were retrieved from the MeSH Pharmaceutical Action file. The side effect data was extracted from ChemoText, a data repository containing MeSH annotations pulled from MEDLINE records. These side effects profiles were treated as molecular descriptors enabling a QSAR-like approach to build models that could reliably discriminate different classes of molecules, e.g., binders versus non-binders, and dopamine antagonists versus non-antagonists. Selected models with the best external prediction performances were applied to a library of ca. 1000 chemicals with known side effects profiles in order to predict their potential 5-HT6 binding and/or dopamine antagonism. In each case the virtual screening process was able to identify putatively active compounds that through subsequent literature-based validation were found to be likely or known 5-HT6 binders or dopamine antagonists. These results demonstrate that side effect profiles can be utilized to predict a drug's unknown molecular activity, thus representing a valuable opportunity in repositioning the drug for a new indications.}, number={2-3}, journal={Molecular Informatics}, publisher={Wiley}, author={Baker, Nancy C. and Fourches, Denis and Tropsha, Alexander}, year={2015}, month={Feb}, pages={160–170} }
@article{isayev_fourches_muratov_oses_rasch_tropsha_curtarolo_2015, title={Materials Cartography: Representing and Mining Materials Space Using Structural and Electronic Fingerprints}, volume={27}, ISSN={0897-4756 1520-5002}, url={http://dx.doi.org/10.1021/CM503507H}, DOI={10.1021/CM503507H}, abstractNote={As the proliferation of high-throughput approaches in materials science is increasing the wealth of data in the field, the gap between accumulated-information and derived-knowledge widens. We address the issue of scientific discovery in materials databases by introducing novel analytical approaches based on structural and electronic materials fingerprints. The framework is employed to (i) query large databases of materials using similarity concepts, (ii) map the connectivity of the materials space (i.e., as a materials cartogram) for rapidly identifying regions with unique organizations/properties, and (iii) develop predictive Quantitative Materials Structure-Property Relation- ships (QMSPR) models for guiding materials design. In this study, we test these fingerprints by seeking target material properties. As a quantitative example, we model the critical temperatures of known superconductors. Our novel materials fingerprinting and materials cartography approaches contribute to the emerging field of materials informatics by enabling effective computational tools to analyze, visualize, model, and design new materials.}, number={3}, journal={Chemistry of Materials}, publisher={American Chemical Society (ACS)}, author={Isayev, Olexandr and Fourches, Denis and Muratov, Eugene N. and Oses, Corey and Rasch, Kevin and Tropsha, Alexander and Curtarolo, Stefano}, year={2015}, month={Jan}, pages={735–743} }
@article{braga_alves_silva_muratov_fourches_liao_tropsha_andrade_2015, title={Pred-hERG: A Novel web-Accessible Computational Tool for Predicting Cardiac Toxicity}, volume={34}, ISSN={["1868-1751"]}, DOI={10.1002/minf.201500040}, abstractNote={The blockage of the hERG K+ channels is closely associated with lethal cardiac arrhythmia. The notorious ligand promiscuity of this channel earmarked hERG as one of the most important antitargets to be considered in early stages of drug development process. Herein we report on the development of an innovative and freely accessible web server for early identification of putative hERG blockers and non-blockers in chemical libraries. We have collected the largest publicly available curated hERG dataset of 5,984 compounds. We succeed in developing robust and externally predictive binary (CCR≈0.8) and multiclass models (accuracy≈0.7). These models are available as a web-service freely available for public at http://labmol.farmacia.ufg.br/predherg/. Three following outcomes are available for the users: prediction by binary model, prediction by multi-class model, and the probability maps of atomic contribution. The Pred-hERG will be continuously updated and upgraded as new information became available.}, number={10}, journal={MOLECULAR INFORMATICS}, author={Braga, Rodolpho C. and Alves, Vinicius M. and Silva, Meryck F. B. and Muratov, Eugene and Fourches, Denis and Liao, Luciano M. and Tropsha, Alexander and Andrade, Carolina H.}, year={2015}, month={Oct}, pages={698–701} }
@article{alves_muratov_fourches_strickland_kleinstreuer_andrade_tropsha_2015, title={Predicting chemically-induced skin reactions. Part I: QSAR models of skin sensitization and their application to identify potentially hazardous compounds}, volume={284}, ISSN={0041-008X}, url={http://dx.doi.org/10.1016/J.TAAP.2014.12.014}, DOI={10.1016/J.TAAP.2014.12.014}, abstractNote={Repetitive exposure to a chemical agent can induce an immune reaction in inherently susceptible individuals that leads to skin sensitization. Although many chemicals have been reported as skin sensitizers, there have been very few rigorously validated QSAR models with defined applicability domains (AD) that were developed using a large group of chemically diverse compounds. In this study, we have aimed to compile, curate, and integrate the largest publicly available dataset related to chemically-induced skin sensitization, use this data to generate rigorously validated and QSAR models for skin sensitization, and employ these models as a virtual screening tool for identifying putative sensitizers among environmental chemicals. We followed best practices for model building and validation implemented with our predictive QSAR workflow using Random Forest modeling technique in combination with SiRMS and Dragon descriptors. The Correct Classification Rate (CCR) for QSAR models discriminating sensitizers from non-sensitizers was 71–88% when evaluated on several external validation sets, within a broad AD, with positive (for sensitizers) and negative (for non-sensitizers) predicted rates of 85% and 79% respectively. When compared to the skin sensitization module included in the OECD QSAR Toolbox as well as to the skin sensitization model in publicly available VEGA software, our models showed a significantly higher prediction accuracy for the same sets of external compounds as evaluated by Positive Predicted Rate, Negative Predicted Rate, and CCR. These models were applied to identify putative chemical hazards in the Scorecard database of possible skin or sense organ toxicants as primary candidates for experimental validation.}, number={2}, journal={Toxicology and Applied Pharmacology}, publisher={Elsevier BV}, author={Alves, Vinicius M. and Muratov, Eugene and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2015}, month={Apr}, pages={262–272} }
@article{alves_muratov_fourches_strickland_kleinstreuer_andrade_tropsha_2015, title={Predicting chemically-induced skin reactions. Part II: QSAR models of skin permeability and the relationships between skin permeability and skin sensitization}, volume={284}, ISSN={0041-008X}, url={http://dx.doi.org/10.1016/J.TAAP.2014.12.013}, DOI={10.1016/J.TAAP.2014.12.013}, abstractNote={Skin permeability is widely considered to be mechanistically implicated in chemically-induced skin sensitization. Although many chemicals have been identified as skin sensitizers, there have been very few reports analyzing the relationships between molecular structure and skin permeability of sensitizers and non-sensitizers. The goals of this study were to: (i) compile, curate, and integrate the largest publicly available dataset of chemicals studied for their skin permeability; (ii) develop and rigorously validate QSAR models to predict skin permeability; and (iii) explore the complex relationships between skin sensitization and skin permeability. Based on the largest publicly available dataset compiled in this study, we found no overall correlation between skin permeability and skin sensitization. In addition, cross-species correlation coefficient between human and rodent permeability data was found to be as low as R2 = 0.44. Human skin permeability models based on the random forest method have been developed and validated using OECD-compliant QSAR modeling workflow. Their external accuracy was high (Q2ext = 0.73 for 63% of external compounds inside the applicability domain). The extended analysis using both experimentally-measured and QSAR-imputed data still confirmed the absence of any overall concordance between skin permeability and skin sensitization. This observation suggests that chemical modifications that affect skin permeability should not be presumed a priori to modulate the sensitization potential of chemicals. The models reported herein as well as those developed in the companion paper on skin sensitization suggest that it may be possible to rationally design compounds with the desired high skin permeability but low sensitization potential.}, number={2}, journal={Toxicology and Applied Pharmacology}, publisher={Elsevier BV}, author={Alves, Vinicius M. and Muratov, Eugene and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2015}, month={Apr}, pages={273–280} }
@article{mu_jiang_chen_zhou_fourches_tropsha_yan_2014, title={Chemical Basis of Interactions Between Engineered Nanoparticles and Biological Systems}, volume={114}, ISSN={0009-2665 1520-6890}, url={http://dx.doi.org/10.1021/CR400295A}, DOI={10.1021/CR400295A}, abstractNote={ADVERTISEMENT RETURN TO ISSUEPREVReviewChemical Basis of Interactions Between Engineered Nanoparticles and Biological SystemsQingxin Mu†, Guibin Jiang§, Lingxin Chen∥, Hongyu Zhou†⊥, Denis Fourches, Alexander Tropsha#, and Bing Yan*†View Author Information† School of Chemistry and Chemical Engineering, Shandong University, Jinan 250100, China§ State Key Laboratory of Environmental Chemistry and Ecotoxicology, Research Center for Eco-Environmental Sciences, Chinese Academy of Sciences, Beijing, 100085, China∥ Yantai Institute of Coastal Zone Research, Chinese Academy of Sciences, Yantai 264003, China⊥ Department of Surgery, Emory University School of Medicine, Atlanta, Georgia 30322, United States# Laboratory for Molecular Modeling, UNC Eshelman School of Pharmacy, University of North Carolina, Chapel Hill, North Carolina 27599, United States*Phone: +86-531-88380019. Fax: +86-531-88380029. E-mail: [email protected]Cite this: Chem. Rev. 2014, 114, 15, 7740–7781Publication Date (Web):June 13, 2014Publication History Received29 May 2013Published online13 June 2014Published inissue 13 August 2014https://doi.org/10.1021/cr400295aCopyright © 2014 American Chemical SocietyRIGHTS & PERMISSIONSArticle Views11881Altmetric-Citations434LEARN ABOUT THESE METRICSArticle Views are the COUNTER-compliant sum of full text article downloads since November 2008 (both PDF and HTML) across all institutions and individuals. These metrics are regularly updated to reflect usage leading up to the last few days.Citations are the number of other articles citing this article, calculated by Crossref and updated daily. Find more information about Crossref citation counts.The Altmetric Attention Score is a quantitative measure of the attention that a research article has received online. Clicking on the donut icon will load a page at altmetric.com with additional details about the score and the social media presence for the given article. Find more information on the Altmetric Attention Score and how the score is calculated. Share Add toView InAdd Full Text with ReferenceAdd Description ExportRISCitationCitation and abstractCitation and referencesMore Options Share onFacebookTwitterWechatLinked InReddit Read OnlinePDF (26 MB) Get e-AlertscloseSUBJECTS:Carbon nanotubes,Genetics,Metal oxide nanoparticles,Molecules,Nanoparticles Get e-Alerts}, number={15}, journal={Chemical Reviews}, publisher={American Chemical Society (ACS)}, author={Mu, Qingxin and Jiang, Guibin and Chen, Lingxin and Zhou, Hongyu and Fourches, Denis and Tropsha, Alexander and Yan, Bing}, year={2014}, month={Jun}, pages={7740–7781} }
@article{goldstein_fredrik jarskog_hilliard_alfirevic_duncan_fourches_huang_lek_neale_ripke_et al._2014, title={Clozapine-induced agranulocytosis is associated with rare HLA-DQB1 and HLA-B alleles}, volume={5}, ISSN={2041-1723}, url={http://dx.doi.org/10.1038/NCOMMS5757}, DOI={10.1038/NCOMMS5757}, abstractNote={Clozapine is a particularly effective antipsychotic medication but its use is curtailed by the risk of clozapine-induced agranulocytosis/granulocytopenia (CIAG), a severe adverse drug reaction occurring in up to 1% of treated individuals. Identifying genetic risk factors for CIAG could enable safer and more widespread use of clozapine. Here we perform the largest and most comprehensive genetic study of CIAG to date by interrogating 163 cases using genome-wide genotyping and whole-exome sequencing. We find that two loci in the major histocompatibility complex are independently associated with CIAG: a single amino acid in HLA-DQB1 (126Q) (P=4.7 × 10−14, odds ratio (OR)=0.19, 95% confidence interval (CI)=0.12–0.29) and an amino acid change in the extracellular binding pocket of HLA-B (158T) (P=6.4 × 10−10, OR=3.3, 95% CI=2.3–4.9). These associations dovetail with the roles of these genes in immunogenetic phenotypes and adverse drug responses for other medications, and provide insight into the pathophysiology of CIAG. Clozapine-induced agranulocytosis/granulocytopenia, or CIAG, is characterised by a rare and potentially fatal reaction to antipsychotic drugs. Here, the authors identify genetic variants in two immune-related genes that may contribute to the pathophysiology of CIAG.}, number={1}, journal={Nature Communications}, publisher={Springer Science and Business Media LLC}, author={Goldstein, Jacqueline I. and Fredrik Jarskog, L. and Hilliard, Chris and Alfirevic, Ana and Duncan, Laramie and Fourches, Denis and Huang, Hailiang and Lek, Monkol and Neale, Benjamin M. and Ripke, Stephan and et al.}, year={2014}, month={Sep} }
@article{golbraikh_muratov_fourches_tropsha_2014, title={Data Set Modelability by QSAR}, volume={54}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/CI400572X}, DOI={10.1021/CI400572X}, abstractNote={We introduce a simple MODelability Index (MODI) that estimates the feasibility of obtaining predictive QSAR models (correct classification rate above 0.7) for a binary data set of bioactive compounds. MODI is defined as an activity class-weighted ratio of the number of nearest-neighbor pairs of compounds with the same activity class versus the total number of pairs. The MODI values were calculated for more than 100 data sets, and the threshold of 0.65 was found to separate the nonmodelable and modelable data sets.}, number={1}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Golbraikh, Alexander and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander}, year={2014}, month={Jan}, pages={1–4} }
@article{blatt_farag_corey_sarrimanolis_muratov_fourches_tropsha_janzen_2014, title={Expanding the scope of drug repurposing in pediatrics: The Children's Pharmacy Collaborative™}, volume={19}, ISSN={1359-6446}, url={http://dx.doi.org/10.1016/J.DRUDIS.2014.08.003}, DOI={10.1016/J.DRUDIS.2014.08.003}, abstractNote={Drug repurposing is the use of 'old' drugs for new indications, avoiding the need for time- and cost-intensive toxicity studies. This approach should be particularly attractive for pediatrics, but its use in this population has been limited. One obstacle has been the lack of a comprehensive database of drugs for which there already is at least one indication in children. We describe the development of The Children's Pharmacy Collaborative, which should grow over time, serve as a resource for professionals and families, and stimulate drug-repurposing efforts for a range of pediatric disorders.}, number={11}, journal={Drug Discovery Today}, publisher={Elsevier BV}, author={Blatt, Julie and Farag, Sherif and Corey, Seth J. and Sarrimanolis, Zafeira and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander and Janzen, William P.}, year={2014}, month={Nov}, pages={1696–1698} }
@article{cherkasov_muratov_fourches_varnek_baskin_cronin_dearden_gramatica_martin_todeschini_et al._2014, title={QSAR Modeling: Where Have You Been? Where Are You Going To?}, volume={57}, ISSN={0022-2623 1520-4804}, url={http://dx.doi.org/10.1021/JM4004285}, DOI={10.1021/JM4004285}, abstractNote={Quantitative structure–activity relationship modeling is one of the major computational tools employed in medicinal chemistry. However, throughout its entire history it has drawn both praise and criticism concerning its reliability, limitations, successes, and failures. In this paper, we discuss (i) the development and evolution of QSAR; (ii) the current trends, unsolved problems, and pressing challenges; and (iii) several novel and emerging applications of QSAR modeling. Throughout this discussion, we provide guidelines for QSAR development, validation, and application, which are summarized in best practices for building rigorously validated and externally predictive QSAR models. We hope that this Perspective will help communications between computational and experimental chemists toward collaborative development and use of QSAR models. We also believe that the guidelines presented here will help journal editors and reviewers apply more stringent scientific standards to manuscripts reporting new QSAR studies, as well as encourage the use of high quality, validated QSARs for regulatory decision making.}, number={12}, journal={Journal of Medicinal Chemistry}, publisher={American Chemical Society (ACS)}, author={Cherkasov, Artem and Muratov, Eugene N. and Fourches, Denis and Varnek, Alexandre and Baskin, Igor I. and Cronin, Mark and Dearden, John and Gramatica, Paola and Martin, Yvonne C. and Todeschini, Roberto and et al.}, year={2014}, month={Jan}, pages={4977–5010} }
@article{zhang_fourches_sedykh_zhu_golbraikh_ekins_clark_connelly_sigal_hodges_et al._2013, title={Discovery of Novel Antimalarial Compounds Enabled by QSAR-Based Virtual Screening}, volume={53}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci300421n}, DOI={10.1021/ci300421n}, abstractNote={Quantitative structure-activity relationship (QSAR) models have been developed for a data set of 3133 compounds defined as either active or inactive against P. falciparum. Because the data set was strongly biased toward inactive compounds, different sampling approaches were employed to balance the ratio of actives versus inactives, and models were rigorously validated using both internal and external validation approaches. The balanced accuracy for assessing the antimalarial activities of 70 external compounds was between 87% and 100% depending on the approach used to balance the data set. Virtual screening of the ChemBridge database using QSAR models identified 176 putative antimalarial compounds that were submitted for experimental validation, along with 42 putative inactives as negative controls. Twenty five (14.2%) computational hits were found to have antimalarial activities with minimal cytotoxicity to mammalian cells, while all 42 putative inactives were confirmed experimentally. Structural inspection of confirmed active hits revealed novel chemical scaffolds, which could be employed as starting points to discover novel antimalarial agents.}, number={2}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Zhang, Liying and Fourches, Denis and Sedykh, Alexander and Zhu, Hao and Golbraikh, Alexander and Ekins, Sean and Clark, Julie and Connelly, Michele C. and Sigal, Martina and Hodges, Dena and et al.}, year={2013}, month={Jan}, pages={475–492} }
@article{low_sedykh_fourches_golbraikh_whelan_rusyn_tropsha_2013, title={Integrative Chemical–Biological Read-Across Approach for Chemical Hazard Classification}, volume={26}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/TX400110F}, DOI={10.1021/TX400110F}, abstractNote={Traditional read-across approaches typically rely on the chemical similarity principle to predict chemical toxicity; however, the accuracy of such predictions is often inadequate due to the underlying complex mechanisms of toxicity. Here, we report on the development of a hazard classification and visualization method that draws upon both chemical structural similarity and comparisons of biological responses to chemicals measured in multiple short-term assays (“biological” similarity). The Chemical–Biological Read-Across (CBRA) approach infers each compound’s toxicity from both chemical and biological analogues whose similarities are determined by the Tanimoto coefficient. Classification accuracy of CBRA was compared to that of classical RA and other methods using chemical descriptors alone or in combination with biological data. Different types of adverse effects (hepatotoxicity, hepatocarcinogenicity, mutagenicity, and acute lethality) were classified using several biological data types (gene expression profiling and cytotoxicity screening). CBRA-based hazard classification exhibited consistently high external classification accuracy and applicability to diverse chemicals. Transparency of the CBRA approach is aided by the use of radial plots that show the relative contribution of analogous chemical and biological neighbors. Identification of both chemical and biological features that give rise to the high accuracy of CBRA-based toxicity prediction facilitates mechanistic interpretation of the models.}, number={8}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Low, Yen and Sedykh, Alexander and Fourches, Denis and Golbraikh, Alexander and Whelan, Maurice and Rusyn, Ivan and Tropsha, Alexander}, year={2013}, month={Aug}, pages={1199–1208} }
@article{fourches_muratov_ding_dokholyan_tropsha_2013, title={Predicting Binding Affinity of CSAR Ligands Using Both Structure-Based and Ligand-Based Approaches}, volume={53}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/CI400216Q}, DOI={10.1021/CI400216Q}, abstractNote={We report on the prediction accuracy of ligand-based (2D QSAR) and structure-based (MedusaDock) methods used both independently and in consensus for ranking the congeneric series of ligands binding to three protein targets (UK, ERK2, and CHK1) from the CSAR 2011 benchmark exercise. An ensemble of predictive QSAR models was developed using known binders of these three targets extracted from the publicly available ChEMBL database. Selected models were used to predict the binding affinity of CSAR compounds toward the corresponding targets and rank them accordingly; the overall ranking accuracy evaluated by Spearman correlation was as high as 0.78 for UK, 0.60 for ERK2, and 0.56 for CHK1, placing our predictions in the top 10% among all the participants. In parallel, MedusaDock, designed to predict reliable docking poses, was also used for ranking the CSAR ligands according to their docking scores; the resulting accuracy (Spearman correlation) for UK, ERK2, and CHK1 were 0.76, 0.31, and 0.26, respectively. In addition, performance of several consensus approaches combining MedusaDock- and QSAR-predicted ranks altogether has been explored; the best approach yielded Spearman correlation coefficients for UK, ERK2, and CHK1 of 0.82, 0.50, and 0.45, respectively. This study shows that (i) externally validated 2D QSAR models were capable of ranking CSAR ligands at least as accurately as more computationally intensive structure-based approaches used both by us and by other groups and (ii) ligand-based QSAR models can complement structure-based approaches by boosting the prediction performances when used in consensus.}, number={8}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Muratov, Eugene and Ding, Feng and Dokholyan, Nikolay V. and Tropsha, Alexander}, year={2013}, month={Jul}, pages={1915–1922} }
@article{fourches_tropsha_2013, title={Using Graph Indices for the Analysis and Comparison of Chemical Datasets}, volume={32}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201300076}, DOI={10.1002/MINF.201300076}, abstractNote={In cheminformatics, compounds are represented as points in multidimensional space of chemical descriptors. When all pairs of points found within certain distance threshold in the original high dimensional chemistry space are connected by distance-labeled edges, the resulting data structure can be defined as Dataset Graph (DG). We show that, similarly to the conventional description of organic molecules, many graph indices can be computed for DGs as well. We demonstrate that chemical datasets can be effectively characterized and compared by computing simple graph indices such as the average vertex degree or Randic connectivity index. This approach is used to characterize and quantify the similarity between different datasets or subsets of the same dataset (e.g., training, test, and external validation sets used in QSAR modeling). The freely available ADDAGRA program has been implemented to build and visualize DGs. The approach proposed and discussed in this report could be further explored and utilized for different cheminformatics applications such as dataset diversification by acquiring external compounds, dataset processing prior to QSAR modeling, or (dis)similarity modeling of multiple datasets studied in chemical genomics applications.}, number={9-10}, journal={Molecular Informatics}, publisher={Wiley}, author={Fourches, Denis and Tropsha, Alexander}, year={2013}, month={Sep}, pages={827–842} }
@article{sedykh_fourches_duan_hucke_garneau_zhu_bonneau_tropsha_2012, title={Human Intestinal Transporter Database: QSAR Modeling and Virtual Profiling of Drug Uptake, Efflux and Interactions}, volume={30}, ISSN={0724-8741 1573-904X}, url={http://dx.doi.org/10.1007/S11095-012-0935-X}, DOI={10.1007/S11095-012-0935-X}, abstractNote={Membrane transporters mediate many biological effects of chemicals and play a major role in pharmacokinetics and drug resistance. The selection of viable drug candidates among biologically active compounds requires the assessment of their transporter interaction profiles.Using public sources, we have assembled and curated the largest, to our knowledge, human intestinal transporter database (>5,000 interaction entries for >3,700 molecules). This data was used to develop thoroughly validated classification Quantitative Structure-Activity Relationship (QSAR) models of transport and/or inhibition of several major transporters including MDR1, BCRP, MRP1-4, PEPT1, ASBT, OATP2B1, OCT1, and MCT1.QSAR models have been developed with advanced machine learning techniques such as Support Vector Machines, Random Forest, and k Nearest Neighbors using Dragon and MOE chemical descriptors. These models afforded high external prediction accuracies of 71-100% estimated by 5-fold external validation, and showed hit retrieval rates with up to 20-fold enrichment in the virtual screening of DrugBank compounds.The compendium of predictive QSAR models developed in this study can be used for virtual profiling of drug candidates and/or environmental agents with the optimal transporter profiles.}, number={4}, journal={Pharmaceutical Research}, publisher={Springer Science and Business Media LLC}, author={Sedykh, Alexander and Fourches, Denis and Duan, Jianmin and Hucke, Oliver and Garneau, Michel and Zhu, Hao and Bonneau, Pierre and Tropsha, Alexander}, year={2012}, month={Dec}, pages={996–1007} }
@article{low_uehara_minowa_yamada_ohno_urushidani_sedykh_muratov_kuz’min_fourches_et al._2011, title={Predicting Drug-Induced Hepatotoxicity Using QSAR and Toxicogenomics Approaches}, volume={24}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx200148a}, DOI={10.1021/tx200148a}, abstractNote={Quantitative structure–activity relationship (QSAR) modeling and toxicogenomics are typically used independently as predictive tools in toxicology. In this study, we evaluated the power of several statistical models for predicting drug hepatotoxicity in rats using different descriptors of drug molecules, namely, their chemical descriptors and toxicogenomics profiles. The records were taken from the Toxicogenomics Project rat liver microarray database containing information on 127 drugs (http://toxico.nibio.go.jp/datalist.html). The model end point was hepatotoxicity in the rat following 28 days of continuous exposure, established by liver histopathology and serum chemistry. First, we developed multiple conventional QSAR classification models using a comprehensive set of chemical descriptors and several classification methods (k nearest neighbor, support vector machines, random forests, and distance weighted discrimination). With chemical descriptors alone, external predictivity (correct classification rate, CCR) from 5-fold external cross-validation was 61%. Next, the same classification methods were employed to build models using only toxicogenomics data (24 h after a single exposure) treated as biological descriptors. The optimized models used only 85 selected toxicogenomics descriptors and had CCR as high as 76%. Finally, hybrid models combining both chemical descriptors and transcripts were developed; their CCRs were between 68 and 77%. Although the accuracy of hybrid models did not exceed that of the models based on toxicogenomics data alone, the use of both chemical and biological descriptors enriched the interpretation of the models. In addition to finding 85 transcripts that were predictive and highly relevant to the mechanisms of drug-induced liver injury, chemical structural alerts for hepatotoxicity were identified. These results suggest that concurrent exploration of the chemical features and acute treatment-induced changes in transcript levels will both enrich the mechanistic understanding of subchronic liver injury and afford models capable of accurate prediction of hepatotoxicity from chemical structure and short-term assay results.}, number={8}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Low, Yen and Uehara, Takeki and Minowa, Yohsuke and Yamada, Hiroshi and Ohno, Yasuo and Urushidani, Tetsuro and Sedykh, Alexander and Muratov, Eugene and Kuz’min, Viktor and Fourches, Denis and et al.}, year={2011}, month={Aug}, pages={1251–1262} }
@article{sushko_novotarskyi_körner_pandey_cherkasov_li_gramatica_hansen_schroeter_müller_et al._2010, title={Applicability Domains for Classification Problems: Benchmarking of Distance to Models for Ames Mutagenicity Set}, volume={50}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci100253r}, DOI={10.1021/ci100253r}, abstractNote={The estimation of accuracy and applicability of QSAR and QSPR models for biological and physicochemical properties represents a critical problem. The developed parameter of "distance to model" (DM) is defined as a metric of similarity between the training and test set compounds that have been subjected to QSAR/QSPR modeling. In our previous work, we demonstrated the utility and optimal performance of DM metrics that have been based on the standard deviation within an ensemble of QSAR models. The current study applies such analysis to 30 QSAR models for the Ames mutagenicity data set that were previously reported within the 2009 QSAR challenge. We demonstrate that the DMs based on an ensemble (consensus) model provide systematically better performance than other DMs. The presented approach identifies 30-60% of compounds having an accuracy of prediction similar to the interlaboratory accuracy of the Ames test, which is estimated to be 90%. Thus, the in silico predictions can be used to halve the cost of experimental measurements by providing a similar prediction accuracy. The developed model has been made publicly available at http://ochem.eu/models/1 .}, number={12}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Sushko, Iurii and Novotarskyi, Sergii and Körner, Robert and Pandey, Anil Kumar and Cherkasov, Artem and Li, Jiazhong and Gramatica, Paola and Hansen, Katja and Schroeter, Timon and Müller, Klaus-Robert and et al.}, year={2010}, month={Oct}, pages={2094–2111} }
@article{fourches_barnes_day_bradley_reed_tropsha_2010, title={Cheminformatics Analysis of Assertions Mined from Literature That Describe Drug-Induced Liver Injury in Different Species}, volume={23}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx900326k}, DOI={10.1021/tx900326k}, abstractNote={Drug-induced liver injury is one of the main causes of drug attrition. The ability to predict the liver effects of drug candidates from their chemical structures is critical to help guide experimental drug discovery projects toward safer medicines. In this study, we have compiled a data set of 951 compounds reported to produce a wide range of effects in the liver in different species, comprising humans, rodents, and nonrodents. The liver effects for this data set were obtained as assertional metadata, generated from MEDLINE abstracts using a unique combination of lexical and linguistic methods and ontological rules. We have analyzed this data set using conventional cheminformatics approaches and addressed several questions pertaining to cross-species concordance of liver effects, chemical determinants of liver effects in humans, and the prediction of whether a given compound is likely to cause a liver effect in humans. We found that the concordance of liver effects was relatively low (ca. 39-44%) between different species, raising the possibility that species specificity could depend on specific features of chemical structure. Compounds were clustered by their chemical similarity, and similar compounds were examined for the expected similarity of their species-dependent liver effect profiles. In most cases, similar profiles were observed for members of the same cluster, but some compounds appeared as outliers. The outliers were the subject of focused assertion regeneration from MEDLINE as well as other data sources. In some cases, additional biological assertions were identified, which were in line with expectations based on compounds' chemical similarities. The assertions were further converted to binary annotations of underlying chemicals (i.e., liver effect vs no liver effect), and binary quantitative structure-activity relationship (QSAR) models were generated to predict whether a compound would be expected to produce liver effects in humans. Despite the apparent heterogeneity of data, models have shown good predictive power assessed by external 5-fold cross-validation procedures. The external predictive power of binary QSAR models was further confirmed by their application to compounds that were retrieved or studied after the model was developed. To the best of our knowledge, this is the first study for chemical toxicity prediction that applied QSAR modeling and other cheminformatics techniques to observational data generated by the means of automated text mining with limited manual curation, opening up new opportunities for generating and modeling chemical toxicology data.}, number={1}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Barnes, Julie C. and Day, Nicola C. and Bradley, Paul and Reed, Jane Z. and Tropsha, Alexander}, year={2010}, month={Jan}, pages={171–183} }
@article{rodgers_zhu_fourches_rusyn_tropsha_2010, title={Modeling Liver-Related Adverse Effects of Drugs UsingkNearest Neighbor Quantitative Structure−Activity Relationship Method}, volume={23}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx900451r}, DOI={10.1021/tx900451r}, abstractNote={Adverse effects of drugs (AEDs) continue to be a major cause of drug withdrawals in both development and postmarketing. While liver-related AEDs are a major concern for drug safety, there are few in silico models for predicting human liver toxicity for drug candidates. We have applied the quantitative structure−activity relationship (QSAR) approach to model liver AEDs. In this study, we aimed to construct a QSAR model capable of binary classification (active vs inactive) of drugs for liver AEDs based on chemical structure. To build QSAR models, we have employed an FDA spontaneous reporting database of human liver AEDs (elevations in activity of serum liver enzymes), which contains data on approximately 500 approved drugs. Approximately 200 compounds with wide clinical data coverage, structural similarity, and balanced (40/60) active/inactive ratios were selected for modeling and divided into multiple training/test and external validation sets. QSAR models were developed using the k nearest neighbor method and validated using external data sets. Models with high sensitivity (>73%) and specificity (>94%) for the prediction of liver AEDs in external validation sets were developed. To test applicability of the models, three chemical databases (World Drug Index, Prestwick Chemical Library, and Biowisdom Liver Intelligence Module) were screened in silico, and the validity of predictions was determined, where possible, by comparing model-based classification with assertions in publicly available literature. Validated QSAR models of liver AEDs based on the data from the FDA spontaneous reporting system can be employed as sensitive and specific predictors of AEDs in preclinical screening of drug candidates for potential hepatotoxicity in humans.}, number={4}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Rodgers, Amie D. and Zhu, Hao and Fourches, Denis and Rusyn, Ivan and Tropsha, Alexander}, year={2010}, month={Apr}, pages={724–732} }
@article{fourches_pu_tassa_weissleder_shaw_mumper_tropsha_2010, title={Quantitative Nanostructure−Activity Relationship Modeling}, volume={4}, ISSN={1936-0851 1936-086X}, url={http://dx.doi.org/10.1021/nn1013484}, DOI={10.1021/nn1013484}, abstractNote={Evaluation of biological effects, both desired and undesired, caused by manufactured nanoparticles (MNPs) is of critical importance for nanotechnology. Experimental studies, especially toxicological, are time-consuming, costly, and often impractical, calling for the development of efficient computational approaches capable of predicting biological effects of MNPs. To this end, we have investigated the potential of cheminformatics methods such as quantitative structure−activity relationship (QSAR) modeling to establish statistically significant relationships between measured biological activity profiles of MNPs and their physical, chemical, and geometrical properties, either measured experimentally or computed from the structure of MNPs. To reflect the context of the study, we termed our approach quantitative nanostructure−activity relationship (QNAR) modeling. We have employed two representative sets of MNPs studied recently using in vitro cell-based assays: (i) 51 various MNPs with diverse metal cores (Proc. Natl. Acad. Sci. 2008, 105, 7387−7392) and (ii) 109 MNPs with similar core but diverse surface modifiers (Nat. Biotechnol. 2005, 23, 1418−1423). We have generated QNAR models using machine learning approaches such as support vector machine (SVM)-based classification and k nearest neighbors (kNN)-based regression; their external prediction power was shown to be as high as 73% for classification modeling and having an R2 of 0.72 for regression modeling. Our results suggest that QNAR models can be employed for: (i) predicting biological activity profiles of novel nanomaterials, and (ii) prioritizing the design and manufacturing of nanomaterials toward better and safer products.}, number={10}, journal={ACS Nano}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Pu, Dongqiuye and Tassa, Carlos and Weissleder, Ralph and Shaw, Stanley Y. and Mumper, Russell J. and Tropsha, Alexander}, year={2010}, month={Sep}, pages={5703–5712} }
@article{fourches_muratov_tropsha_2010, title={Trust, But Verify: On the Importance of Chemical Structure Curation in Cheminformatics and QSAR Modeling Research}, volume={50}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci100176x}, DOI={10.1021/ci100176x}, abstractNote={ADVERTISEMENT RETURN TO ISSUEPerspectiveNEXTTrust, But Verify: On the Importance of Chemical Structure Curation in Cheminformatics and QSAR Modeling ResearchDenis Fourches†, Eugene Muratov†‡, and Alexander Tropsha*†View Author Information Laboratory for Molecular Modeling, Eshelman School of Pharmacy, University of North Carolina, Chapel Hill, North Carolina 27599, and Laboratory of Theoretical Chemistry, Department of Molecular Structure, A.V. Bogatsky Physical-Chemical Institute NAS of Ukraine, Odessa, 65080, Ukraine* To whom correspondence should be addressed. E-mail: [email protected]†University of North Carolina at Chapel Hill.‡A.V. Bogatsky Physical-Chemical Institute NAS of Ukraine.Cite this: J. Chem. Inf. Model. 2010, 50, 7, 1189–1204Publication Date (Web):June 24, 2010Publication History Received5 May 2010Published online24 June 2010Published inissue 26 July 2010https://doi.org/10.1021/ci100176xCopyright © 2010 American Chemical SocietyRIGHTS & PERMISSIONSArticle Views5658Altmetric-Citations494LEARN ABOUT THESE METRICSArticle Views are the COUNTER-compliant sum of full text article downloads since November 2008 (both PDF and HTML) across all institutions and individuals. These metrics are regularly updated to reflect usage leading up to the last few days.Citations are the number of other articles citing this article, calculated by Crossref and updated daily. Find more information about Crossref citation counts.The Altmetric Attention Score is a quantitative measure of the attention that a research article has received online. Clicking on the donut icon will load a page at altmetric.com with additional details about the score and the social media presence for the given article. Find more information on the Altmetric Attention Score and how the score is calculated. Share Add toView InAdd Full Text with ReferenceAdd Description ExportRISCitationCitation and abstractCitation and referencesMore Options Share onFacebookTwitterWechatLinked InReddit Read OnlinePDF (2 MB) Get e-AlertsSUBJECTS:Bioinformatics and computational biology,Chemical structure,Molecular structure,Software,Structure activity relationship Get e-Alerts}, number={7}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2010}, month={Jun}, pages={1189–1204} }
@article{zhu_tropsha_fourches_varnek_papa_gramatica_öberg_dao_cherkasov_tetko_2008, title={Combinatorial QSAR Modeling of Chemical Toxicants Tested against Tetrahymena pyriformis}, volume={48}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci700443v}, DOI={10.1021/ci700443v}, abstractNote={Selecting most rigorous quantitative structure-activity relationship (QSAR) approaches is of great importance in the development of robust and predictive models of chemical toxicity. To address this issue in a systematic way, we have formed an international virtual collaboratory consisting of six independent groups with shared interests in computational chemical toxicology. We have compiled an aqueous toxicity data set containing 983 unique compounds tested in the same laboratory over a decade against Tetrahymena pyriformis. A modeling set including 644 compounds was selected randomly from the original set and distributed to all groups that used their own QSAR tools for model development. The remaining 339 compounds in the original set (external set I) as well as 110 additional compounds (external set II) published recently by the same laboratory (after this computational study was already in progress) were used as two independent validation sets to assess the external predictive power of individual models. In total, our virtual collaboratory has developed 15 different types of QSAR models of aquatic toxicity for the training set. The internal prediction accuracy for the modeling set ranged from 0.76 to 0.93 as measured by the leave-one-out cross-validation correlation coefficient ( Q abs2). The prediction accuracy for the external validation sets I and II ranged from 0.71 to 0.85 (linear regression coefficient R absI2) and from 0.38 to 0.83 (linear regression coefficient R absII2), respectively. The use of an applicability domain threshold implemented in most models generally improved the external prediction accuracy but at the same time led to a decrease in chemical space coverage. Finally, several consensus models were developed by averaging the predicted aquatic toxicity for every compound using all 15 models, with or without taking into account their respective applicability domains. We find that consensus models afford higher prediction accuracy for the external validation data sets with the highest space coverage as compared to individual constituent models. Our studies prove the power of a collaborative and consensual approach to QSAR model development. The best validated models of aquatic toxicity developed by our collaboratory (both individual and consensus) can be used as reliable computational predictors of aquatic toxicity and are available from any of the participating laboratories.}, number={4}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Zhu, Hao and Tropsha, Alexander and Fourches, Denis and Varnek, Alexandre and Papa, Ester and Gramatica, Paola and Öberg, Tomas and Dao, Phuong and Cherkasov, Artem and Tetko, Igor V.}, year={2008}, month={Mar}, pages={766–784} }
@article{tetko_sushko_pandey_zhu_tropsha_papa_öberg_todeschini_fourches_varnek_2008, title={Critical Assessment of QSAR Models of Environmental Toxicity against Tetrahymena pyriformis: Focusing on Applicability Domain and Overfitting by Variable Selection}, volume={48}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci800151m}, DOI={10.1021/ci800151m}, abstractNote={The estimation of the accuracy of predictions is a critical problem in QSAR modeling. The “distance to model” can be defined as a metric that defines the similarity between the training set molecules and the test set compound for the given property in the context of a specific model. It could be expressed in many different ways, e.g., using Tanimoto coefficient, leverage, correlation in space of models, etc. In this paper we have used mixtures of Gaussian distributions as well as statistical tests to evaluate six types of distances to models with respect to their ability to discriminate compounds with small and large prediction errors. The analysis was performed for twelve QSAR models of aqueous toxicity against T. pyriformis obtained with different machine-learning methods and various types of descriptors. The distances to model based on standard deviation of predicted toxicity calculated from the ensemble of models afforded the best results. This distance also successfully discriminated molecules with low and large prediction errors for a mechanism-based model developed using log P and the Maximum Acceptor Superdelocalizability descriptors. Thus, the distance to model metric could also be used to augment mechanistic QSAR models by estimating their prediction errors. Moreover, the accuracy of prediction is mainly determined by the training set data distribution in the chemistry and activity spaces but not by QSAR approaches used to develop the models. We have shown that incorrect validation of a model may result in the wrong estimation of its performance and suggested how this problem could be circumvented. The toxicity of 3182 and 48774 molecules from the EPA High Production Volume (HPV) Challenge Program and EINECS (European chemical Substances Information System), respectively, was predicted, and the accuracy of prediction was estimated. The developed models are available online at http://www.qspr.org site.}, number={9}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Tetko, Igor V. and Sushko, Iurii and Pandey, Anil Kumar and Zhu, Hao and Tropsha, Alexander and Papa, Ester and Öberg, Tomas and Todeschini, Roberto and Fourches, Denis and Varnek, Alexandre}, year={2008}, month={Aug}, pages={1733–1746} }
@article{varnek_fourches_sieffert_solov'ev_hill_lecomte_2007, title={QSPR Modeling of the AmIII/EuIIISeparation Factor: How Far Can we Predict ?}, volume={25}, ISSN={0736-6299 1532-2262}, url={http://dx.doi.org/10.1080/07366290601067481}, DOI={10.1080/07366290601067481}, abstractNote={Abstract Exhaustive quantitative structure‐property relationship (QSPR) modeling of the separation factor logSF for 46 polyazaheterocyclic ligands extracting Am3+ and Eu3+ from nitric acid aqueous solution to the 1,1,2,2–tetrachloroethane phase has been done using different computational approaches. Modeling methods included Multiple Linear Regression, Radial Basis Function Neural Networks, and Associated Neural Networks; two types of descriptors (substructural molecular fragments and molecular descriptors) and different techniques of variable selection have been employed. The developed QSPR models applied for novel t‐Bu‐hemi‐BTP ligand resulted in logSF=1.07−1.46; these predicted values somewhat exceed the experimental value logSF=1.0. Several hypothetical extractants potentially possessing high logSF values are proposed. An influence of uncertainties in initial experimental data as well as the choice of the theoretical approach on the performance of QSPR models is discussed.}, number={1}, journal={Solvent Extraction and Ion Exchange}, publisher={Informa UK Limited}, author={Varnek, Alexandre and Fourches, D. and Sieffert, N. and Solov'ev, V. P. and Hill, C. and Lecomte, M.}, year={2007}, month={Mar}, pages={1–26} }
@article{varnek_fourches_solov'ev_klimchuk_ouadi_billard_2007, title={Successful “In Silico” Design of New Efficient Uranyl Binders}, volume={25}, ISSN={0736-6299 1532-2262}, url={http://dx.doi.org/10.1080/07366290701415820}, DOI={10.1080/07366290701415820}, abstractNote={Abstract ISIDA (In Silico Design and Data Analysis) software have been used for computer‐aided molecular design of novel monoamides that efficiently extract U(VI). A set of available experimental uranyl partition coefficients (logD) in a water/toluene system for 19 monoamides has been used in order to establish quantitative relationships between the structure of the molecules and their extraction properties using different machine‐learning methods (multi‐linear regression analysis, associated neural networks, support vector machine). Then, developed structure‐property models have been applied to screen a virtual combinatorial library containing about 10,500 molecules. Hits' selection has been performed taking into account for the extraction property of molecules, their aqueous solubility (potential extractants must not be soluble in water), and synthetic feasibility. Selected 21 hits have been synthesized and studied experimentally as uranyl extractants using the same protocol as for the molecules from th...}, number={4}, journal={Solvent Extraction and Ion Exchange}, publisher={Informa UK Limited}, author={Varnek, A. and Fourches, D. and Solov'ev, V. and Klimchuk, O. and Ouadi, A. and Billard, I.}, year={2007}, month={Jun}, pages={433–462} }
@article{tetko_solov'ev_antonov_yao_doucet_fan_hoonakker_fourches_jost_lachiche_et al._2006, title={Benchmarking of Linear and Nonlinear Approaches for Quantitative Structure−Property Relationship Studies of Metal Complexation with Ionophores}, volume={46}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci0504216}, DOI={10.1021/ci0504216}, abstractNote={A benchmark of several popular methods, Associative Neural Networks (ANN), Support Vector Machines (SVM), k Nearest Neighbors (kNN), Maximal Margin Linear Programming (MMLP), Radial Basis Function Neural Network (RBFNN), and Multiple Linear Regression (MLR), is reported for quantitative-structure property relationships (QSPR) of stability constants logK1 for the 1:1 (M:L) and logbeta2 for 1:2 complexes of metal cations Ag+ and Eu3+ with diverse sets of organic molecules in water at 298 K and ionic strength 0.1 M. The methods were tested on three types of descriptors: molecular descriptors including E-state values, counts of atoms determined for E-state atom types, and substructural molecular fragments (SMF). Comparison of the models was performed using a 5-fold external cross-validation procedure. Robust statistical tests (bootstrap and Kolmogorov-Smirnov statistics) were employed to evaluate the significance of calculated models. The Wilcoxon signed-rank test was used to compare the performance of methods. Individual structure-complexation property models obtained with nonlinear methods demonstrated a significantly better performance than the models built using multilinear regression analysis (MLRA). However, the averaging of several MLRA models based on SMF descriptors provided as good of a prediction as the most efficient nonlinear techniques. Support Vector Machines and Associative Neural Networks contributed in the largest number of significant models. Models based on fragments (SMF descriptors and E-state counts) had higher prediction ability than those based on E-state indices. The use of SMF descriptors and E-state counts provided similar results, whereas E-state indices lead to less significant models. The current study illustrates the difficulties of quantitative comparison of different methods: conclusions based only on one data set without appropriate statistical tests could be wrong.}, number={2}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Tetko, Igor V. and Solov'ev, Vitaly P. and Antonov, Alexey V. and Yao, Xiaojun and Doucet, Jean Pierre and Fan, Botao and Hoonakker, Frank and Fourches, Denis and Jost, Piere and Lachiche, Nicolas and et al.}, year={2006}, month={Mar}, pages={808–819} }
@article{varnek_fourches_hoonakker_solov’ev_2005, title={Substructural fragments: an universal language to encode reactions, molecular and supramolecular structures}, volume={19}, ISSN={0920-654X 1573-4951}, url={http://dx.doi.org/10.1007/s10822-005-9008-0}, DOI={10.1007/s10822-005-9008-0}, number={9-10}, journal={Journal of Computer-Aided Molecular Design}, publisher={Springer Science and Business Media LLC}, author={Varnek, A. and Fourches, D. and Hoonakker, F. and Solov’ev, V. P.}, year={2005}, month={Sep}, pages={693–703} }
@article{varnek_fourches_solov'e_baulin_turanov_karandashev_fara_katritzky_2004, title={“In Silico” Design of New Uranyl Extractants Based on Phosphoryl-Containing Podands: QSPR Studies, Generation and Screening of Virtual Combinatorial Library, and Experimental Tests}, volume={44}, ISSN={0095-2338}, url={http://dx.doi.org/10.1021/ci049976b}, DOI={10.1021/ci049976b}, abstractNote={This paper is devoted to computer-aided design of new extractants of the uranyl cation involving three main steps: (i) a QSPR study, (ii) generation and screening of a virtual combinatorial library, and (iii) synthesis of several predicted compounds and their experimental extraction studies. First, we performed a QSPR modeling of the distribution coefficient (logD) of uranyl extracted by phosphoryl-containing podands from water to 1,2-dichloroethane. Two different approaches were used: one based on classical structural and physicochemical descriptors (implemented in the CODESSA PRO program) and another one based on fragment descriptors (implemented in the TRAIL program). Three statistically significant models obtained with TRAIL involve as descriptors either sequences of atoms and bonds or atoms with their close environment (augmented atoms). The best models of CODESSA PRO include its own molecular descriptors as well as fragment descriptors obtained with TRAIL. At the second step, a virtual combinatorial library of 2024 podands has been generated with the CombiLib program, followed by the assessment of logD values using developed QSPR models. At the third step, eight of these hypothetical compounds were synthesized and tested experimentally. Comparison with experiment shows that developed QSPR models successfully predict logD values for 7 of 8 compounds from that "blind test" set.}, number={4}, journal={Journal of Chemical Information and Computer Sciences}, publisher={American Chemical Society (ACS)}, author={Varnek, A. and Fourches, D. and Solov'e, V. P. and Baulin, V. E. and Turanov, A. N. and Karandashev, V. K. and Fara, D. and Katritzky, A. R.}, year={2004}, month={Jul}, pages={1365–1382} }