@article{wu_yu_suresh_gale-day_alteen_woo_millbern_johnson_carroll_partch_et al._2024, title={Protein-adaptive differential scanning fluorimetry using conformationally responsive dyes}, volume={5}, ISSN={["1546-1696"]}, DOI={10.1038/s41587-024-02158-7}, journal={NATURE BIOTECHNOLOGY}, author={Wu, Taiasean and Yu, Joshua C. and Suresh, Arundhati and Gale-Day, Zachary J. and Alteen, Matthew G. and Woo, Amanda S. and Millbern, Zoe and Johnson, Oleta T. and Carroll, Emma C. and Partch, Carrie L. and et al.}, year={2024}, month={May} } @article{muratov_amaro_andrade_brown_ekins_fourches_isayev_kozakov_medina-franco_merz_et al._2021, title={A critical overview of computational approaches employed for COVID-19 drug discovery}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs01065k}, abstractNote={We cover diverse methodologies, computational approaches, and case studies illustrating the ongoing efforts to develop viable drug candidates for treatment of COVID-19.}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Amaro, Rommie and Andrade, Carolina H. and Brown, Nathan and Ekins, Sean and Fourches, Denis and Isayev, Olexandr and Kozakov, Dima and Medina-Franco, Jose L. and Merz, Kenneth M. and et al.}, year={2021}, month={Jul} } @article{mansouri_karmaus_fitzpatrick_patlewicz_pradeep_alberga_alepee_allen_allen_alves_et al._2021, title={CATMoS: Collaborative Acute Toxicity Modeling Suite}, volume={129}, ISSN={["1552-9924"]}, DOI={10.1289/EHP8495}, abstractNote={Background: Humans are exposed to tens of thousands of chemical substances that need to be assessed for their potential toxicity. Acute systemic toxicity testing serves as the basis for regulatory hazard classification, labeling, and risk management. However, it is cost- and time-prohibitive to evaluate all new and existing chemicals using traditional rodent acute toxicity tests. In silico models built using existing data facilitate rapid acute toxicity predictions without using animals. Objectives: The U.S. Interagency Coordinating Committee on the Validation of Alternative Methods (ICCVAM) Acute Toxicity Workgroup organized an international collaboration to develop in silico models for predicting acute oral toxicity based on five different end points: Lethal Dose 50 (LD50 value, U.S. Environmental Protection Agency hazard (four) categories, Globally Harmonized System for Classification and Labeling hazard (five) categories, very toxic chemicals [LD50 (LD50≤50mg/kg)], and nontoxic chemicals (LD50>2,000mg/kg). Methods: An acute oral toxicity data inventory for 11,992 chemicals was compiled, split into training and evaluation sets, and made available to 35 participating international research groups that submitted a total of 139 predictive models. Predictions that fell within the applicability domains of the submitted models were evaluated using external validation sets. These were then combined into consensus models to leverage strengths of individual approaches. Results: The resulting consensus predictions, which leverage the collective strengths of each individual model, form the Collaborative Acute Toxicity Modeling Suite (CATMoS). CATMoS demonstrated high performance in terms of accuracy and robustness when compared with in vivo results. Discussion: CATMoS is being evaluated by regulatory agencies for its utility and applicability as a potential replacement for in vivo rat acute oral toxicity studies. CATMoS predictions for more than 800,000 chemicals have been made available via the National Toxicology Program’s Integrated Chemical Environment tools and data sets (ice.ntp.niehs.nih.gov). The models are also implemented in a free, standalone, open-source tool, OPERA, which allows predictions of new and untested chemicals to be made. https://doi.org/10.1289/EHP8495}, number={4}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Mansouri, Kamel and Karmaus, Agnes L. and Fitzpatrick, Jeremy and Patlewicz, Grace and Pradeep, Prachi and Alberga, Domenico and Alepee, Nathalie and Allen, Timothy E. H. and Allen, Dave and Alves, Vinicius M. and et al.}, year={2021}, month={Apr} } @article{borrel_melander_fourches_2021, title={Cheminformatics Analysis of Fluoroquinolones and Their Inhibition Potency Against Four Pathogens}, volume={40}, ISSN={["1868-1751"]}, DOI={10.1002/minf.202000215}, abstractNote={Abstract}, number={5}, journal={MOLECULAR INFORMATICS}, author={Borrel, Alexandre and Melander, Christian and Fourches, Denis}, year={2021}, month={May} } @article{li_fourches_2021, title={SMILES Pair Encoding: A Data-Driven Substructure Tokenization Algorithm for Deep Learning}, volume={61}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.0c01127}, abstractNote={Simplified molecular input line entry system (SMILES)-based deep learning models are slowly emerging as an important research topic in cheminformatics. In this study, we introduce SMILES pair encoding (SPE), a data-driven tokenization algorithm. SPE first learns a vocabulary of high-frequency SMILES substrings from a large chemical dataset (e.g., ChEMBL) and then tokenizes SMILES based on the learned vocabulary for the actual training of deep learning models. SPE augments the widely used atom-level tokenization by adding human-readable and chemically explainable SMILES substrings as tokens. Case studies show that SPE can achieve superior performances on both molecular generation and quantitative structure–activity relationship (QSAR) prediction tasks. In particular, the SPE-based generative models outperformed the atom-level tokenization model in the aspects of novelty, diversity, and ability to resemble the training set distribution. The performance of SPE-based QSAR prediction models were evaluated using 24 benchmark datasets where SPE consistently either did match or outperform atom-level and k-mer tokenization. Therefore, SPE could be a promising tokenization method for SMILES-based deep learning models. An open-source Python package SmilesPE was developed to implement this algorithm and is now freely available at https://github.com/XinhaoLi74/SmilesPE.}, number={4}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Li, Xinhao and Fourches, Denis}, year={2021}, month={Apr}, pages={1560–1569} } @article{takeda_ikenaka_fourches_tanaka_nakayama_triki_li_igarashi_tanikawa_ishizuka_2021, title={The VKORC1 ER-luminal loop mutation (Leu76Pro) leads to a significant resistance to warfarin in black rats (Rattus rattus)}, volume={173}, ISSN={["1095-9939"]}, DOI={10.1016/j.pestbp.2021.104774}, abstractNote={Well-known 4-hydroxycoumarin derivatives, such as warfarin, act as inhibitors of the vitamin K epoxide reductase (VKOR) and are used as anticoagulants. Mutations of the VKOR enzyme can lead to resistance to those compounds. This has been a problem in using them as medicine or rodenticide. Most of these mutations lie in the vicinity of potential warfarin-binding sites within the ER-luminal loop structure (Lys30, Phe55) and the transmembrane helix (Tyr138). However, a VKOR mutation found in Tokyo in warfarin-resistant rats does not follow that pattern (Leu76Pro), and its effect on VKOR function and structure remains unclear. We conducted both in vitro kinetic analyses and in silico docking studies to characterize the VKOR mutant. On the one hand, resistant rats (R-rats) showed a 37.5-fold increased IC50 value to warfarin when compared to susceptible rats (S-rats); on the other hand, R-rats showed a 16.5-fold lower basal VKOR activity (Vmax/Km). Docking calculations exhibited that the mutated VKOR of R-rats has a decreased affinity for warfarin. Molecular dynamics simulations further revealed that VKOR-associated warfarin was more exposed to solvents in R-rats and key interactions between Lys30, Phe55, and warfarin were less favored. This study concludes that a single mutation of VKOR at position 76 leads to a significant resistance to warfarin by modifying the types and numbers of intermolecular interactions between the two.}, journal={PESTICIDE BIOCHEMISTRY AND PHYSIOLOGY}, author={Takeda, Kazuki and Ikenaka, Yoshinori and Fourches, Denis and Tanaka, Kazuyuki D. and Nakayama, Shouta M. M. and Triki, Dhoha and Li, Xinhao and Igarashi, Manabu and Tanikawa, Tsutomu and Ishizuka, Mayumi}, year={2021}, month={Mar} } @article{zin_borrel_fourches_2020, title={Benchmarking 2D/3D/MD-QSAR Models for Imatinib Derivatives: How Far Can We Predict?}, volume={60}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.0c00200}, abstractNote={Imatinib, a 2-phenylaminopyridine-based BCR-ABL tyrosine kinase inhibitor, is a highly effective drug for treating Chronic Myeloid Leukemia (CML). However, cases of drug resistance are constantly emerging due to various mutations in the ABL kinase domain; thus, it is crucial to identify novel bioactive analogues. Reliable QSAR models and molecular docking protocols have been shown to facilitate the discovery of new compounds from chemical libraries prior to experimental testing. However, as the vast majority of QSAR models strictly relies on 2D descriptors, the rise of 3D descriptors directly computed from molecular dynamics simulations offers new opportunities to potentially augment the reliability of QSAR models. Herein, we employed molecular docking and molecular dynamics on a large series of Imatinib derivatives and developed an ensemble of QSAR models relying on deep neural nets (DNN) and hybrid sets of 2D/3D/MD descriptors in order to predict the binding affinity and inhibition potencies of those compounds. Through rigorous validation tests, we showed that our DNN regression models achieved excellent external prediction performances for the pKi data set (n = 555, R2 ≥ 0.71. and MAE ≤ 0.85), and the pIC50 data set (n = 306, R2 ≥ 0.54. and MAE ≤ 0.71) with strict validation protocols based on external test sets and 10-fold native and nested cross validations. Interestingly, the best DNN and random forest models performed similarly across all descriptor sets. In fact, for this particular series of compounds, our external test results suggest that incorporating additional 3D protein-ligand binding site fingerprint, descriptors, or even MD time-series descriptors did not significantly improve the overall R2 but lowered the MAE of DNN QSAR models. Those augmented models could still help in identifying and understanding the key dynamic protein-ligand interactions to be optimized for further molecular design.}, number={7}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Zin, Phyo Phyo Kyaw and Borrel, Alexandre and Fourches, Denis}, year={2020}, month={Jul}, pages={3342–3360} } @article{mansouri_kleinstreuer_abdelaziz_alberga_alves_andersson_andrade_bai_balabin_ballabio_et al._2020, title={CoMPARA: Collaborative Modeling Project for Androgen Receptor Activity}, volume={128}, ISSN={["1552-9924"]}, DOI={10.1289/EHP5580}, abstractNote={Background: Endocrine disrupting chemicals (EDCs) are xenobiotics that mimic the interaction of natural hormones and alter synthesis, transport, or metabolic pathways. The prospect of EDCs causing adverse health effects in humans and wildlife has led to the development of scientific and regulatory approaches for evaluating bioactivity. This need is being addressed using high-throughput screening (HTS) in vitro approaches and computational modeling. Objectives: In support of the Endocrine Disruptor Screening Program, the U.S. Environmental Protection Agency (EPA) led two worldwide consortiums to virtually screen chemicals for their potential estrogenic and androgenic activities. Here, we describe the Collaborative Modeling Project for Androgen Receptor Activity (CoMPARA) efforts, which follows the steps of the Collaborative Estrogen Receptor Activity Prediction Project (CERAPP). Methods: The CoMPARA list of screened chemicals built on CERAPP’s list of 32,464 chemicals to include additional chemicals of interest, as well as simulated ToxCast™ metabolites, totaling 55,450 chemical structures. Computational toxicology scientists from 25 international groups contributed 91 predictive models for binding, agonist, and antagonist activity predictions. Models were underpinned by a common training set of 1,746 chemicals compiled from a combined data set of 11 ToxCast™/Tox21 HTS in vitro assays. Results: The resulting models were evaluated using curated literature data extracted from different sources. To overcome the limitations of single-model approaches, CoMPARA predictions were combined into consensus models that provided averaged predictive accuracy of approximately 80% for the evaluation set. Discussion: The strengths and limitations of the consensus predictions were discussed with example chemicals; then, the models were implemented into the free and open-source OPERA application to enable screening of new chemicals with a defined applicability domain and accuracy assessment. This implementation was used to screen the entire EPA DSSTox database of ∼875,000 chemicals, and their predicted AR activities have been made available on the EPA CompTox Chemicals dashboard and National Toxicology Program’s Integrated Chemical Environment. https://doi.org/10.1289/EHP5580}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Mansouri, Kamel and Kleinstreuer, Nicole and Abdelaziz, Ahmed M. and Alberga, Domenico and Alves, Vinicius M. and Andersson, Patrik L. and Andrade, Carolina H. and Bai, Fang and Balabin, Ilya and Ballabio, Davide and et al.}, year={2020}, month={Feb} } @article{li_kleinstreuer_fourches_2020, title={Hierarchical Quantitative Structure-Activity Relationship Modeling Approach for Integrating Binary, Multiclass, and Regression Models of Acute Oral Systemic Toxicity}, volume={33}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.9b00259}, abstractNote={Reliable in silico approaches to replace animal testing for the evaluation of potential acute toxic effects are highly demanded by regulatory agencies. In particular, quantitative structure-activity relationships (QSAR) models have been used to rapidly assess chemical induced toxicity using either continuous (regression) or discrete (classification) predictions. However, it is often unclear how those different types of models can complement and potentially help each other to afford the best prediction accuracy for a given chemical. This paper presents a novel, dual-layer hierarchical modeling method to fully integrate regression and classification QSAR models for assessing rat acute oral systemic toxicity, with respect to regulatory classifications of concern. The first layer of independent regression, binary and multiclass models (base models) were solely built using computed chemical descriptors/fingerprints. Then, a second layer of models (hierarchical models) were built by stacking all the cross-validated out-of-fold predictions from the base models. All models were validated using an external test set and we found that the hierarchical models did outperform the base models for all the three endpoints. The H-QSAR modeling method represents a promising approach for chemical toxicity prediction and more generally for stacking and blending individual QSAR models into more predictive ensemble models.}, number={2}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Li, Xinhao and Kleinstreuer, Nicole C. and Fourches, Denis}, year={2020}, month={Feb}, pages={353–366} } @article{reich_guan_fourches_warren_sarnat_chang_2020, title={INTEGRATIVE STATISTICAL METHODS FOR EXPOSURE MIXTURES AND HEALTH}, volume={14}, ISSN={["1941-7330"]}, DOI={10.1214/20-AOAS1364}, abstractNote={Humans are concurrently exposed to chemically, structurally and toxicologically diverse chemicals. A critical challenge for environmental epidemiology is to quantify the risk of adverse health outcomes resulting from exposures to such chemical mixtures and to identify which mixture constituents may be driving etiologic associations. A variety of statistical methods have been proposed to address these critical research questions. However, they generally rely solely on measured exposure and health data available within a specific study. Advancements in understanding of the role of mixtures on human health impacts may be better achieved through the utilization of external data and knowledge from multiple disciplines with innovative statistical tools. In this paper we develop new methods for health analyses that incorporate auxiliary information about the chemicals in a mixture, such as physicochemical, structural and/or toxicological data. We expect that the constituents identified using auxiliary information will be more biologically meaningful than those identified by methods that solely utilize observed correlations between measured exposure. We develop flexible Bayesian models by specifying prior distributions for the exposures and their effects that include auxiliary information and examine this idea over a spectrum of analyses from regression to factor analysis. The methods are applied to study the effects of volatile organic compounds on emergency room visits in Atlanta. We find that including cheminformatic information about the exposure variables improves prediction and provides a more interpretable model for emergency room visits for respiratory diseases.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Guan, Yawen and Fourches, Denis and Warren, Joshua L. and Sarnat, Stefanie E. and Chang, Howard H.}, year={2020}, month={Dec}, pages={1945–1963} } @article{cools_triki_geerts_delputte_fourches_cos_2020, title={In vitroandin vivoEvaluation ofin silicoPredicted Pneumococcal UDPG:PP Inhibitors}, volume={11}, ISSN={["1664-302X"]}, DOI={10.3389/fmicb.2020.01596}, abstractNote={Pneumonia, of which Streptococcus pneumoniae is the most common causative agent, is considered one of the three top leading causes of death worldwide. As seen in other bacterial species, antimicrobial resistance is on the rise for this pathogen. Therefore, there is a pressing need for novel antimicrobial strategies to combat these infections. Recently, uridine diphosphate glucose pyrophosphorylase (UDPG:PP) has been put forward as a potential drug target worth investigating. Moreover, earlier research demonstrated that streptococci lacking a functional galU gene (encoding for UDPG:PP) were characterized by significantly reduced in vitro and in vivo virulence. Therefore, in this study we evaluated the anti-virulence activity of potential UDPG:PP inhibitors. They were selected in silico using a tailor-made streptococcal homology model, based on earlier listerial research. While the compounds didn’t affect bacterial growth, nor affected in vitro adhesion to and phagocytosis in macrophages, the amount of polysaccharide capsule was significantly reduced after co-incubation with these inhibitors. Moreover, co-incubation proved to have a positive effect on survival in an in vivo Galleria mellonella larval infection model. Therefore, rather than targeting bacterial survival directly, these compounds proved to have an effect on streptococcal virulence by lowering the amount of polysaccharide and thereby probably boosting recognition of this pathogen by the innate immune system. While the compounds need adaptation to broaden their activity to more streptococcal strains rather than being strain-specific, this study consolidates UDPG:PP as a potential novel drug target.}, journal={FRONTIERS IN MICROBIOLOGY}, author={Cools, Freya and Triki, Dhoha and Geerts, Nele and Delputte, Peter and Fourches, Denis and Cos, Paul}, year={2020}, month={Jul} } @article{li_fourches_2020, title={Inductive transfer learning for molecular activity prediction: Next-Gen QSAR Models with MolPMoFiT}, volume={12}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-020-00430-x}, abstractNote={Abstract}, number={1}, journal={JOURNAL OF CHEMINFORMATICS}, author={Li, Xinhao and Fourches, Denis}, year={2020}, month={Apr} } @article{reese_xiao_shanahan_driessche_fourches_carbonell_hall_menegatti_2020, title={Novel peptide ligands for antibody purification provide superior clearance of host cell protein impurities}, volume={1625}, ISSN={["1873-3778"]}, DOI={10.1016/j.chroma.2020.461237}, abstractNote={The quest for ligands alternative to Protein A for the purification of monoclonal antibodies (mAbs) has been pursued for almost three decades. Yet, the IgG-binding peptides known to date still fall short of the host cell protein (HCP) logarithmic removal value (LRV) set by Protein A media (2.5-3.1). In this study, we present an integrated computational-experimental approach leading to the discovery of peptide ligands that provide HCP LRVs on par with Protein A. First, the screening of 60,000 peptide variants was performed using a high-throughput search algorithm to identify sequences that ensure IgG affinity binding. Select sequences WQRHGI, MWRGWQ, RHLGWF, and GWLHQR were then negatively screened in silico against a panel of model HCPs to ensure the selection of peptides with high binding selectivity. Candidate ligands WQRHGI and MWRGWQ were conjugated to chromatographic resins and characterized by isothermal binding and breakthrough assays to quantify static and dynamic binding capacity (Qmax and DBC10%), respectively. The resulting Qmax were 52.6 mg of IgG per mL of adsorbent for WQRHGI and 57.48 mg/mL for MWRGWQ, while the DBC10% (2 minutes residence time) were 30.1 mg/mL for WQRHGI and 36.4 mg/mL for MWRGWQ. Evaluation of the peptides by isothermal titration calorimetry (ITC) confirmed the binding energy predicted in silico, and an amino acid scanning study corroborated the affinity-like binding activity of the peptides. WQRHGI-WorkBeads resin was finally characterized by purification of a monoclonal antibody from a Chinese Hamster Ovary (CHO) cell culture harvest, affording a remarkable HCP LRV of 2.7, and consistent product yield and purity over 100 chromatographic cycles. These results demonstrate the potential of WQRHGI as an effective alternative to Protein A for antibody purification.}, journal={JOURNAL OF CHROMATOGRAPHY A}, author={Reese, Hannah R. and Xiao, Xingqing and Shanahan, Calvin C. and Driessche, George A. and Fourches, Denis and Carbonell, Ruben G. and Hall, Carol K. and Menegatti, Stefano}, year={2020}, month={Aug} } @article{day_schneible_young_pozdin_driessche_gaffney_prodromou_freytes_fourches_daniele_et al._2020, title={Photoinduced reconfiguration to control the protein-binding affinity of azobenzene-cyclized peptides}, volume={8}, ISSN={["2050-7518"]}, DOI={10.1039/d0tb01189d}, abstractNote={Light-controlled switching of cell-binding activity of fluorescently-labeled peptides for on-demand cell labeling.}, number={33}, journal={JOURNAL OF MATERIALS CHEMISTRY B}, author={Day, Kevin and Schneible, John D. and Young, Ashlyn T. and Pozdin, Vladimir A. and Driessche, George and Gaffney, Lewis A. and Prodromou, Raphael and Freytes, Donald O. and Fourches, Denis and Daniele, Michael and et al.}, year={2020}, month={Sep}, pages={7413–7427} } @misc{muratov_bajorath_sheridan_tetko_filimonov_poroikov_oprea_baskin_varnek_roitberg_et al._2020, title={QSAR without borders}, volume={49}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs00098a}, abstractNote={Word cloud summary of diverse topics associated with QSAR modeling that are discussed in this review.}, number={11}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Bajorath, Jurgen and Sheridan, Robert P. and Tetko, Igor V and Filimonov, Dmitry and Poroikov, Vladimir and Oprea, Tudor I and Baskin, Igor I. and Varnek, Alexandre and Roitberg, Adrian and et al.}, year={2020}, month={Jun}, pages={3525–3564} } @article{muratov_bajorath_sheridan_tetko_filimonov_poroikov_oprea_baskin_varnek_roitberg_et al._2020, title={QSAR without borders (vol 10, pg 531, 2020)}, volume={49}, ISSN={["1460-4744"]}, DOI={10.1039/d0cs90041a}, abstractNote={Correction for ‘QSAR without borders’ by Eugene N. Muratov et al., Chem. Soc. Rev., 2020, DOI: 10.1039/d0cs00098a.}, number={11}, journal={CHEMICAL SOCIETY REVIEWS}, author={Muratov, Eugene N. and Bajorath, Juergen and Sheridan, Robert P. and Tetko, Igor V. and Filimonov, Dmitry and Poroikov, Vladimir and Oprea, Tudor I. and Baskin, Igor I. and Varnek, Alexandre and Roitberg, Adrian and et al.}, year={2020}, month={Jun}, pages={3716–3716} } @article{zin_williams_fourches_2020, title={SIME: synthetic insight-based macrolide enumerator to generate the V1B library of 1 billion macrolides}, volume={12}, ISSN={1758-2946}, url={http://dx.doi.org/10.1186/s13321-020-00427-6}, DOI={10.1186/s13321-020-00427-6}, abstractNote={Abstract}, number={1}, journal={Journal of Cheminformatics}, publisher={Springer Science and Business Media LLC}, author={Zin, Phyo Phyo Kyaw and Williams, Gavin and Fourches, Denis}, year={2020}, month={Apr} } @article{odenkirk_zin_ash_reif_fourches_baker_2020, title={Structural-based connectivity and omic phenotype evaluations (SCOPE): a cheminformatics toolbox for investigating lipidomic changes in complex systems}, volume={145}, ISSN={["1364-5528"]}, DOI={10.1039/d0an01638a}, abstractNote={SCOPE is a toolbox for expanding upon lipid data interpretation capabilities. Herein we utilize SCOPE to explore how lipid structure, biological connections and metadata linkages contribute to the results observed from lipidomic experiments.}, number={22}, journal={ANALYST}, author={Odenkirk, Melanie T. and Zin, Phyo Phyo K. and Ash, Jeremy R. and Reif, David M. and Fourches, Denis and Baker, Erin S.}, year={2020}, month={Nov}, pages={7197–7209} } @article{singam_tachachartvanich_fourches_soshilov_hsieh_la merrill_smith_durkin_2020, title={Structure-based virtual screening of perfluoroalkyl and polyfluoroalkyl substances (PFASs) as endocrine disruptors of androgen receptor activity using molecular docking and machine learning}, volume={190}, ISSN={["1096-0953"]}, DOI={10.1016/j.envres.2020.109920}, abstractNote={Perfluoroalkyl and polyfluoroalkyl substances (PFASs) pose a substantial threat as endocrine disruptors, and thus early identification of those that may interact with steroid hormone receptors, such as the androgen receptor (AR), is critical. In this study we screened 5,206 PFASs from the CompTox database against the different binding sites on the AR using both molecular docking and machine learning techniques. We developed support vector machine models trained on Tox21 data to classify the active and inactive PFASs for AR using different chemical fingerprints as features. The maximum accuracy was 95.01% and Matthew's correlation coefficient (MCC) was 0.76 respectively, based on MACCS fingerprints (MACCSFP). The combination of docking-based screening and machine learning models identified 29 PFASs that have strong potential for activity against the AR and should be considered priority chemicals for biological toxicity testing.}, journal={ENVIRONMENTAL RESEARCH}, author={Singam, Ettayapuram Ramaprasad Azhagiya and Tachachartvanich, Phum and Fourches, Denis and Soshilov, Anatoly and Hsieh, Jennifer C. Y. and La Merrill, Michele A. and Smith, Martyn T. and Durkin, Kathleen A.}, year={2020}, month={Nov} } @article{odenkirk_stratton_gritsenko_bramer_webb-robertson_bloodsworth_weitz_lipton_monroe_ash_et al._2020, title={Unveiling molecular signatures of preeclampsia and gestational diabetes mellitus with multi-omics and innovative cheminformatics visualization tools}, volume={16}, ISSN={["2515-4184"]}, DOI={10.1039/d0mo00074d}, abstractNote={Specific lipid and protein changes characterized term preeclampsia (PRE) and gestational diabetes mellitus (GDM) and novel visualization tools were created to aid in the process.}, number={6}, journal={MOLECULAR OMICS}, author={Odenkirk, Melanie T. and Stratton, Kelly G. and Gritsenko, Marina A. and Bramer, Lisa M. and Webb-Robertson, Bobbie-Jo M. and Bloodsworth, Kent J. and Weitz, Karl K. and Lipton, Anna K. and Monroe, Matthew E. and Ash, Jeremy R. and et al.}, year={2020}, month={Dec} } @misc{fourches_ash_2019, title={4D-quantitative structure-activity relationship modeling: making a comeback}, volume={14}, ISSN={["1746-045X"]}, DOI={10.1080/17460441.2019.1664467}, abstractNote={ABSTRACT Introduction: Predictive Quantitative Structure–Activity Relationship (QSAR) modeling has become an essential methodology for rapidly assessing various properties of chemicals. The vast majority of these QSAR models utilize numerical descriptors derived from the two- and/or three-dimensional structures of molecules. However, the conformation-dependent characteristics of flexible molecules and their dynamic interactions with biological target(s) is/are not encoded by these descriptors, leading to limited prediction performances and reduced interpretability. 2D/3D QSAR models are successful for virtual screening, but typically suffer at lead optimization stages. That is why conformation-dependent 4D-QSAR modeling methods were developed two decades ago. However, these methods have always suffered from the associated computational cost. Recently, 4D-QSAR has been experiencing a significant come-back due to rapid advances in GPU-accelerated molecular dynamic simulations and modern machine learning techniques. Areas covered: Herein, the authors briefly review the literature regarding 4D-QSAR modeling and describe its modern workflow called MD-QSAR. Challenges and current limitations are also highlighted. Expert opinion: The development of hyper-predictive MD-QSAR models could represent a disruptive technology for analyzing, understanding, and optimizing dynamic protein-ligand interactions with countless applications for drug discovery and chemical toxicity assessment. Therefore, there has never been a better time and relevance for molecular modeling teams to engage in hyper-predictive MD-QSAR modeling.}, number={12}, journal={EXPERT OPINION ON DRUG DISCOVERY}, author={Fourches, Denis and Ash, Jeremy}, year={2019}, month={Dec}, pages={1227–1235} } @article{wen_wang_van den driessche_chen_zhang_chen_li_soto_liu_ohashi_et al._2019, title={Adipocytes as Anticancer Drug Delivery Depot}, volume={1}, ISSN={["2590-2385"]}, DOI={10.1016/j.matt.2019.08.007}, abstractNote={

Summary

Tumor-associated adipocytes promote tumor growth by providing energy and causing chronic inflammation. Here, we have exploited the lipid metabolism to engineer adipocytes that serve as a depot to deliver cancer therapeutics at the tumor site. Rumenic acid (RA), as an anticancer fatty acid, and a doxorubicin prodrug (pDox) with a reactive oxygen species (ROS)-cleavable linker, are encapsulated in adipocytes to deliver therapeutics in a tumor-specific bioresponsive manner. After intratumoral or postsurgical administration, lipolysis releases the RA and pDox that is activated by intracellular ROS-responsive conversion, subsequently promoting antitumor efficacy. Furthermore, downregulation of PD-L1 expression is observed in tumor cells, favoring the emergence of CD4+ and CD8+ T cell-mediated immune responses.}, number={5}, journal={MATTER}, author={Wen, Di and Wang, Jinqiang and Van Den Driessche, George and Chen, Qian and Zhang, Yuqi and Chen, Guojun and Li, Hongjun and Soto, Jennifer and Liu, Ming and Ohashi, Masao and et al.}, year={2019}, month={Nov}, pages={1203–1214} } @article{plundrich_cook_maleki_fourches_lila_2019, title={Binding of peanut allergen Ara h 2 with Vaccinium fruit polyphenols}, volume={284}, ISSN={["1873-7072"]}, DOI={10.1016/j.foodchem.2019.01.081}, abstractNote={The potential for 42 different polyphenols found in Vaccinium fruits to bind to peanut allergen Ara h 2 and inhibit IgE binding epitopes was investigated using cheminformatics techniques. Out of 12 predicted binders, delphinidin-3-glucoside, cyanidin-3-glucoside, procyanidin C1, and chlorogenic acid were further evaluated in vitro. Circular dichroism, UV-Vis spectroscopy, and immunoblotting determined their capacity to (i) bind to Ara h 2, (ii) induce protein secondary structural changes, and (iii) inhibit IgE binding epitopes. UV-Vis spectroscopy clearly indicated that procyanidin C1 and chlorogenic acid interacted with Ara h 2, and circular dichroism results suggested that interactions with these polyphenols resulted in changes to Ara h 2 secondary structures. Immunoblotting showed that procyanidin C1 and chlorogenic acid bound to Ara h 2 significantly decreased the IgE binding capacity by 37% and 50%, respectively. These results suggest that certain polyphenols can inhibit IgE recognition of Ara h 2 by obstructing linear IgE epitopes.}, journal={FOOD CHEMISTRY}, author={Plundrich, Nathalie J. and Cook, Bethany T. and Maleki, Soheila J. and Fourches, Denis and Lila, Mary Ann}, year={2019}, month={Jun}, pages={287–295} } @article{ash_kuenemann_rotroff_motsinger-reif_fourches_2019, title={Cheminformatics approach to exploring and modeling trait-associated metabolite profiles}, volume={11}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-019-0366-3}, abstractNote={Developing predictive and transparent approaches to the analysis of metabolite profiles across patient cohorts is of critical importance for understanding the events that trigger or modulate traits of interest (e.g., disease progression, drug metabolism, chemical risk assessment). However, metabolites' chemical structures are still rarely used in the statistical modeling workflows that establish these trait-metabolite relationships. Herein, we present a novel cheminformatics-based approach capable of identifying predictive, interpretable, and reproducible trait-metabolite relationships. As a proof-of-concept, we utilize a previously published case study consisting of metabolite profiles from non-small-cell lung cancer (NSCLC) adenocarcinoma patients and healthy controls. By characterizing each structurally annotated metabolite using both computed molecular descriptors and patient metabolite concentration profiles, we show that these complementary features enhance the identification and understanding of key metabolites associated with cancer. Ultimately, we built multi-metabolite classification models for assessing patients' cancer status using specific groups of metabolites identified based on high structural similarity through chemical clustering. We subsequently performed a metabolic pathway enrichment analysis to identify potential mechanistic relationships between metabolites and NSCLC adenocarcinoma. This cheminformatics-inspired approach relies on the metabolites' structural features and chemical properties to provide critical information about metabolite-trait associations. This method could ultimately facilitate biological understanding and advance research based on metabolomics data, especially with respect to the identification of novel biomarkers.}, journal={JOURNAL OF CHEMINFORMATICS}, author={Ash, Jeremy R. and Kuenemann, Melaine A. and Rotroff, Daniel and Motsinger-Reif, Alison and Fourches, Denis}, year={2019}, month={Jun} } @article{menden_wang_mason_szalai_bulusu_guan_yu_kang_jeon_wolfinger_et al._2019, title={Community assessment to advance computational prediction of cancer drug combinations in a pharmacogenomic screen}, volume={10}, ISSN={["2041-1723"]}, DOI={10.1038/s41467-019-09799-2}, abstractNote={Abstract}, journal={NATURE COMMUNICATIONS}, author={Menden, Michael P. and Wang, Dennis and Mason, Mike J. and Szalai, Bence and Bulusu, Krishna C. and Guan, Yuanfang and Yu, Thomas and Kang, Jaewoo and Jeon, Minji and Wolfinger, Russ and et al.}, year={2019}, month={Jun} } @article{williams_van den driessche_valery_fourches_freeman_2019, title={Corrections to “Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library”}, volume={7}, ISSN={2168-0485 2168-0485}, url={http://dx.doi.org/10.1021/ACSSUSCHEMENG.8B05545}, DOI={10.1021/ACSSUSCHEMENG.8B05545}, abstractNote={ADVERTISEMENT RETURN TO ISSUEPREVCorrectionNEXTORIGINAL ARTICLEThis notice is a correctionCorrections to "Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library"Tova N. Williams*Tova N. WilliamsMore by Tova N. Williamshttp://orcid.org/0000-0003-4284-3068, George A. Van Den DriesscheGeorge A. Van Den DriesscheMore by George A. Van Den Driessche, Alain R. B. ValeryAlain R. B. ValeryMore by Alain R. B. Valery, Denis Fourches*Denis FourchesMore by Denis Fourcheshttp://orcid.org/0000-0001-5642-8303, and Harold S. Freeman*Harold S. FreemanMore by Harold S. FreemanCite this: ACS Sustainable Chem. Eng. 2019, 7, 1, 1806Publication Date (Web):December 4, 2018Publication History Received27 October 2018Published online4 December 2018Published inissue 7 January 2019https://pubs.acs.org/doi/10.1021/acssuschemeng.8b05545https://doi.org/10.1021/acssuschemeng.8b05545correctionACS PublicationsCopyright © 2018 American Chemical Society. This publication is available under these Terms of Use. Request reuse permissions This publication is free to access through this site. Learn MoreArticle Views582Altmetric-Citations-LEARN ABOUT THESE METRICSArticle Views are the COUNTER-compliant sum of full text article downloads since November 2008 (both PDF and HTML) across all institutions and individuals. These metrics are regularly updated to reflect usage leading up to the last few days.Citations are the number of other articles citing this article, calculated by Crossref and updated daily. Find more information about Crossref citation counts.The Altmetric Attention Score is a quantitative measure of the attention that a research article has received online. Clicking on the donut icon will load a page at altmetric.com with additional details about the score and the social media presence for the given article. Find more information on the Altmetric Attention Score and how the score is calculated. Share Add toView InAdd Full Text with ReferenceAdd Description ExportRISCitationCitation and abstractCitation and referencesMore Options Share onFacebookTwitterWechatLinked InRedditEmail PDF (259 KB) Get e-AlertscloseSUBJECTS:Biological databases,Chemoinformatics,Dyes and pigments,Rational design Get e-Alerts}, number={1}, journal={ACS Sustainable Chemistry & Engineering}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Van Den Driessche, George A. and Valery, Alain R. B. and Fourches, Denis and Freeman, Harold S.}, year={2019}, month={Jan}, pages={1806–1806} } @article{west_lu_rotroff_kuenemann_chang_wu_wagner_buse_motsinger-reif_fourches_et al._2019, title={Identifying individual risk rare variants using protein structure guided local tests (POINT)}, volume={15}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1006722}, abstractNote={Rare variants are of increasing interest to genetic association studies because of their etiological contributions to human complex diseases. Due to the rarity of the mutant events, rare variants are routinely analyzed on an aggregate level. While aggregation analyses improve the detection of global-level signal, they are not able to pinpoint causal variants within a variant set. To perform inference on a localized level, additional information, e.g., biological annotation, is often needed to boost the information content of a rare variant. Following the observation that important variants are likely to cluster together on functional domains, we propose a protein structure guided local test (POINT) to provide variant-specific association information using structure-guided aggregation of signal. Constructed under a kernel machine framework, POINT performs local association testing by borrowing information from neighboring variants in the 3-dimensional protein space in a data-adaptive fashion. Besides merely providing a list of promising variants, POINT assigns each variant a p-value to permit variant ranking and prioritization. We assess the selection performance of POINT using simulations and illustrate how it can be used to prioritize individual rare variants in PCSK9, ANGPTL4 and CETP in the Action to Control Cardiovascular Risk in Diabetes (ACCORD) clinical trial data.}, number={2}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={West, Rachel Marceau and Lu, Wenbin and Rotroff, Daniel M. and Kuenemann, Melaine A. and Chang, Sheng-Mao and Wu, Michael C. and Wagner, Michael J. and Buse, John B. and Motsinger-Reif, Alison A. and Fourches, Denis and et al.}, year={2019}, month={Feb} } @misc{burnum-johnson_zheng_dodds_ash_fourches_nicora_wendler_metz_waters_jansson_et al._2019, title={Ion mobility spectrometry and the omics: Distinguishing isomers, molecular classes and contaminant ions in complex samples}, volume={116}, ISSN={["1879-3142"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85065908529&partnerID=MN8TOARS}, DOI={10.1016/j.trac.2019.04.022}, abstractNote={Ion mobility spectrometry (IMS) is a widely used analytical technique providing rapid gas phase separations. IMS alone is useful, but its coupling with mass spectrometry (IMS-MS) and various front-end separation techniques has greatly increased the molecular information achievable from different omic analyses. IMS-MS analyses are specifically gaining attention for improving metabolomic, lipidomic, glycomic, proteomic and exposomic analyses by increasing measurement sensitivity (e.g. S/N ratio), reducing the detection limit, and amplifying peak capacity. Numerous studies including national security-related analyses, disease screenings and environmental evaluations are illustrating that IMS-MS is able to extract information not possible with MS alone. Furthermore, IMS-MS has shown great utility in salvaging molecular information for low abundance molecules of interest when high concentration contaminant ions are present in the sample by reducing detector suppression. This review highlights how IMS-MS is currently being used in omic analyses to distinguish structurally similar molecules, isomers, molecular classes and contaminant ions.}, journal={TRAC-TRENDS IN ANALYTICAL CHEMISTRY}, author={Burnum-Johnson, Kristin E. and Zheng, Xueyun and Dodds, James N. and Ash, Jeremy and Fourches, Denis and Nicora, Carrie D. and Wendler, Jason P. and Metz, Thomas O. and Waters, Katrina M. and Jansson, Janet K. and et al.}, year={2019}, month={Jul}, pages={292–299} } @article{fourches_feducia_2019, title={Student-Guided Three-Dimensional Printing Activity in Large Lecture Courses: A Practical Guideline}, volume={96}, ISSN={["1938-1328"]}, DOI={10.1021/acs.jchemed.8b00346}, abstractNote={Modern technology stimulates the development of innovative classroom activities. We designed a 3D printing activity in two separate Organic Chemistry lectures of at least 200 students each. This assignment required students to 3D print a molecule of their choice, relying on services made available through the university libraries. Data obtained through a survey at the end of the semester provided key information on the students’ experiences with printing 3D models for the first time. A summary of this feedback and constructive remarks on the best practices regarding 3D printing assignments in large lecture courses are presented.}, number={2}, journal={JOURNAL OF CHEMICAL EDUCATION}, author={Fourches, Denis and Feducia, Jeremiah}, year={2019}, month={Feb}, pages={291–295} } @article{van den driessche_fourches_2018, title={Adverse drug reactions triggered by the common HLA-B*57:01 variant: Virtual screening of DrugBank using 3D molecular docking}, volume={10}, journal={Journal of Cheminformatics}, author={Van Den Driessche, G. and Fourches, D.}, year={2018} } @article{kuenemann_fourches_2018, title={Cheminformatics Analysis of Dynamic WNK-Inhibitor Interactions}, volume={37}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201700138}, DOI={10.1002/MINF.201700138}, abstractNote={Abstract}, number={6-7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Fourches, Denis}, year={2018}, month={Feb}, pages={1700138} } @article{zin_williams_fourches_2018, title={Cheminformatics-based enumeration and analysis of large libraries of macrolide scaffolds}, volume={10}, ISSN={1758-2946}, url={http://dx.doi.org/10.1186/s13321-018-0307-6}, DOI={10.1186/s13321-018-0307-6}, abstractNote={We report on the development of a cheminformatics enumeration technology and the analysis of a resulting large dataset of virtual macrolide scaffolds. Although macrolides have been shown to have valuable biological properties, there is no ready-to-screen virtual library of diverse macrolides in the public domain. Conducting molecular modeling (especially virtual screening) of these complex molecules is highly relevant as the organic synthesis of these compounds, when feasible, typically requires many synthetic steps, and thus dramatically slows the discovery of new bioactive macrolides. Herein, we introduce a cheminformatics approach and associated software that allows for designing and generating libraries of virtual macrocycle/macrolide scaffolds with user-defined constitutional and structural constraints (e.g., types and numbers of structural motifs to be included in the macrocycle, ring size, maximum number of compounds generated). To study the chemical diversity of such generated molecules, we enumerated V1M (Virtual 1 million Macrolide scaffolds) library, each containing twelve common structural motifs. For each macrolide scaffold, we calculated several key properties, such as molecular weight, hydrogen bond donors/acceptors, topological polar surface area. In this study, we discuss (1) the initial concept and current features of our PKS (polyketides) Enumerator software, (2) the chemical diversity and distribution of structural motifs in V1M library, and (3) the unique opportunities for future virtual screening of such enumerated ensembles of macrolides. Importantly, V1M is provided in the Supplementary Material of this paper allowing other researchers to conduct any type of molecular modeling and virtual screening studies. Therefore, this technology for enumerating extremely large libraries of macrolide scaffolds could hold a unique potential in the field of computational chemistry and drug discovery for rational designing of new antibiotics and anti-cancer agents.}, number={1}, journal={Journal of Cheminformatics}, publisher={Springer Science and Business Media LLC}, author={Zin, Phyo Phyo Kyaw and Williams, Gavin and Fourches, Denis}, year={2018}, month={Nov} } @article{low_alves_fourches_sedykh_andrade_muratov_rusyn_tropsha_2018, title={Chemistry-Wide Association Studies (CWAS): A Novel Framework for Identifying and Interpreting Structure-Activity Relationships}, volume={58}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.8b00450}, abstractNote={Quantitative structure-activity relationships (QSAR) models are often seen as a "black box" because they are considered difficult to interpret. Meanwhile, qualitative approaches, e.g., structural alerts (SA) or read-across, provide mechanistic insight, which is preferred for regulatory purposes, but predictive accuracy of such approaches is often low. Herein, we introduce the chemistry-wide association study (CWAS) approach, a novel framework that both addresses such deficiencies and combines advantages of statistical QSAR and alert-based approaches. The CWAS framework consists of the following steps: (i) QSAR model building for an end point of interest, (ii) identification of key chemical features, (iii) determination of communities of such features disproportionately co-occurring more frequently in the active than in the inactive class, and (iv) assembling these communities to form larger (and not necessarily chemically connected) novel structural alerts with high specificity. As a proof-of-concept, we have applied CWAS to model Ames mutagenicity and Stevens-Johnson Syndrome (SJS). For the well-studied Ames mutagenicity data set, we identified 76 important individual fragments and assembled co-occurring fragments into SA both replicative of known as well as representing novel mutagenicity alerts. For the SJS data set, we identified 29 important fragments and assembled co-occurring communities into SA including both known and novel alerts. In summary, we demonstrate that CWAS provides a new framework to interpret predictive QSAR models and derive refined structural alerts for more effective design and safety assessment of drugs and drug candidates.}, number={11}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Low, Yen S. and Alves, Vinicius M. and Fourches, Denis and Sedykh, Alexander and Andrade, Carolina Horta and Muratov, Eugene N. and Rusyn, Ivan and Tropsha, Alexander}, year={2018}, month={Nov}, pages={2203–2213} } @article{mahapatra_franzosa_roell_kuenemann_houck_reif_fourches_kullman_2018, title={Confirmation of high-throughput screening data and novel mechanistic insights into VDR-xenobiotic interactions by orthogonal assays}, volume={8}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/S41598-018-27055-3}, DOI={10.1038/S41598-018-27055-3}, abstractNote={Abstract}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Mahapatra, Debabrata and Franzosa, Jill A. and Roell, Kyle and Kuenemann, Melaine Agnes and Houck, Keith A. and Reif, David M. and Fourches, Denis and Kullman, Seth W.}, year={2018}, month={Jun} } @article{borrel_kleinstreuer_fourches_2018, title={Exploring drug space with ChemMaps.com}, volume={34}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bty412}, abstractNote={Abstract}, number={21}, journal={BIOINFORMATICS}, author={Borrel, Alexandre and Kleinstreuer, Nicole C. and Fourches, Denis}, year={2018}, month={Nov}, pages={3773–3775} } @article{kuenemann_spears_orndorff_fourches_2018, title={In silico Predicted Glucose-1-phosphate Uridylyltransferase (GalU) Inhibitors Block a Key Pathway Required for Listeria Virulence}, volume={37}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201800004}, DOI={10.1002/MINF.201800004}, abstractNote={Abstract}, number={6-7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Spears, Patricia A. and Orndorff, Paul E. and Fourches, Denis}, year={2018}, month={Mar}, pages={1800004} } @article{sanabria-ojeda_fukuyama_fourches_baumer_2018, title={Janus kinase inhibitors differ in their affinity to the TRPV1 receptor - implications for their use in itch and pain}, volume={41}, journal={Journal of Veterinary Pharmacology and Therapeutics}, author={Sanabria-Ojeda, L. and Fukuyama, T. and Fourches, D. and Baumer, W.}, year={2018}, pages={160–160} } @article{la_sedykh_fourches_muratov_tropsha_2018, title={Predicting Adverse Drug Effects from Literature- and Database-Mined Assertions}, volume={41}, ISSN={["1179-1942"]}, DOI={10.1007/s40264-018-0688-5}, abstractNote={Given that adverse drug effects (ADEs) have led to post-market patient harm and subsequent drug withdrawal, failure of candidate agents in the drug development process, and other negative outcomes, it is essential to attempt to forecast ADEs and other relevant drug–target–effect relationships as early as possible. Current pharmacologic data sources, providing multiple complementary perspectives on the drug–target–effect paradigm, can be integrated to facilitate the inference of relationships between these entities. This study aims to identify both existing and unknown relationships between chemicals (C), protein targets (T), and ADEs (E) based on evidence in the literature. Cheminformatics and data mining approaches were employed to integrate and analyze publicly available clinical pharmacology data and literature assertions interrelating drugs, targets, and ADEs. Based on these assertions, a C–T–E relationship knowledge base was developed. Known pairwise relationships between chemicals, targets, and ADEs were collected from several pharmacological and biomedical data sources. These relationships were curated and integrated according to Swanson’s paradigm to form C–T–E triangles. Missing C–E edges were then inferred as C–E relationships. Unreported associations between drugs, targets, and ADEs were inferred, and inferences were prioritized as testable hypotheses. Several C–E inferences, including testosterone → myocardial infarction, were identified using inferences based on the literature sources published prior to confirmatory case reports. Timestamping approaches confirmed the predictive ability of this inference strategy on a larger scale. The presented workflow, based on free-access databases and an association-based inference scheme, provided novel C–E relationships that have been validated post hoc in case reports. With refinement of prioritization schemes for the generated C–E inferences, this workflow may provide an effective computational method for the early detection of potential drug candidate ADEs that can be followed by targeted experimental investigations.}, number={11}, journal={DRUG SAFETY}, author={La, Mary K. and Sedykh, Alexander and Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2018}, month={Nov}, pages={1059–1072} } @article{williams_kuenemann_driessche_williams_fourches_freeman_2018, title={Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 1. Database Development and Analysis}, volume={6}, ISSN={["2168-0485"]}, url={https://doi.org/10.1021/acssuschemeng.7b03795}, DOI={10.1021/acssuschemeng.7b03795}, abstractNote={Herein, we report on the initial step of the design process of new hair dyes with the desired properties. The first step is dedicated to the development of the largest, publicly available database of hair dye substances (containing temporary and semipermanent hair dyes as well as permanent hair dye precursors) used in commercial hair dye formulations. The database was utilized to perform a cheminformatics study assessing the computed physicochemical properties of the different hair dye substances, especially within each cluster of structurally similar dyes. The various substances could be differentiated based on their average molecular weight, hydrophobicity, topological polar surface area, and number of hydrogen bond acceptors, with some overlap also observed. In particular, we found that dyes such as C.I. Basic Orange 1 and 2 were clustered among the precursors, suggesting that their diffusion behavior is similar to that of permanent hair dye precursors. We anticipate taking advantage of this interestin...}, number={2}, journal={ACS SUSTAINABLE CHEMISTRY & ENGINEERING}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Kuenemann, Melaine A. and Driessche, George A. and Williams, Antony J. and Fourches, Denis and Freeman, Harold S.}, year={2018}, month={Feb}, pages={2344–2352} } @article{williams_driessche_valery_fourches_freeman_2018, title={Toward the Rational Design of Sustainable Hair Dyes Using Cheminformatics Approaches: Step 2. Identification of Hair Dye Substance Database Analogs in the Max Weaver Dye Library}, volume={6}, ISSN={["2168-0485"]}, url={https://doi.org/10.1021/acssuschemeng.8b02882}, DOI={10.1021/acssuschemeng.8b02882}, abstractNote={We report on part 2 of the cheminformatics-assisted development of sustainable hair dyes with enhanced technical and toxicological properties. In this study, an initial similarity search analysis was performed using two reference probes (C.I. Basic Orange 1 and Orange 2) as structural templates for the identification of potential analogs among the Max Weaver Dye Library (MWDL). The analysis revealed an interesting subset of 158 MWDL compounds that were close analogs of the classical aminoazobenzene dyes. A more detailed similarity search analysis of this subset ultimately led to the selection of four dyes for further in silico quantum calculations and experimental dye uptake (color depth on hair) studies. Results from quantum calculations indicated that the ESP surface properties of these dyes were consistent with nonionic interactions between dye and keratin. Among the four dye analogs, 2-amino-6-methyl-5-(phenyldiazenyl)pyrimidin-4-ol and 2-amino-4-chloro-1,6-dimethyl-5-(phenyldiazenyl)-pyrimidin-1-ium ...}, number={11}, journal={ACS SUSTAINABLE CHEMISTRY & ENGINEERING}, publisher={American Chemical Society (ACS)}, author={Williams, Tova N. and Driessche, George A. and Valery, Alain R. B. and Fourches, Denis and Freeman, Harold S.}, year={2018}, month={Nov}, pages={14248–14256} } @article{driessche_fourches_2017, title={Adverse drug reactions triggered by the common HLA-B*57:01 variant: A molecular docking study}, volume={9}, journal={Journal of Cheminformatics}, author={Driessche, G. and Fourches, D.}, year={2017} } @article{ash_fourches_2017, title={Characterizing the Chemical Space of ERK2 Kinase Inhibitors Using Descriptors Computed from Molecular Dynamics Trajectories}, volume={57}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.7b00048}, abstractNote={Quantitative Structure-Activity Relationship (QSAR) models typically rely on 2D and 3D molecular descriptors to characterize chemicals and forecast their experimental activities. Previously, we showed that even the most reliable 2D QSAR models and structure-based 3D molecular docking techniques were not capable of accurately ranking a set of known inhibitors for the ERK2 kinase, a key player in various types of cancer. Herein, we calculated and analyzed a series of chemical descriptors computed from the molecular dynamics (MD) trajectories of ERK2-ligand complexes. First, the docking of 87 ERK2 ligands with known binding affinities was accomplished using Schrodinger's Glide software; then, solvent-explicit MD simulations (20 ns, NPT, 300 K, TIP3P, 1 fs) were performed using the GPU-accelerated Desmond program. Second, we calculated a series of MD descriptors based on the distributions of 3D descriptors computed for representative samples of the ligand's conformations over the MD simulations. Third, we analyzed the data set of 87 inhibitors in the MD chemical descriptor space. We showed that MD descriptors (i) had little correlation with conventionally used 2D/3D descriptors, (ii) were able to distinguish the most active ERK2 inhibitors from the moderate/weak actives and inactives, and (iii) provided key and complementary information about the unique characteristics of active ligands. This study represents the largest attempt to utilize MD-extracted chemical descriptors to characterize and model a series of bioactive molecules. MD descriptors could enable the next generation of hyperpredictive MD-QSAR models for computer-aided lead optimization and analogue prioritization.}, number={6}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Ash, Jeremy and Fourches, Denis}, year={2017}, month={Jun}, pages={1286–1299} } @article{kuenemann_fourches_2017, title={Cheminformatics Modeling of Amine Solutions for Assessing their CO2Absorption Properties}, volume={36}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201600143}, DOI={10.1002/MINF.201600143}, abstractNote={As stricter regulations on CO2 emissions are adopted worldwide, identifying efficient chemical processes to capture and recycle CO2 is of critical importance for industry. The most common process known as amine scrubbing suffers from the lack of available amine solutions capable of capturing CO2 efficiently. Tertiary amines characterized by low heats of reaction are considered good candidates but their absorption properties can significantly differ from one analogue to another despite high structural similarity. Herein, after collecting and curating experimental data from the literature, we have built a modeling set of 41 amine structures with their absorption properties. Then we analyzed their chemical composition using molecular descriptors and non‐supervised clustering. Furthermore, we developed a series of quantitative structure‐property relationships (QSPR) to assess amines’ CO2 absorption properties from their structural characteristics. These models afforded reasonable prediction performances (e. g., Q2LOO=0.63 for CO2 absorption amount) even though they are solely based on 2D chemical descriptors and individual machine learning techniques (random forest and neural network). Overall, we believe the chemical analysis and the series of QSPR models presented in this proof‐of‐concept study represent new knowledge and innovative tools that could be very useful for screening and prioritizing hypothetical amines to be synthesized and tested experimentally for their CO2 absorption properties.}, number={7}, journal={Molecular Informatics}, publisher={Wiley}, author={Kuenemann, Melaine A. and Fourches, Denis}, year={2017}, month={Mar}, pages={1600143} } @article{kuenemann_fourches_2017, title={Cheminformatics modeling of amine solutions for assessing their CO2 absorption properties}, volume={36}, number={7}, journal={Molecular Informatics}, author={Kuenemann, M. A. and Fourches, D.}, year={2017} } @article{muratov_lewis_fourches_tropsha_cox_2017, title={Computer-assisted decision support for student admissions based on their predicted academic performance}, volume={81}, number={3}, journal={American Journal of Pharmaceutical Education}, author={Muratov, E. and Lewis, M. and Fourches, D. and Tropsha, A. and Cox, W. C.}, year={2017} } @article{legge_hamshere_ripke_pardinas_goldstein_rees_richards_leonenko_jorskog_fourches_et al._2017, title={Genome-wide common and rare variant analysis provides novel insights into clozapine-associated neutropenia}, volume={22}, ISSN={1359-4184 1476-5578}, url={http://dx.doi.org/10.1038/MP.2016.97}, DOI={10.1038/MP.2016.97}, abstractNote={Abstract}, number={10}, journal={Molecular Psychiatry}, publisher={Springer Science and Business Media LLC}, author={Legge, S E and Hamshere, M L and Ripke, S and Pardinas, A F and Goldstein, J I and Rees, E and Richards, A L and Leonenko, G and Jorskog, L F and Fourches, Denis and et al.}, year={2017}, pages={1502–1508} } @article{fourches_2017, title={Reaction: Molecular Modeling for Novel Antibacterials}, volume={3}, ISSN={2451-9294}, url={http://dx.doi.org/10.1016/J.CHEMPR.2017.06.016}, DOI={10.1016/J.CHEMPR.2017.06.016}, abstractNote={Denis Fourches, PhD, is a molecular modeler and expert in cheminformatics in the Department of Chemistry and the Bioinformatics Research Center at North Carolina State University. His research focuses on the development and applications of novel predictive cheminformatics methods. Denis Fourches, PhD, is a molecular modeler and expert in cheminformatics in the Department of Chemistry and the Bioinformatics Research Center at North Carolina State University. His research focuses on the development and applications of novel predictive cheminformatics methods. I read the Catalysis piece by Dr. Micha Fridman with great interest. The burden of drug-resistant pathogens is already affecting healthcare systems to a great extent and will continue to do so if the research community at large cannot succeed in developing and delivering new antibacterial agents in the near future. Dr. Fridman properly underlines the need to develop new organic synthetic routes for complex molecules, especially the derivatives of natural products. Because their chemical synthesis is long (frequently more than 20 steps) and usually associated with low yields, such synthetic efforts become rapidly impractical and thus drastically limit the development and in-depth study of large series of congeneric analogs. Meanwhile, these series are highly useful for medicinal chemists to establish structure-activity relationships and enable the rational design of new molecules with enhanced potency and/or selectivity toward a particular pathogen . Herein, I would like to emphasize how cheminformatic and molecular-modeling approaches should be considered key methods and tools for achieving the overarching goal of developing the next generation of antibacterials. Developed over the past 30 years and benefiting from both the increase in computational power and the skyrocketing availability of chemogenomic data, these computational chemistry methods have become an essential element in the drug-discovery pipeline. Several well-known marketed drugs (e.g., imatinib, zanamivir, and nelfinavir) and countless drug candidates currently in clinical trials have been discovered and/or optimized by computational chemistry techniques. Essentially, these techniques allow for the rapid and inexpensive analysis, visualization, modeling, and in silico screening of virtual libraries containing either tens of millions of diverse molecules or smaller focused sets of congeneric series centered on known actives (e.g., fluoroquinolones and macrolides). In my opinion, two main categories of computational approaches are of strategic importance in the race toward new antimicrobials. Often, the main biological target of antibiotics is unknown, unavailable, or very dissimilar to all other known structures from the Protein Data Bank (PDB). For all such cases, ligand-based modeling methods are relevant. Such techniques rely solely on the 2D and/or 3D molecular structures of the compounds included in the screening library without taking into account any structural information relative to the target protein. Recent improvements and successful case studies have involved (1) 3D pharmacophore modeling, which superimposes the conformations of a set of ligands and attempts to identify the key pharmacophoric features (e.g., aromatic ring, hydrophobic substituent, and H-bond donor) present in all actives but absent in inactives; (2) clustering algorithms that group compounds with high structural and/or conformational similarity into small clusters, allowing for the identification of activity cliffs (i.e., extremely similar compounds with dissimilar experimental activity) and local structure-activity relationships of high interest for rational drug design; and (3) quantitative structure-activity relationships (QSARs),1Cherkasov A. Muratov E.N. Fourches D. Varnek A. Baskin I.I. Cronin M. Dearden J. Gramatica P. Martin Y.C. Todeschini R. et al.J. Med. Chem. 2014; 57: 4977-5010Crossref PubMed Scopus (1133) Google Scholar which use 2D and 3D molecular descriptors and machine-learning techniques to establish quantified links between the structural features of ligands and their experimental activity (e.g., pKi, half maximal inhibitory concentration, minimum inhibitory concentration, and efflux).2Ghate M. Devi P. Parikh J. Vyas V.K. Med. Chem. 2013; 9: 474-485Crossref PubMed Scopus (3) Google Scholar Importantly, QSAR modeling techniques have benefitted highly from the rapid development of artificial intelligence relying on modern learning algorithms (e.g., deep-learning neural networks, random forests, and support vector machines) and complex hierarchical architectures. When the 3D structure of the biological target is known (either experimentally determined by X-ray or nuclear magnetic resonance or computationally derived by homology modeling), additional structure-based modeling techniques can be considered. The most common is 3D molecular docking, which allows for the prediction and scoring of the 3D binding mode of a small molecule in the binding pocket of a target protein.3Anand R.S. Somasundaram S. Doble M. Paramasivan C.N. BMC Struct. Biol. 2011; 11: 47-60Crossref PubMed Scopus (9) Google Scholar From fully rigid to flexible, ensemble based, or induced fit, molecular-docking protocols and associated scoring functions have drastically evolved and improved over these past 20 years. They have done so to the point that conducting molecular docking toward biological targets as complex as the bacterial ribosome (see Figure 1 with the modeling of linezolid [PDB: 3CPW]) has become feasible. Moreover, the rise of graphics processing unit (GPU) acceleration is further revolutionizing the way massively parallel computations are conducted, especially molecular-dynamics simulations (MDSs). MDSs enable cheminformaticians and medicinal chemists to simulate the motions of every single atom of a solvent-explicit system (e.g., protein-ligand complex fully solvated in water) over biological time (typically 100 ns). Not that long ago, such types of computations used to take several months while running on very expensive supercomputers; now they can be conducted in just several hours on a desktop-sized GPU workstation at a fraction of the cost. MDSs can thus be done for series of protein-ligand complexes,4Ash J. Fourches D. J. Chem. Inf. Model. 2017; https://doi.org/10.1021/acs.jcim.7b00048Crossref PubMed Scopus (58) Google Scholar allowing for a better characterization and overall understanding of the dynamic molecular interactions that antibacterials can undergo once docked in the binding site of a given target. Again, this is critical for the rational design of compounds with enhanced potency and selectivity. In summary, molecular modeling is poised to play a strategic and critical role in the discovery, development, and optimization of the next generation of antibacterial leads. Disrupting ligand- and structure-based technologies5Fourches D. Muratov E. Ding F. Dokholyan N.V. Tropsha A. J. Chem. Inf. Model. 2013; 53: 1915-1922Crossref PubMed Scopus (20) Google Scholar allows for the fast screening of millions of potentially testable molecules in a matter of hours of CPU time. Obviously, computational hits need to be confirmed experimentally, but only a small fraction of the library (e.g., top 1% of compounds predicted to have strong binding affinity toward the target) ends up being synthesized and tested in vitro and/or in vivo, resulting in significant cost reduction (especially in the case of complex derivatives of natural products). Academia has a particular role to play by developing the methods that accurately forecast the antibacterial properties of new untested compounds and rationally generate novel molecular candidates for synthesis and testing. However, Big Pharma should also look toward creating new partnerships with academic laboratories conducting innovative molecular modeling to boost the rational development of novel antibacterials. Catalyst: The Role of Chemistry in Delivering the Next Antimicrobial DrugsMicha FridmanChemJuly 13, 2017In BriefMicha Fridman is an associate professor of chemistry at Tel Aviv University. He earned his PhD at the Technion-Israel Institute of Technology (2005) and was a postdoctoral fellow at Harvard University (2008). His research focuses on studying the mode of action of antimicrobial agents and on exploring novel approaches and cellular targets for the development of antibacterial and antifungal drugs. Full-Text PDF Open ArchiveReaction: Broad-Spectrum Antibiotics, a Call for ChemistsRichter et al.ChemJuly 13, 2017In BriefMichelle F. Richter received her BS in biochemistry from Union College in 2011. She is now a National Science Foundation graduate fellow and member of the NIH Chemistry-Biology Interface Training Program in the Hergenrother laboratory at the University of Illinois at Urbana-Champaign (UIUC), where she is working toward a PhD in chemistry. Paul J. Hergenrother received his BS in chemistry from the University of Notre Dame in 1994 and his PhD in chemistry at the University of Texas at Austin in 1999. After an American Cancer Society postdoctoral fellowship in the laboratory of Prof. Stuart Schreiber at Harvard University, in 2001 he joined the faculty at UIUC, where he is now the Kenneth L. Rinehart Jr. Endowed Chair in Natural Products Chemistry. Full-Text PDF Open ArchiveReaction: Alternative Modalities to Address Antibiotic-Resistant PathogensBagnoli et al.ChemSeptember 14, 2017In BriefFabio Bagnoli is discovery project leader of research & development vaccine programs at GSK. He has served as editor of several publications and is the coordinator of an industrial-academic effort on human organotypic models. He holds a PhD from the University of Padova and conducted post-doctoral studies at Stanford University. David Payne is vice president and head of the Antibacterial Discovery Performance Unit at GSK. He is also principle investigator of GSK's partnerships with BARDA and the Defense Threat Reduction Agency. David holds a PhD and DSc from the University of Edinburgh and has authored >200 conference presentations, abstracts, and publications. Full-Text PDF Open Archive}, number={1}, journal={Chem}, publisher={Elsevier BV}, author={Fourches, Denis}, year={2017}, month={Jul}, pages={13–14} } @article{borrel_fourches_2017, title={RealityConvert: a tool for preparing 3D models of biochemical structures for augmented and virtual reality}, volume={33}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btx485}, abstractNote={Abstract}, number={23}, journal={BIOINFORMATICS}, author={Borrel, Alexandre and Fourches, Denis}, year={2017}, month={Dec}, pages={3816–3818} } @article{kuenemann_szymczyk_chen_sultana_hinks_freeman_williams_fourches_vinueza_2017, title={Weaver's historic accessible collection of synthetic dyes: a cheminformatics analysis}, volume={8}, ISSN={["2041-6539"]}, DOI={10.1039/c7sc00567a}, abstractNote={The Max Weaver Dye Library is presented to the scientific community with a cheminformatics approach to enhance research opportunities with this unique collection of ∼98 000 vials of custom-made dyes.}, number={6}, journal={CHEMICAL SCIENCE}, author={Kuenemann, Melaine A. and Szymczyk, Malgorzata and Chen, Yufei and Sultana, Nadia and Hinks, David and Freeman, Harold S. and Williams, Antony J. and Fourches, Denis and Vinueza, Nelson R.}, year={2017}, month={Jun}, pages={4334–4339} } @article{borysov_hannig_marron_muratov_fourches_tropsha_2016, title={Activity prediction and identification of mis-annotated chemical compounds using extreme descriptors}, volume={30}, ISSN={0886-9383}, url={http://dx.doi.org/10.1002/CEM.2776}, DOI={10.1002/CEM.2776}, abstractNote={Data pre‐processing that includes removal of descriptors with low variance is a standard first step in quantitative structure–activity relationship modeling. In this paper, we study low‐variance descriptors and show that some of them contain significant amounts of useful information. In particular, we define the notion of extreme descriptors (those variables that have the same value for almost all compounds and only a few values that are different from the common median). We show that extreme descriptors can be helpful for activity prediction in a standard binary classification setting. Moreover, we demonstrate using two case studies (M2 muscarinic receptors and skin sensitization) that extreme descriptors can be used for the identification of possibly mislabeled compounds. Because of these previously unknown, but important, properties, extreme descriptors should be considered in quantitative structure–activity relationship modeling studies. Copyright © 2016 John Wiley & Sons, Ltd.}, number={3}, journal={Journal of Chemometrics}, publisher={Wiley}, author={Borysov, Petro and Hannig, Jan and Marron, J. S. and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander}, year={2016}, month={Feb}, pages={99–108} } @article{alves_muratov_capuzzi_politi_low_braga_zakharov_sedykh_mokshyna_farag_et al._2016, title={Alarms about structural alerts}, volume={18}, ISSN={["1463-9270"]}, DOI={10.1039/c6gc01492e}, abstractNote={Integrative approach for safety assessment of new chemicals by combining structural alerts and QSAR models.}, number={16}, journal={GREEN CHEMISTRY}, author={Alves, Vinicius M. and Muratov, Eugene N. and Capuzzi, Stephen J. and Politi, Regina and Low, Yen and Braga, Rodolpho C. and Zakharov, Alexey V. and Sedykh, Alexander and Mokshyna, Elena and Farag, Sherif and et al.}, year={2016}, pages={4348–4360} } @article{elkins_fedele_szklarz_azeez_salah_mikolajczyk_romanov_sepetov_huang_roth_et al._2016, title={Comprehensive characterization of the Published Kinase Inhibitor Set}, volume={34}, number={1}, journal={Nature Biotechnology}, author={Elkins, J. M. and Fedele, V. and Szklarz, M. and Azeez, K. R. A. and Salah, E. and Mikolajczyk, J. and Romanov, S. and Sepetov, N. and Huang, X. P. and Roth, B. L. and et al.}, year={2016}, pages={95–103} } @article{fourches_pu_li_zhou_mu_su_yan_tropsha_2016, title={Computer-aided design of carbon nanotubes with the desired bioactivity and safety profiles}, volume={10}, ISSN={["1743-5404"]}, DOI={10.3109/17435390.2015.1073397}, abstractNote={Abstract Growing experimental evidences suggest the existence of direct relationships between the surface chemistry of nanomaterials and their biological effects. Herein, we have employed computational approaches to design a set of biologically active carbon nanotubes (CNTs) with controlled protein binding and cytotoxicity. Quantitative structure–activity relationship (QSAR) models were built and validated using a dataset of 83 surface-modified CNTs. A subset of a combinatorial virtual library of 240 000 ligands potentially attachable to CNTs was selected to include molecules that were within the chemical similarity threshold with respect to the modeling set compounds. QSAR models were then employed to virtually screen this subset and prioritize CNTs for chemical synthesis and biological evaluation. Ten putatively active and 10 putatively inactive CNTs decorated with the ligands prioritized by virtual screening for either protein-binding or cytotoxicity assay were synthesized and tested. We found that all 10 putatively inactive and 7 of 10 putatively active CNTs were confirmed in the protein-binding assay, whereas all 10 putatively inactive and 6 of 10 putatively active CNTs were confirmed in the cytotoxicity assay. This proof-of-concept study shows that computational models can be employed to guide the design of surface-modified nanomaterials with the desired biological and safety profiles.}, number={3}, journal={NANOTOXICOLOGY}, author={Fourches, Denis and Pu, Dongqiuye and Li, Liwen and Zhou, Hongyu and Mu, Qingxin and Su, Gaoxing and Yan, Bing and Tropsha, Alexander}, year={2016}, month={Mar}, pages={374–383} } @article{zakharov_varlamova_lagunin_dmitriev_muratov_fourches_kuz'min_poroikov_tropsha_nicklaus_2016, title={QSAR Modeling and Prediction of Drug-Drug Interactions}, volume={13}, ISSN={["1543-8392"]}, DOI={10.1021/acs.molpharmaceut.5b00762}, abstractNote={Severe adverse drug reactions (ADRs) are the fourth leading cause of fatality in the U.S. with more than 100,000 deaths per year. As up to 30% of all ADRs are believed to be caused by drug-drug interactions (DDIs), typically mediated by cytochrome P450s, possibilities to predict DDIs from existing knowledge are important. We collected data from public sources on 1485, 2628, 4371, and 27,966 possible DDIs mediated by four cytochrome P450 isoforms 1A2, 2C9, 2D6, and 3A4 for 55, 73, 94, and 237 drugs, respectively. For each of these data sets, we developed and validated QSAR models for the prediction of DDIs. As a unique feature of our approach, the interacting drug pairs were represented as binary chemical mixtures in a 1:1 ratio. We used two types of chemical descriptors: quantitative neighborhoods of atoms (QNA) and simplex descriptors. Radial basis functions with self-consistent regression (RBF-SCR) and random forest (RF) were utilized to build QSAR models predicting the likelihood of DDIs for any pair of drug molecules. Our models showed balanced accuracy of 72-79% for the external test sets with a coverage of 81.36-100% when a conservative threshold for the model's applicability domain was applied. We generated virtually all possible binary combinations of marketed drugs and employed our models to identify drug pairs predicted to be instances of DDI. More than 4500 of these predicted DDIs that were not found in our training sets were confirmed by data from the DrugBank database.}, number={2}, journal={MOLECULAR PHARMACEUTICS}, author={Zakharov, Alexey V. and Varlamova, Ekaterina V. and Lagunin, Alexey A. and Dmitriev, Alexander V. and Muratov, Eugene N. and Fourches, Denis and Kuz'min, Victor E. and Poroikov, Vladimir V. and Tropsha, Alexander and Nicklaus, Marc C.}, year={2016}, month={Feb}, pages={545–556} } @article{alves_capuzzi_muratov_braga_thornton_fourches_strickland_kleinstreuer_andrade_tropsha_2016, title={QSAR models of human data can enrich or replace LLNA testing for human skin sensitization}, volume={18}, ISSN={["1463-9270"]}, DOI={10.1039/c6gc01836j}, abstractNote={An example of structural transformation of human skin sensitizers into various non-sensitizers based on interpretation of QSAR models.}, number={24}, journal={GREEN CHEMISTRY}, author={Alves, Vinicius M. and Capuzzi, Stephen J. and Muratov, Eugene N. and Braga, Rodolpho C. and Thornton, Thomas E. and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2016}, pages={6501–6515} } @article{fourches_muratov_tropsha_2016, title={Trust, but Verify II: A Practical Guide to Chemogenomics Data Curation}, volume={56}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.6b00129}, abstractNote={There is a growing public concern about the lack of reproducibility of experimental data published in peer-reviewed scientific literature. Herein, we review the most recent alerts regarding experimental data quality and discuss initiatives taken thus far to address this problem, especially in the area of chemical genomics. Going beyond just acknowledging the issue, we propose a chemical and biological data curation workflow that relies on existing cheminformatics approaches to flag, and when appropriate, correct possibly erroneous entries in large chemogenomics data sets. We posit that the adherence to the best practices for data curation is important for both experimental scientists who generate primary data and deposit them in chemical genomics databases and computational researchers who rely on these data for model development.}, number={7}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2016}, month={Jul}, pages={1243–1252} } @article{elkins_fedele_szklarz_abdul azeez_salah_mikolajczyk_romanov_sepetov_huang_roth_et al._2015, title={Comprehensive characterization of the Published Kinase Inhibitor Set}, volume={34}, ISSN={1087-0156 1546-1696}, url={http://dx.doi.org/10.1038/NBT.3374}, DOI={10.1038/NBT.3374}, abstractNote={Despite the success of protein kinase inhibitors as approved therapeutics, drug discovery has focused on a small subset of kinase targets. Here we provide a thorough characterization of the Published Kinase Inhibitor Set (PKIS), a set of 367 small-molecule ATP-competitive kinase inhibitors that was recently made freely available with the aim of expanding research in this field and as an experiment in open-source target validation. We screen the set in activity assays with 224 recombinant kinases and 24 G protein-coupled receptors and in cellular assays of cancer cell proliferation and angiogenesis. We identify chemical starting points for designing new chemical probes of orphan kinases and illustrate the utility of these leads by developing a selective inhibitor for the previously untargeted kinases LOK and SLK. Our cellular screens reveal compounds that modulate cancer cell growth and angiogenesis in vitro. These reagents and associated data illustrate an efficient way forward to increasing understanding of the historically untargeted kinome.}, number={1}, journal={Nature Biotechnology}, publisher={Springer Science and Business Media LLC}, author={Elkins, Jonathan M and Fedele, Vita and Szklarz, Marta and Abdul Azeez, Kamal R and Salah, Eidarus and Mikolajczyk, Jowita and Romanov, Sergei and Sepetov, Nikolai and Huang, Xi-Ping and Roth, Bryan L and et al.}, year={2015}, month={Oct}, pages={95–103} } @misc{fourches_muratov_tropsha_2015, title={Curation of chemogenomics data}, volume={11}, ISSN={["1552-4469"]}, DOI={10.1038/nchembio.1881}, number={8}, journal={NATURE CHEMICAL BIOLOGY}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2015}, month={Aug}, pages={535–535} } @article{baker_fourches_tropsha_2015, title={Drug Side Effect Profiles as Molecular Descriptors for Predictive Modeling of Target Bioactivity}, volume={34}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201400134}, DOI={10.1002/MINF.201400134}, abstractNote={Abstract}, number={2-3}, journal={Molecular Informatics}, publisher={Wiley}, author={Baker, Nancy C. and Fourches, Denis and Tropsha, Alexander}, year={2015}, month={Feb}, pages={160–170} } @article{isayev_fourches_muratov_oses_rasch_tropsha_curtarolo_2015, title={Materials Cartography: Representing and Mining Materials Space Using Structural and Electronic Fingerprints}, volume={27}, ISSN={0897-4756 1520-5002}, url={http://dx.doi.org/10.1021/CM503507H}, DOI={10.1021/CM503507H}, abstractNote={As the proliferation of high-throughput approaches in materials science is increasing the wealth of data in the field, the gap between accumulated-information and derived-knowledge widens. We address the issue of scientific discovery in materials databases by introducing novel analytical approaches based on structural and electronic materials fingerprints. The framework is employed to (i) query large databases of materials using similarity concepts, (ii) map the connectivity of materials space (i.e., as a materials cartograms) for rapidly identifying regions with unique organizations/properties, and (iii) develop predictive Quantitative Materials Structure–Property Relationship models for guiding materials design. In this study, we test these fingerprints by seeking target material properties. As a quantitative example, we model the critical temperatures of known superconductors. Our novel materials fingerprinting and materials cartography approaches contribute to the emerging field of materials informati...}, number={3}, journal={Chemistry of Materials}, publisher={American Chemical Society (ACS)}, author={Isayev, Olexandr and Fourches, Denis and Muratov, Eugene N. and Oses, Corey and Rasch, Kevin and Tropsha, Alexander and Curtarolo, Stefano}, year={2015}, month={Jan}, pages={735–743} } @article{braga_alves_silva_muratov_fourches_liao_tropsha_andrade_2015, title={Pred-hERG: A Novel web-Accessible Computational Tool for Predicting Cardiac Toxicity}, volume={34}, ISSN={["1868-1751"]}, DOI={10.1002/minf.201500040}, abstractNote={Abstract}, number={10}, journal={MOLECULAR INFORMATICS}, author={Braga, Rodolpho C. and Alves, Vinicius M. and Silva, Meryck F. B. and Muratov, Eugene and Fourches, Denis and Liao, Luciano M. and Tropsha, Alexander and Andrade, Carolina H.}, year={2015}, month={Oct}, pages={698–701} } @article{alves_muratov_fourches_strickland_kleinstreuer_andrade_tropsha_2015, title={Predicting chemically-induced skin reactions. Part I: QSAR models of skin sensitization and their application to identify potentially hazardous compounds}, volume={284}, ISSN={0041-008X}, url={http://dx.doi.org/10.1016/J.TAAP.2014.12.014}, DOI={10.1016/J.TAAP.2014.12.014}, abstractNote={Repetitive exposure to a chemical agent can induce an immune reaction in inherently susceptible individuals that leads to skin sensitization. Although many chemicals have been reported as skin sensitizers, there have been very few rigorously validated QSAR models with defined applicability domains (AD) that were developed using a large group of chemically diverse compounds. In this study, we have aimed to compile, curate, and integrate the largest publicly available dataset related to chemically-induced skin sensitization, use this data to generate rigorously validated and QSAR models for skin sensitization, and employ these models as a virtual screening tool for identifying putative sensitizers among environmental chemicals. We followed best practices for model building and validation implemented with our predictive QSAR workflow using Random Forest modeling technique in combination with SiRMS and Dragon descriptors. The Correct Classification Rate (CCR) for QSAR models discriminating sensitizers from non-sensitizers was 71-88% when evaluated on several external validation sets, within a broad AD, with positive (for sensitizers) and negative (for non-sensitizers) predicted rates of 85% and 79% respectively. When compared to the skin sensitization module included in the OECD QSAR Toolbox as well as to the skin sensitization model in publicly available VEGA software, our models showed a significantly higher prediction accuracy for the same sets of external compounds as evaluated by Positive Predicted Rate, Negative Predicted Rate, and CCR. These models were applied to identify putative chemical hazards in the Scorecard database of possible skin or sense organ toxicants as primary candidates for experimental validation.}, number={2}, journal={Toxicology and Applied Pharmacology}, publisher={Elsevier BV}, author={Alves, Vinicius M. and Muratov, Eugene and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2015}, month={Apr}, pages={262–272} } @article{alves_muratov_fourches_strickland_kleinstreuer_andrade_tropsha_2015, title={Predicting chemically-induced skin reactions. Part II: QSAR models of skin permeability and the relationships between skin permeability and skin sensitization}, volume={284}, ISSN={0041-008X}, url={http://dx.doi.org/10.1016/J.TAAP.2014.12.013}, DOI={10.1016/J.TAAP.2014.12.013}, abstractNote={Skin permeability is widely considered to be mechanistically implicated in chemically-induced skin sensitization. Although many chemicals have been identified as skin sensitizers, there have been very few reports analyzing the relationships between molecular structure and skin permeability of sensitizers and non-sensitizers. The goals of this study were to: (i) compile, curate, and integrate the largest publicly available dataset of chemicals studied for their skin permeability; (ii) develop and rigorously validate QSAR models to predict skin permeability; and (iii) explore the complex relationships between skin sensitization and skin permeability. Based on the largest publicly available dataset compiled in this study, we found no overall correlation between skin permeability and skin sensitization. In addition, cross-species correlation coefficient between human and rodent permeability data was found to be as low as R(2)=0.44. Human skin permeability models based on the random forest method have been developed and validated using OECD-compliant QSAR modeling workflow. Their external accuracy was high (Q(2)ext=0.73 for 63% of external compounds inside the applicability domain). The extended analysis using both experimentally-measured and QSAR-imputed data still confirmed the absence of any overall concordance between skin permeability and skin sensitization. This observation suggests that chemical modifications that affect skin permeability should not be presumed a priori to modulate the sensitization potential of chemicals. The models reported herein as well as those developed in the companion paper on skin sensitization suggest that it may be possible to rationally design compounds with the desired high skin permeability but low sensitization potential.}, number={2}, journal={Toxicology and Applied Pharmacology}, publisher={Elsevier BV}, author={Alves, Vinicius M. and Muratov, Eugene and Fourches, Denis and Strickland, Judy and Kleinstreuer, Nicole and Andrade, Carolina H. and Tropsha, Alexander}, year={2015}, month={Apr}, pages={273–280} } @article{mu_jiang_chen_zhou_fourches_tropsha_yan_2014, title={Chemical Basis of Interactions Between Engineered Nanoparticles and Biological Systems}, volume={114}, ISSN={0009-2665 1520-6890}, url={http://dx.doi.org/10.1021/CR400295A}, DOI={10.1021/CR400295A}, abstractNote={As defined by the European Commission, nanomaterial is a natural, incidental or manufactured material containing particles in an unbound state or as an aggregate or agglomerate in which ≥ 50% of the particles in the number size distribution have one or more external dimensions in the size range 1 to 100 nm. In specific cases and where warranted by concerns for the environment, health, safety or competition, the number size distribution threshold of 50% may be replaced with a threshold between 1 and 50%.1 Engineered nanomaterials (ENMs) refer to man-made nanomaterials. Materials in the nanometer range often possess unique physical, optical, electronic, and biological properties compared with larger particles, such as the strength of graphene,2 the electronic properties of carbon nanotubes (CNTs),3 the antibacterial activity of silver nanoparticles4 and the optical properties of quantum dots (QDs).5 The unique and advanced properties of ENMs have led to a rapid increase in their application. These applications include aerospace and airplanes, energy, architecture, chemicals and coatings, catalysts, environmental protection, computer memory, biomedicine and consumer products. Driven by these demands, the worldwide ENM production volume in 2016 is conservatively estimated in a market report by Future Markets to be 44,267 tons or ≥ $5 billion.6 As the production and applications of ENMs rapidly expand, their environmental impacts and effects on human health are becoming increasingly significant.7 Due to their small sizes, ENMs are easily made airborne.8 However, no accurate method to quantitatively measure their concentration in air currently exists. A recently reported incident of severe pulmonary fibrosis caused by inhaled polymer nanoparticles in seven female workers obtained much attention.9 In addition to the release of ENM waste from industrial sites, a major release of ENMs to environmental water occurs due to home and personal use of appliances, cosmetics and personal products, such as shampoo and sunscreen.10 Airborne and aqueous ENMs pose immediate danger to the human respiratory and gastrointestinal systems. ENMs may enter other human organs after they are absorbed into the bloodstream through the gastrointestinal or respiratory systems.11,12 Furthermore, ENMs in cosmetics and personal care products, such as lotion, sunscreen and shampoo may enter human circulation through skin penetration.13 ENMs are very persistent in the environment and are slowly degraded. The dissolved metal ions from ENMs can also revert back to nanoparticles under natural conditions.14 ENMs are stored in plants, microbes and animal organs and can be transferred and accumulated through the food chain.15,16 In addition to the accidental entry of ENMs into human and biological systems, ENMs are also purposefully injected into or enter humans for medicinal and diagnostic purposes.17 Therefore, interactions of ENMs with biological systems are inevitable. In addition to engineered nanomaterials, there are also naturally existing nanomaterials such as proteins and DNA molecules, which are key components of biological systems. These materials, combined with lipids and organic and inorganic small molecules, form the basic units of living systems –cells.18 To elucidate how nanomaterials affect organs and physiological functions, a thorough understanding of how nanomaterials perturb cells and biological molecules is required (Figure 1). Rapidly accumulating evidence indicates that ENMs interact with the basic components of biological systems, such as proteins, DNA molecules and cells.19-21 The driving forces for such interactions are quite complex and include the size, shape and surface properties (e.g., hydrophobicity, hydrogen-bonding capability, pi-bonds and stereochemical interactions) of ENMs.22-25 Figure 1 Interactions of nanoparticles with biological systems at different levels. Nanoparticles enter the human body through various pathways, reaching different organs and contacting tissues and cells. All of these interactions are based on nanoparticle-biomacromolecule ... Evidence also indicates that chemical modifications on a nanoparticle’s surface alter its interactions with biological systems.26-28 These observations not only support the hypothesis that basic nano-bio interactions are mainly physicochemical in nature but also provide a powerful approach to controlling the nature and strength of a nanoparticle’s interactions with biological systems. Practically, a thorough understanding of the fundamental chemical interactions between nanoparticles and biological systems has two direct impacts. First, this knowledge will encourage and assist experimental approaches to chemically modify nanoparticle surfaces for various industrial or medicinal applications. Second, a range of chemical information can be combined with computational methods to investigate nano-biological properties and predict desired nanoparticle properties to direct experiments.29-31 The literature regarding nanoparticle-biological system interactions has increased exponentially in the past decade (Figure 2). However, a mechanistic understanding of the chemical basis for such complex interactions is still lacking. This review intends to explore such an understanding in the context of recent publications. Figure 2 An analysis of literature statistics indicates growing concern for the topics that are the focus of this review. The number of publications and citations were obtained using the keywords “nanoparticles” and “biological systems” ... A breakthrough technology cannot prosper without wide acceptance from the public and society; that is, it must pose minimal harm to human health and the environment. Nanotechnology is now facing such a critical challenge. We must elucidate the effects of ENMs on biological systems (such as biological molecules, human cells, organs and physiological systems). Accumulating experimental evidence suggests that nanoparticles interact with biological systems at nearly every level, often causing unwanted physiological consequences. Elucidating these interactions is the goal of this review. This endeavor will help regulate the proper application of ENMs in various products and their release into the environment. A more significant mission of this review is to direct the development of “safe-by-design” ENMs, as their demands for and applications continue to increase.}, number={15}, journal={Chemical Reviews}, publisher={American Chemical Society (ACS)}, author={Mu, Qingxin and Jiang, Guibin and Chen, Lingxin and Zhou, Hongyu and Fourches, Denis and Tropsha, Alexander and Yan, Bing}, year={2014}, month={Jun}, pages={7740–7781} } @article{goldstein_fredrik jarskog_hilliard_alfirevic_duncan_fourches_huang_lek_neale_ripke_et al._2014, title={Clozapine-induced agranulocytosis is associated with rare HLA-DQB1 and HLA-B alleles}, volume={5}, ISSN={2041-1723}, url={http://dx.doi.org/10.1038/NCOMMS5757}, DOI={10.1038/NCOMMS5757}, abstractNote={Clozapine is a particularly effective antipsychotic medication but its use is curtailed by the risk of clozapine-induced agranulocytosis/granulocytopenia (CIAG), a severe adverse drug reaction occurring in up to 1% of treated individuals. Identifying genetic risk factors for CIAG could enable safer and more widespread use of clozapine. Here we perform the largest and most comprehensive genetic study of CIAG to date by interrogating 163 cases using genome-wide genotyping and whole-exome sequencing. We find that two loci in the major histocompatibility complex are independently associated with CIAG: a single amino acid in HLA-DQB1 (126Q) (P=4.7 × 10−14, odds ratio (OR)=0.19, 95% confidence interval (CI)=0.12–0.29) and an amino acid change in the extracellular binding pocket of HLA-B (158T) (P=6.4 × 10−10, OR=3.3, 95% CI=2.3–4.9). These associations dovetail with the roles of these genes in immunogenetic phenotypes and adverse drug responses for other medications, and provide insight into the pathophysiology of CIAG. Clozapine-induced agranulocytosis/granulocytopenia, or CIAG, is characterised by a rare and potentially fatal reaction to antipsychotic drugs. Here, the authors identify genetic variants in two immune-related genes that may contribute to the pathophysiology of CIAG.}, number={1}, journal={Nature Communications}, publisher={Springer Science and Business Media LLC}, author={Goldstein, Jacqueline I. and Fredrik Jarskog, L. and Hilliard, Chris and Alfirevic, Ana and Duncan, Laramie and Fourches, Denis and Huang, Hailiang and Lek, Monkol and Neale, Benjamin M. and Ripke, Stephan and et al.}, year={2014}, month={Sep} } @article{golbraikh_muratov_fourches_tropsha_2014, title={Data Set Modelability by QSAR}, volume={54}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/CI400572X}, DOI={10.1021/CI400572X}, abstractNote={We introduce a simple MODelability Index (MODI) that estimates the feasibility of obtaining predictive QSAR models (correct classification rate above 0.7) for a binary data set of bioactive compounds. MODI is defined as an activity class-weighted ratio of the number of nearest-neighbor pairs of compounds with the same activity class versus the total number of pairs. The MODI values were calculated for more than 100 data sets, and the threshold of 0.65 was found to separate the nonmodelable and modelable data sets.}, number={1}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Golbraikh, Alexander and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander}, year={2014}, month={Jan}, pages={1–4} } @article{blatt_farag_corey_sarrimanolis_muratov_fourches_tropsha_janzen_2014, title={Expanding the scope of drug repurposing in pediatrics: The Children's Pharmacy Collaborative™}, volume={19}, ISSN={1359-6446}, url={http://dx.doi.org/10.1016/J.DRUDIS.2014.08.003}, DOI={10.1016/J.DRUDIS.2014.08.003}, abstractNote={Drug repurposing is the use of 'old' drugs for new indications, avoiding the need for time- and cost-intensive toxicity studies. This approach should be particularly attractive for pediatrics, but its use in this population has been limited. One obstacle has been the lack of a comprehensive database of drugs for which there already is at least one indication in children. We describe the development of The Children's Pharmacy Collaborative, which should grow over time, serve as a resource for professionals and families, and stimulate drug-repurposing efforts for a range of pediatric disorders.}, number={11}, journal={Drug Discovery Today}, publisher={Elsevier BV}, author={Blatt, Julie and Farag, Sherif and Corey, Seth J. and Sarrimanolis, Zafeira and Muratov, Eugene and Fourches, Denis and Tropsha, Alexander and Janzen, William P.}, year={2014}, month={Nov}, pages={1696–1698} } @article{cherkasov_muratov_fourches_varnek_baskin_cronin_dearden_gramatica_martin_todeschini_et al._2014, title={QSAR Modeling: Where Have You Been? Where Are You Going To?}, volume={57}, ISSN={0022-2623 1520-4804}, url={http://dx.doi.org/10.1021/JM4004285}, DOI={10.1021/JM4004285}, abstractNote={Quantitative structure-activity relationship modeling is one of the major computational tools employed in medicinal chemistry. However, throughout its entire history it has drawn both praise and criticism concerning its reliability, limitations, successes, and failures. In this paper, we discuss (i) the development and evolution of QSAR; (ii) the current trends, unsolved problems, and pressing challenges; and (iii) several novel and emerging applications of QSAR modeling. Throughout this discussion, we provide guidelines for QSAR development, validation, and application, which are summarized in best practices for building rigorously validated and externally predictive QSAR models. We hope that this Perspective will help communications between computational and experimental chemists toward collaborative development and use of QSAR models. We also believe that the guidelines presented here will help journal editors and reviewers apply more stringent scientific standards to manuscripts reporting new QSAR studies, as well as encourage the use of high quality, validated QSARs for regulatory decision making.}, number={12}, journal={Journal of Medicinal Chemistry}, publisher={American Chemical Society (ACS)}, author={Cherkasov, Artem and Muratov, Eugene N. and Fourches, Denis and Varnek, Alexandre and Baskin, Igor I. and Cronin, Mark and Dearden, John and Gramatica, Paola and Martin, Yvonne C. and Todeschini, Roberto and et al.}, year={2014}, month={Jan}, pages={4977–5010} } @article{zhang_fourches_sedykh_zhu_golbraikh_ekins_clark_connelly_sigal_hodges_et al._2013, title={Discovery of Novel Antimalarial Compounds Enabled by QSAR-Based Virtual Screening}, volume={53}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci300421n}, DOI={10.1021/ci300421n}, abstractNote={Quantitative structure-activity relationship (QSAR) models have been developed for a data set of 3133 compounds defined as either active or inactive against P. falciparum. Because the data set was strongly biased toward inactive compounds, different sampling approaches were employed to balance the ratio of actives versus inactives, and models were rigorously validated using both internal and external validation approaches. The balanced accuracy for assessing the antimalarial activities of 70 external compounds was between 87% and 100% depending on the approach used to balance the data set. Virtual screening of the ChemBridge database using QSAR models identified 176 putative antimalarial compounds that were submitted for experimental validation, along with 42 putative inactives as negative controls. Twenty five (14.2%) computational hits were found to have antimalarial activities with minimal cytotoxicity to mammalian cells, while all 42 putative inactives were confirmed experimentally. Structural inspection of confirmed active hits revealed novel chemical scaffolds, which could be employed as starting points to discover novel antimalarial agents.}, number={2}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Zhang, Liying and Fourches, Denis and Sedykh, Alexander and Zhu, Hao and Golbraikh, Alexander and Ekins, Sean and Clark, Julie and Connelly, Michele C. and Sigal, Martina and Hodges, Dena and et al.}, year={2013}, month={Jan}, pages={475–492} } @article{low_sedykh_fourches_golbraikh_whelan_rusyn_tropsha_2013, title={Integrative Chemical–Biological Read-Across Approach for Chemical Hazard Classification}, volume={26}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/TX400110F}, DOI={10.1021/TX400110F}, abstractNote={Traditional read-across approaches typically rely on the chemical similarity principle to predict chemical toxicity; however, the accuracy of such predictions is often inadequate due to the underlying complex mechanisms of toxicity. Here, we report on the development of a hazard classification and visualization method that draws upon both chemical structural similarity and comparisons of biological responses to chemicals measured in multiple short-term assays ("biological" similarity). The Chemical-Biological Read-Across (CBRA) approach infers each compound's toxicity from both chemical and biological analogues whose similarities are determined by the Tanimoto coefficient. Classification accuracy of CBRA was compared to that of classical RA and other methods using chemical descriptors alone or in combination with biological data. Different types of adverse effects (hepatotoxicity, hepatocarcinogenicity, mutagenicity, and acute lethality) were classified using several biological data types (gene expression profiling and cytotoxicity screening). CBRA-based hazard classification exhibited consistently high external classification accuracy and applicability to diverse chemicals. Transparency of the CBRA approach is aided by the use of radial plots that show the relative contribution of analogous chemical and biological neighbors. Identification of both chemical and biological features that give rise to the high accuracy of CBRA-based toxicity prediction facilitates mechanistic interpretation of the models.}, number={8}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Low, Yen and Sedykh, Alexander and Fourches, Denis and Golbraikh, Alexander and Whelan, Maurice and Rusyn, Ivan and Tropsha, Alexander}, year={2013}, month={Aug}, pages={1199–1208} } @article{fourches_muratov_ding_dokholyan_tropsha_2013, title={Predicting Binding Affinity of CSAR Ligands Using Both Structure-Based and Ligand-Based Approaches}, volume={53}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/CI400216Q}, DOI={10.1021/CI400216Q}, abstractNote={We report on the prediction accuracy of ligand-based (2D QSAR) and structure-based (MedusaDock) methods used both independently and in consensus for ranking the congeneric series of ligands binding to three protein targets (UK, ERK2, and CHK1) from the CSAR 2011 benchmark exercise. An ensemble of predictive QSAR models was developed using known binders of these three targets extracted from the publicly available ChEMBL database. Selected models were used to predict the binding affinity of CSAR compounds toward the corresponding targets and rank them accordingly; the overall ranking accuracy evaluated by Spearman correlation was as high as 0.78 for UK, 0.60 for ERK2, and 0.56 for CHK1, placing our predictions in the top 10% among all the participants. In parallel, MedusaDock, designed to predict reliable docking poses, was also used for ranking the CSAR ligands according to their docking scores; the resulting accuracy (Spearman correlation) for UK, ERK2, and CHK1 were 0.76, 0.31, and 0.26, respectively. In addition, performance of several consensus approaches combining MedusaDock- and QSAR-predicted ranks altogether has been explored; the best approach yielded Spearman correlation coefficients for UK, ERK2, and CHK1 of 0.82, 0.50, and 0.45, respectively. This study shows that (i) externally validated 2D QSAR models were capable of ranking CSAR ligands at least as accurately as more computationally intensive structure-based approaches used both by us and by other groups and (ii) ligand-based QSAR models can complement structure-based approaches by boosting the prediction performances when used in consensus.}, number={8}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Muratov, Eugene and Ding, Feng and Dokholyan, Nikolay V. and Tropsha, Alexander}, year={2013}, month={Jul}, pages={1915–1922} } @article{fourches_tropsha_2013, title={Using Graph Indices for the Analysis and Comparison of Chemical Datasets}, volume={32}, ISSN={1868-1743}, url={http://dx.doi.org/10.1002/MINF.201300076}, DOI={10.1002/MINF.201300076}, abstractNote={Abstract}, number={9-10}, journal={Molecular Informatics}, publisher={Wiley}, author={Fourches, Denis and Tropsha, Alexander}, year={2013}, month={Sep}, pages={827–842} } @article{sedykh_fourches_duan_hucke_garneau_zhu_bonneau_tropsha_2012, title={Human Intestinal Transporter Database: QSAR Modeling and Virtual Profiling of Drug Uptake, Efflux and Interactions}, volume={30}, ISSN={0724-8741 1573-904X}, url={http://dx.doi.org/10.1007/S11095-012-0935-X}, DOI={10.1007/S11095-012-0935-X}, abstractNote={Membrane transporters mediate many biological effects of chemicals and play a major role in pharmacokinetics and drug resistance. The selection of viable drug candidates among biologically active compounds requires the assessment of their transporter interaction profiles. Using public sources, we have assembled and curated the largest, to our knowledge, human intestinal transporter database (>5,000 interaction entries for >3,700 molecules). This data was used to develop thoroughly validated classification Quantitative Structure-Activity Relationship (QSAR) models of transport and/or inhibition of several major transporters including MDR1, BCRP, MRP1-4, PEPT1, ASBT, OATP2B1, OCT1, and MCT1. QSAR models have been developed with advanced machine learning techniques such as Support Vector Machines, Random Forest, and k Nearest Neighbors using Dragon and MOE chemical descriptors. These models afforded high external prediction accuracies of 71–100% estimated by 5-fold external validation, and showed hit retrieval rates with up to 20-fold enrichment in the virtual screening of DrugBank compounds. The compendium of predictive QSAR models developed in this study can be used for virtual profiling of drug candidates and/or environmental agents with the optimal transporter profiles.}, number={4}, journal={Pharmaceutical Research}, publisher={Springer Science and Business Media LLC}, author={Sedykh, Alexander and Fourches, Denis and Duan, Jianmin and Hucke, Oliver and Garneau, Michel and Zhu, Hao and Bonneau, Pierre and Tropsha, Alexander}, year={2012}, month={Dec}, pages={996–1007} } @article{low_uehara_minowa_yamada_ohno_urushidani_sedykh_muratov_kuz’min_fourches_et al._2011, title={Predicting Drug-Induced Hepatotoxicity Using QSAR and Toxicogenomics Approaches}, volume={24}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx200148a}, DOI={10.1021/tx200148a}, abstractNote={Quantitative structure-activity relationship (QSAR) modeling and toxicogenomics are typically used independently as predictive tools in toxicology. In this study, we evaluated the power of several statistical models for predicting drug hepatotoxicity in rats using different descriptors of drug molecules, namely, their chemical descriptors and toxicogenomics profiles. The records were taken from the Toxicogenomics Project rat liver microarray database containing information on 127 drugs ( http://toxico.nibio.go.jp/datalist.html ). The model end point was hepatotoxicity in the rat following 28 days of continuous exposure, established by liver histopathology and serum chemistry. First, we developed multiple conventional QSAR classification models using a comprehensive set of chemical descriptors and several classification methods (k nearest neighbor, support vector machines, random forests, and distance weighted discrimination). With chemical descriptors alone, external predictivity (correct classification rate, CCR) from 5-fold external cross-validation was 61%. Next, the same classification methods were employed to build models using only toxicogenomics data (24 h after a single exposure) treated as biological descriptors. The optimized models used only 85 selected toxicogenomics descriptors and had CCR as high as 76%. Finally, hybrid models combining both chemical descriptors and transcripts were developed; their CCRs were between 68 and 77%. Although the accuracy of hybrid models did not exceed that of the models based on toxicogenomics data alone, the use of both chemical and biological descriptors enriched the interpretation of the models. In addition to finding 85 transcripts that were predictive and highly relevant to the mechanisms of drug-induced liver injury, chemical structural alerts for hepatotoxicity were identified. These results suggest that concurrent exploration of the chemical features and acute treatment-induced changes in transcript levels will both enrich the mechanistic understanding of subchronic liver injury and afford models capable of accurate prediction of hepatotoxicity from chemical structure and short-term assay results.}, number={8}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Low, Yen and Uehara, Takeki and Minowa, Yohsuke and Yamada, Hiroshi and Ohno, Yasuo and Urushidani, Tetsuro and Sedykh, Alexander and Muratov, Eugene and Kuz’min, Viktor and Fourches, Denis and et al.}, year={2011}, month={Aug}, pages={1251–1262} } @article{sushko_novotarskyi_körner_pandey_cherkasov_li_gramatica_hansen_schroeter_müller_et al._2010, title={Applicability Domains for Classification Problems: Benchmarking of Distance to Models for Ames Mutagenicity Set}, volume={50}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci100253r}, DOI={10.1021/ci100253r}, abstractNote={The estimation of accuracy and applicability of QSAR and QSPR models for biological and physicochemical properties represents a critical problem. The developed parameter of "distance to model" (DM) is defined as a metric of similarity between the training and test set compounds that have been subjected to QSAR/QSPR modeling. In our previous work, we demonstrated the utility and optimal performance of DM metrics that have been based on the standard deviation within an ensemble of QSAR models. The current study applies such analysis to 30 QSAR models for the Ames mutagenicity data set that were previously reported within the 2009 QSAR challenge. We demonstrate that the DMs based on an ensemble (consensus) model provide systematically better performance than other DMs. The presented approach identifies 30-60% of compounds having an accuracy of prediction similar to the interlaboratory accuracy of the Ames test, which is estimated to be 90%. Thus, the in silico predictions can be used to halve the cost of experimental measurements by providing a similar prediction accuracy. The developed model has been made publicly available at http://ochem.eu/models/1 .}, number={12}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Sushko, Iurii and Novotarskyi, Sergii and Körner, Robert and Pandey, Anil Kumar and Cherkasov, Artem and Li, Jiazhong and Gramatica, Paola and Hansen, Katja and Schroeter, Timon and Müller, Klaus-Robert and et al.}, year={2010}, month={Oct}, pages={2094–2111} } @article{fourches_barnes_day_bradley_reed_tropsha_2010, title={Cheminformatics Analysis of Assertions Mined from Literature That Describe Drug-Induced Liver Injury in Different Species}, volume={23}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx900326k}, DOI={10.1021/tx900326k}, abstractNote={Drug-induced liver injury is one of the main causes of drug attrition. The ability to predict the liver effects of drug candidates from their chemical structures is critical to help guide experimental drug discovery projects toward safer medicines. In this study, we have compiled a data set of 951 compounds reported to produce a wide range of effects in the liver in different species, comprising humans, rodents, and nonrodents. The liver effects for this data set were obtained as assertional metadata, generated from MEDLINE abstracts using a unique combination of lexical and linguistic methods and ontological rules. We have analyzed this data set using conventional cheminformatics approaches and addressed several questions pertaining to cross-species concordance of liver effects, chemical determinants of liver effects in humans, and the prediction of whether a given compound is likely to cause a liver effect in humans. We found that the concordance of liver effects was relatively low (ca. 39-44%) between different species, raising the possibility that species specificity could depend on specific features of chemical structure. Compounds were clustered by their chemical similarity, and similar compounds were examined for the expected similarity of their species-dependent liver effect profiles. In most cases, similar profiles were observed for members of the same cluster, but some compounds appeared as outliers. The outliers were the subject of focused assertion regeneration from MEDLINE as well as other data sources. In some cases, additional biological assertions were identified, which were in line with expectations based on compounds' chemical similarities. The assertions were further converted to binary annotations of underlying chemicals (i.e., liver effect vs no liver effect), and binary quantitative structure-activity relationship (QSAR) models were generated to predict whether a compound would be expected to produce liver effects in humans. Despite the apparent heterogeneity of data, models have shown good predictive power assessed by external 5-fold cross-validation procedures. The external predictive power of binary QSAR models was further confirmed by their application to compounds that were retrieved or studied after the model was developed. To the best of our knowledge, this is the first study for chemical toxicity prediction that applied QSAR modeling and other cheminformatics techniques to observational data generated by the means of automated text mining with limited manual curation, opening up new opportunities for generating and modeling chemical toxicology data.}, number={1}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Barnes, Julie C. and Day, Nicola C. and Bradley, Paul and Reed, Jane Z. and Tropsha, Alexander}, year={2010}, month={Jan}, pages={171–183} } @article{rodgers_zhu_fourches_rusyn_tropsha_2010, title={Modeling Liver-Related Adverse Effects of Drugs UsingkNearest Neighbor Quantitative Structure−Activity Relationship Method}, volume={23}, ISSN={0893-228X 1520-5010}, url={http://dx.doi.org/10.1021/tx900451r}, DOI={10.1021/tx900451r}, abstractNote={Adverse effects of drugs (AEDs) continue to be a major cause of drug withdrawals in both development and postmarketing. While liver-related AEDs are a major concern for drug safety, there are few in silico models for predicting human liver toxicity for drug candidates. We have applied the quantitative structure-activity relationship (QSAR) approach to model liver AEDs. In this study, we aimed to construct a QSAR model capable of binary classification (active vs inactive) of drugs for liver AEDs based on chemical structure. To build QSAR models, we have employed an FDA spontaneous reporting database of human liver AEDs (elevations in activity of serum liver enzymes), which contains data on approximately 500 approved drugs. Approximately 200 compounds with wide clinical data coverage, structural similarity, and balanced (40/60) active/inactive ratios were selected for modeling and divided into multiple training/test and external validation sets. QSAR models were developed using the k nearest neighbor method and validated using external data sets. Models with high sensitivity (>73%) and specificity (>94%) for the prediction of liver AEDs in external validation sets were developed. To test applicability of the models, three chemical databases (World Drug Index, Prestwick Chemical Library, and Biowisdom Liver Intelligence Module) were screened in silico, and the validity of predictions was determined, where possible, by comparing model-based classification with assertions in publicly available literature. Validated QSAR models of liver AEDs based on the data from the FDA spontaneous reporting system can be employed as sensitive and specific predictors of AEDs in preclinical screening of drug candidates for potential hepatotoxicity in humans.}, number={4}, journal={Chemical Research in Toxicology}, publisher={American Chemical Society (ACS)}, author={Rodgers, Amie D. and Zhu, Hao and Fourches, Denis and Rusyn, Ivan and Tropsha, Alexander}, year={2010}, month={Apr}, pages={724–732} } @article{fourches_pu_tassa_weissleder_shaw_mumper_tropsha_2010, title={Quantitative Nanostructure−Activity Relationship Modeling}, volume={4}, ISSN={1936-0851 1936-086X}, url={http://dx.doi.org/10.1021/nn1013484}, DOI={10.1021/nn1013484}, abstractNote={Evaluation of biological effects, both desired and undesired, caused by manufactured nanoparticles (MNPs) is of critical importance for nanotechnology. Experimental studies, especially toxicological, are time-consuming, costly, and often impractical, calling for the development of efficient computational approaches capable of predicting biological effects of MNPs. To this end, we have investigated the potential of cheminformatics methods such as quantitative structure-activity relationship (QSAR) modeling to establish statistically significant relationships between measured biological activity profiles of MNPs and their physical, chemical, and geometrical properties, either measured experimentally or computed from the structure of MNPs. To reflect the context of the study, we termed our approach quantitative nanostructure-activity relationship (QNAR) modeling. We have employed two representative sets of MNPs studied recently using in vitro cell-based assays: (i) 51 various MNPs with diverse metal cores (Proc. Natl. Acad. Sci. 2008, 105, 7387-7392) and (ii) 109 MNPs with similar core but diverse surface modifiers (Nat. Biotechnol. 2005, 23, 1418-1423). We have generated QNAR models using machine learning approaches such as support vector machine (SVM)-based classification and k nearest neighbors (kNN)-based regression; their external prediction power was shown to be as high as 73% for classification modeling and having an R(2) of 0.72 for regression modeling. Our results suggest that QNAR models can be employed for: (i) predicting biological activity profiles of novel nanomaterials, and (ii) prioritizing the design and manufacturing of nanomaterials toward better and safer products.}, number={10}, journal={ACS Nano}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Pu, Dongqiuye and Tassa, Carlos and Weissleder, Ralph and Shaw, Stanley Y. and Mumper, Russell J. and Tropsha, Alexander}, year={2010}, month={Sep}, pages={5703–5712} } @article{fourches_muratov_tropsha_2010, title={Trust, But Verify: On the Importance of Chemical Structure Curation in Cheminformatics and QSAR Modeling Research}, volume={50}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci100176x}, DOI={10.1021/ci100176x}, abstractNote={Molecular modelers and cheminformaticians typically analyze experimental data generated by other scientists. Consequently, when it comes to data accuracy, cheminformaticians are always at the mercy of data providers who may inadvertently publish (partially) erroneous data. Thus, dataset curation is crucial for any cheminformatics analysis such as similarity searching, clustering, QSAR modeling, virtual screening, etc., especially nowadays when the availability of chemical datasets in public domain has skyrocketed in recent years. Despite the obvious importance of this preliminary step in the computational analysis of any dataset, there appears to be no commonly accepted guidance or set of procedures for chemical data curation. The main objective of this paper is to emphasize the need for a standardized chemical data curation strategy that should be followed at the onset of any molecular modeling investigation. Herein, we discuss several simple but important steps for cleaning chemical records in a database including the removal of a fraction of the data that cannot be appropriately handled by conventional cheminformatics techniques. Such steps include the removal of inorganic and organometallic compounds, counterions, salts and mixtures; structure validation; ring aromatization; normalization of specific chemotypes; curation of tautomeric forms; and the deletion of duplicates. To emphasize the importance of data curation as a mandatory step in data analysis, we discuss several case studies where chemical curation of the original “raw” database enabled the successful modeling study (specifically, QSAR analysis) or resulted in a significant improvement of model's prediction accuracy. We also demonstrate that in some cases rigorously developed QSAR models could be even used to correct erroneous biological data associated with chemical compounds. We believe that good practices for curation of chemical records outlined in this paper will be of value to all scientists working in the fields of molecular modeling, cheminformatics, and QSAR studies.}, number={7}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Fourches, Denis and Muratov, Eugene and Tropsha, Alexander}, year={2010}, month={Jun}, pages={1189–1204} } @article{zhu_tropsha_fourches_varnek_papa_gramatica_öberg_dao_cherkasov_tetko_2008, title={Combinatorial QSAR Modeling of Chemical Toxicants Tested against Tetrahymena pyriformis}, volume={48}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci700443v}, DOI={10.1021/ci700443v}, abstractNote={Selecting most rigorous quantitative structure-activity relationship (QSAR) approaches is of great importance in the development of robust and predictive models of chemical toxicity. To address this issue in a systematic way, we have formed an international virtual collaboratory consisting of six independent groups with shared interests in computational chemical toxicology. We have compiled an aqueous toxicity data set containing 983 unique compounds tested in the same laboratory over a decade against Tetrahymena pyriformis. A modeling set including 644 compounds was selected randomly from the original set and distributed to all groups that used their own QSAR tools for model development. The remaining 339 compounds in the original set (external set I) as well as 110 additional compounds (external set II) published recently by the same laboratory (after this computational study was already in progress) were used as two independent validation sets to assess the external predictive power of individual models. In total, our virtual collaboratory has developed 15 different types of QSAR models of aquatic toxicity for the training set. The internal prediction accuracy for the modeling set ranged from 0.76 to 0.93 as measured by the leave-one-out cross-validation correlation coefficient ( Q abs2). The prediction accuracy for the external validation sets I and II ranged from 0.71 to 0.85 (linear regression coefficient R absI2) and from 0.38 to 0.83 (linear regression coefficient R absII2), respectively. The use of an applicability domain threshold implemented in most models generally improved the external prediction accuracy but at the same time led to a decrease in chemical space coverage. Finally, several consensus models were developed by averaging the predicted aquatic toxicity for every compound using all 15 models, with or without taking into account their respective applicability domains. We find that consensus models afford higher prediction accuracy for the external validation data sets with the highest space coverage as compared to individual constituent models. Our studies prove the power of a collaborative and consensual approach to QSAR model development. The best validated models of aquatic toxicity developed by our collaboratory (both individual and consensus) can be used as reliable computational predictors of aquatic toxicity and are available from any of the participating laboratories.}, number={4}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Zhu, Hao and Tropsha, Alexander and Fourches, Denis and Varnek, Alexandre and Papa, Ester and Gramatica, Paola and Öberg, Tomas and Dao, Phuong and Cherkasov, Artem and Tetko, Igor V.}, year={2008}, month={Mar}, pages={766–784} } @article{tetko_sushko_pandey_zhu_tropsha_papa_öberg_todeschini_fourches_varnek_2008, title={Critical Assessment of QSAR Models of Environmental Toxicity against Tetrahymena pyriformis: Focusing on Applicability Domain and Overfitting by Variable Selection}, volume={48}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci800151m}, DOI={10.1021/ci800151m}, abstractNote={The estimation of the accuracy of predictions is a critical problem in QSAR modeling. The "distance to model" can be defined as a metric that defines the similarity between the training set molecules and the test set compound for the given property in the context of a specific model. It could be expressed in many different ways, e.g., using Tanimoto coefficient, leverage, correlation in space of models, etc. In this paper we have used mixtures of Gaussian distributions as well as statistical tests to evaluate six types of distances to models with respect to their ability to discriminate compounds with small and large prediction errors. The analysis was performed for twelve QSAR models of aqueous toxicity against T. pyriformis obtained with different machine-learning methods and various types of descriptors. The distances to model based on standard deviation of predicted toxicity calculated from the ensemble of models afforded the best results. This distance also successfully discriminated molecules with low and large prediction errors for a mechanism-based model developed using log P and the Maximum Acceptor Superdelocalizability descriptors. Thus, the distance to model metric could also be used to augment mechanistic QSAR models by estimating their prediction errors. Moreover, the accuracy of prediction is mainly determined by the training set data distribution in the chemistry and activity spaces but not by QSAR approaches used to develop the models. We have shown that incorrect validation of a model may result in the wrong estimation of its performance and suggested how this problem could be circumvented. The toxicity of 3182 and 48774 molecules from the EPA High Production Volume (HPV) Challenge Program and EINECS (European chemical Substances Information System), respectively, was predicted, and the accuracy of prediction was estimated. The developed models are available online at http://www.qspr.org site.}, number={9}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Tetko, Igor V. and Sushko, Iurii and Pandey, Anil Kumar and Zhu, Hao and Tropsha, Alexander and Papa, Ester and Öberg, Tomas and Todeschini, Roberto and Fourches, Denis and Varnek, Alexandre}, year={2008}, month={Aug}, pages={1733–1746} } @article{varnek_fourches_sieffert_solov'ev_hill_lecomte_2007, title={QSPR Modeling of the AmIII/EuIIISeparation Factor: How Far Can we Predict ?}, volume={25}, ISSN={0736-6299 1532-2262}, url={http://dx.doi.org/10.1080/07366290601067481}, DOI={10.1080/07366290601067481}, abstractNote={Abstract Exhaustive quantitative structure‐property relationship (QSPR) modeling of the separation factor logSF for 46 polyazaheterocyclic ligands extracting Am3+ and Eu3+ from nitric acid aqueous solution to the 1,1,2,2–tetrachloroethane phase has been done using different computational approaches. Modeling methods included Multiple Linear Regression, Radial Basis Function Neural Networks, and Associated Neural Networks; two types of descriptors (substructural molecular fragments and molecular descriptors) and different techniques of variable selection have been employed. The developed QSPR models applied for novel t‐Bu‐hemi‐BTP ligand resulted in logSF=1.07−1.46; these predicted values somewhat exceed the experimental value logSF=1.0. Several hypothetical extractants potentially possessing high logSF values are proposed. An influence of uncertainties in initial experimental data as well as the choice of the theoretical approach on the performance of QSPR models is discussed.}, number={1}, journal={Solvent Extraction and Ion Exchange}, publisher={Informa UK Limited}, author={Varnek, Alexandre and Fourches, D. and Sieffert, N. and Solov'ev, V. P. and Hill, C. and Lecomte, M.}, year={2007}, month={Mar}, pages={1–26} } @article{varnek_fourches_solov'ev_klimchuk_ouadi_billard_2007, title={Successful “In Silico” Design of New Efficient Uranyl Binders}, volume={25}, ISSN={0736-6299 1532-2262}, url={http://dx.doi.org/10.1080/07366290701415820}, DOI={10.1080/07366290701415820}, abstractNote={Abstract ISIDA (In Silico Design and Data Analysis) software have been used for computer‐aided molecular design of novel monoamides that efficiently extract U(VI). A set of available experimental uranyl partition coefficients (logD) in a water/toluene system for 19 monoamides has been used in order to establish quantitative relationships between the structure of the molecules and their extraction properties using different machine‐learning methods (multi‐linear regression analysis, associated neural networks, support vector machine). Then, developed structure‐property models have been applied to screen a virtual combinatorial library containing about 10,500 molecules. Hits' selection has been performed taking into account for the extraction property of molecules, their aqueous solubility (potential extractants must not be soluble in water), and synthetic feasibility. Selected 21 hits have been synthesized and studied experimentally as uranyl extractants using the same protocol as for the molecules from the initial data set. Experiment shows that the theoretical calculations reasonably well predict logD values for novel compounds. The data set of novel monoamides has been significantly enriched by efficient uranyl binders. One of the novel molecules displays a slightly larger affinity for uranyl than previously known extractants.}, number={4}, journal={Solvent Extraction and Ion Exchange}, publisher={Informa UK Limited}, author={Varnek, A. and Fourches, D. and Solov'ev, V. and Klimchuk, O. and Ouadi, A. and Billard, I.}, year={2007}, month={Jun}, pages={433–462} } @article{tetko_solov'ev_antonov_yao_doucet_fan_hoonakker_fourches_jost_lachiche_et al._2006, title={Benchmarking of Linear and Nonlinear Approaches for Quantitative Structure−Property Relationship Studies of Metal Complexation with Ionophores}, volume={46}, ISSN={1549-9596 1549-960X}, url={http://dx.doi.org/10.1021/ci0504216}, DOI={10.1021/ci0504216}, abstractNote={A benchmark of several popular methods, Associative Neural Networks (ANN), Support Vector Machines (SVM), k Nearest Neighbors (kNN), Maximal Margin Linear Programming (MMLP), Radial Basis Function Neural Network (RBFNN), and Multiple Linear Regression (MLR), is reported for quantitative-structure property relationships (QSPR) of stability constants logK1 for the 1:1 (M:L) and logbeta2 for 1:2 complexes of metal cations Ag+ and Eu3+ with diverse sets of organic molecules in water at 298 K and ionic strength 0.1 M. The methods were tested on three types of descriptors: molecular descriptors including E-state values, counts of atoms determined for E-state atom types, and substructural molecular fragments (SMF). Comparison of the models was performed using a 5-fold external cross-validation procedure. Robust statistical tests (bootstrap and Kolmogorov-Smirnov statistics) were employed to evaluate the significance of calculated models. The Wilcoxon signed-rank test was used to compare the performance of methods. Individual structure-complexation property models obtained with nonlinear methods demonstrated a significantly better performance than the models built using multilinear regression analysis (MLRA). However, the averaging of several MLRA models based on SMF descriptors provided as good of a prediction as the most efficient nonlinear techniques. Support Vector Machines and Associative Neural Networks contributed in the largest number of significant models. Models based on fragments (SMF descriptors and E-state counts) had higher prediction ability than those based on E-state indices. The use of SMF descriptors and E-state counts provided similar results, whereas E-state indices lead to less significant models. The current study illustrates the difficulties of quantitative comparison of different methods: conclusions based only on one data set without appropriate statistical tests could be wrong.}, number={2}, journal={Journal of Chemical Information and Modeling}, publisher={American Chemical Society (ACS)}, author={Tetko, Igor V. and Solov'ev, Vitaly P. and Antonov, Alexey V. and Yao, Xiaojun and Doucet, Jean Pierre and Fan, Botao and Hoonakker, Frank and Fourches, Denis and Jost, Piere and Lachiche, Nicolas and et al.}, year={2006}, month={Mar}, pages={808–819} } @article{varnek_fourches_hoonakker_solov’ev_2005, title={Substructural fragments: an universal language to encode reactions, molecular and supramolecular structures}, volume={19}, ISSN={0920-654X 1573-4951}, url={http://dx.doi.org/10.1007/s10822-005-9008-0}, DOI={10.1007/s10822-005-9008-0}, abstractNote={Substructural fragments are proposed as a simple and safe way to encode molecular structures in a matrix containing the occurrence of fragments of a given type. The knowledge retrieved from QSPR modelling can also be stored in that matrix in addition to the information about fragments. Complex supramolecular systems (using special bond types) and chemical reactions (represented as Condensed Graphs of Reactions, CGR) can be treated similarly. The efficiency of fragments as descriptors has been demonstrated in QSPR studies of aqueous solubility for a diverse set of organic compounds as well as in the analysis of thermodynamic parameters for hydrogen-bonding in some supramolecular complexes. It has also been shown that CGR may be an interesting opportunity to perform similarity searches for chemical reactions. The relationship between the density of information in descriptors/knowledge matrices and the robustness of QSPR models is discussed.}, number={9-10}, journal={Journal of Computer-Aided Molecular Design}, publisher={Springer Science and Business Media LLC}, author={Varnek, A. and Fourches, D. and Hoonakker, F. and Solov’ev, V. P.}, year={2005}, month={Sep}, pages={693–703} } @article{varnek_fourches_solov'e_baulin_turanov_karandashev_fara_katritzky_2004, title={“In Silico” Design of New Uranyl Extractants Based on Phosphoryl-Containing Podands:  QSPR Studies, Generation and Screening of Virtual Combinatorial Library, and Experimental Tests}, volume={44}, ISSN={0095-2338}, url={http://dx.doi.org/10.1021/ci049976b}, DOI={10.1021/ci049976b}, abstractNote={This paper is devoted to computer-aided design of new extractants of the uranyl cation involving three main steps: (i) a QSPR study, (ii) generation and screening of a virtual combinatorial library, and (iii) synthesis of several predicted compounds and their experimental extraction studies. First, we performed a QSPR modeling of the distribution coefficient (logD) of uranyl extracted by phosphoryl-containing podands from water to 1,2-dichloroethane. Two different approaches were used: one based on classical structural and physicochemical descriptors (implemented in the CODESSA PRO program) and another one based on fragment descriptors (implemented in the TRAIL program). Three statistically significant models obtained with TRAIL involve as descriptors either sequences of atoms and bonds or atoms with their close environment (augmented atoms). The best models of CODESSA PRO include its own molecular descriptors as well as fragment descriptors obtained with TRAIL. At the second step, a virtual combinatorial library of 2024 podands has been generated with the CombiLib program, followed by the assessment of logD values using developed QSPR models. At the third step, eight of these hypothetical compounds were synthesized and tested experimentally. Comparison with experiment shows that developed QSPR models successfully predict logD values for 7 of 8 compounds from that "blind test" set.}, number={4}, journal={Journal of Chemical Information and Computer Sciences}, publisher={American Chemical Society (ACS)}, author={Varnek, A. and Fourches, D. and Solov'e, V. P. and Baulin, V. E. and Turanov, A. N. and Karandashev, V. K. and Fara, D. and Katritzky, A. R.}, year={2004}, month={Jul}, pages={1365–1382} }