@article{li_kleinstreuer_fourches_2020, title={Hierarchical Quantitative Structure-Activity Relationship Modeling Approach for Integrating Binary, Multiclass, and Regression Models of Acute Oral Systemic Toxicity}, volume={33}, ISSN={["1520-5010"]}, DOI={10.1021/acs.chemrestox.9b00259}, abstractNote={Reliable in silico approaches to replace animal testing for the evaluation of potential acute toxic effects are highly demanded by regulatory agencies. In particular, quantitative structure–activit...}, number={2}, journal={CHEMICAL RESEARCH IN TOXICOLOGY}, author={Li, Xinhao and Kleinstreuer, Nicole C. and Fourches, Denis}, year={2020}, month={Feb}, pages={353–366} } @article{li_fourches_2020, title={Inductive transfer learning for molecular activity prediction: Next-Gen QSAR Models with MolPMoFiT}, volume={12}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-020-00430-x}, abstractNote={Abstract Deep neural networks can directly learn from chemical structures without extensive, user-driven selection of descriptors in order to predict molecular properties/activities with high reliability. But these approaches typically require large training sets to learn the endpoint-specific structural features and ensure reasonable prediction accuracy. Even though large datasets are becoming the new normal in drug discovery, especially when it comes to high-throughput screening or metabolomics datasets, one should also consider smaller datasets with challenging endpoints to model and forecast. Thus, it would be highly relevant to better utilize the tremendous compendium of unlabeled compounds from publicly-available datasets for improving the model performances for the user’s particular series of compounds. In this study, we propose the Mol ecular P rediction Mo del Fi ne- T uning ( MolPMoFiT ) approach, an effective transfer learning method based on self-supervised pre-training + task-specific fine-tuning for QSPR/QSAR modeling. A large-scale molecular structure prediction model is pre-trained using one million unlabeled molecules from ChEMBL in a self-supervised learning manner, and can then be fine-tuned on various QSPR/QSAR tasks for smaller chemical datasets with specific endpoints. Herein, the method is evaluated on four benchmark datasets (lipophilicity, FreeSolv, HIV, and blood–brain barrier penetration). The results showed the method can achieve strong performances for all four datasets compared to other state - of - the - art machine learning modeling techniques reported in the literature so far.}, number={1}, journal={JOURNAL OF CHEMINFORMATICS}, author={Li, Xinhao and Fourches, Denis}, year={2020}, month={Apr} }