@article{ash_hughes-oliver_2022, title={Confidence bands and hypothesis tests for hit enrichment curves}, volume={14}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-022-00629-0}, abstractNote={AbstractIn virtual screening for drug discovery, hit enrichment curves are widely used to assess the performance of ranking algorithms with regard to their ability to identify early enrichment. Unfortunately, researchers almost never consider the uncertainty associated with estimating such curves before declaring differences between performance of competing algorithms. Uncertainty is often large because the testing fractions of interest to researchers are small. Appropriate inference is complicated by two sources of correlation that are often overlooked: correlation across different testing fractions within a single algorithm, and correlation between competing algorithms. Additionally, researchers are often interested in making comparisons along the entire curve, not only at a few testing fractions. We develop inferential procedures to address both the needs of those interested in a few testing fractions, as well as those interested in the entire curve. For the former, four hypothesis testing and (pointwise) confidence intervals are investigated, and a newly developed EmProc approach is found to be most effective. For inference along entire curves, EmProc-based confidence bands are recommended for simultaneous coverage and minimal width. While we focus on the hit enrichment curve, this work is also appropriate for lift curves that are used throughout the machine learning community. Our inferential procedures trivially extend to enrichment factors, as well.}, number={1}, journal={JOURNAL OF CHEMINFORMATICS}, author={Ash, Jeremy R. and Hughes-Oliver, Jacqueline M.}, year={2022}, month={Jul} } @article{akhtari_havener_hertz_ash_larson_carey_mcleod_motsinger-reif_2021, title={Race and smoking status associated with paclitaxel drug response in patient-derived lymphoblastoid cell lines}, volume={31}, ISSN={["1744-6880"]}, DOI={10.1097/FPC.0000000000000419}, abstractNote={The use of ex-vivo model systems to provide a level of forecasting for in-vivo characteristics remains an important need for cancer therapeutics. The use of lymphoblastoid cell lines (LCLs) is an attractive approach for pharmacogenomics and toxicogenomics, due to their scalability, efficiency, and cost-effectiveness. There is little data on the impact of demographic or clinical covariates on LCL response to chemotherapy. Paclitaxel sensitivity was determined in LCLs from 93 breast cancer patients from the University of North Carolina Lineberger Comprehensive Cancer Center Breast Cancer Database to test for potential associations and/or confounders in paclitaxel dose-response assays. Measures of paclitaxel cell viability were associated with patient data included treatment regimens, cancer status, demographic and environmental variables, and clinical outcomes. We used multivariate analysis of variance to identify the in-vivo variables associated with ex-vivo dose-response. In this unique dataset that includes both in-vivo and ex-vivo data from breast cancer patients, race (P = 0.0049) and smoking status (P = 0.0050) were found to be significantly associated with ex-vivo dose-response in LCLs. Racial differences in clinical dose-response have been previously described, but the smoking association has not been reported. Our results indicate that in-vivo smoking status can influence ex-vivo dose-response in LCLs, and more precise measures of covariates may allow for more precise forecasting of clinical effect. In addition, understanding the mechanism by which exposure to smoking in-vivo effects ex-vivo dose-response in LCLs may open up new avenues in the quest for better therapeutic prediction.}, number={2}, journal={PHARMACOGENETICS AND GENOMICS}, author={Akhtari, Farida S. and Havener, Tammy M. and Hertz, Daniel L. and Ash, Jeremy and Larson, Alexandra and Carey, Lisa A. and McLeod, Howard L. and Motsinger-Reif, Alison A.}, year={2021}, month={Feb}, pages={48–52} } @article{odenkirk_zin_ash_reif_fourches_baker_2020, title={Structural-based connectivity and omic phenotype evaluations (SCOPE): a cheminformatics toolbox for investigating lipidomic changes in complex systems}, volume={145}, ISSN={["1364-5528"]}, DOI={10.1039/d0an01638a}, abstractNote={SCOPE is a toolbox for expanding upon lipid data interpretation capabilities. Herein we utilize SCOPE to explore how lipid structure, biological connections and metadata linkages contribute to the results observed from lipidomic experiments.}, number={22}, journal={ANALYST}, author={Odenkirk, Melanie T. and Zin, Phyo Phyo K. and Ash, Jeremy R. and Reif, David M. and Fourches, Denis and Baker, Erin S.}, year={2020}, month={Nov}, pages={7197–7209} } @article{odenkirk_stratton_gritsenko_bramer_webb-robertson_bloodsworth_weitz_lipton_monroe_ash_et al._2020, title={Unveiling molecular signatures of preeclampsia and gestational diabetes mellitus with multi-omics and innovative cheminformatics visualization tools}, volume={16}, ISSN={["2515-4184"]}, DOI={10.1039/d0mo00074d}, abstractNote={Specific lipid and protein changes characterized term preeclampsia (PRE) and gestational diabetes mellitus (GDM) and novel visualization tools were created to aid in the process.}, number={6}, journal={MOLECULAR OMICS}, author={Odenkirk, Melanie T. and Stratton, Kelly G. and Gritsenko, Marina A. and Bramer, Lisa M. and Webb-Robertson, Bobbie-Jo M. and Bloodsworth, Kent J. and Weitz, Karl K. and Lipton, Anna K. and Monroe, Matthew E. and Ash, Jeremy R. and et al.}, year={2020}, month={Dec} } @misc{fourches_ash_2019, title={4D-quantitative structure-activity relationship modeling: making a comeback}, volume={14}, ISSN={["1746-045X"]}, DOI={10.1080/17460441.2019.1664467}, abstractNote={ABSTRACT Introduction: Predictive Quantitative Structure–Activity Relationship (QSAR) modeling has become an essential methodology for rapidly assessing various properties of chemicals. The vast majority of these QSAR models utilize numerical descriptors derived from the two- and/or three-dimensional structures of molecules. However, the conformation-dependent characteristics of flexible molecules and their dynamic interactions with biological target(s) is/are not encoded by these descriptors, leading to limited prediction performances and reduced interpretability. 2D/3D QSAR models are successful for virtual screening, but typically suffer at lead optimization stages. That is why conformation-dependent 4D-QSAR modeling methods were developed two decades ago. However, these methods have always suffered from the associated computational cost. Recently, 4D-QSAR has been experiencing a significant come-back due to rapid advances in GPU-accelerated molecular dynamic simulations and modern machine learning techniques. Areas covered: Herein, the authors briefly review the literature regarding 4D-QSAR modeling and describe its modern workflow called MD-QSAR. Challenges and current limitations are also highlighted. Expert opinion: The development of hyper-predictive MD-QSAR models could represent a disruptive technology for analyzing, understanding, and optimizing dynamic protein-ligand interactions with countless applications for drug discovery and chemical toxicity assessment. Therefore, there has never been a better time and relevance for molecular modeling teams to engage in hyper-predictive MD-QSAR modeling.}, number={12}, journal={EXPERT OPINION ON DRUG DISCOVERY}, author={Fourches, Denis and Ash, Jeremy}, year={2019}, month={Dec}, pages={1227–1235} } @article{ash_kuenemann_rotroff_motsinger-reif_fourches_2019, title={Cheminformatics approach to exploring and modeling trait-associated metabolite profiles}, volume={11}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-019-0366-3}, abstractNote={Developing predictive and transparent approaches to the analysis of metabolite profiles across patient cohorts is of critical importance for understanding the events that trigger or modulate traits of interest (e.g., disease progression, drug metabolism, chemical risk assessment). However, metabolites' chemical structures are still rarely used in the statistical modeling workflows that establish these trait-metabolite relationships. Herein, we present a novel cheminformatics-based approach capable of identifying predictive, interpretable, and reproducible trait-metabolite relationships. As a proof-of-concept, we utilize a previously published case study consisting of metabolite profiles from non-small-cell lung cancer (NSCLC) adenocarcinoma patients and healthy controls. By characterizing each structurally annotated metabolite using both computed molecular descriptors and patient metabolite concentration profiles, we show that these complementary features enhance the identification and understanding of key metabolites associated with cancer. Ultimately, we built multi-metabolite classification models for assessing patients' cancer status using specific groups of metabolites identified based on high structural similarity through chemical clustering. We subsequently performed a metabolic pathway enrichment analysis to identify potential mechanistic relationships between metabolites and NSCLC adenocarcinoma. This cheminformatics-inspired approach relies on the metabolites' structural features and chemical properties to provide critical information about metabolite-trait associations. This method could ultimately facilitate biological understanding and advance research based on metabolomics data, especially with respect to the identification of novel biomarkers.}, journal={JOURNAL OF CHEMINFORMATICS}, author={Ash, Jeremy R. and Kuenemann, Melaine A. and Rotroff, Daniel and Motsinger-Reif, Alison and Fourches, Denis}, year={2019}, month={Jun} } @article{menden_wang_mason_szalai_bulusu_guan_yu_kang_jeon_wolfinger_et al._2019, title={Community assessment to advance computational prediction of cancer drug combinations in a pharmacogenomic screen}, volume={10}, ISSN={["2041-1723"]}, DOI={10.1038/s41467-019-09799-2}, abstractNote={AbstractThe effectiveness of most cancer targeted therapies is short-lived. Tumors often develop resistance that might be overcome with drug combinations. However, the number of possible combinations is vast, necessitating data-driven approaches to find optimal patient-specific treatments. Here we report AstraZeneca’s large drug combination dataset, consisting of 11,576 experiments from 910 combinations across 85 molecularly characterized cancer cell lines, and results of a DREAM Challenge to evaluate computational strategies for predicting synergistic drug pairs and biomarkers. 160 teams participated to provide a comprehensive methodological development and benchmarking. Winning methods incorporate prior knowledge of drug-target interactions. Synergy is predicted with an accuracy matching biological replicates for >60% of combinations. However, 20% of drug combinations are poorly predicted by all methods. Genomic rationale for synergy predictions are identified, including ADAM17 inhibitor antagonism when combined with PIK3CB/D inhibition contrasting to synergy when combined with other PI3K-pathway inhibitors in PIK3CA mutant cells.}, journal={NATURE COMMUNICATIONS}, author={Menden, Michael P. and Wang, Dennis and Mason, Mike J. and Szalai, Bence and Bulusu, Krishna C. and Guan, Yuanfang and Yu, Thomas and Kang, Jaewoo and Jeon, Minji and Wolfinger, Russ and et al.}, year={2019}, month={Jun} } @misc{burnum-johnson_zheng_dodds_ash_fourches_nicora_wendler_metz_waters_jansson_et al._2019, title={Ion mobility spectrometry and the omics: Distinguishing isomers, molecular classes and contaminant ions in complex samples}, volume={116}, ISSN={["1879-3142"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85065908529&partnerID=MN8TOARS}, DOI={10.1016/j.trac.2019.04.022}, abstractNote={Ion mobility spectrometry (IMS) is a widely used analytical technique providing rapid gas phase separations. IMS alone is useful, but its coupling with mass spectrometry (IMS-MS) and various front-end separation techniques has greatly increased the molecular information achievable from different omic analyses. IMS-MS analyses are specifically gaining attention for improving metabolomic, lipidomic, glycomic, proteomic and exposomic analyses by increasing measurement sensitivity (e.g. S/N ratio), reducing the detection limit, and amplifying peak capacity. Numerous studies including national security-related analyses, disease screenings and environmental evaluations are illustrating that IMS-MS is able to extract information not possible with MS alone. Furthermore, IMS-MS has shown great utility in salvaging molecular information for low abundance molecules of interest when high concentration contaminant ions are present in the sample by reducing detector suppression. This review highlights how IMS-MS is currently being used in omic analyses to distinguish structurally similar molecules, isomers, molecular classes and contaminant ions.}, journal={TRAC-TRENDS IN ANALYTICAL CHEMISTRY}, author={Burnum-Johnson, Kristin E. and Zheng, Xueyun and Dodds, James N. and Ash, Jeremy and Fourches, Denis and Nicora, Carrie D. and Wendler, Jason P. and Metz, Thomas O. and Waters, Katrina M. and Jansson, Janet K. and et al.}, year={2019}, month={Jul}, pages={292–299} } @article{ash_hughes-oliver_2018, title={chemmodlab: a cheminformatics modeling laboratoryR package for fitting and assessing machine learning models}, volume={10}, ISSN={["1758-2946"]}, DOI={10.1186/s13321-018-0309-4}, abstractNote={The goal of chemmodlab is to streamline the fitting and assessment pipeline for many machine learning models in R, making it easy for researchers to compare the utility of these models. While focused on implementing methods for model fitting and assessment that have been accepted by experts in the cheminformatics field, all of the methods in chemmodlab have broad utility for the machine learning community. chemmodlab contains several assessment utilities, including a plotting function that constructs accumulation curves and a function that computes many performance measures. The most novel feature of chemmodlab is the ease with which statistically significant performance differences for many machine learning models is presented by means of the multiple comparisons similarity plot. Differences are assessed using repeated k-fold cross validation, where blocking increases precision and multiplicity adjustments are applied. chemmodlab is freely available on CRAN at https://cran.r-project.org/web/packages/chemmodlab/index.html .}, journal={JOURNAL OF CHEMINFORMATICS}, author={Ash, Jeremy R. and Hughes-Oliver, Jacqueline M.}, year={2018}, month={Nov} } @article{ash_fourches_2017, title={Characterizing the Chemical Space of ERK2 Kinase Inhibitors Using Descriptors Computed from Molecular Dynamics Trajectories}, volume={57}, ISSN={["1549-960X"]}, DOI={10.1021/acs.jcim.7b00048}, abstractNote={Quantitative Structure-Activity Relationship (QSAR) models typically rely on 2D and 3D molecular descriptors to characterize chemicals and forecast their experimental activities. Previously, we showed that even the most reliable 2D QSAR models and structure-based 3D molecular docking techniques were not capable of accurately ranking a set of known inhibitors for the ERK2 kinase, a key player in various types of cancer. Herein, we calculated and analyzed a series of chemical descriptors computed from the molecular dynamics (MD) trajectories of ERK2-ligand complexes. First, the docking of 87 ERK2 ligands with known binding affinities was accomplished using Schrodinger's Glide software; then, solvent-explicit MD simulations (20 ns, NPT, 300 K, TIP3P, 1 fs) were performed using the GPU-accelerated Desmond program. Second, we calculated a series of MD descriptors based on the distributions of 3D descriptors computed for representative samples of the ligand's conformations over the MD simulations. Third, we analyzed the data set of 87 inhibitors in the MD chemical descriptor space. We showed that MD descriptors (i) had little correlation with conventionally used 2D/3D descriptors, (ii) were able to distinguish the most active ERK2 inhibitors from the moderate/weak actives and inactives, and (iii) provided key and complementary information about the unique characteristics of active ligands. This study represents the largest attempt to utilize MD-extracted chemical descriptors to characterize and model a series of bioactive molecules. MD descriptors could enable the next generation of hyperpredictive MD-QSAR models for computer-aided lead optimization and analogue prioritization.}, number={6}, journal={JOURNAL OF CHEMICAL INFORMATION AND MODELING}, author={Ash, Jeremy and Fourches, Denis}, year={2017}, month={Jun}, pages={1286–1299} } @article{huang_zhou_marchand_ash_morris_van dooren_brown_gallivan_wilgenbusch_2016, title={TreeScaper: Visualizing and Extracting Phylogenetic Signal from Sets of Trees}, volume={33}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msw196}, abstractNote={Modern phylogenomic analyses often result in large collections of phylogenetic trees representing uncertainty in individual gene trees, variation across genes, or both. Extracting phylogenetic signal from these tree sets can be challenging, as they are difficult to visualize, explore, and quantify. To overcome some of these challenges, we have developed TreeScaper, an application for tree set visualization as well as the identification of distinct phylogenetic signals. GUI and command-line versions of TreeScaper and a manual with tutorials can be downloaded from https://github.com/whuang08/TreeScaper/releases. TreeScaper is distributed under the GNU General Public License.}, number={12}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Huang, Wen and Zhou, Guifang and Marchand, Melissa and Ash, Jeremy R. and Morris, David and Van Dooren, Paul and Brown, Jeremy M. and Gallivan, Kyle A. and Wilgenbusch, Jim C.}, year={2016}, month={Dec}, pages={3314–3316} }