@article{chappel_king_fleming_eberlin_reif_baker_2023, title={Aggregated Molecular Phenotype Scores: Enhancing Assessment and Visualization of Mass Spectrometry Imaging Data for Tissue-Based Diagnostics}, volume={8}, ISSN={["1520-6882"]}, DOI={10.1021/acs.analchem.3c02389}, abstractNote={Mass spectrometry imaging (MSI) has gained increasing popularity for tissue-based diagnostics due to its ability to identify and visualize molecular characteristics unique to different phenotypes within heterogeneous samples. Data from MSI experiments are often assessed and visualized using various supervised and unsupervised statistical approaches. However, these approaches tend to fall short in identifying and concisely visualizing subtle, phenotype-relevant molecular changes. To address these shortcomings, we developed aggregated molecular phenotype (AMP) scores. AMP scores are generated using an ensemble machine learning approach to first select features differentiating phenotypes, weight the features using logistic regression, and combine the weights and feature abundances. AMP scores are then scaled between 0 and 1, with lower values generally corresponding to class 1 phenotypes (typically control) and higher scores relating to class 2 phenotypes. AMP scores, therefore, allow the evaluation of multiple features simultaneously and showcase the degree to which these features correlate with various phenotypes. Due to the ensembled approach, AMP scores are able to overcome limitations associated with individual models, leading to high diagnostic accuracy and interpretability. Here, AMP score performance was evaluated using metabolomic data collected from desorption electrospray ionization MSI. Initial comparisons of cancerous human tissues to their normal or benign counterparts illustrated that AMP scores distinguished phenotypes with high accuracy, sensitivity, and specificity. Furthermore, when combined with spatial coordinates, AMP scores allow visualization of tissue sections in one map with distinguished phenotypic borders, highlighting their diagnostic utility.}, journal={ANALYTICAL CHEMISTRY}, author={Chappel, Jessie R. and King, Mary E. and Fleming, Jonathon and Eberlin, Livia S. and Reif, David M. and Baker, Erin S.}, year={2023}, month={Aug} } @article{hao_fleming_petterson_lyons_edger_pires_thorne_conant_2022, title={Convergent evolution of polyploid genomes from across the eukaryotic tree of life}, volume={5}, ISSN={["2160-1836"]}, DOI={10.1093/g3journal/jkac094}, abstractNote={By modeling the homoeologous gene losses that occurred in 50 genomes deriving from ten distinct polyploidy events, we show that the evolutionary forces acting on polyploids are remarkably similar, regardless of whether they occur in flowering plants, ciliates, fishes, or yeasts. We show that many of the events show a relative rate of duplicate gene loss before the first postpolyploidy speciation that is significantly higher than in later phases of their evolution. The relatively weak selective constraint experienced by the single-copy genes these losses produced leads us to suggest that most of the purely selectively neutral duplicate gene losses occur in the immediate postpolyploid period. Nearly all of the events show strong evidence of biases in the duplicate losses, consistent with them being allopolyploidies, with 2 distinct progenitors contributing to the modern species. We also find ongoing and extensive reciprocal gene losses (alternative losses of duplicated ancestral genes) between these genomes. With the exception of a handful of closely related taxa, all of these polyploid organisms are separated from each other by tens to thousands of reciprocal gene losses. As a result, it is very unlikely that viable diploid hybrid species could form between these taxa, since matings between such hybrids would tend to produce offspring lacking essential genes. It is, therefore, possible that the relatively high frequency of recurrent polyploidies in some lineages may be due to the ability of new polyploidies to bypass reciprocal gene loss barriers.}, journal={G3-GENES GENOMES GENETICS}, author={Hao, Yue and Fleming, Jonathon and Petterson, Joanna and Lyons, Eric and Edger, Patrick P. and Pires, J. Chris and Thorne, Jeffrey L. and Conant, Gavin C.}, year={2022}, month={May} } @article{fleming_marvel_supak_motsinger-reif_reif_2022, title={ToxPi*GIS Toolkit: creating, viewing, and sharing integrative visualizations for geospatial data using ArcGIS}, volume={4}, ISSN={["1559-064X"]}, DOI={10.1038/s41370-022-00433-w}, abstractNote={Abstract Background Presenting a comprehensive picture of geographic data comprising multiple factors is an inherently integrative undertaking. Visualizing such data in an interactive form is essential for public sharing and geographic information systems (GIS) analysis. The Toxicological Prioritization Index (ToxPi) framework offers a visual analytic integrating data that is compatible with geographic data. ArcGIS is a predominant geospatial software available for presenting and communicating geographic data, yet to our knowledge there is no methodology for integrating ToxPi profiles into ArcGIS maps. Objective We introduce an actively developed suite of software, the ToxPi*GIS Toolkit, for creating, viewing, sharing, and analyzing interactive ToxPi profiles in ArcGIS to allow for new GIS analysis and an avenue for providing geospatial results to the public. Methods The ToxPi*GIS Toolkit is a collection of methods for creating interactive feature layers that contain ToxPi profiles. It currently includes an ArcGIS Toolbox ( ToxPiToolbox.tbx ) for drawing location-specific ToxPi profiles in a single feature layer, a collection of modular Python scripts that create predesigned layer files containing ToxPi feature layers from the command line, and a collection of Python routines for useful data manipulation and preprocessing. We present workflows documenting ToxPi feature layer creation, sharing, and embedding for both novice and advanced users looking for additional customizability. Results Map visualizations created with the ToxPi*GIS Toolkit can be made freely available on public URLs, allowing users without ArcGIS Pro access or expertise to view and interact with them. Novice users with ArcGIS Pro access can create de novo custom maps, and advanced users can exploit additional customization options. The ArcGIS Toolbox provides a simple means for generating ToxPi feature layers. We illustrate its usage with current COVID-19 data to compare drivers of pandemic vulnerability in counties across the United States. Significance The integration of ToxPi profiles with ArcGIS provides new avenues for geospatial analysis, visualization, and public sharing of multi-factor data. This allows for comparison of data across a region, which can support decisions that help address issues such as disease prevention, environmental health, natural disaster prevention, chemical risk, and many others. Development of new features, which will advance the interests of the scientific community in many fields, is ongoing for the ToxPi*GIS Toolkit, which can be accessed from www.toxpi.org .}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Fleming, Jonathon and Marvel, Skylar W. and Supak, Stacy and Motsinger-Reif, Alison A. and Reif, David M.}, year={2022}, month={Apr} } @article{kirkwood_fleming_nguyen_reif_baker_belcher_2022, title={Utilizing Pine Needles to Temporally and Spatially Profile Per- and Polyfluoroalkyl Substances (PFAS)}, volume={56}, ISSN={["1520-5851"]}, url={https://doi.org/10.1021/acs.est.1c06483}, DOI={10.1021/acs.est.1c06483}, abstractNote={As concerns over exposure to per- and polyfluoroalkyl substances (PFAS) are continually increasing, novel methods to monitor their presence and modifications are greatly needed, as some have known toxic and bioaccumulative characteristics while most have unknown effects. This task however is not simple, as the Environmental Protection Agency (EPA) CompTox PFAS list contains more than 9000 substances as of September 2020 with additional substances added continually. Nontargeted analyses are therefore crucial to investigating the presence of this immense list of possible PFAS. Here, we utilized archived and field-sampled pine needles as widely available passive samplers and a novel nontargeted, multidimensional analytical method coupling liquid chromatography, ion mobility spectrometry, and mass spectrometry (LC-IMS-MS) to evaluate the temporal and spatial presence of numerous PFAS. Over 70 PFAS were detected in the pine needles from this study, including both traditionally monitored legacy perfluoroalkyl acids (PFAAs) and their emerging replacements such as chlorinated derivatives, ultrashort chain PFAAs, perfluoroalkyl ether acids including hexafluoropropylene oxide dimer acid (HFPO-DA, "GenX") and Nafion byproduct 2, and a cyclic perfluorooctanesulfonic acid (PFOS) analog. Results from this study provide critical insight related to PFAS transport, contamination, and reduction efforts over the past six decades.}, number={6}, journal={ENVIRONMENTAL SCIENCE & TECHNOLOGY}, publisher={American Chemical Society (ACS)}, author={Kirkwood, Kaylie I and Fleming, Jonathon and Nguyen, Helen and Reif, David M. and Baker, Erin S. and Belcher, Scott M.}, year={2022}, month={Mar}, pages={3441–3451} }