@article{fleming_house_chappel_motsinger-reif_reif_2024, title={Guided optimization of ToxPi model weights using a Semi-Automated approach}, volume={29}, ISSN={["2468-1113"]}, DOI={10.1016/j.comtox.2023.100294}, abstractNote={The Toxicological Prioritization Index (ToxPi) is a visual analysis and decision support tool for dimension reduction and visualization of high throughput, multi-dimensional feature data. ToxPi was originally developed for assessing the relative toxicity of multiple chemicals or stressors by synthesizing complex toxicological data to provide a single comprehensive view of the potential health effects. It continues to be used for profiling chemicals and has since been applied to other types of “sample” entities, including geospatial (e.g. county-level Covid-19 risk and sites of historical PFAS exposure) and other profiling applications. For any set of features (data collected on a set of sample entities), ToxPi integrates the data into a set of weighted slices that provide a visual profile and a score metric for comparison. This scoring system is highly dependent on user-provided feature weights, yet users often lack knowledge of how to define these feature weights. Common methods for predicting feature weights are generally unusable due to inappropriate statistical assumptions and lack of global distributional expectation. However, users often have an inherent understanding of expected results for a small subset of samples. For example, in chemical toxicity, prior knowledge can often place subsets of chemicals into categories of low, moderate or high toxicity (reference chemicals). Ordinal regression can be used to predict weights based on these response levels that are applicable to the entire feature set, analogous to using positive and negative controls to contextualize an empirical distribution. We propose a semi-supervised method utilizing ordinal regression to predict a set of feature weights that produces the best fit for the known response (“reference”) data and subsequently fine-tunes the weights via a customized genetic algorithm. We conduct a simulation study to show when this method can improve the results of ordinal regression, allowing for accurate feature weight prediction and sample ranking in scenarios with minimal response data. To ground-truth the guided weight optimization, we test this method on published data to build a ToxPi model for comparison against expert-knowledge-driven weight assignments.}, journal={COMPUTATIONAL TOXICOLOGY}, author={Fleming, Jonathon F. and House, John S. and Chappel, Jessie R. and Motsinger-Reif, Alison A. and Reif, David M.}, year={2024}, month={Mar} } @article{chappel_king_fleming_eberlin_reif_baker_2023, title={Aggregated Molecular Phenotype Scores: Enhancing Assessment and Visualization of Mass Spectrometry Imaging Data for Tissue-Based Diagnostics}, volume={8}, ISSN={["1520-6882"]}, DOI={10.1021/acs.analchem.3c02389}, abstractNote={Mass spectrometry imaging (MSI) has gained increasing popularity for tissue-based diagnostics due to its ability to identify and visualize molecular characteristics unique to different phenotypes within heterogeneous samples. Data from MSI experiments are often assessed and visualized using various supervised and unsupervised statistical approaches. However, these approaches tend to fall short in identifying and concisely visualizing subtle, phenotype-relevant molecular changes. To address these shortcomings, we developed aggregated molecular phenotype (AMP) scores. AMP scores are generated using an ensemble machine learning approach to first select features differentiating phenotypes, weight the features using logistic regression, and combine the weights and feature abundances. AMP scores are then scaled between 0 and 1, with lower values generally corresponding to class 1 phenotypes (typically control) and higher scores relating to class 2 phenotypes. AMP scores, therefore, allow the evaluation of multiple features simultaneously and showcase the degree to which these features correlate with various phenotypes. Due to the ensembled approach, AMP scores are able to overcome limitations associated with individual models, leading to high diagnostic accuracy and interpretability. Here, AMP score performance was evaluated using metabolomic data collected from desorption electrospray ionization MSI. Initial comparisons of cancerous human tissues to their normal or benign counterparts illustrated that AMP scores distinguished phenotypes with high accuracy, sensitivity, and specificity. Furthermore, when combined with spatial coordinates, AMP scores allow visualization of tissue sections in one map with distinguished phenotypic borders, highlighting their diagnostic utility.}, journal={ANALYTICAL CHEMISTRY}, author={Chappel, Jessie R. and King, Mary E. and Fleming, Jonathon and Eberlin, Livia S. and Reif, David M. and Baker, Erin S.}, year={2023}, month={Aug} } @article{chappel_kirkwood-donelson_reif_baker_2023, title={From big data to big insights: statistical and bioinformatic approaches for exploring the lipidome}, volume={10}, ISSN={["1618-2650"]}, DOI={10.1007/s00216-023-04991-2}, journal={ANALYTICAL AND BIOANALYTICAL CHEMISTRY}, author={Chappel, Jessie R. and Kirkwood-Donelson, Kaylie I. and Reif, David M. and Baker, Erin S.}, year={2023}, month={Oct} }