@misc{nardini_baker_simpson_flores_2021, title={Learning differential equation models from stochastic agent-based model simulations}, volume={18}, ISSN={["1742-5662"]}, DOI={10.1098/rsif.2020.0987}, abstractNote={Agent-based models provide a flexible framework that is frequently used for modelling many biological systems, including cell migration, molecular dynamics, ecology and epidemiology. Analysis of the model dynamics can be challenging due to their inherent stochasticity and heavy computational requirements. Common approaches to the analysis of agent-based models include extensive Monte Carlo simulation of the model or the derivation of coarse-grained differential equation models to predict the expected or averaged output from the agent-based model. Both of these approaches have limitations, however, as extensive computation of complex agent-based models may be infeasible, and coarse-grained differential equation models can fail to accurately describe model dynamics in certain parameter regimes. We propose that methods from the equation learning field provide a promising, novel and unifying approach for agent-based model analysis. Equation learning is a recent field of research from data science that aims to infer differential equation models directly from data. We use this tutorial to review how methods from equation learning can be used to learn differential equation models from agent-based model simulations. We demonstrate that this framework is easy to use, requires few model simulations, and accurately predicts model dynamics in parameter regions where coarse-grained differential equation models fail to do so. We highlight these advantages through several case studies involving two agent-based models that are broadly applicable to biological phenomena: a birth–death–migration model commonly used to explore cell biology experiments and a susceptible–infected–recovered model of infectious disease spread.}, number={176}, journal={JOURNAL OF THE ROYAL SOCIETY INTERFACE}, author={Nardini, John T. and Baker, Ruth E. and Simpson, Matthew J. and Flores, Kevin B.}, year={2021}, month={Mar} } @article{nardini_stolz_flores_harrington_byrne_2021, title={Topological data analysis distinguishes parameter regimes in the Anderson-Chaplain model of angiogenesis}, volume={17}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1009094}, abstractNote={Angiogenesis is the process by which blood vessels form from pre-existing vessels. It plays a key role in many biological processes, including embryonic development and wound healing, and contributes to many diseases including cancer and rheumatoid arthritis. The structure of the resulting vessel networks determines their ability to deliver nutrients and remove waste products from biological tissues. Here we simulate the Anderson-Chaplain model of angiogenesis at different parameter values and quantify the vessel architectures of the resulting synthetic data. Specifically, we propose a topological data analysis (TDA) pipeline for systematic analysis of the model. TDA is a vibrant and relatively new field of computational mathematics for studying the shape of data. We compute topological and standard descriptors of model simulations generated by different parameter values. We show that TDA of model simulation data stratifies parameter space into regions with similar vessel morphology. The methodologies proposed here are widely applicable to other synthetic and experimental data including wound healing, development, and plant biology.}, number={6}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={Nardini, John T. and Stolz, Bernadette J. and Flores, Kevin B. and Harrington, Heather A. and Byrne, Helen M.}, year={2021}, month={Jun} } @misc{everett_flores_henscheid_lagergren_larripa_li_nardini_nguyen_pitman_rutter_2020, title={A tutorial review of mathematical techniques for quantifying tumor heterogeneity}, volume={17}, ISSN={["1551-0018"]}, DOI={10.3934/mbe.2020207}, abstractNote={Intra-tumor and inter-patient heterogeneity are two challenges in developing mathematical models for precision medicine diagnostics. Here we review several techniques that can be used to aid the mathematical modeller in inferring and quantifying both sources of heterogeneity from patient data. These techniques include virtual populations, nonlinear mixed effects modeling, non-parametric estimation, Bayesian techniques, and machine learning. We create simulated virtual populations in this study and then apply the four remaining methods to these datasets to highlight the strengths and weak-nesses of each technique. We provide all code used in this review at https://github.com/jtnardin/Tumor-Heterogeneity/ so that this study may serve as a tutorial for the mathematical modelling community. This review article was a product of a Tumor Heterogeneity Working Group as part of the 2018-2019 Program on Statistical, Mathematical, and Computational Methods for Precision Medicine which took place at the Statistical and Applied Mathematical Sciences Institute.}, number={4}, journal={MATHEMATICAL BIOSCIENCES AND ENGINEERING}, author={Everett, Rebecca and Flores, Kevin B. and Henscheid, Nick and Lagergren, John and Larripa, Kamila and Li, Ding and Nardini, John T. and Nguyen, Phuong T. T. and Pitman, E. Bruce and Rutter, Erica M.}, year={2020}, pages={3660–3709} } @article{lagergren_nardini_baker_simpson_flores_2020, title={Biologically-informed neural networks guide mechanistic modeling from sparse experimental data}, volume={16}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1008462}, abstractNote={Biologically-informed neural networks (BINNs), an extension of physics-informed neural networks [1], are introduced and used to discover the underlying dynamics of biological systems from sparse experimental data. In the present work, BINNs are trained in a supervised learning framework to approximate in vitro cell biology assay experiments while respecting a generalized form of the governing reaction-diffusion partial differential equation (PDE). By allowing the diffusion and reaction terms to be multilayer perceptrons (MLPs), the nonlinear forms of these terms can be learned while simultaneously converging to the solution of the governing PDE. Further, the trained MLPs are used to guide the selection of biologically interpretable mechanistic forms of the PDE terms which provides new insights into the biological and physical mechanisms that govern the dynamics of the observed system. The method is evaluated on sparse real-world data from wound healing assays with varying initial cell densities [2].}, number={12}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={Lagergren, John H. and Nardini, John T. and Baker, Ruth E. and Simpson, Matthew J. and Flores, Kevin B.}, year={2020}, month={Dec} } @article{nardini_lagergren_hawkins-daarud_curtin_morris_rutter_swanson_flores_2020, title={Learning Equations from Biological Data with Limited Time Samples}, volume={82}, ISSN={["1522-9602"]}, DOI={10.1007/s11538-020-00794-z}, abstractNote={Equation learning methods present a promising tool to aid scientists in the modeling process for biological data. Previous equation learning studies have demonstrated that these methods can infer models from rich datasets; however, the performance of these methods in the presence of common challenges from biological data has not been thoroughly explored. We present an equation learning methodology comprised of data denoising, equation learning, model selection and post-processing steps that infers a dynamical systems model from noisy spatiotemporal data. The performance of this methodology is thoroughly investigated in the face of several common challenges presented by biological data, namely, sparse data sampling, large noise levels, and heterogeneity between datasets. We find that this methodology can accurately infer the correct underlying equation and predict unobserved system dynamics from a small number of time samples when the data are sampled over a time interval exhibiting both linear and nonlinear dynamics. Our findings suggest that equation learning methods can be used for model discovery and selection in many areas of biology when an informative dataset is used. We focus on glioblastoma multiforme modeling as a case study in this work to highlight how these results are informative for data-driven modeling-based tumor invasion predictions.}, number={9}, journal={BULLETIN OF MATHEMATICAL BIOLOGY}, author={Nardini, John T. and Lagergren, John H. and Hawkins-Daarud, Andrea and Curtin, Lee and Morris, Bethan and Rutter, Erica M. and Swanson, Kristin R. and Flores, Kevin B.}, year={2020}, month={Sep} } @article{lagergren_nardini_michael lavigne_rutter_flores_2020, title={Learning partial differential equations for biological transport models from noisy spatio-temporal data}, volume={476}, ISSN={["1471-2946"]}, DOI={10.1098/rspa.2019.0800}, abstractNote={We investigate methods for learning partial differential equation (PDE) models from spatio-temporal data under biologically realistic levels and forms of noise. Recent progress in learning PDEs from data have used sparse regression to select candidate terms from a denoised set of data, including approximated partial derivatives. We analyse the performance in using previous methods to denoise data for the task of discovering the governing system of PDEs. We also develop a novel methodology that uses artificial neural networks (ANNs) to denoise data and approximate partial derivatives. We test the methodology on three PDE models for biological transport, i.e. the advection–diffusion, classical Fisher–Kolmogorov–Petrovsky–Piskunov (Fisher–KPP) and nonlinear Fisher–KPP equations. We show that the ANN methodology outperforms previous denoising methods, including finite differences and both local and global polynomial regression splines, in the ability to accurately approximate partial derivatives and learn the correct PDE model.}, number={2234}, journal={PROCEEDINGS OF THE ROYAL SOCIETY A-MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES}, author={Lagergren, John H. and Nardini, John T. and Michael Lavigne, G. and Rutter, Erica M. and Flores, Kevin B.}, year={2020}, month={Feb} } @article{nardini_bortz_2019, title={The influence of numerical error on parameter estimation and uncertainty quantification for advective PDE models}, volume={35}, ISSN={["1361-6420"]}, DOI={10.1088/1361-6420/ab10bb}, abstractNote={Advective partial differential equations can be used to describe many scientific processes. Two significant sources of error that can cause difficulties in inferring parameters from experimental data on these processes include (i) noise from the measurement and collection of experimental data and (ii) numerical error in approximating the forward solution to the advection equation. How this second source of error alters parameter estimation and uncertainty quantification during an inverse problem methodology is not well understood. As a step towards a better understanding of this problem, we present both analytical and computational results concerning how a least squares cost function and parameter estimator behave in the presence of numerical error in approximating solutions to the underlying advection equation. We investigate residual patterns to derive an autocorrelative statistical model that can improve parameter estimation and confidence interval computation for first order methods. Building on our results and their general nature, we provide guidelines for practitioners to determine when numerical or experimental error is the main source of error in their inference, along with suggestions of how to efficiently improve their results.}, number={6}, journal={INVERSE PROBLEMS}, author={Nardini, John T. and Bortz, D. M.}, year={2019}, month={Jun} } @article{banks_choi_huffman_nardini_poag_thompson_2013, title={Quantifying CFSE label decay in flow cytometry data}, volume={26}, ISSN={["0893-9659"]}, DOI={10.1016/j.aml.2012.12.010}, abstractNote={We developed a series of models for the label decay in cell proliferation assays when the intracellular dye carboxyfluorescein succinimidyl ester (CFSE) is used as a staining agent. Data collected from two healthy patients were used to validate the models and to compare the models with the Akiake Information Criteria. The distinguishing features of multiple decay rates in the data are readily characterized and explained via time dependent decay models such as the logistic and Gompertz models.}, number={5}, journal={APPLIED MATHEMATICS LETTERS}, author={Banks, H. T. and Choi, A. and Huffman, T. and Nardini, J. and Poag, L. and Thompson, W. C.}, year={2013}, month={May}, pages={571–577} }