@article{chen_gebert_faith_dunn_fierer_barberan_2021, title={Global Patterns and Climatic Controls of Dust-Associated Microbial Communities}, volume={9}, ISSN={["2165-0497"]}, DOI={10.1128/Spectrum.01447-21}, abstractNote={A broad diversity of microorganisms can be found in dust, with some of these microorganisms capable of causing allergenic disease in human via inhalation or affecting plant health by acting as plant pathogens. However, the spatial variation in dust microbiomes and the environmental factors associated with this variation have not been comprehensively assessed at the global scale.}, number={2}, journal={MICROBIOLOGY SPECTRUM}, author={Chen, Yongjian and Gebert, Matthew J. and Faith, Seth A. and Dunn, Robert R. and Fierer, Noah and Barberan, Albert}, year={2021}, month={Oct} } @article{grantham_reich_laber_pacifici_dunn_fierer_gebert_allwood_faith_2020, title={Global forensic geolocation with deep neural networks}, volume={69}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12427}, abstractNote={SummaryAn important problem in modern forensic analyses is identifying the provenance of materials at a crime scene, such as biological material on a piece of clothing. This procedure, which is known as geolocation, is conventionally guided by expert knowledge of the biological evidence and therefore tends to be application specific, labour intensive and often subjective. Purely data-driven methods have yet to be fully realized in this domain, because in part of the lack of a sufficiently rich source of data. However, high throughput sequencing technologies can identify tens of thousands of fungi and bacteria taxa by using DNA recovered from a single swab collected from nearly any object or surface. This microbial community, or microbiome, may be highly informative of the provenance of the sample, but data on the spatial variation of microbiomes are sparse and high dimensional and have a complex dependence structure that render them difficult to model with standard statistical tools. Deep learning algorithms have generated a tremendous amount of interest within the machine learning community for their predictive performance in high dimensional problems. We present DeepSpace: a new algorithm for geolocation that aggregates over an ensemble of deep neural network classifiers trained on randomly generated Voronoi partitions of a spatial domain. The DeepSpace algorithm makes remarkably good point predictions; for example, when applied to the microbiomes of over 1300 dust samples collected across continental USA, more than half of geolocation predictions produced by this model fall less than 100 km from their true origin, which is a 60% reduction in error from competing geolocation methods. Moreover, we apply DeepSpace to a novel data set of global dust samples collected from nearly 30 countries, finding that dust-associated fungi alone predict a sample's country of origin with nearly 90% accuracy.}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Grantham, Neal S. and Reich, Brian J. and Laber, Eric B. and Pacifici, Krishna and Dunn, Robert R. and Fierer, Noah and Gebert, Matthew and Allwood, Julia S. and Faith, Seth A.}, year={2020}, month={Aug}, pages={909–929} } @article{allwood_fierer_dunn_breen_reich_laber_clifton_grantham_faith_2020, title={Use of standardized bioinformatics for the analysis of fungal DNA signatures applied to sample provenance}, volume={310}, ISSN={["1872-6283"]}, DOI={10.1016/j.forsciint.2020.110250}, abstractNote={The use of environmental trace material to aid criminal investigations is an ongoing field of research within forensic science. The application of environmental material thus far has focused upon a variety of different objectives relevant to forensic biology, including sample provenance (also referred to as sample attribution). The capability to predict the provenance or origin of an environmental DNA sample would be an advantageous addition to the suite of investigative tools currently available. A metabarcoding approach is often used to predict sample provenance, through the extraction and comparison of the DNA signatures found within different environmental materials, such as the bacteria within soil or fungi within dust. Such approaches are combined with bioinformatics workflows and statistical modelling, often as part of large-scale study, with less emphasis on the investigation of the adaptation of these methods to a smaller scale method for forensic use. The present work was investigating a small-scale approach as an adaptation of a larger metabarcoding study to develop a model for global sample provenance using fungal DNA signatures collected from dust swabs. This adaptation was to facilitate a standardized method for consistent, reproducible sample treatment, including bioinformatics processing and final application of resulting data to the available prediction model. To investigate this small-scale method, 76 DNA samples were treated as anonymous test samples and analyzed using the standardized process to demonstrate and evaluate processing and customized sequence data analysis. This testing included samples originating from countries previously used to train the model, samples artificially mixed to represent multiple or mixed countries, as well as outgroup samples. Positive controls were also developed to monitor laboratory processing and bioinformatics analysis. Through this evaluation we were able to demonstrate that the samples could be processed and analyzed in a consistent manner, facilitated by a relatively user-friendly bioinformatic pipeline for sequence data analysis. Such investigation into standardized analyses and application of metabarcoding data is of key importance for the future use of applied microbiology in forensic science.}, journal={FORENSIC SCIENCE INTERNATIONAL}, author={Allwood, Julia S. and Fierer, Noah and Dunn, Robert R. and Breen, Matthew and Reich, Brian J. and Laber, Eric B. and Clifton, Jesse and Grantham, Neal S. and Faith, Seth A.}, year={2020}, month={May} } @article{silva_sawitzki_scheible_bailey_alho_faith_2018, title={Paternity testing using massively parallel sequencing and the PowerSeq (TM) AUTO/Y system for short tandem repeat sequencing}, volume={39}, ISSN={["1522-2683"]}, DOI={10.1002/elps.201800072}, abstractNote={AbstractMassively parallel sequencing (MPS) is gaining attention as a new technology for routine forensic casework, including paternity testing. Recently released MPS multiplex panels provide many more loci compared to CE methods, plus provide sequence‐based alleles that together improve the statistical power of the genetic testing. Here, an MPS system (PowerSeq™ AUTO/Y) was applied for STR sequencing in the study of first‐degree STR sequence allele inheritance from families in Southern Brazil. In 29 trios (mother‐child‐father) analyzed, the paternity index values generally increased when data from sequence‐based analysis were used in comparison to length‐based data. Further, allele inconsistencies (e.g., single repeat mutation events) between child and parents could be resolved with MPS by assessing the core repeat and flanking region sequences. Lastly, the sequence information allowed for identification of isoalleles (alleles of the same size, but different sequence) to determine specific paternal and maternal inheritances. The results from this study showed advantages of implementing sequence‐based analysis, MPS, in paternity testing with improved statistical calculations and a greater resolution for the trios/families tested.}, number={21}, journal={ELECTROPHORESIS}, author={Silva, Deborah S. B. S. and Sawitzki, Fernanda R. and Scheible, Melissa K. R. and Bailey, Sarah F. and Alho, Clarice S. and Faith, Seth A.}, year={2018}, month={Nov}, pages={2669–2673} } @article{gettings_kiesler_faith_montano_baker_young_guerrieri_vallone_2016, title={Sequence variation of 22 autosomal STR loci detected by next generation sequencing}, volume={21}, journal={Forensic Science International-Genetics}, author={Gettings, K. B. and Kiesler, K. M. and Faith, S. A. and Montano, E. and Baker, C. H. and Young, B. A. and Guerrieri, R. A. and Vallone, P. M.}, year={2016}, pages={15–21} }