@article{grinevich_harden_thakur_callahan_2024, title={Serovar-level identification of bacterial foodborne pathogens from full-length 16S rRNA gene sequencing}, volume={2}, ISSN={["2379-5077"]}, url={https://doi.org/10.1128/msystems.00757-23}, DOI={10.1128/msystems.00757-23}, abstractNote={ABSTRACT The resolution of variation within species is critical for interpreting and acting on many microbial measurements. In the key foodborne pathogens Salmonella and Escherichia coli , the primary subspecies classification scheme used is serotyping: differentiating variants within these species by surface antigen profiles. Serotype prediction from whole-genome sequencing (WGS) of isolates is now seen as comparable or preferable to traditional laboratory methods where WGS is available. However, laboratory and WGS methods depend on an isolation step that is time-consuming and incompletely represents the sample when multiple strains are present. Community sequencing approaches that skip the isolation step are, therefore, of interest for pathogen surveillance. Here, we evaluated the viability of amplicon sequencing of the full-length 16S rRNA gene for serotyping Salmonella enterica and E. coli . We developed a novel algorithm for serotype prediction, implemented as an R package (Seroplacer), which takes as input full-length 16S rRNA gene sequences and outputs serovar predictions after phylogenetic placement into a reference phylogeny. We achieved over 89% accuracy in predicting Salmonella serotypes on in silico test data and identified key pathogenic serovars of Salmonella and E. coli in isolate and environmental test samples. Although serotype prediction from 16S rRNA gene sequences is not as accurate as serotype prediction from WGS of isolates, the potential to identify dangerous serovars directly from amplicon sequencing of environmental samples is intriguing for pathogen surveillance. The capabilities developed here are also broadly relevant to other applications where intraspecies variation and direct sequencing from environmental samples could be valuable. IMPORTANCE In order to prevent and stop outbreaks of foodborne pathogens, it is important that we can detect when pathogenic bacteria are present in a food or food-associated site and identify connections between specific pathogenic bacteria present in different samples. In this work, we develop a new computational technology that allows the important foodborne pathogens Escherichia coli and Salmonella enterica to be serotyped (a subspecies level classification) from sequencing of a single-marker gene, and the 16S rRNA gene often used to surveil bacterial communities. Our results suggest current limitations to serotyping from 16S rRNA gene sequencing alone but set the stage for further progress that we consider likely given the rapid advance in the long-read sequencing technologies and genomic databases our work leverages. If this research direction succeeds, it could enable better detection of foodborne pathogens before they reach the public and speed the resolution of foodborne pathogen outbreaks. }, journal={MSYSTEMS}, author={Grinevich, Dmitry and Harden, Lyndy and Thakur, Siddhartha and Callahan, Benjamin}, editor={Langille, Morgan G. I.Editor}, year={2024}, month={Feb} } @article{callahan_grinevich_thakur_balamotis_ben yehezkel_2021, title={Ultra-accurate microbial amplicon sequencing with synthetic long reads}, volume={9}, ISSN={["2049-2618"]}, url={https://doi.org/10.1186/s40168-021-01072-3}, DOI={10.1186/s40168-021-01072-3}, abstractNote={Abstract Background Out of the many pathogenic bacterial species that are known, only a fraction are readily identifiable directly from a complex microbial community using standard next generation DNA sequencing. Long-read sequencing offers the potential to identify a wider range of species and to differentiate between strains within a species, but attaining sufficient accuracy in complex metagenomes remains a challenge. Methods Here, we describe and analytically validate LoopSeq, a commercially available synthetic long-read (SLR) sequencing technology that generates highly accurate long reads from standard short reads. Results LoopSeq reads are sufficiently long and accurate to identify microbial genes and species directly from complex samples. LoopSeq perfectly recovered the full diversity of 16S rRNA genes from known strains in a synthetic microbial community. Full-length LoopSeq reads had a per-base error rate of 0.005%, which exceeds the accuracy reported for other long-read sequencing technologies. 18S-ITS and genomic sequencing of fungal and bacterial isolates confirmed that LoopSeq sequencing maintains that accuracy for reads up to 6 kb in length. LoopSeq full-length 16S rRNA reads could accurately classify organisms down to the species level in rinsate from retail meat samples, and could differentiate strains within species identified by the CDC as potential foodborne pathogens. Conclusions The order-of-magnitude improvement in length and accuracy over standard Illumina amplicon sequencing achieved with LoopSeq enables accurate species-level and strain identification from complex- to low-biomass microbiome samples. The ability to generate accurate and long microbiome sequencing reads using standard short read sequencers will accelerate the building of quality microbial sequence databases and removes a significant hurdle on the path to precision microbial genomics. }, number={1}, journal={MICROBIOME}, author={Callahan, Benjamin J. and Grinevich, Dmitry and Thakur, Siddhartha and Balamotis, Michael A. and Ben Yehezkel, Tuval}, year={2021}, month={Jun} } @article{desai_lawas_valente_leman_grinevich_jagadish_doherty_2021, title={Warm nights disrupt transcriptome rhythms in field-grown rice panicles}, volume={118}, ISSN={["0027-8424"]}, url={https://doi.org/10.1073/pnas.2025899118}, DOI={10.1073/pnas.2025899118}, abstractNote={Significance The effects of warmer nighttime temperatures (WNT) on crops are one poorly understood dimension of climate change. WNT result from the asymmetrical increase in nighttime versus daytime temperatures. In rice, WNT reduce grain yield and quality. WNT reduce the amplitude of daily temperature cycles plants use to set their circadian clock. Therefore, we examined how WNT affect the timing of molecular activities. In field-grown plants, WNT alter the daily pattern of the transcriptome. Genes with strong rhythmic expression and those under circadian control are affected most by WNT. Many candidate regulators of the disrupted genes are circadian clock associated, emphasizing the altered timing under WNT. The pathways and mechanisms identified can assist efforts to identify lines tolerant to WNT.}, number={25}, journal={PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA}, publisher={Proceedings of the National Academy of Sciences}, author={Desai, Jigar S. and Lawas, Lovely Mae F. and Valente, Ashlee M. and Leman, Adam R. and Grinevich, Dmitry O. and Jagadish, S. V. Krishna and Doherty, Colleen J.}, year={2021}, month={Jun} }