@article{berman_goltsman_anderson_relman_callahan_2024, title={Gardnerella diversity and ecology in pregnancy and preterm birth}, url={https://doi.org/10.1128/msystems.01339-23}, DOI={10.1128/msystems.01339-23}, abstractNote={ABSTRACT The vaginal microbiome has been linked to negative health outcomes including preterm birth. Specific taxa, including Gardnerella spp., have been identified as risk factors for these conditions. Historically, microbiome analysis methods have treated all Gardnerella spp. as one species, but the broad diversity of Gardnerella has become more apparent. We explore the diversity of Gardnerella clades and genomic species in the vaginal microbiome of pregnant women and their associations with microbiome composition and preterm birth. Relative abundance of Gardnerella clades and genomic species and other taxa was quantified in shotgun metagenomic sequencing data from three distinct cohorts of pregnant women. We also assessed the diversity and abundance of Gardnerella variants in 16S rRNA gene amplicon sequencing data from seven previously conducted studies in differing populations. Individual microbiomes often contained multiple Gardnerella variants, and the number of clades was associated with increased microbial load, or the ratio of non-human reads to human reads. Taxon co-occurrence patterns were largely consistent across Gardnerella clades and among cohorts. Some variants previously described as rare were prevalent in other cohorts, highlighting the importance of surveying a diverse set of populations to fully capture the diversity of Gardnerella . The diversity of Gardnerella both across populations and within individual vaginal microbiomes has long been unappreciated, as has been the intra-species diversity of many other members of the vaginal microbiome. The broad genomic diversity of Gardnerella has led to its reclassification as multiple species; here we demonstrate the diversity of Gardnerella found within and between vaginal microbiomes. IMPORTANCE The present study shows that single microbiomes can contain all currently known species of Gardnerella and that multiple similar species can exist within the same environment. Furthermore, surveys of demographically distinct populations suggest that some species appear more commonly in certain populations. Further studies in broad and diverse populations will be necessary to fully understand the ecological roles of each Gardnerella sp., how they can co-exist, and their distinct impacts on microbial communities, preterm birth, and other health outcomes.}, journal={mSystems}, author={Berman, Hanna L. and Goltsman, Daniela S. Aliaga and Anderson, Megan and Relman, David A. and Callahan, Benjamin J.}, editor={Cope, Emily K.Editor}, year={2024}, month={Jun} } @article{halleran_sylvester_jacob_callahan_baynes_foster_2024, title={Impact of florfenicol dosing regimen on the phenotypic and genotypic resistance of enteric bacteria in steers}, volume={14}, ISSN={["2045-2322"]}, DOI={10.1038/s41598-024-55591-8}, abstractNote={AbstractThe food animal sector’s use of antimicrobials is heavily critiqued for its role in allowing resistance to develop against critically important antimicrobials in human health. The WHO recommends using lower tier antimicrobials such as florfenicol for disease treatment. The primary objective of this study was to assess the differences in resistance profiles of enteric microbes following administration of florfenicol to steers using both FDA-approved dosing regimens and two different detection methods. Our hypothesis was that we would identify an increased prevalence of resistance in the steers administered the repeated, lower dose of florfenicol; additionally, we hypothesized resistance profiles would be similar between both detection methods. Twelve steers were administered either two intramuscular (20 mg/kg q 48 h; n = 6) or a single subcutaneous dose (40 mg/kg, n = 6). Fecal samples were collected for 38 days, and E. coli and Enterococcus were isolated and tested for resistance. Fecal samples were submitted for metagenomic sequencing analysis. Metagenomics revealed genes conferring resistance to aminoglycosides as the most abundant drug class. Most multidrug resistance genes contained phenicols. The genotypic and phenotypic patterns of resistance were not similar between drug classes. Observed increases in resistant isolates and relative abundance of resistance genes peaked after drug administration and returned to baseline by the end of the sampling period. The use of a “lower tier” antimicrobial, such as florfenicol, may cause an increased amount of resistance to critically important antimicrobials for a brief period, but these changes largely resolve by the end of the drug withdrawal period.}, number={1}, journal={SCIENTIFIC REPORTS}, author={Halleran, Jennifer and Sylvester, Hannah and Jacob, Megan and Callahan, Benjamin and Baynes, Ronald and Foster, Derek}, year={2024}, month={Feb} } @article{maier_gin_callahan_sheriff_duerkop_kleiner_2024, title={Pseudo-pac site sequences used by phage P22 in generalized transduction of Salmonella}, volume={20}, ISSN={["1553-7374"]}, url={https://doi.org/10.1371/journal.ppat.1012301}, DOI={10.1371/journal.ppat.1012301}, abstractNote={Salmonella enterica Serovar Typhimurium (Salmonella) and its bacteriophage P22 are a model system for the study of horizontal gene transfer by generalized transduction. Typically, the P22 DNA packaging machinery initiates packaging when a short sequence of DNA, known as the pac site, is recognized on the P22 genome. However, sequences similar to the pac site in the host genome, called pseudo-pac sites, lead to erroneous packaging and subsequent generalized transduction of Salmonella DNA. While the general genomic locations of the Salmonella pseudo-pac sites are known, the sequences themselves have not been determined. We used visualization of P22 sequencing reads mapped to host Salmonella genomes to define regions of generalized transduction initiation and the likely locations of pseudo-pac sites. We searched each genome region for the sequence with the highest similarity to the P22 pac site and aligned the resulting sequences. We built a regular expression (sequence match pattern) from the alignment and used it to search the genomes of two P22-susceptible Salmonella strains-LT2 and 14028S-for sequence matches. The final regular expression successfully identified pseudo-pac sites in both LT2 and 14028S that correspond with generalized transduction initiation sites in mapped read coverages. The pseudo-pac site sequences identified in this study can be used to predict locations of generalized transduction in other P22-susceptible hosts or to initiate generalized transduction at specific locations in P22-susceptible hosts with genetic engineering. Furthermore, the bioinformatics approach used to identify the Salmonella pseudo-pac sites in this study could be applied to other phage-host systems.}, number={6}, journal={PLOS PATHOGENS}, author={Maier, Jessie L. and Gin, Craig and Callahan, Benjamin and Sheriff, Emma K. and Duerkop, Breck A. and Kleiner, Manuel}, editor={Secor, PatrickEditor}, year={2024}, month={Jun} } @article{grinevich_harden_thakur_callahan_2024, title={Serovar-level identification of bacterial foodborne pathogens from full-length 16S rRNA gene sequencing}, volume={2}, ISSN={["2379-5077"]}, url={https://doi.org/10.1128/msystems.00757-23}, DOI={10.1128/msystems.00757-23}, abstractNote={ABSTRACT The resolution of variation within species is critical for interpreting and acting on many microbial measurements. In the key foodborne pathogens Salmonella and Escherichia coli , the primary subspecies classification scheme used is serotyping: differentiating variants within these species by surface antigen profiles. Serotype prediction from whole-genome sequencing (WGS) of isolates is now seen as comparable or preferable to traditional laboratory methods where WGS is available. However, laboratory and WGS methods depend on an isolation step that is time-consuming and incompletely represents the sample when multiple strains are present. Community sequencing approaches that skip the isolation step are, therefore, of interest for pathogen surveillance. Here, we evaluated the viability of amplicon sequencing of the full-length 16S rRNA gene for serotyping Salmonella enterica and E. coli . We developed a novel algorithm for serotype prediction, implemented as an R package (Seroplacer), which takes as input full-length 16S rRNA gene sequences and outputs serovar predictions after phylogenetic placement into a reference phylogeny. We achieved over 89% accuracy in predicting Salmonella serotypes on in silico test data and identified key pathogenic serovars of Salmonella and E. coli in isolate and environmental test samples. Although serotype prediction from 16S rRNA gene sequences is not as accurate as serotype prediction from WGS of isolates, the potential to identify dangerous serovars directly from amplicon sequencing of environmental samples is intriguing for pathogen surveillance. The capabilities developed here are also broadly relevant to other applications where intraspecies variation and direct sequencing from environmental samples could be valuable. IMPORTANCE In order to prevent and stop outbreaks of foodborne pathogens, it is important that we can detect when pathogenic bacteria are present in a food or food-associated site and identify connections between specific pathogenic bacteria present in different samples. In this work, we develop a new computational technology that allows the important foodborne pathogens Escherichia coli and Salmonella enterica to be serotyped (a subspecies level classification) from sequencing of a single-marker gene, and the 16S rRNA gene often used to surveil bacterial communities. Our results suggest current limitations to serotyping from 16S rRNA gene sequencing alone but set the stage for further progress that we consider likely given the rapid advance in the long-read sequencing technologies and genomic databases our work leverages. If this research direction succeeds, it could enable better detection of foodborne pathogens before they reach the public and speed the resolution of foodborne pathogen outbreaks. }, journal={MSYSTEMS}, author={Grinevich, Dmitry and Harden, Lyndy and Thakur, Siddhartha and Callahan, Benjamin}, editor={Langille, Morgan G. I.Editor}, year={2024}, month={Feb} } @article{belotserkovsky_stabryla_hunter_allegretti_callahan_carlson_daschner_goudarzi_guyard_jackson_et al._2024, title={Standards for fecal microbiota transplant: Tools and therapeutic advances}, volume={86}, ISSN={["1095-8320"]}, DOI={10.1016/j.biologicals.2024.101758}, abstractNote={Fecal microbiota transplantation (FMT) has been demonstrated to be efficacious in preventing recurrent Clostridioides difficile (C. difficile) infections, and is being investigated for treatment of several other diseases including inflammatory bowel disease, cancer, obesity, liver disease, and diabetes. To speed up the translation of FMT into clinical practice as a safe and standardized therapeutic intervention, additional evidence-based technical and regulatory guidance is needed. To this end in May of 2022, the International Alliance for Biological Standardization (IABS) and the BIOASTER Microbiology Technology Institute hosted a second webinar to discuss key issues still impeding the advancement and standardization of FMT. The goal of this two-day webinar was to provide a forum for scientific experts to share and discuss data and key challenges with one another. Discussion included a focus on the evaluation of safety, efficacy, clinical trial design, reproducibility and accuracy in obtained microbiome measurements and data reporting, and the potential for standardization across these areas. It also focused on increasing the application potential and visibility of FMT beyond treating C. difficile infections.}, journal={BIOLOGICALS}, author={Belotserkovsky, Ilia and Stabryla, Lisa M. and Hunter, Monique and Allegretti, Jessica and Callahan, Benjamin J. and Carlson, Paul E. and Daschner, Phillip J. and Goudarzi, Maryam and Guyard, Cyril and Jackson, Scott A. and et al.}, year={2024}, month={May} } @article{scheible_stinson_breen_callahan_thomas_meiklejohn_2024, title={The development of non-destructive sampling methods of parchment skins for genetic species identification}, volume={19}, ISSN={["1932-6203"]}, url={https://doi.org/10.1371/journal.pone.0299524}, DOI={10.1371/journal.pone.0299524}, abstractNote={Parchment, the skins of animals prepared for use as writing surfaces, offers a valuable source of genetic information. Many have clearly defined provenance, allowing for the genetic findings to be evaluated in temporal and spatial context. While these documents can yield evidence of the animal sources, the DNA contained within these aged skins is often damaged and fragmented. Previously, genetic studies targeting parchment have used destructive sampling techniques and so the development and validation of non-destructive sampling methods would expand opportunities and facilitate testing of more precious documents, especially those with historical significance. Here we present genetic data obtained by non-destructive sampling of eight parchments spanning the 15th century to the modern day. We define a workflow for enriching the mitochondrial genome (mtGenome), generating next-generation sequencing reads to permit species identification, and providing interpretation guidance. Using sample replication, comparisons to destructively sampled controls, and by establishing authentication criteria, we were able to confidently assign full/near full mtGenome sequences to 56.3% of non-destructively sampled parchments, each with greater than 90% of the mtGenome reference covered. Six of eight parchments passed all four established thresholds with at least one non-destructive sample, highlighting promise for future studies.}, number={3}, journal={PLOS ONE}, author={Scheible, Melissa and Stinson, Timothy L. and Breen, Matthew and Callahan, Benjamin J. and Thomas, Rachael and Meiklejohn, Kelly A.}, editor={Shakoori, Abdul RaufEditor}, year={2024}, month={Mar} } @article{hakimzadeh_abdala asbun_albanese_bernard_buchner_callahan_caporaso_curd_djemiel_brandstrom durling_et al._2023, title={A pile of pipelines: An overview of the bioinformatics software for metabarcoding data analyses}, volume={8}, ISSN={["1755-0998"]}, DOI={10.1111/1755-0998.13847}, abstractNote={AbstractEnvironmental DNA (eDNA) metabarcoding has gained growing attention as a strategy for monitoring biodiversity in ecology. However, taxa identifications produced through metabarcoding require sophisticated processing of high‐throughput sequencing data from taxonomically informative DNA barcodes. Various sets of universal and taxon‐specific primers have been developed, extending the usability of metabarcoding across archaea, bacteria and eukaryotes. Accordingly, a multitude of metabarcoding data analysis tools and pipelines have also been developed. Often, several developed workflows are designed to process the same amplicon sequencing data, making it somewhat puzzling to choose one among the plethora of existing pipelines. However, each pipeline has its own specific philosophy, strengths and limitations, which should be considered depending on the aims of any specific study, as well as the bioinformatics expertise of the user. In this review, we outline the input data requirements, supported operating systems and particular attributes of thirty‐two amplicon processing pipelines with the goal of helping users to select a pipeline for their metabarcoding projects.}, journal={MOLECULAR ECOLOGY RESOURCES}, author={Hakimzadeh, Ali and Abdala Asbun, Alejandro and Albanese, Davide and Bernard, Maria and Buchner, Dominik and Callahan, Benjamin and Caporaso, J. Gregory and Curd, Emily and Djemiel, Christophe and Brandstrom Durling, Mikael and et al.}, year={2023}, month={Aug} } @article{gin_petzold_uthappa_neighbors_borough_gin_lashnits_sempowski_denny_bienzle_et al._2023, title={Evaluation of SARS-CoV-2 identification methods through surveillance of companion animals in SARS-CoV-2-positive homes in North Carolina, March to December 2020}, volume={11}, ISSN={["2167-8359"]}, DOI={10.7717/peerj.16310}, abstractNote={We collected oral and/or rectal swabs and serum from dogs and cats living in homes with SARS-CoV-2-PCR-positive persons for SARS-CoV-2 PCR and serology testing. Pre-COVID-19 serum samples from dogs and cats were used as negative controls, and samples were tested in duplicate at different timepoints. Raw ELISA results scrutinized relative to known negative samples suggested that cut-offs for IgG seropositivity may require adjustment relative to previously proposed values, while proposed cut-offs for IgM require more extensive validation. A small number of pet dogs (2/43, 4.7%) and one cat (1/21, 4.8%) were positive for SARS-CoV-2 RNA, and 28.6 and 37.5% of cats and dogs were positive for anti-SARS-CoV-2 IgG, respectively.}, journal={PEERJ}, author={Gin, Taylor E. and Petzold, Elizabeth A. and Uthappa, Diya M. and Neighbors, Coralei E. and Borough, Anna R. and Gin, Craig and Lashnits, Erin and Sempowski, Gregory D. and Denny, Thomas and Bienzle, Dorothee and et al.}, year={2023}, month={Oct} } @article{gookin_hartley_aicher_mathews_cullen_cullen_callahan_stowe_seiler_jacob_et al._2023, title={Gallbladder microbiota in healthy dogs and dogs with mucocele formation}, volume={18}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0281432}, abstractNote={To date studies have not investigated the culture-independent microbiome of bile from dogs, a species where aseptic collection of bile under ultrasound guidance is somewhat routine. Despite frequent collection of bile for culture-based diagnosis of bacterial cholecystitis, it is unknown whether bile from healthy dogs harbors uncultivable bacteria or a core microbiota. The answer to this question is critical to understanding the pathogenesis of biliary infection and as a baseline to exploration of other biliary diseases in dogs where uncultivable bacteria could play a pathogenic role. A pressing example of such a disease would be gallbladder mucocele formation in dogs. This prevalent and deadly condition is characterized by excessive secretion of abnormal mucus by the gallbladder epithelium that can eventually lead to rupture of the gallbladder or obstruction of bile flow. The cause of mucocele formation is unknown as is whether uncultivable, and therefore unrecognized, bacteria play any systematic role in pathogenesis. In this study we applied next-generation 16S rRNA gene sequencing to identify the culture-negative bacterial community of gallbladder bile from healthy dogs and gallbladder mucus from dogs with mucocele formation. Integral to our study was the use of 2 separate DNA isolations on each sample using different extraction methods and sequencing of negative control samples enabling recognition and curation of contaminating sequences. Microbiota findings were validated by simultaneous culture-based identification, cytological examination of bile, and fluorescence in-situ hybridization (FISH) performed on gallbladder mucosa. Using culture-dependent, cytological, FISH, and 16S rRNA sequencing approaches, results of our study do not support existence of a core microbiome in the bile of healthy dogs or gallbladder mucus from dogs with mucocele formation. Our findings further document how contaminating sequences can significantly contribute to the results of sequencing analysis when performed on samples with low bacterial biomass.}, number={2}, journal={PLOS ONE}, author={Gookin, Jody L. and Hartley, Ashley N. and Aicher, Kathleen M. and Mathews, Kyle G. and Cullen, Rachel and Cullen, John M. and Callahan, Benjamin J. and Stowe, Devorah M. and Seiler, Gabriela S. and Jacob, Megan E. and et al.}, year={2023}, month={Feb} } @article{berman_goltsman_anderson_relman_callahan_2023, title={GardnerellaDiversity and Ecology in Pregnancy and Preterm Birth}, url={https://doi.org/10.1101/2023.02.03.527032}, DOI={10.1101/2023.02.03.527032}, abstractNote={AbstractThe vaginal microbiome has been linked to numerous negative health outcomes including preterm birth. Specific taxa, includingGardnerellaspp., have been identified as risk factors for these conditions. Historically, microbiome analysis methods have treated allGardnerellaspp. as one species, but the broad diversity ofGardnerellahas recently become more apparent. In the present study, we explore the diversity ofGardnerellaclades and genomic species in the vaginal microbiome of pregnant women and their impacts on microbiome composition and associations with preterm birth.Shotgun metagenomic sequencing data collected longitudinally from three distinct cohorts of pregnant women were assessed. Relative abundance ofGardnerellaclades and genomic species and other taxa was quantified, and associations betweenGardnerellaclades and signatures of the vaginal microbiome were measured. We also assessed the diversity and abundance ofGardnerellavariants in 16S rRNA gene amplicon sequencing data from seven previously conducted studies in differing populations on the vaginal microbiome and preterm birth.Individual microbiomes often contained multipleGardnerellavariants, and the number of clades was associated with increased microbial load. The genusGardnerellawas also associated with increased microbial load, or the ratio of non-human reads to human reads. Taxon co-occurrence patterns matched previously described community structures, and were largely consistent acrossGardnerellaclades and among cohorts. Some variants previously described as rare were prevalent in other cohorts, highlighting the importance of surveying a diverse set of populations to fully capture the diversity ofGardnerella.The diversity ofGardnerellaboth across populations and within individual vaginal microbiomes has long been unappreciated, as has been the intra-species diversity of many other members of the vaginal microbiome.1The broad genomic diversity ofGardnerellahas led to its reclassification as multiple species; here we demonstrate the diversity ofGardnerellafound within and between vaginal microbiomes. Further studies should investigate the phenotypes ofGardnerellavariants that may underlie the mechanisms by whichGardnerellaspecies may differentially shape the vaginal microbiome.}, author={Berman, Hanna L. and Goltsman, Daniela S. Aliaga and Anderson, Megan and Relman, David A. and Callahan, Benjamin J.}, year={2023}, month={Feb} } @article{templeton_fefer_case_roach_azcarate-peril_gruen_callahan_olby_2023, title={Longitudinal Analysis of Canine Oral Microbiome Using Whole Genome Sequencing in Aging Companion Dogs}, volume={13}, ISSN={["2076-2615"]}, DOI={10.3390/ani13243846}, abstractNote={Aged companion dogs have a high prevalence of periodontal disease and canine cognitive dysfunction syndrome (CCDS) and the two disorders are correlated. Similarly, periodontal disease and Alzheimer’s Disease are correlated in people. However, little is known about the oral microbiota of aging dogs. The goal of this project was to characterize the longitudinal changes in oral microbiota in aged dogs. Oral swabs were taken from ten senior client-owned dogs on 2–3 occasions spanning 24 months and they underwent whole genome shotgun (WGS) sequencing. Cognitive status was established at each sampling time. A statistically significant increase in alpha diversity for bacterial and fungal species was observed between the first and last study visits. Bacteroidetes and proteobacteria were the most abundant bacterial phyla. Porphyromonas gulae was the most abundant bacterial species (11.6% of total reads). The species Lactobacillus gasseri had a statistically significant increase in relative abundance with age whereas Leptotrichia sp. oral taxon 212 had a statistically significant positive longitudinal association with cognition score. There is an increased fungal and bacterial alpha diversity in aging dogs over time and nearly universal oral dysbiosis. The role of the oral microbiota, particularly Leptotrichia and P. gulae and P. gingivalis, in aging and CCDS warrants further investigation.}, number={24}, journal={ANIMALS}, author={Templeton, Ginger B. and Fefer, Gilad and Case, Beth C. and Roach, Jeff and Azcarate-Peril, M. Andrea and Gruen, Margaret E. and Callahan, Benjamin J. and Olby, Natasha J.}, year={2023}, month={Dec} } @article{huang_gin_fettweis_foxman_gelaye_macintyre_subramaniam_fraser_tabatabaei_callahan_2023, title={Meta-analysis reveals the vaginal microbiome is a better predictor of earlier than later preterm birth}, url={https://doi.org/10.1186/s12915-023-01702-2}, DOI={10.1186/s12915-023-01702-2}, abstractNote={Abstract Background High-throughput sequencing measurements of the vaginal microbiome have yielded intriguing potential relationships between the vaginal microbiome and preterm birth (PTB; live birth prior to 37 weeks of gestation). However, results across studies have been inconsistent. Results Here, we perform an integrated analysis of previously published datasets from 12 cohorts of pregnant women whose vaginal microbiomes were measured by 16S rRNA gene sequencing. Of 2039 women included in our analysis, 586 went on to deliver prematurely. Substantial variation between these datasets existed in their definition of preterm birth, characteristics of the study populations, and sequencing methodology. Nevertheless, a small group of taxa comprised a vast majority of the measured microbiome in all cohorts. We trained machine learning (ML) models to predict PTB from the composition of the vaginal microbiome, finding low to modest predictive accuracy (0.28–0.79). Predictive accuracy was typically lower when ML models trained in one dataset predicted PTB in another dataset. Earlier preterm birth (< 32 weeks, < 34 weeks) was more predictable from the vaginal microbiome than late preterm birth (34–37 weeks), both within and across datasets. Integrated differential abundance analysis revealed a highly significant negative association between L. crispatus and PTB that was consistent across almost all studies. The presence of the majority (18 out of 25) of genera was associated with a higher risk of PTB, with L. iners, Prevotella, and Gardnerella showing particularly consistent and significant associations. Some example discrepancies between studies could be attributed to specific methodological differences but not most study-to-study variations in the relationship between the vaginal microbiome and preterm birth. Conclusions We believe future studies of the vaginal microbiome and PTB will benefit from a focus on earlier preterm births and improved reporting of specific patient metadata shown to influence the vaginal microbiome and/or birth outcomes. }, journal={BMC Biology}, author={Huang, Caizhi and Gin, Craig and Fettweis, Jennifer and Foxman, Betsy and Gelaye, Bizu and MacIntyre, David A. and Subramaniam, Akila and Fraser, William and Tabatabaei, Negar and Callahan, Benjamin}, year={2023}, month={Sep} } @article{slead_callahan_schreeg_seiler_stowe_azcarate-peril_jacob_gookin_2023, title={Microbiome analysis of bile from apparently healthy cats and cats with suspected hepatobiliary disease}, volume={9}, ISSN={["1939-1676"]}, url={https://doi.org/10.1111/jvim.16852}, DOI={10.1111/jvim.16852}, abstractNote={AbstractBackgroundBacterial infection of bile is a common cause of hepatobiliary disease in cats. Whether bile harbors a core microbiota in health or in cases of suspected hepatobiliary disease in cats is unknown.ObjectivesEstablish if gallbladder bile in apparently healthy cats harbors a core microbiota composed of bacterial taxa common to many individuals. Compare results of bile cytology, bile culture, and 16S rRNA gene amplicon sequencing in apparently healthy cats and cats with suspected hepatobiliary disease.AnimalsForty‐three client‐owned cats with suspected hepatobiliary disease and 17 control cats.MethodsBile was collected by ultrasound guided cholecystocentesis (cats with suspected hepatobiliary disease) or laparotomy after euthanasia (controls). Bile samples underwent cytologic examination, aerobic and anaerobic culture, and DNA was extracted for 16S rRNA gene amplification and sequencing.ResultsMicrobiome sequencing did not identify a core microbiota in control cats or cats having bile sampled because of clinical suspicion for hepatobiliary disease. Microbiome profiles from control cats were indistinguishable from profiles obtained from sampling instruments and reagents that were not exposed to bile (technical controls). Bacterial taxa that could not be explained by contamination or off‐target amplification were identified only in samples from cats with bactibilia and positive bile culture results for Escherichia coli. In several E. coli positive samples, microbiome sequencing also identified a small number of potentially co‐infecting bacterial genera not identified by culture.Conclusions and Clinical ImportanceCat bile does not harbor a core microbiota. Uncultured bacteria may contribute to pathogenesis of hepatobiliary disease in cats with bile E. coli infection.}, journal={JOURNAL OF VETERINARY INTERNAL MEDICINE}, author={Slead, Tanner S. and Callahan, Benjamin J. and Schreeg, Megan E. and Seiler, Gabriela S. and Stowe, Devorah M. and Azcarate-Peril, Maria Andrea and Jacob, Megan E. and Gookin, Jody L.}, year={2023}, month={Sep} } @article{switzer_callahan_costello_bik_fontaine_gulland_relman_2023, title={Rookery through rehabilitation: Microbial community assembly in newborn harbour seals after maternal separation}, volume={6}, ISSN={["1462-2920"]}, DOI={10.1111/1462-2920.16444}, abstractNote={AbstractMicrobial community assembly remains largely unexplored in marine mammals, despite its potential importance for conservation and management. Here, neonatal microbiota assembly was studied in harbour seals (Phoca vitulina richardii) at a rehabilitation facility soon after maternal separation, through weaning, to the time of release back to their native environment. We found that the gingival and rectal communities of rehabilitated harbour seals were distinct from the microbiotas of formula and pool water, and became increasingly diverse and dissimilar over time, ultimately resembling the gingival and rectal communities of local wild harbour seals. Harbour seal microbiota assembly was compared to that of human infants, revealing the rapid emergence of host specificity and evidence of phylosymbiosis even though these harbour seals had been raised by humans. Early life prophylactic antibiotics were associated with changes in the composition of the harbour seal gingival and rectal communities and surprisingly, with transient increases in alpha diversity, perhaps because of microbiota sharing during close cohabitation with other harbour seals. Antibiotic‐associated effects dissipated over time. These results suggest that while early life maternal contact may provide seeding for microbial assembly, co‐housing of conspecifics during rehabilitation may help neonatal mammals achieve a healthy host‐specific microbiota with features of resilience.}, journal={ENVIRONMENTAL MICROBIOLOGY}, author={Switzer, Alexandra D. and Callahan, Benjamin J. and Costello, Elizabeth K. and Bik, Elisabeth M. and Fontaine, Christine and Gulland, Frances M. D. and Relman, David A.}, year={2023}, month={Jun} } @article{grinevich_harden_thakur_callahan_2023, title={Serovar-level Identification of Bacterial Foodborne Pathogens From Full-length 16S rRNA Gene Sequencing}, url={https://doi.org/10.1101/2023.06.28.546915}, DOI={10.1101/2023.06.28.546915}, abstractNote={AbstractThe resolution of variation within species is critical for interpreting and acting on many microbial measurements. In the key foodborne pathogensEscherichia coliandSalmonella, the primary sub-species classification scheme used is serotyping: differentiating variants within these species by surface antigen profiles. Serotype prediction from whole-genome sequencing (WGS) of isolates is now seen as comparable or preferable to traditional laboratory methods where WGS is available. However, laboratory and WGS methods depend on an isolation step that is time-consuming and incompletely represents the sample when multiple strains are present. Community sequencing approaches that skip the isolation step are therefore of interest for pathogen surveillance. Here we evaluated the viability of amplicon sequencing of the full-length 16S rRNA gene for serotypingS. entericaandE. coli. We developed a novel algorithm for serotype prediction, implemented as an R package (Seroplacer), which takes as input full-length 16S rRNA gene sequences and outputs serovar predictions after phylogenetic placement into a reference phylogeny. We achieved over 89% accuracy in predictingSalmonellaserotypes onin silicotest data, and identified key pathogenic serovars ofSalmonellaandE. coliin isolate and environmental test samples. Although serotype prediction from 16S sequences is not as accurate as serotype prediction from WGS of isolates, the potential to identify dangerous serovars directly from amplicon sequencing of environmental samples is intriguing for pathogen surveillance. The capabilities developed here are also broadly relevant to other applications where intra-species variation and direct sequencing from environmental samples could be valuable.}, author={Grinevich, Dmitry and Harden, Lyndy and Thakur, Siddhartha and Callahan, Benjamin J}, year={2023}, month={Jun} } @article{manvell_berman_callahan_breitschwerdt_swain_ferris_maggi_lashnits_2022, title={Identification of microbial taxa present in Ctenocephalides felis (cat flea) reveals widespread co-infection and associations with vector phylogeny}, volume={15}, ISSN={["1756-3305"]}, DOI={10.1186/s13071-022-05487-1}, abstractNote={Abstract Background Ctenocephalides felis, the cat flea, is the most common ectoparasite of cats and dogs worldwide. As a cause of flea allergy dermatitis and a vector for two genera of zoonotic pathogens (Bartonella and Rickettsia spp.), the effect of the C. felis microbiome on pathogen transmission and vector survival is of substantial medical importance to both human and veterinary medicine. The aim of this study was to assay the pathogenic and commensal eubacterial microbial communities of individual C. felis from multiple geographic locations and analyze these findings by location, qPCR pathogen prevalence, and flea genetic diversity. Methods 16S Next Generation Sequencing (NGS) was utilized to sequence the microbiome of fleas collected from free-roaming cats, and the cox1 gene was used for flea phylogenetic analysis. NGS data were analyzed for 168 individual fleas from seven locations within the US and UK. Given inconsistency in the genera historically reported to constitute the C. felis microbiome, we utilized the decontam prevalence method followed by literature review to separate contaminants from true microbiome members. Results NGS identified a single dominant and cosmopolitan amplicon sequence variant (ASV) from Rickettsia and Wolbachia while identifying one dominant Bartonella clarridgeiae and one dominant Bartonella henselae/Bartonella koehlerae ASV. Multiple less common ASVs from these genera were detected within restricted geographical ranges. Co-detection of two or more genera (Bartonella, Rickettsia, and/or Wolbachia) or multiple ASVs from a single genus in a single flea was common. Achromobacter, Peptoniphilus, and Rhodococcus were identified as additional candidate members of the C. felis microbiome on the basis of decontam analysis and literature review. Ctenocephalides felis phylogenetic diversity as assessed by the cox1 gene fell within currently characterized clades while identifying seven novel haplotypes. NGS sensitivity and specificity for Bartonella and Rickettsia spp. DNA detection were compared to targeted qPCR. Conclusions Our findings confirm the widespread coinfection of fleas with multiple bacterial genera and strains, proposing three additional microbiome members. The presence of minor Bartonella, Rickettsia, and Wolbachia ASVs was found to vary by location and flea haplotype. These findings have important implications for flea-borne pathogen transmission and control. Graphical Abstract }, number={1}, journal={PARASITES & VECTORS}, author={Manvell, Charlotte and Berman, Hanna and Callahan, Benjamin and Breitschwerdt, Edward and Swain, William and Ferris, Kelli and Maggi, Ricardo and Lashnits, Erin}, year={2022}, month={Oct} } @article{mclaren_nearing_willis_lloyd_callahan_2022, title={Implications of taxonomic bias for microbial differential-abundance analysis}, url={https://doi.org/10.1101/2022.08.19.504330}, DOI={10.1101/2022.08.19.504330}, abstractNote={AbstractDifferential-abundance (DA) analyses enable microbiome researchers to assess how microbial species vary in relative or absolute abundance with specific host or environmental conditions, such as health status or pH. These analyses typically use sequencing-based community measurements that are taxonomically biased to measure some species more efficiently than others. Understanding the effects that taxonomic bias has on the results of a DA analysis is essential for achieving reliable and translatable findings; yet currently, these effects are unknown. Here, we characterized these effects for DA analyses of both relative and absolute abundances, using a combination of mathematical theory and data analysis of real and simulated case studies. We found that, for analyses based on species proportions, taxonomic bias can cause significant errors in DA results if the average measurement efficiency of the community is associated with the condition of interest. These errors can be avoided by using more robust DA methods (based on species ratios) or quantified and corrected using appropriate controls. Wide adoption of our recommendations can improve the reproducibility, interpretability, and translatability of microbiome DA studies.This manuscript was rendered from commit 7412a36 of https://github.com/mikemc/differential-abundance-theory. Supporting data analyses can be found in the accompanying computational research notebook. Please post comments or questions on GitHub. The manuscript is licensed under a CC BY 4.0 License. See the GitHub Releases or Zenodo record for earlier versions.}, author={McLaren, Michael R. and Nearing, Jacob T. and Willis, Amy D. and Lloyd, Karen G. and Callahan, Benjamin J.}, year={2022}, month={Aug} } @article{huang_gin_fettweis_foxman_gelaye_macintyre_subramaniam_fraser_tabatabaei_callahan_2022, title={Meta-Analysis Reveals the Vaginal Microbiome is a Better Predictor of Earlier Than Later Preterm Birth}, url={https://doi.org/10.1101/2022.09.26.22280389}, DOI={10.1101/2022.09.26.22280389}, abstractNote={High-throughput sequencing measurements of the vaginal microbiome have yielded intriguing potential relationships between the vaginal microbiome and preterm birth (PTB; live birth prior to 37 weeks of gestation). However, results across studies have been inconsistent. Here we perform an integrated analysis of previously published datasets from 12 cohorts of pregnant women whose vaginal microbiomes were measured by 16S rRNA gene sequencing. Of 1926 women included in our analysis, 568 went on to deliver prematurely. Substantial variation between these datasets existed in their definition of preterm birth, characteristics of the study populations, and sequencing methodology. Nevertheless, a small group of taxa comprised a vast majority of the measured microbiome in all cohorts. We trained machine learning (ML) models to predict PTB from the composition of the vaginal microbiome, finding low to modest predictive accuracy (0.28-0.79). Predictive accuracy was typically lower when ML models trained in one dataset predicted PTB in another dataset. Earlier preterm birth (<32 weeks, <34 weeks) was more predictable from the vaginal microbiome than late preterm birth (34 - 37 weeks), both within and across datasets. Integrated differential abundance analysis revealed a highly significant negative association betweenL. crispatusand PTB that was consistent across almost all studies. The presence of the majority (18 out of 25) of genera was associated with a higher risk of PTB, withL. iners, Prevotella, andGardnerellashowing particularly consistent and significant associations. Some example discrepancies between studies could be attributed to specific methodological differences, but not most study-to-study variations in the relationship between the vaginal microbiome and preterm birth. We believe future studies of the vaginal microbiome and PTB will benefit from a focus on earlier preterm births, and improved reporting of specific patient metadata shown to influence the vaginal microbiome and/or birth outcomes.}, author={Huang, Caizhi and Gin, Craig and Fettweis, Jennifer and Foxman, Betsy and Gelaye, Bizu and MacIntyre, David A. and Subramaniam, Akila and Fraser, William and Tabatabaei, Negar and Callahan, Benjamin}, year={2022}, month={Sep} } @article{huang_callahan_wu_holloway_brochu_lu_peng_tzeng_2022, title={Phylogeny-guided microbiome OTU-specific association test (POST)}, volume={10}, ISSN={["2049-2618"]}, DOI={10.1186/s40168-022-01266-3}, abstractNote={AbstractBackgroundThe relationship between host conditions and microbiome profiles, typically characterized by operational taxonomic units (OTUs), contains important information about the microbial role in human health. Traditional association testing frameworks are challenged by the high dimensionality and sparsity of typical microbiome profiles. Phylogenetic information is often incorporated to address these challenges with the assumption that evolutionarily similar taxa tend to behave similarly. However, this assumption may not always be valid due to the complex effects of microbes, and phylogenetic information should be incorporated in adata-supervisedfashion.ResultsIn this work, we propose a local collapsing test called phylogeny-guided microbiome OTU-specific association test (POST). In POST, whether or not to borrow information and how much information to borrow from the neighboring OTUs in the phylogenetic tree are supervised by phylogenetic distance and the outcome-OTU association. POST is constructed under the kernel machine framework to accommodate complex OTU effects and extends kernel machine microbiome tests from community level to OTU level. Using simulation studies, we show that when the phylogenetic tree is informative, POST has better performance than existing OTU-level association tests. When the phylogenetic tree is not informative, POST achieves similar performance as existing methods. Finally, in real data applications on bacterial vaginosis and on preterm birth, we find that POST can identify similar or more outcome-associated OTUs that are of biological relevance compared to existing methods.ConclusionsUsing POST, we show that adaptively leveraging the phylogenetic information can enhance the selection performance of associated microbiome features by improving the overall true-positive and false-positive detection. We developed a user friendly R packagePOSTmwhich is freely available on CRAN (https://CRAN.R-project.org/package=POSTm).}, number={1}, journal={MICROBIOME}, author={Huang, Caizhi and Callahan, Benjamin John and Wu, Michael C. and Holloway, Shannon T. and Brochu, Hayden and Lu, Wenbin and Peng, Xinxia and Tzeng, Jung-Ying}, year={2022}, month={Jun} } @article{halleran_callahan_jacob_sylvester_prange_papich_foster_2021, title={Effects of danofloxacin dosing regimen on gastrointestinal pharmacokinetics and fecal microbiome in steers}, volume={11}, ISSN={["2045-2322"]}, DOI={10.1038/s41598-021-90647-z}, abstractNote={AbstractFluoroquinolones are a class of antimicrobial commonly used in human medicine, and deemed critical by the World Health Organization. Nonetheless, two formulations are approved for the treatment of respiratory disease in beef cattle. The objective of this study was to determine the gastrointestinal pharmacokinetics and impact on enteric bacteria of cattle when receiving one of the two dosing regimens (high: 40 mg/kg SC once or low: 20 mg/kg IM q48hr) of danofloxacin, a commonly utilized synthetic fluoroquinolone in veterinary medicine. Danofloxacin was administered to 12 steers (age 7 months) fitted with intestinal ultrafiltration devices at two different dosing regimens to assess the gastrointestinal pharmacokinetics, the shifts in the gastrointestinal microbiome and the development of resistant bacterial isolates. Our results demonstrated high intestinal penetration of danofloxacin for both dosing groups, as well as, significant differences in MIC values for E. coli and Enterococcus between dosing groups at selected time points over a 38 day period. Danofloxacin treatment consistently resulted in the Euryarchaeota phyla decreasing over time, specifically due to a decrease in Methanobrevibacter. Although microbiome differences were minor between dosing groups, the low dose group had a higher number of isolates with MIC values high enough to cause clinically relevant resistance. This information would help guide veterinarians as to appropriate dosing schemes to minimize the spread of antimicrobial resistance.}, number={1}, journal={SCIENTIFIC REPORTS}, author={Halleran, J. L. and Callahan, B. J. and Jacob, M. E. and Sylvester, H. J. and Prange, T. and Papich, M. G. and Foster, D. M.}, year={2021}, month={May} } @article{callahan_grinevich_thakur_balamotis_ben yehezkel_2021, title={Ultra-accurate microbial amplicon sequencing with synthetic long reads}, volume={9}, ISSN={["2049-2618"]}, url={https://doi.org/10.1186/s40168-021-01072-3}, DOI={10.1186/s40168-021-01072-3}, abstractNote={Abstract Background Out of the many pathogenic bacterial species that are known, only a fraction are readily identifiable directly from a complex microbial community using standard next generation DNA sequencing. Long-read sequencing offers the potential to identify a wider range of species and to differentiate between strains within a species, but attaining sufficient accuracy in complex metagenomes remains a challenge. Methods Here, we describe and analytically validate LoopSeq, a commercially available synthetic long-read (SLR) sequencing technology that generates highly accurate long reads from standard short reads. Results LoopSeq reads are sufficiently long and accurate to identify microbial genes and species directly from complex samples. LoopSeq perfectly recovered the full diversity of 16S rRNA genes from known strains in a synthetic microbial community. Full-length LoopSeq reads had a per-base error rate of 0.005%, which exceeds the accuracy reported for other long-read sequencing technologies. 18S-ITS and genomic sequencing of fungal and bacterial isolates confirmed that LoopSeq sequencing maintains that accuracy for reads up to 6 kb in length. LoopSeq full-length 16S rRNA reads could accurately classify organisms down to the species level in rinsate from retail meat samples, and could differentiate strains within species identified by the CDC as potential foodborne pathogens. Conclusions The order-of-magnitude improvement in length and accuracy over standard Illumina amplicon sequencing achieved with LoopSeq enables accurate species-level and strain identification from complex- to low-biomass microbiome samples. The ability to generate accurate and long microbiome sequencing reads using standard short read sequencers will accelerate the building of quality microbial sequence databases and removes a significant hurdle on the path to precision microbial genomics. }, number={1}, journal={MICROBIOME}, author={Callahan, Benjamin J. and Grinevich, Dmitry and Thakur, Siddhartha and Balamotis, Michael A. and Ben Yehezkel, Tuval}, year={2021}, month={Jun} } @article{thanissery_mclaren_rivera_reed_betrapally_burdette_winston_jacob_callahan_theriot_2020, title={Clostridioides difficile carriage in animals and the associated changes in the host fecal microbiota}, volume={66}, ISSN={["1095-8274"]}, DOI={10.1016/j.anaerobe.2020.102279}, abstractNote={The relationship between the gut microbiota and Clostridioides difficile, and its role in the severity of C. difficile infection in humans is an area of active research. Intestinal carriage of toxigenic and non-toxigenic C. difficile strains, with and without clinical signs, is reported in animals, however few studies have looked at the risk factors associated with C. difficile carriage and the role of the host gut microbiota. Here, we isolated and characterized C. difficile strains from different animal species (predominantly canines (dogs), felines (cats), and equines (horses)) that were brought in for tertiary care at North Carolina State University Veterinary Hospital. C. difficile strains were characterized by toxin gene profiling, fluorescent PCR ribotyping, and antimicrobial susceptibility testing. 16S rRNA gene sequencing was done on animal feces to investigate the relationship between the presence of C. difficile and the gut microbiota in different hosts. Here, we show that C. difficile was recovered from 20.9% of samples (42/201), which included 33 canines, 2 felines, and 7 equines. Over 69% (29/42) of the isolates were toxigenic and belonged to 14 different ribotypes including ones known to cause CDI in humans. The presence of C. difficile results in a shift in the fecal microbial community structure in both canines and equines. Commensal Clostridium hiranonis was negatively associated with C. difficile in canines. Further experimentation showed a clear antagonistic relationship between the two strains in vitro, suggesting that commensal Clostridia might play a role in colonization resistance against C. difficile in different hosts.}, journal={ANAEROBE}, author={Thanissery, R. and McLaren, M. R. and Rivera, A. and Reed, A. D. and Betrapally, N. S. and Burdette, T. and Winston, J. A. and Jacob, M. and Callahan, B. J. and Theriot, C. M.}, year={2020}, month={Dec} } @article{mclaren_callahan_2020, title={Pathogen resistance may be the principal evolutionary advantage provided by the microbiome}, volume={375}, url={https://doi.org/10.1098/rstb.2019.0592}, DOI={10.1098/rstb.2019.0592}, abstractNote={To survive, plants and animals must continually defend against pathogenic microbes that would invade and disrupt their tissues. Yet they do not attempt to extirpate all microbes. Instead, they tolerate and even encourage the growth of commensal microbes, which compete with pathogens for resources and via direct inhibition. We argue that hosts have evolved to cooperate with commensals in order to enhance the pathogen resistance this competition provides. We briefly describe competition between commensals and pathogens within the host, consider how natural selection might favour hosts that tilt this competition in favour of commensals, and describe examples of extant host traits that may serve this purpose. Finally, we consider ways that this cooperative immunity may have facilitated the adaptive evolution of non-pathogen-related host traits. On the basis of these observations, we argue that pathogen resistance vies with other commensal-provided benefits for being the principal evolutionary advantage provided by the microbiome to host lineages across the tree of life.This article is part of the theme issue ‘The role of the microbiome in host evolution’.}, number={1808}, journal={Philosophical Transactions of the Royal Society B: Biological Sciences}, publisher={The Royal Society}, author={McLaren, Michael R. and Callahan, Benjamin J.}, year={2020}, month={Sep}, pages={20190592} } @article{kolodny_callahan_douglas_2020, title={The role of the microbiome in host evolution}, volume={375}, url={https://doi.org/10.1098/rstb.2019.0588}, DOI={10.1098/rstb.2019.0588}, abstractNote={In the last decade, we have witnessed a major paradigm shift in the life sciences: the recognition that the microbiome, i.e. the set of microorganisms associated with healthy animals (including humans) and plants, plays a crucial role in the sustained health and fitness of its host. Enabled by rapid advances in sequencing technologies and analytical methods, substantial advances have been achieved in both identifying the microbial taxa and understanding the relationship between microbiome composition and host phenotype. These breakthroughs are leading to novel strategies for improved human and animal health, enhanced crop yield and nutritional quality, and the control of various pests and disease agents. This article is part of the theme issue ‘The role of the microbiome in host evolution'.}, number={1808}, journal={Philosophical Transactions of the Royal Society B: Biological Sciences}, publisher={The Royal Society}, author={Kolodny, Oren and Callahan, Benjamin J. and Douglas, Angela E.}, year={2020}, month={Sep}, pages={20190588} } @article{callahan_grinevich_thakur_balamotis_yehezkel_2020, title={Ultra-accurate Microbial Amplicon Sequencing Directly from Complex Samples with Synthetic Long Reads}, url={https://doi.org/10.1101/2020.07.07.192286}, DOI={10.1101/2020.07.07.192286}, abstractNote={AbstractOut of the many pathogenic bacterial species that are known, only a fraction are readily identifiable directly from a complex microbial community using standard next generation DNA sequencing technology. Long-read sequencing offers the potential to identify a wider range of species and to differentiate between strains within a species, but attaining sufficient accuracy in complex metagenomes remains a challenge. Here, we describe and analytically validate LoopSeq, a commercially-available synthetic long-read (SLR) sequencing technology that generates highly-accurate long reads from standard short reads. LoopSeq reads are sufficiently long and accurate to identify microbial genes and species directly from complex samples. LoopSeq applied to full-length 16S rRNA genes from known strains in a microbial community perfectly recovered the full diversity of full-length exact sequence variants in a known microbial community. Full-length LoopSeq reads had a per-base error rate of 0.005%, which exceeds the accuracy reported for other long-read sequencing technologies. 18S-ITS and genomic sequencing of fungal and bacterial isolates confirmed that LoopSeq sequencing maintains that accuracy for reads up to 6 kilobases in length. Analysis of rinsate from retail meat samples demonstrated that LoopSeq full-length 16S rRNA synthetic long-reads could accurately classify organisms down to the species level, and could differentiate between different strains within species identified by the CDC as potential foodborne pathogens. The order-of-magnitude improvement in both length and accuracy over standard Illumina amplicon sequencing achieved with LoopSeq enables accurate species-level and strain identification from complex and low-biomass microbiome samples. The ability to generate accurate and long microbiome sequencing reads using standard short read sequencers will accelerate the building of quality microbial sequence databases and removes a significant hurdle on the path to precision microbial genomics.}, author={Callahan, Benjamin J and Grinevich, Dmitry and Thakur, Siddhartha and Balamotis, Michael A and Yehezkel, Tuval Ben}, year={2020}, month={Jul} } @article{foster_jacob_farmer_callahan_theriot_kathariou_cernicchiaro_prange_papich_2019, title={Ceftiofur formulation differentially affects the intestinal drug concentration, resistance of fecal Escherichia coli, and the microbiome of steers}, volume={14}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0223378}, abstractNote={Antimicrobial drug concentrations in the gastrointestinal tract likely drive antimicrobial resistance in enteric bacteria. Our objective was to determine the concentration of ceftiofur and its metabolites in the gastrointestinal tract of steers treated with ceftiofur crystalline-free acid (CCFA) or ceftiofur hydrochloride (CHCL), determine the effect of these drugs on the minimum inhibitory concentration (MIC) of fecal Escherichia coli, and evaluate shifts in the microbiome. Steers were administered either a single dose (6.6 mg/kg) of CCFA or 2.2 mg/kg of CHCL every 24 hours for 3 days. Ceftiofur and its metabolites were measured in the plasma, interstitium, ileum and colon. The concentration and MIC of fecal E. coli and the fecal microbiota composition were assessed after treatment. The maximum concentration of ceftiofur was higher in all sampled locations of steers treated with CHCL. Measurable drug persisted longer in the intestine of CCFA-treated steers. There was a significant decrease in E. coli concentration (P = 0.002) within 24 hours that persisted for 2 weeks after CCFA treatment. In CHCL-treated steers, the mean MIC of ceftiofur in E. coli peaked at 48 hours (mean MIC = 20.45 ug/ml, 95% CI = 10.29–40.63 ug/ml), and in CCFA-treated steers, mean MIC peaked at 96 hours (mean MIC = 10.68 ug/ml, 95% CI = 5.47–20.85 ug/ml). Shifts in the microbiome of steers in both groups were due to reductions in Firmicutes and increases in Bacteroidetes. CCFA leads to prolonged, low intestinal drug concentrations, and is associated with decreased E. coli concentration, an increased MIC of ceftiofur in E. coli at specific time points, and shifts in the fecal microbiota. CHCL led to higher intestinal drug concentrations over a shorter duration. Effects on E. coli concentration and the microbiome were smaller in this group, but the increase in the MIC of ceftiofur in fecal E. coli was similar.}, number={10}, journal={PLOS ONE}, author={Foster, Derek M. and Jacob, Megan E. and Farmer, Kyle A. and Callahan, Benjamin J. and Theriot, Casey M. and Kathariou, Sophia and Cernicchiaro, Natalia and Prange, Timo and Papich, Mark G.}, year={2019}, month={Oct} } @article{mclaren_willis_callahan_2019, title={Consistent and correctable bias in metagenomic sequencing experiments}, volume={2}, url={https://doi.org/10.1101/559831}, DOI={10.1101/559831}, abstractNote={AbstractMeasurements of biological communities by marker-gene and metagenomic sequencing are biased: The measured relative abundances of taxa or their genes are systematically distorted from their true values because each step in the experimental workflow preferentially detects some taxa over others. Bias can lead to qualitatively incorrect conclusions and makes measurements from different protocols quantitatively incomparable. A rigorous understanding of bias is therefore essential. Here we propose, test, and apply a simple mathematical model of how bias distorts marker-gene and metagenomics measurements: Bias multiplies the true relative abundances within each sample by taxon-and protocol-specific factors that describe the different efficiencies with which taxa are detected by the workflow. Critically, these factors are consistent across samples with different compositions, allowing bias to be estimated and corrected. We validate this model in 16S rRNA gene and shotgun metagenomics data from bacterial communities with defined compositions. We use it to reason about the effects of bias on downstream statistical analyses, finding that analyses based on taxon ratios are less sensitive to bias than analyses based on taxon proportions. Finally, we demonstrate how this model can be used to quantify bias from samples of defined composition, partition bias into steps such as DNA extraction and PCR amplification, and to correct biased measurements. Our model improves on previous models by providing a better fit to experimental data and by providing a composition-independent approach to analyzing, measuring, and correcting bias.}, journal={bioRxiv}, publisher={Cold Spring Harbor Laboratory}, author={McLaren, Michael R. and Willis, Amy D. and Callahan, Benjamin J.}, year={2019}, month={Feb}, pages={559831} } @article{mclaren_willis_callahan_2019, title={Consistent and correctable bias in metagenomic sequencing experiments}, url={https://doi.org/10.7554/eLife.46923}, DOI={10.7554/eLife.46923}, abstractNote={Marker-gene and metagenomic sequencing have profoundly expanded our ability to measure biological communities. But the measurements they provide differ from the truth, often dramatically, because these experiments are biased toward detecting some taxa over others. This experimental bias makes the taxon or gene abundances measured by different protocols quantitatively incomparable and can lead to spurious biological conclusions. We propose a mathematical model for how bias distorts community measurements based on the properties of real experiments. We validate this model with 16S rRNA gene and shotgun metagenomics data from defined bacterial communities. Our model better fits the experimental data despite being simpler than previous models. We illustrate how our model can be used to evaluate protocols, to understand the effect of bias on downstream statistical analyses, and to measure and correct bias given suitable calibration controls. These results illuminate new avenues toward truly quantitative and reproducible metagenomics measurements.}, journal={eLife}, author={McLaren, Michael R and Willis, Amy D and Callahan, Benjamin J}, year={2019}, month={Sep} } @article{callahan_wong_heiner_oh_theriot_gulati_mcgill_dougherty_2019, title={High-throughput amplicon sequencing of the full-length 16S rRNA gene with single-nucleotide resolution}, volume={7}, url={https://doi.org/10.1093/nar/gkz569}, DOI={10.1093/nar/gkz569}, abstractNote={AbstractTargeted PCR amplification and high-throughput sequencing (amplicon sequencing) of 16S rRNA gene fragments is widely used to profile microbial communities. New long-read sequencing technologies can sequence the entire 16S rRNA gene, but higher error rates have limited their attractiveness when accuracy is important. Here we present a high-throughput amplicon sequencing methodology based on PacBio circular consensus sequencing and the DADA2 sample inference method that measures the full-length 16S rRNA gene with single-nucleotide resolution and a near-zero error rate. In two artificial communities of known composition, our method recovered the full complement of full-length 16S sequence variants from expected community members without residual errors. The measured abundances of intra-genomic sequence variants were in the integral ratios expected from the genuine allelic variants within a genome. The full-length 16S gene sequences recovered by our approach allowed Escherichia coli strains to be correctly classified to the O157:H7 and K12 sub-species clades. In human fecal samples, our method showed strong technical replication and was able to recover the full complement of 16S rRNA alleles in several E. coli strains. There are likely many applications beyond microbial profiling for which high-throughput amplicon sequencing of complete genes with single-nucleotide resolution will be of use.}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Callahan, Benjamin J and Wong, Joan and Heiner, Cheryl and Oh, Steve and Theriot, Casey M and Gulati, Ajay S and McGill, Sarah K and Dougherty, Michael K}, year={2019}, month={Oct} } @article{berman_mclaren_callahan_2020, title={Understanding and interpreting community sequencing measurements of the vaginal microbiome}, volume={127}, url={https://doi.org/10.1111/1471-0528.15978}, DOI={10.1111/1471-0528.15978}, abstractNote={Community‐wide high‐throughput sequencing has transformed the study of the vaginal microbiome, and clinical applications are on the horizon. Here we outline the three main community sequencing methods: (1) amplicon sequencing, (2) shotgun metagenomic sequencing, and (3) metatranscriptomic sequencing. We discuss the advantages and limitations of community sequencing generally, and the unique strengths and weaknesses of each method. We briefly review the contributions of community sequencing to vaginal microbiome research and practice. We develop suggestions for critically interpreting research results and potential clinical applications based on community sequencing of the vaginal microbiome.Tweetable abstractWe review the advantages and limitations of amplicon sequencing, metagenomics, and metatranscriptomics methods for the study of the vaginal microbiome.}, number={2}, journal={BJOG: An International Journal of Obstetrics & Gynaecology}, publisher={Wiley}, author={Berman, HL and McLaren and Callahan, BJ}, year={2020}, month={Jan}, pages={139–146} } @article{callahan_wong_heiner_oh_theriot_gulati_mcgill_dougherty_2018, title={High-throughput amplicon sequencing of the full-length 16S rRNA gene with single-nucleotide resolution}, volume={8}, url={https://doi.org/10.1101/392332}, DOI={10.1101/392332}, abstractNote={AbstractTargeted PCR amplification and high-throughput sequencing (amplicon sequencing) of 16S rRNA gene fragments is widely used to profile microbial communities. New long-read sequencing technologies can sequence the entire 16S rRNA gene, but higher error rates have limited their attractiveness when accuracy is important. Here we present a high-throughput amplicon sequencing methodology based on PacBio circular consensus sequencing and the DADA2 sample inference method that measures the full-length 16S rRNA gene with single-nucleotide resolution and a near-zero error rate.In two artificial communities of known composition, our method recovered the full complement of full-length 16S sequence variants from expected community members without residual errors. The measured abundances of intra-genomic sequence variants were in the integral ratios expected from the genuine allelic variants within a genome. The full-length 16S gene sequences recovered by our approach allowedE. colistrains to be correctly classified to the O157:H7 and K12 sub-species clades. In human fecal samples, our method showed strong technical replication and was able to recover the full complement of 16S rRNA alleles in severalE. colistrains.There are likely many applications beyond microbial profiling for which high-throughput amplicon sequencing of complete genes with single-nucleotide resolution will be of use.}, publisher={Cold Spring Harbor Laboratory}, author={Callahan, Benjamin J and Wong, Joan and Heiner, Cheryl and Oh, Steve and Theriot, Casey M and Gulati, Ajay S and McGill, Sarah K and Dougherty, Michael K}, year={2018}, month={Aug} } @article{mclaren_callahan_2018, title={In Nature, There Is Only Diversity}, volume={9}, ISSN={["2150-7511"]}, url={http://www.ncbi.nlm.nih.gov/pubmed/29295915}, DOI={10.1128/mbio.02149-17}, abstractNote={ABSTRACT Microbial ecology has been transformed by the advent of high-throughput marker gene and metagenomic sequencing methods. These tools provide expansive descriptions of microbial communities, but the descriptions are framed in terms of molecular objects, such as 97% ribosomal operational taxonomic units (OTUs), rather than biological objects, such as species. A recent study by A. B. Chase and colleagues (mBio 8:e01809-17, 2017, https://doi.org/10.1128/mBio.01809-17 ) explores the so-called microdiversity within the Curtobacterium OTU, the most abundant OTU in a leaf litter community. Perhaps unsurprisingly, they find that some important ecologic traits, such as drought response, are coherent within the OTU, but that others vary significantly. Here we discuss their findings in relation to the more general issue of how molecular tools can be effectively used to study microbial ecology. We specifically note the need for investigators to choose the right molecular methods for their biological problem, as nature does not respect the limitations and conventions associated with our methods. }, number={1}, journal={MBIO}, publisher={American Society for Microbiology}, author={McLaren, Michael R. and Callahan, Benjamin J.}, year={2018} } @article{ghaemi_digiulio_contrepois_callahan_ngo_lee-mcmullen_lehallier_robaczewska_mcilwain_rosenberg-hasson_et al._2019, title={Multiomics modeling of the immunome, transcriptome, microbiome, proteome and metabolome adaptations during human pregnancy}, volume={35}, ISSN={["1460-2059"]}, url={https://doi.org/10.1093/bioinformatics/bty537}, DOI={10.1093/bioinformatics/bty537}, abstractNote={Abstract Motivation Multiple biological clocks govern a healthy pregnancy. These biological mechanisms produce immunologic, metabolomic, proteomic, genomic and microbiomic adaptations during the course of pregnancy. Modeling the chronology of these adaptations during full-term pregnancy provides the frameworks for future studies examining deviations implicated in pregnancy-related pathologies including preterm birth and preeclampsia. Results We performed a multiomics analysis of 51 samples from 17 pregnant women, delivering at term. The datasets included measurements from the immunome, transcriptome, microbiome, proteome and metabolome of samples obtained simultaneously from the same patients. Multivariate predictive modeling using the Elastic Net (EN) algorithm was used to measure the ability of each dataset to predict gestational age. Using stacked generalization, these datasets were combined into a single model. This model not only significantly increased predictive power by combining all datasets, but also revealed novel interactions between different biological modalities. Future work includes expansion of the cohort to preterm-enriched populations and in vivo analysis of immune-modulating interventions based on the mechanisms identified. Availability and implementation Datasets and scripts for reproduction of results are available through: https://nalab.stanford.edu/multiomics-pregnancy/. Supplementary information Supplementary data are available at Bioinformatics online. }, number={1}, journal={BIOINFORMATICS}, publisher={Oxford University Press (OUP)}, author={Ghaemi, Mohammad Sajjad and DiGiulio, Daniel B. and Contrepois, Kevin and Callahan, Benjamin and Ngo, Thuy T. M. and Lee-McMullen, Brittany and Lehallier, Benoit and Robaczewska, Anna and Mcilwain, David and Rosenberg-Hasson, Yael and et al.}, editor={Wren, JonathanEditor}, year={2019}, month={Jan}, pages={95–103} } @article{davis_proctor_holmes_relman_callahan_2018, title={Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data}, volume={6}, ISSN={["2049-2618"]}, url={https://doi.org/10.1186/s40168-018-0605-2}, DOI={10.1186/s40168-018-0605-2}, abstractNote={The accuracy of microbial community surveys based on marker-gene and metagenomic sequencing (MGS) suffers from the presence of contaminants-DNA sequences not truly present in the sample. Contaminants come from various sources, including reagents. Appropriate laboratory practices can reduce contamination, but do not eliminate it. Here we introduce decontam ( https://github.com/benjjneb/decontam ), an open-source R package that implements a statistical classification procedure that identifies contaminants in MGS data based on two widely reproduced patterns: contaminants appear at higher frequencies in low-concentration samples and are often found in negative controls.Decontam classified amplicon sequence variants (ASVs) in a human oral dataset consistently with prior microscopic observations of the microbial taxa inhabiting that environment and previous reports of contaminant taxa. In metagenomics and marker-gene measurements of a dilution series, decontam substantially reduced technical variation arising from different sequencing protocols. The application of decontam to two recently published datasets corroborated and extended their conclusions that little evidence existed for an indigenous placenta microbiome and that some low-frequency taxa seemingly associated with preterm birth were contaminants.Decontam improves the quality of metagenomic and marker-gene sequencing by identifying and removing contaminant DNA sequences. Decontam integrates easily with existing MGS workflows and allows researchers to generate more accurate profiles of microbial communities at little to no additional cost.}, number={1}, journal={MICROBIOME}, publisher={Springer Science and Business Media LLC}, author={Davis, Nicole M. and Proctor, Diana M. and Holmes, Susan P. and Relman, David A. and Callahan, Benjamin J.}, year={2018}, month={Dec} } @article{callahan_mcmurdie_holmes_2017, title={Exact sequence variants should replace operational taxonomic units in marker-gene data analysis}, volume={11}, ISSN={1751-7362 1751-7370}, url={http://dx.doi.org/10.1038/ismej.2017.119}, DOI={10.1038/ismej.2017.119}, abstractNote={Abstract Recent advances have made it possible to analyze high-throughput marker-gene sequencing data without resorting to the customary construction of molecular operational taxonomic units (OTUs): clusters of sequencing reads that differ by less than a fixed dissimilarity threshold. New methods control errors sufficiently such that amplicon sequence variants (ASVs) can be resolved exactly, down to the level of single-nucleotide differences over the sequenced gene region. The benefits of finer resolution are immediately apparent, and arguments for ASV methods have focused on their improved resolution. Less obvious, but we believe more important, are the broad benefits that derive from the status of ASVs as consistent labels with intrinsic biological meaning identified independently from a reference database. Here we discuss how these features grant ASVs the combined advantages of closed-reference OTUs—including computational costs that scale linearly with study size, simple merging between independently processed data sets, and forward prediction—and of de novo OTUs—including accurate measurement of diversity and applicability to communities lacking deep coverage in reference databases. We argue that the improvements in reusability, reproducibility and comprehensiveness are sufficiently great that ASVs should replace OTUs as the standard unit of marker-gene analysis and reporting.}, number={12}, journal={The ISME Journal}, publisher={Springer Science and Business Media LLC}, author={Callahan, Benjamin J and McMurdie, Paul J and Holmes, Susan P}, year={2017}, month={Jul}, pages={2639–2643} } @article{callahan_digiulio_goltsman_sun_costello_jeganathan_biggio_wong_druzin_shaw_et al._2017, title={Replication and refinement of a vaginal microbial signature of preterm birth in two racially distinct cohorts of US women}, volume={114}, ISSN={["0027-8424"]}, DOI={10.1073/pnas.1705899114}, abstractNote={Significance Premature birth (PTB) is a major global public health burden. Previous studies have suggested an association between altered vaginal microbiota composition and PTB, although findings across studies have been inconsistent. To address these inconsistencies, improve upon our previous signature, and better understand the vaginal microbiota’s role in PTB, we conducted a case-control study in two cohorts of pregnant women: one predominantly Caucasian at low risk of PTB, the second predominantly African American at high risk. With the results, we were able to replicate our signature in the first cohort and refine our signature of PTB for both cohorts. Our findings elucidate the ecology of the vaginal microbiota and advance our ability to predict and understand the causes of PTB.}, number={37}, journal={PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA}, author={Callahan, Benjamin J. and DiGiulio, Daniel B. and Goltsman, Daniela S. Aliaga and Sun, Christine L. and Costello, Elizabeth K. and Jeganathan, Pratheepa and Biggio, Joseph R. and Wong, Ronald J. and Druzin, Maurice L. and Shaw, Gary M. and et al.}, year={2017}, month={Sep}, pages={9966–9971} } @article{callahan_digiulio_goltsman_sun_costello_jeganathan_biggio_wong_druzin_shaw_et al._2017, title={Replication and refinement of a vaginal microbial signature of preterm birth in two racially distinct cohorts of US women}, journal={Proceedings of the National Academy of Sciences}, publisher={National Acad Sciences}, author={Callahan, Benjamin J and DiGiulio, Daniel B and Goltsman, Daniela S Aliaga and Sun, Christine L and Costello, Elizabeth K and Jeganathan, Pratheepa and Biggio, Joseph R and Wong, Ronald J and Druzin, Maurice L and Shaw, Gary M and et al.}, year={2017}, pages={201705899} } @article{davis_proctor_holmes_relman_callahan_2017, title={Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data}, volume={11}, url={https://doi.org/10.1101/221499}, DOI={10.1101/221499}, abstractNote={AbstractBackgroundThe accuracy of microbial community surveys based on marker-gene and metagenomic sequencing (MGS) suffers from the presence of contaminants — DNA sequences not truly present in the sample. Contaminants come from various sources, including reagents. Appropriate laboratory practices can reduce contamination, but do not eliminate it. Here we introduce decontam (https://github.com/benjjneb/decontam), an open-source R package that implements a statistical classification procedure that identifies contaminants in MGS data based on two widely reproduced patterns: contaminants appear at higher frequencies in low-concentration samples, and are often found in negative controls.Resultsdecontam classified amplicon sequence variants (ASVs) in a human oral dataset consistently with prior microscopic observations of the microbial taxa inhabiting that environment and previous reports of contaminant taxa. In metagenomics and marker-gene measurements of a dilution series, decontam substantially reduced technical variation arising from different sequencing protocols. The application of decontam to two recently published datasets corroborated and extended their conclusions that little evidence existed for an indigenous placenta microbiome, and that some low-frequency taxa seemingly associated with preterm birth were contaminants.Conclusionsdecontam improves the quality of metagenomic and marker-gene sequencing by identifying and removing contaminant DNA sequences. decontam integrates easily with existing MGS workflows, and allows researchers to generate more accurate profiles of microbial communities at little to no additional cost.}, publisher={Cold Spring Harbor Laboratory}, author={Davis, Nicole M. and Proctor, Diana M. and Holmes, Susan P. and Relman, David A. and Callahan, Benjamin J.}, year={2017}, month={Nov} } @article{mayer-blackwell_fincker_molenda_callahan_sewell_holmes_edwards_spormann_2016, title={1,2-Dichloroethane Exposure Alters the Population Structure, Metabolism, and Kinetics of a Trichloroethene-Dechlorinating Dehalococcoides mccartyi Consortium}, volume={50}, ISSN={0013-936X 1520-5851}, url={http://dx.doi.org/10.1021/acs.est.6b02957}, DOI={10.1021/acs.est.6b02957}, abstractNote={Bioremediation of groundwater contaminated with chlorinated aliphatic hydrocarbons such as perchloroethene and trichloroethene can result in the accumulation of the undesirable intermediate vinyl chloride. Such accumulation can either be due to the absence of specific vinyl chloride respiring Dehalococcoides mccartyi or to the inhibition of such strains by the metabolism of other microorganisms. The fitness of vinyl chloride respiring Dehalococcoides mccartyi subpopulations is particularly uncertain in the presence of chloroethene/chloroethane cocontaminant mixtures, which are commonly found in contaminated groundwater. Therefore, we investigated the structure of Dehalococcoides populations in a continuously fed reactor system under changing chloroethene/ethane influent conditions. We observed that increasing the influent ratio of 1,2-dichloroethane to trichloroethene was associated with ecological selection of a tceA-containing Dehalococcoides population relative to a vcrA-containing Dehalococcoides population. Although both vinyl chloride and 1,2-dichloroethane could be simultaneously transformed to ethene, prolonged exposure to 1,2-dichloroethane diminished the vinyl chloride transforming capacity of the culture. Kinetic tests revealed that dechlorination of 1,2-dichloroethane by the consortium was strongly inhibited by cis-dichloroethene but not vinyl chloride. Native polyacrylamide gel electrophoresis and mass spectrometry revealed that a trichloroethene reductive dehalogenase (TceA) homologue was the most consistently expressed of four detectable reductive dehalogenases during 1,2-dichloroethane exposure, suggesting that it catalyzes the reductive dihaloelimination of 1,2-dichloroethane to ethene.}, number={22}, journal={Environmental Science & Technology}, publisher={American Chemical Society (ACS)}, author={Mayer-Blackwell, Koshlan and Fincker, Maeva and Molenda, Olivia and Callahan, Benjamin and Sewell, Holly and Holmes, Susan and Edwards, Elizabeth A. and Spormann, Alfred M.}, year={2016}, month={Nov}, pages={12187–12196} } @article{callahan_sankaran_fukuyama_mcmurdie_holmes_2016, title={Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses}, volume={5}, ISSN={2046-1402}, url={http://dx.doi.org/10.12688/f1000research.8986.2}, DOI={10.12688/f1000research.8986.2}, abstractNote={High-throughput sequencing of PCR-amplified taxonomic markers (like the 16S rRNA gene) has enabled a new level of analysis of complex bacterial communities known as microbiomes. Many tools exist to quantify and compare abundance levels or OTU composition of communities in different conditions. The sequencing reads have to be denoised and assigned to the closest taxa from a reference database. Common approaches use a notion of 97% similarity and normalize the data by subsampling to equalize library sizes. In this paper, we show that statistical models allow more accurate abundance estimates. By providing a complete workflow in R, we enable the user to do sophisticated downstream statistical analyses, whether parametric or nonparametric. We provide examples of using the R packages dada2, phyloseq, DESeq2, ggplot2 and vegan to filter, visualize and test microbiome data. We also provide examples of supervised analyses using random forests and nonparametric testing using community networks and the ggnetwork package.}, journal={F1000Research}, publisher={F1000 ( Faculty of 1000 Ltd)}, author={Callahan, Ben J. and Sankaran, Kris and Fukuyama, Julia A. and McMurdie, Paul J. and Holmes, Susan P.}, year={2016}, month={Nov}, pages={1492} } @article{callahan_mcmurdie_rosen_han_johnson_holmes_2016, title={DADA2: High-resolution sample inference from Illumina amplicon data}, volume={13}, ISSN={1548-7091 1548-7105}, url={http://dx.doi.org/10.1038/nmeth.3869}, DOI={10.1038/nmeth.3869}, abstractNote={DADA2 is an open-source software package that denoises and removes sequencing errors from Illumina amplicon sequence data to distinguish microbial sample sequences differing by as little as a single nucleotide. We present the open-source software package DADA2 for modeling and correcting Illumina-sequenced amplicon errors ( https://github.com/benjjneb/dada2 ). DADA2 infers sample sequences exactly and resolves differences of as little as 1 nucleotide. In several mock communities, DADA2 identified more real variants and output fewer spurious sequences than other methods. We applied DADA2 to vaginal samples from a cohort of pregnant women, revealing a diversity of previously undetected Lactobacillus crispatus variants.}, number={7}, journal={Nature Methods}, publisher={Springer Nature}, author={Callahan, Benjamin J and McMurdie, Paul J and Rosen, Michael J and Han, Andrew W and Johnson, Amy Jo A and Holmes, Susan P}, year={2016}, month={May}, pages={581–583} } @article{bik_costello_switzer_callahan_holmes_wells_carlin_jensen_venn-watson_relman_2016, title={Marine mammals harbor unique microbiotas shaped by and yet distinct from the sea}, volume={7}, ISSN={2041-1723}, url={http://dx.doi.org/10.1038/ncomms10516}, DOI={10.1038/ncomms10516}, abstractNote={AbstractMarine mammals play crucial ecological roles in the oceans, but little is known about their microbiotas. Here we study the bacterial communities in 337 samples from 5 body sites in 48 healthy dolphins and 18 healthy sea lions, as well as those of adjacent seawater and other hosts. The bacterial taxonomic compositions are distinct from those of other mammals, dietary fish and seawater, are highly diverse and vary according to body site and host species. Dolphins harbour 30 bacterial phyla, with 25 of them in the mouth, several abundant but poorly characterized Tenericutes species in gastric fluid and a surprisingly paucity of Bacteroidetes in distal gut. About 70% of near-full length bacterial 16S ribosomal RNA sequences from dolphins are unique. Host habitat, diet and phylogeny all contribute to variation in marine mammal distal gut microbiota composition. Our findings help elucidate the factors structuring marine mammal microbiotas and may enhance monitoring of marine mammal health.}, number={1}, journal={Nature Communications}, publisher={Springer Science and Business Media LLC}, author={Bik, Elisabeth M. and Costello, Elizabeth K. and Switzer, Alexandra D. and Callahan, Benjamin J. and Holmes, Susan P. and Wells, Randall S. and Carlin, Kevin P. and Jensen, Eric D. and Venn-Watson, Stephanie and Relman, David A.}, year={2016}, month={Feb}, pages={10516} } @article{callahan_proctor_relman_fukuyama_holmes_2016, title={Reproducible research workflow in R for the analysis of personalized human microbiome data}, volume={21}, journal={Pacific Symposium on Biocomputing}, author={Callahan, B.J. and Proctor, D. and Relman, D.A. and Fukuyama, J. and Holmes, S.P.}, year={2016}, pages={183–194} } @inproceedings{callahan_proctor_relman_fukuyama_holmes_2016, title={Reproducible research workflow in R for the analysis of personalized human microbiome data}, volume={21}, booktitle={Pacific Symposium on Biocomputing. Pacific Symposium on Biocomputing}, author={Callahan, Benjamin and Proctor, Diana and Relman, David and Fukuyama, Julia and Holmes, Susan}, year={2016}, pages={183} } @article{digiulio_callahan_mcmurdie_costello_lyell_robaczewska_sun_goltsman_wong_shaw_et al._2015, title={Temporal and spatial variation of the human microbiota during pregnancy}, volume={112}, ISSN={0027-8424 1091-6490}, url={http://dx.doi.org/10.1073/pnas.1502875112}, DOI={10.1073/pnas.1502875112}, abstractNote={Significance The human indigenous microbial communities (microbiota) play critical roles in health and may be especially important for mother and fetus during pregnancy. Using a case-control cohort of 40 women, we characterized weekly variation in the vaginal, gut, and oral microbiota during and after pregnancy. Microbiota membership remained relatively stable at each body site during pregnancy. An altered vaginal microbial community was associated with preterm birth; this finding was corroborated by an analysis of samples from an additional cohort of nine women. We also discovered an abrupt change in the vaginal microbiota at delivery that persisted in some cases for at least 1 y. Our findings suggest that pregnancy outcomes might be predicted by features of the microbiota early in gestation.}, number={35}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={DiGiulio, Daniel B. and Callahan, Benjamin J. and McMurdie, Paul J. and Costello, Elizabeth K. and Lyell, Deirdre J. and Robaczewska, Anna and Sun, Christine L. and Goltsman, Daniela S. A. and Wong, Ronald J. and Shaw, Gary and et al.}, year={2015}, month={Aug}, pages={11060–11065} } @article{digiulio_callahan_mcmurdie_costello_lyell_robaczewska_sun_goltsman_wong_shaw_et al._2015, title={Temporal and spatial variation of the human microbiota during pregnancy}, volume={112}, number={35}, journal={Proceedings of the National Academy of Sciences}, publisher={National Acad Sciences}, author={DiGiulio, Daniel B and Callahan, Benjamin J and McMurdie, Paul J and Costello, Elizabeth K and Lyell, Deirdre J and Robaczewska, Anna and Sun, Christine L and Goltsman, Daniela SA and Wong, Ronald J and Shaw, Gary and et al.}, year={2015}, pages={11060–11065} } @article{callahan_fukami_fisher_2014, title={Rapid evolution of adaptive niche construction in experimental microbial populations}, volume={68}, ISSN={0014-3820}, url={http://dx.doi.org/10.1111/evo.12512}, DOI={10.1111/evo.12512}, abstractNote={Many species engage in adaptive niche construction: modification of the local environment that increases the modifying organism's competitive fitness. Adaptive niche construction provides an alternative pathway to higher fitness, shaping the environment rather than conforming to it. Yet, experimental evidence for the evolutionary emergence of adaptive niche construction is lacking, leaving its role in evolution uncertain. Here we report a direct observation of the de novo evolution of adaptive niche construction in populations of the bacteria Pseudomonas fluorescens. In a laboratory experiment, we allowed several bacterial populations to adapt to a novel environment and assessed whether niche construction evolved over time. We found that adaptive niche construction emerged rapidly, within approximately 100 generations, and became ubiquitous after approximately 400 generations. The large fitness effect of this niche construction was dominated by the low fitness of evolved strains in the ancestrally modified environment: evolved niche constructors were highly dependent on their specific environmental modifications. Populations were subjected to frequent resetting of environmental conditions and severe reduction of spatial habitat structure, both of which are thought to make adaptive niche construction difficult to evolve. Our finding that adaptive niche construction nevertheless evolved repeatably suggests that it may play a more important role in evolution than generally thought.}, number={11}, journal={Evolution}, publisher={Wiley}, author={Callahan, Benjamin J. and Fukami, Tadashi and Fisher, Daniel S.}, year={2014}, month={Sep}, pages={3307–3316} } @article{walker_callahan_arya_barry_bhattacharya_grigoryev_pellegrini_rippe_rosenberg_2013, title={Evolutionary dynamics and information hierarchies in biological systems}, volume={1305}, ISSN={0077-8923}, url={http://dx.doi.org/10.1111/nyas.12140}, DOI={10.1111/nyas.12140}, abstractNote={The study of evolution has entered a revolutionary new era, where quantitative and predictive methods are transforming the traditionally qualitative and retrospective approaches of the past. Genomic sequencing and modern computational techniques are permitting quantitative comparisons between variation in the natural world and predictions rooted in neo‐Darwinian theory, revealing the shortcomings of current evolutionary theory, particularly with regard to large‐scale phenomena like macroevolution. Current research spanning and uniting diverse fields and exploring the physical and chemical nature of organisms across temporal, spatial, and organizational scales is replacing the model of evolution as a passive filter selecting for random changes at the nucleotide level with a paradigm in which evolution is a dynamic process both constrained and driven by the informational architecture of organisms across scales, from DNA and chromatin regulation to interactions within and between species and the environment.}, number={1}, journal={Annals of the New York Academy of Sciences}, publisher={Wiley}, author={Walker, Sara Imari and Callahan, Benjamin J. and Arya, Gaurav and Barry, J. David and Bhattacharya, Tanmoy and Grigoryev, Sergei and Pellegrini, Matteo and Rippe, Karsten and Rosenberg, Susan M.}, year={2013}, month={May}, pages={1–17} } @article{rosen_callahan_fisher_holmes_2012, title={Denoising PCR-amplified metagenome data}, volume={13}, number={1}, journal={BMC bioinformatics}, publisher={BioMed Central}, author={Rosen, M.J. and Callahan, B.J. and Fisher, D.S. and Holmes, S.P.}, year={2012}, pages={283} } @article{callahan_2012, title={The length scale of selection in protein evolution}, volume={6}, ISSN={1933-6934 1933-6942}, url={http://dx.doi.org/10.4161/fly.18305}, DOI={10.4161/fly.18305}, abstractNote={Central to the study of molecular evolution, and an area of long-standing debate, is the appropriate model for the fitness landscape of proteins. Much of this debate has focused on the strength and frequency of positive and purifying selection, but the form and frequency of selective correlations is also a vital element. The constituent amino acids within a protein generically interact and share selective pressures in predictable ways, which conflicts with the selective independence assumed by common caricatures of the fitness landscape. Here, I discuss a recent study by myself and coauthors1 that used whole-genome comparisons of orthologous molecular sequences from closely related Drosophilids to explore the form of the selective correlations and selective interactions (epistasis) between the amino acids within a protein. I outline our results and highlight our finding of a selective length scale of ten amino acids within which individual amino acids are substantially and generically more likely to share selective pressures and interact epistatically. I then focus on the evidence presented in our study supporting a substantial role for epistasis in the process of molecular evolution, and discuss further the implications of this widespread epistasis on the overdispersion of the molecular clock and the efficacy of common tests for positive selection.}, number={1}, journal={Fly}, publisher={Informa UK Limited}, author={Callahan, Benjamin J.}, year={2012}, month={Jan}, pages={16–20} } @article{callahan_neher_bachtrog_andolfatto_shraiman_2011, title={Correlated Evolution of Nearby Residues in Drosophilid Proteins}, volume={7}, ISSN={1553-7404}, url={http://dx.doi.org/10.1371/journal.pgen.1001315}, DOI={10.1371/journal.pgen.1001315}, abstractNote={Here we investigate the correlations between coding sequence substitutions as a function of their separation along the protein sequence. We consider both substitutions between the reference genomes of several Drosophilids as well as polymorphisms in a population sample of Zimbabwean Drosophila melanogaster. We find that amino acid substitutions are “clustered” along the protein sequence, that is, the frequency of additional substitutions is strongly enhanced within ≈10 residues of a first such substitution. No such clustering is observed for synonymous substitutions, supporting a “correlation length” associated with selection on proteins as the causative mechanism. Clustering is stronger between substitutions that arose in the same lineage than it is between substitutions that arose in different lineages. We consider several possible origins of clustering, concluding that epistasis (interactions between amino acids within a protein that affect function) and positional heterogeneity in the strength of purifying selection are primarily responsible. The role of epistasis is directly supported by the tendency of nearby substitutions that arose on the same lineage to preserve the total charge of the residues within the correlation length and by the preferential cosegregation of neighboring derived alleles in our population sample. We interpret the observed length scale of clustering as a statistical reflection of the functional locality (or modularity) of proteins: amino acids that are near each other on the protein backbone are more likely to contribute to, and collaborate toward, a common subfunction.}, number={2}, journal={PLoS Genetics}, publisher={Public Library of Science (PLoS)}, author={Callahan, Benjamin and Neher, Richard A. and Bachtrog, Doris and Andolfatto, Peter and Shraiman, Boris I.}, editor={McVean, GilEditor}, year={2011}, month={Feb}, pages={e1001315} } @article{sellis_callahan_petrov_messer_2011, title={Heterozygote advantage as a natural consequence of adaptation in diploids}, volume={108}, ISSN={0027-8424 1091-6490}, url={http://dx.doi.org/10.1073/pnas.1114573108}, DOI={10.1073/pnas.1114573108}, abstractNote={Molecular adaptation is typically assumed to proceed by sequential fixation of beneficial mutations. In diploids, this picture presupposes that for most adaptive mutations, the homozygotes have a higher fitness than the heterozygotes. Here, we show that contrary to this expectation, a substantial proportion of adaptive mutations should display heterozygote advantage. This feature of adaptation in diploids emerges naturally from the primary importance of the fitness of heterozygotes for the invasion of new adaptive mutations. We formalize this result in the framework of Fisher's influential geometric model of adaptation. We find that in diploids, adaptation should often proceed through a succession of short-lived balanced states that maintain substantially higher levels of phenotypic and fitness variation in the population compared with classic adaptive walks. In fast-changing environments, this variation produces a diversity advantage that allows diploids to remain better adapted compared with haploids despite the disadvantage associated with the presence of unfit homozygotes. The short-lived balanced states arising during adaptive walks should be mostly invisible to current scans for long-term balancing selection. Instead, they should leave signatures of incomplete selective sweeps, which do appear to be common in many species. Our results also raise the possibility that balancing selection, as a natural consequence of frequent adaptation, might play a more prominent role among the forces maintaining genetic variation than is commonly recognized.}, number={51}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Sellis, D. and Callahan, B. J. and Petrov, D. A. and Messer, P. W.}, year={2011}, month={Dec}, pages={20666–20671} } @article{callahan_thattai_shraiman_2009, title={Emergent gene order in a model of modular polyketide synthases}, volume={106}, ISSN={0027-8424 1091-6490}, url={http://dx.doi.org/10.1073/pnas.0902364106}, DOI={10.1073/pnas.0902364106}, abstractNote={Polyketides are a class of biologically active heteropolymers produced by assembly line-like multiprotein complexes of modular polyketide synthases (PKS). The polyketide product is encoded in the order of the PKS proteins in the assembly line, suggesting that polyketide diversity derives from combinatorial rearrangement of these PKS complexes. Remarkably, the order of PKS genes on the chromosome follows the order of PKS proteins in the assembly line: This fact is commonly referred to as “collinearity”. Here we propose an evolutionary origin for collinearity and demonstrate the mechanism by using a computational model of PKS evolution in a population. Assuming continuous evolutionary pressure for novel polyketides, and that new polyketide pathways are formed by horizontal transfer/recombination of PKS-encoding DNA, we demonstrate the existence of a broad range of parameters for which collinearity emerges spontaneously. Collinearity confers no fitness advantage in our model; it is established and maintained through a “secondary selection” mechanism, as a trait which increases the probability of forming long, novel PKS complexes through recombination. Consequently, collinearity hitchhikes on the successful genotypes which periodically sweep through the evolving population. In addition to computer simulation of a simplified model of PKS evolution, we provide a mathematical framework describing the secondary selection mechanism, which generalizes beyond the context of the present model.}, number={46}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Callahan, B. and Thattai, M. and Shraiman, B. I.}, year={2009}, month={Oct}, pages={19410–19415} } @phdthesis{callahan_2009, title={Evolution on an interacting fitness landscape: The effects of the interplay of epistasis and recombination on genetic structure}, school={University of California, Santa Barbara}, author={Callahan, Benjamin J}, year={2009} } @article{cheung_finke_callahan_onuchic_2003, title={Exploring the Interplay between Topology and Secondary Structural Formation in the Protein Folding Problem}, volume={107}, ISSN={1520-6106 1520-5207}, url={http://dx.doi.org/10.1021/jp034441r}, DOI={10.1021/jp034441r}, abstractNote={models) have been successful in providing a qualitativeunderstanding of the folding mechanism of small globular proteins. Can we go beyond this qualitativeunderstanding and make more detailed quantitative connections to experiments? To achieve this goal, a tractableframework of protein representations whose complexity falls between C}, number={40}, journal={The Journal of Physical Chemistry B}, publisher={American Chemical Society (ACS)}, author={Cheung, Margaret S. and Finke, John M. and Callahan, Benjamin and Onuchic, José N.}, year={2003}, month={Oct}, pages={11193–11200} }