@article{phelps_razanatsoa_davis_hackel_rasolondrainy_tiley_burney_cox_godfrey_hempson_et al._2025, title={Advancing transdisciplinary research on Madagascar's grassy biomes to support resilience in ecosystems and livelihoods}, url={https://doi.org/10.1002/ecm.70011}, DOI={10.1002/ecm.70011}, abstractNote={Abstract Grassy biomes (savanna and grasslands) are globally extensive and host a unique biodiversity that is of central importance to human livelihoods. We focus here on the island of Madagascar—a microcosm of the global tropics, covered in 80% grassy biomes—to illustrate how transdisciplinary approaches to research can clarify ecosystem dynamics, from evolutionary history to human land use. Research on Madagascar's human‐environment interactions has sparked debates about the role of past and current land use in shaping grassy biomes (e.g., pastoralism, cultivation, fire use). These debates echo those in other regions globally, and highlight obstacles to understanding and supporting both ecosystem and livelihood resilience. Like many tropical biodiversity hotspots, Madagascar faces converging challenges that can be aided by transdisciplinary research, including food and health insecurity, economic inequities, biodiversity loss, climate change, land conversion, and limited resource access. We present a framework to guide transdisciplinary research centered on improved understanding and management of grassy biomes on Madagascar by: (1) establishing a globally common terminology; (2) summarizing data contributions and scientific knowledge gaps relating to Madagascar's grassy biomes; (3) identifying priority research questions for Madagascar with applicability in other regions; and (4) highlighting transdisciplinary, inclusive approaches to research that can co‐benefit people and the ecosystems with which they interact.}, journal={Ecological Monographs}, author={Phelps, Leanne N. and Razanatsoa, Estelle and Davis, Dylan S. and Hackel, Jan and Rasolondrainy, Tanambelo and Tiley, George P. and Burney, David and Cox, Ronadh and Godfrey, Laurie and Hempson, Gareth P. and et al.}, year={2025}, month={May} }
@article{tiley_liu_solís-lemus_2025, title={Extracting diamonds: identifiability of 4-node cycles in level-1 phylogenetic networks}, url={https://doi.org/10.1093/evolinnean/kzaf019}, DOI={10.1093/evolinnean/kzaf019}, abstractNote={Abstract Phylogenetic networks encode a broader picture of evolution by the inclusion of reticulate processes such as hybridization, introgression, or horizontal gene transfer. Each hybridization event is represented by a ‘hybridization cycle’. Here, we investigate the statistical identifiability of the position of the hybrid node in a 4-node hybridization cycle in a semi-directed level-1 phylogenetic network. That is, we investigate if our model is able to detect the correct placement of the hybrid node in the hybridization cycle using quartet concordance factors as data. In the current study, we prove that the correct placement of the hybrid node in 4-node hybridization cycles, included in level-1 phylogenetic networks, is generically identifiable if the assumptions are non-restrictive such as t∈(0,∞) for all branch (or edge) lengths and γ∈(0,1) for the inheritance probability of the hybrid edges. However, simulations show that accurate detection of these cycles can be complicated by inadequate sampling, small sample size, or gene tree estimation error. We identify practical advice for evolutionary biologists on best sampling strategies to improve the detection of this type of hybridization cycle.}, journal={Evolutionary Journal of the Linnean Society}, author={Tiley, George P and Liu, Nan and Solís-Lemus, Claudia}, year={2025}, month={Jan} }
@article{hackel_davis_rasolondrainy_razanatsoa_tiley_phelps_2025, title={Madagascar's grassy biomes, from Holocene to Anthropocene}, url={https://doi.org/10.1002/ppp3.70062}, DOI={10.1002/ppp3.70062}, journal={Plants People Planet}, author={Hackel, Jan and Davis, Dylan S. and Rasolondrainy, Tanambelo and Razanatsoa, Estelle and Tiley, George P. and Phelps, Leanne N.}, year={2025}, month={Jun} }
@article{kong_solís-lemus_tiley_2025, title={Phylogenetic networks empower biodiversity research}, url={https://doi.org/10.1073/pnas.2410934122}, DOI={10.1073/pnas.2410934122}, abstractNote={Reticulate evolution has long been recognized as a key mechanism that contributes to genetic and trait diversity. With the widespread availability of genomic data, investigating historical reticulate evolution across taxa has gained significant attention, driven by the rapid development of statistical methods for detecting nontreelike patterns. Phylogenetic networks provide a biologically intuitive approach to depicting evolutionary processes such as hybrid speciation and introgressive hybridization, which result in signatures of historical gene flow. Interpreting phylogenetic networks is especially critical for groups of conservation concern that lack reference genome resources and explicit hypotheses from prior investigation, such as those based on molecular data, morphology, or species distributions. Here, we highlight recent advances in computational methods for inferring networks from genome-scale data and offer guidelines for deriving biological insights from phylogenetic networks. Particular emphasis is placed on modeling hybridization and whole-genome duplication in the context of allopolyploidization. Practical recommendations for empirical studies and the limitations of commonly used methods are discussed throughout. We anticipate that phylogenetic networks will influence conservation biology and biodiversity research, emphasizing the need for careful consideration of reticulate evolution inferred from these networks in the near future. Networks will accelerate other pressing avenues of biodiversity research, especially investigations of orphan crops and climate change resilience in natural systems. The promise of phylogenetic networks connects with broader themes in the special feature Monitoring and restoring gene flow in the increasingly fragmented ecosystems of the Anthropocene by providing an emerging probabilistic framework for inferring historical connectivity between species and populations.}, journal={Proceedings of the National Academy of Sciences}, author={Kong, Sungsik and Solís-Lemus, Claudia and Tiley, George P.}, year={2025}, month={Jul} }
@article{tiley_crowl_manos_sessa_solís-lemus_yoder_burleigh_2024, title={Benefits and Limits of Phasing Alleles for Network Inference of Allopolyploid Complexes}, url={https://doi.org/10.1093/sysbio/syae024}, DOI={10.1093/sysbio/syae024}, abstractNote={Abstract Accurately reconstructing the reticulate histories of polyploids remains a central challenge for understanding plant evolution. Although phylogenetic networks can provide insights into relationships among polyploid lineages, inferring networks may be hindered by the complexities of homology determination in polyploid taxa. We use simulations to show that phasing alleles from allopolyploid individuals can improve phylogenetic network inference under the multispecies coalescent by obtaining the true network with fewer loci compared with haplotype consensus sequences or sequences with heterozygous bases represented as ambiguity codes. Phased allelic data can also improve divergence time estimates for networks, which is helpful for evaluating allopolyploid speciation hypotheses and proposing mechanisms of speciation. To achieve these outcomes in empirical data, we present a novel pipeline that leverages a recently developed phasing algorithm to reliably phase alleles from polyploids. This pipeline is especially appropriate for target enrichment data, where the depth of coverage is typically high enough to phase entire loci. We provide an empirical example in the North American Dryopteris fern complex that demonstrates insights from phased data as well as the challenges of network inference. We establish that our pipeline (PATÉ: Phased Alleles from Target Enrichment data) is capable of recovering a high proportion of phased loci from both diploids and polyploids. These data may improve network estimates compared with using haplotype consensus assemblies by accurately inferring the direction of gene flow, but statistical nonidentifiability of phylogenetic networks poses a barrier to inferring the evolutionary history of reticulate complexes.}, journal={Systematic Biology}, author={Tiley, George P and Crowl, Andrew A and Manos, Paul S and Sessa, Emily B and Solís-Lemus, Claudia and Yoder, Anne D and Burleigh, J Gordon}, editor={Smith, StaceyEditor}, year={2024}, month={May} }
@article{elst_sgarlata_schüßler_tiley_poelstra_scheumann_blanco_aleixo-pais_evasoa_ganzhorn_et al._2024, title={Integrative taxonomy clarifies the evolution of a cryptic primate clade}, url={https://doi.org/10.1038/s41559-024-02547-w}, DOI={10.1038/s41559-024-02547-w}, abstractNote={Global biodiversity is under accelerating threats, and species are succumbing to extinction before being described. Madagascar's biota represents an extreme example of this scenario, with the added complication that much of its endemic biodiversity is cryptic. Here we illustrate best practices for clarifying cryptic diversification processes by presenting an integrative framework that leverages multiple lines of evidence and taxon-informed cut-offs for species delimitation, while placing special emphasis on identifying patterns of isolation by distance. We systematically apply this framework to an entire taxonomically controversial primate clade, the mouse lemurs (genus Microcebus, family Cheirogaleidae). We demonstrate that species diversity has been overestimated primarily due to the interpretation of geographic variation as speciation, potentially biasing inference of the underlying processes of evolutionary diversification. Following a revised classification, we find that crypsis within the genus is best explained by a model of morphological stasis imposed by stabilizing selection and a neutral process of niche diversification. Finally, by clarifying species limits and defining evolutionarily significant units, we provide new conservation priorities, bridging fundamental and applied objectives in a generalizable framework.}, journal={Nature Ecology & Evolution}, author={Elst, Tobias and Sgarlata, Gabriele M. and Schüßler, Dominik and Tiley, George P. and Poelstra, Jelmer W. and Scheumann, Marina and Blanco, Marina B. and Aleixo-Pais, Isa G. and Evasoa, Mamy Rina and Ganzhorn, Jörg U. and et al.}, year={2024}, month={Sep} }
@article{sutherland_tiley_li_mckibben_barker_2024, title={SLEDGe: Inference of ancient whole genome duplications using machine learning}, url={https://doi.org/10.1101/2024.01.17.574559}, DOI={10.1101/2024.01.17.574559}, abstractNote={Abstract Ancient whole-genome duplication--previous genome duplication events that have since been eroded via diploidization, are increasingly identified throughout eukaryotes. One of the constraints against large-scale studies of ancient eukaryotic WGD is the relatively large, high-quality datasets often needed to definitively establish ancient WGD events; alternatively, the more low-input method interpretation of genome-wide synonymous substitution rates (Ks plots) is prone to bias and inconsistency. We improve upon the shortcomings of the current Ks plot method by building a Ks plot simulator. This data-agnostic approach simulates common distributions found in Ks plots in the presence or absence of ancient WGD signatures. In conjunction with a machine-learning classifier, this approach can quickly assess the likelihood that transcriptomic and genomic data bear WGD signatures. On independently-generated synthetic data and real plant transcriptomic data, SLEDGE is capable of correctly identifying ancient WGD in 93-100% of samples. This approach can serve as a quick classification step in large-scale genomic analyses, identifying putative ancient polyploids for further study.}, author={Sutherland, Brittany L. and Tiley, George P. and Li, Zheng and McKibben, Michael TW and Barker, Michael S.}, year={2024}, month={Jan} }
@article{tiley_flouri_jiao_poelstra_xu_zhu_rannala_yoder_yang_2023, title={Estimation of species divergence times in presence of cross-species gene flow}, url={https://doi.org/10.1093/sysbio/syad015}, DOI={10.1093/sysbio/syad015}, abstractNote={Abstract Cross-species introgression can have significant impacts on phylogenomic reconstruction of species divergence events. Here, we used simulations to show how the presence of even a small amount of introgression can bias divergence time estimates when gene flow is ignored in the analysis. Using advances in analytical methods under the multispecies coalescent (MSC) model, we demonstrate that by accounting for incomplete lineage sorting and introgression using large phylogenomic data sets this problem can be avoided. The multispecies-coalescent-with-introgression (MSci) model is capable of accurately estimating both divergence times and ancestral effective population sizes, even when only a single diploid individual per species is sampled. We characterize some general expectations for biases in divergence time estimation under three different scenarios: 1) introgression between sister species, 2) introgression between non-sister species, and 3) introgression from an unsampled (i.e., ghost) outgroup lineage. We also conducted simulations under the isolation-with-migration (IM) model and found that the MSci model assuming episodic gene flow was able to accurately estimate species divergence times despite high levels of continuous gene flow. We estimated divergence times under the MSC and MSci models from two published empirical datasets with previous evidence of introgression, one of 372 target-enrichment loci from baobabs (Adansonia), and another of 1000 transcriptome loci from 14 species of the tomato relative, Jaltomata. The empirical analyses not only confirm our findings from simulations, demonstrating that the MSci model can reliably estimate divergence times but also show that divergence time estimation under the MSC can be robust to the presence of small amounts of introgression in empirical datasets with extensive taxon sampling. [divergence time; gene flow; hybridization; introgression; MSci model; multispecies coalescent]}, journal={Systematic Biology}, author={Tiley, George P and Flouri, Tomáš and Jiao, Xiyun and Poelstra, Jelmer W and Xu, Bo and Zhu, Tianqi and Rannala, Bruce and Yoder, Anne D and Yang, Ziheng}, editor={Carstens, BryanEditor}, year={2023}, month={Mar} }
@article{tiley_liu_solís-lemus_2023, title={Extracting diamonds: Identifiability of 4-node cycles in level-1 phylogenetic networks under a pseudolikelihood coalescent model}, url={https://publons.com/wos-op/publon/68496718/}, DOI={10.1101/2023.10.25.564087}, abstractNote={Abstract Phylogenetic networks encode a broader picture of evolution by the inclusion of reticulate processes such as hybridization, introgression or horizontal gene transfer. Each reticulation event is represented by a “hybridization cycle”. Here, we investigate the statistical identifiability of the position of the hybrid node in a 4-node hybridization cycle in a semi-directed level-1 phylogenetic network. That is, we investigate if our model is able to detect the correct placement of the hybrid node in the hybridization cycle from concordance factors as data. While generic identifiability is easily attained under non-restrictive assumptions such as t ∈ (0, ∞) for all branches and γ ∈ (0, 1) for the inheritance probability of the hybrid edges, simulations show that accurate detection of these cycles can be complicated by inadequate sampling, small sample size or gene tree estimation error. We identify practical advice for evolutionary biologists on best sampling strategies to improve the detection of this type of hybridization cycle.}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, author={Tiley, George and Liu, Nan and Solís-Lemus, Claudia}, year={2023}, month={Oct} }
@article{tiley_crowl_almary_luke_solofondranohatra_besnard_lehmann_yoder_vorontsova_2023, title={Genetic variation in Loudetia simplex supports the presence of ancient grasslands in Madagascar}, url={https://doi.org/10.1002/ppp3.10437}, DOI={10.1002/ppp3.10437}, abstractNote={Societal Impact Statement Recognizing Loudetia ‐dominated grasslands were widespread prior to human colonization highlights that open ecosystems were and continue to be an important component of Madagascar's biodiversity. A better understanding of the plant species that form grassland ecosystems is necessary for effective land management strategies that support livelihoods, but substantial financial and logistical barriers exist to implementing conservation genetic studies using contemporary genomic tools. Some challenges for population genetic analyses of non‐model polyploids lacking reference genomes can be ameliorated by developing computational resources that leverage a cost‐effective data generation strategy that requires no prior genetic knowledge of the target species. This may benefit conservation programs with small operating budgets while reducing uncertainty compared to status quo microsatellite assays. Summary The extent of Madagascar's grasslands prior to human colonization is unresolved. We used population genetic analyses of a broadly dominant C 4 fire‐adapted grass, Loudetia simplex , as a proxy for estimating grassland change through time. We carefully examined the utility of target‐enrichment data for population genetics to make recommendations for conservation genetics. We explored the potential of estimating individual ploidy levels from target‐enrichment data and how assumptions about ploidy could affect analyses. We developed a novel bioinformatic pipeline to estimate ploidy and genotypes from target‐enrichment data. We estimated standard population genetic summary statistics in addition to species trees and population structure. Extended Bayesian skyline plots provided estimates of population size through time for empirical and simulated data. All Malagasy L. simplex individuals sampled in this study formed a clade and possibly indicated an ancestral Central Highland distribution of 800 m in altitude and above. Demographic models suggested grassland expansions occurred prior to the Last Interglacial Period and supported extensive grasslands prior to human colonization. Though there are limitations to target‐enrichment data for population genetic studies, we find that analyses of population structure are reliable. Genetic variation in L. simplex supports widespread grasslands in Madagascar prior to the more recent periods of notable paleoclimatic change. However, the methods explored here could not differentiate between paleoclimatic change near the Last Glacial Maximum and anthropogenic effects. Target‐enrichment data can be a valuable tool for analyses of population structure in the absence a reference genome.}, journal={Plants People Planet}, author={Tiley, George P. and Crowl, Andrew A. and Almary, Tchana O. M. and Luke, W. R. Quentin and Solofondranohatra, Cédrique L. and Besnard, Guillaume and Lehmann, Caroline E. R. and Yoder, Anne D. and Vorontsova, Maria S.}, year={2023}, month={Sep} }
@article{tiley_crowl_almary_luke_solofondranohatra_besnard_lehmann_yoder_vorontsova_2023, title={Genetic variation inLoudetia simplexsupports the presence of ancient grasslands in Madagascar}, url={https://doi.org/10.1101/2023.04.07.536094}, DOI={10.1101/2023.04.07.536094}, abstractNote={Summary Research Aims — The extent of Madagascar’s grasslands prior to human colonization is unresolved. We used population genetic analyses of a broadly dominant C 4 fire-adapted grass, Loudetia simplex , as a proxy for estimating grassland change through time. We carefully examined the utility of target-enrichment data for population genetics to make recommendations for conservation genetics. We explored the potential of estimating individual ploidy levels from target-enrichment data and how assumptions about ploidy could affect analyses. Methods — We developed a novel bioinformatic pipeline to estimate ploidy and genotypes from target-enrichment data. We estimated standard population genetic summary statistics in addition to species trees and population structure. Extended Bayesian skyline plots provided estimates of population size through time for empirical and simulated data. Key Result — All Malagasy Loudetia simplex individuals sampled in this study formed a clade and possibly indicated an ancestral Central Highland distribution of 800m in altitude and above. Demographic models suggested grassland expansions occurred prior to the Last Interglacial Period and supported extensive grasslands prior to human colonization. Though there are limitations to target-enrichment data for population genetic studies, we find that analyses of population structure are reliable. Key Point —Genetic variation in Loudetia simplex supports widespread grasslands in Madagascar prior to the more recent periods of notable paleoclimatic change. However, the methods explored here could not differentiate between paleoclimatic change near the Last Glacial Maximum and anthropogenic effects. Target-enrichment data can be a valuable tool for analyses of population structure in the absence a reference genome. Societal Impact Statement Recognizing Loudetia dominated grasslands were widespread prior to human colonization highlights that open ecosystems were and continue to be an important component of Madagascar’s biodiversity. Urgently required are biodiversity inventories and integrative taxonomic treatments of grassland flora and fauna to asses risks to understudied ecosystems historically regarded as wastelands. Substantial financial and logistical barriers exist to implementing conservation studies using contemporary genomic tools. We ameliorated some of the challenges for population genetic analyses of non-model polyploids lacking reference genomes by developing computational resources to leverage a cost-effective data generation strategy that requires no prior genetic knowledge of the target species. Résumé Les objectifs de la recherche — L’étendue des écosystèmes ouverts de Madagascar avant la colonisation humaine reste à éclaircir. Nous avons utilisé une analyse de la population génétique d’une graminée C 4 adaptée au feu, largement dominante, Loudetia simplex, comme référence pour estimer les changements au niveau de ces biomes au fil du temps. Nous avons examiné attentivement l’utilité des données d’enrichissement ciblé pour la génétique de population afin de formuler des recommandations pour la conservation génétique. Nous avons exploré le potentiel de l’estimation du niveau des ploidies individuelles à partir des données d’enrichissement ciblé et comment les hypothèses à propos de ces ploidies pourraient affecter les analyses. Les méthodes — Nous avons développé un nouveau canal bioinformatique pour estimer les ploidies et les génotypes à partir des données d’enrichissement ciblé. Nous avons estimé les statistiques standard de la population génétique, en plus des arbres des espèces et de la structure de la population. L’utilisation des tracés étendus du ciel bayésien a fourni une estimation de la taille de la population au fil du temps pour des données empiriques et simulées. Résultat clé — Tous les individus Malagasy de Loudetia simplex échantillonnés dans cette étude ont formé un clade, indiquant une éventuelle ancienne distribution dans les hauts plateaux. Les modèles démographiques suggèrent une expansion des prairies bien avant la dernière période interglaciaire et soutiennent l’existence d’une vaste distribution avant la colonisation humaine. Bien qu’il y ait des limites à l’enrichissement des données cibles pour l’étude de la génétique des populations, nous constatons que l’analyse des structures des populations est fiable. Les points clés — La variation génétique de Loudetia simplex soutient l’existence de vastes prairies à Madagascar avant les périodes plus récentes de changements paléoclimatiques notables. Cependant, les méthodes explorées ici n’ont pas permis de faire la différence entre les changements paléoclimatiques près du dernier maximum glaciaire et les effets anthropogènes. Les données d’enrichissement ciblé peuvent être un outil précieux pour les analyses de la structure des populations en l’absence d’un génome de référence. Déclaration d’impact societal Reconnaître que les prairies dominées par Loudetia étaient répandues avant la colonisation humaine souligne que les écosystèmes ouverts étaient et continuent d’être un composant important de la biodiversité de Madagascar. Il est urgent de réaliser des inventaires de la biodiversité et une taxonomie intégrée pour le traitement de la flore et de la faune des écosystèmes ouverts afin d’évaluer les risques pour les écosystèmes sous-étudiés considérés historiquement comme des terres en friches. Des barrières financières et logistiques existent pour mettre en œuvre l’étude de la conservation en utilisant les outils génomiques contemporains. Nous avons amélioré certains des défis liés aux analyses génétiques de populations de polyploïdes non modèles, sans génomes de référence, en développant des ressources informatiques pour exploiter une stratégie pouvant générer des données rentables ne nécessitant aucune connaissance génétique préalable de l’espèce cible. Famintinana Ny tanjon’ny fikarohana — Mbola tsy fantatra mazava tsara ny fivelaran’ny hivoka teto Madagasikara talohan’ny fahatongava’ny olombelona. Mba ahafantarana ny fihovana nitranga nandritra ny fotoana naharitra teo amin’ireo hivoka ireo dia nanao famakafakahana ara-genetika amin’ny ahitra C 4 miompana amin’ny afo iray antsoina Loudetia simplex ara-tsiantifika na Berambo na Hara amin’ny teny malagasy izahay. Nandinika tsara ny maha-zava-dehibe ny fampitomboana ny antotan-kevitra mba ahafahana manolo-kevitra momba ny fiarovana ny fototarazo genetika. Nandinika ny mety mampiavaka ny fanombanana an’ny ploidy tsirairay amin’ny fampitomboana antotan-kevitra sy ny mety ho fiantraikan’ny fiheverana momba ireo ploidy ireo amin’ny fikarohana. Fomba Fiasa — Namorona fantsona bioinformatika vaovao mba ahafahana manombana ny ploidy sy ny « genotypes » avy amin’ny antotan-kevitra nokendrena izahay. Notombanana ny antontan’isa famintinana ny fototarazo ara-genetikan’ireo vondron’ahitra ireo, miampy ny karazana hazo sy ny firafitry ny vondrona na koa hoe mponina. Nanome tombantombana ny haben’ny mponina amin’ny alàlan’ny fotoana ny antontan-kevitra voavinavina azo tamin’ny fikarohana. Fikarohana izay azo tamin’ny alalan’ny « Bayesina Skuline Plots ». Vokam-pikarohana fototra — Ny vondrona Loudetia simplex eto Madagasikara izay niasana dia namorona « clade » na fikambanana iray, izay manondro ny mety maha ela netezana sy tranainy an’io ahitra io eny amin’ny faritra avo. Ny modely demografika dia manoro hevitra amin’ny naha be velarana ny hivoka izay efa ela talohan’ny vanim-potoana « interglacial » farany ary manohana ny fivelarana midadasika an’ireo kijana ireo alohan’ny fonenan’ny olombelona. Na dia misy fetrany aza ny fampitomboana ny antotan-kevitra kendrena amin’ny fandalinana ny fototarazo genetika momban’ny mponina, dia hita fa azo itokisana ny fikarohana natao momban’ny firafitry ny mponina. Hevi-dehibe — Ny fahasamihafana ara-genetika ao amin’ny Loudetia simplex dia manohana ny fisian’ny hivoka na kijana midadasika eto Madagasikara talohan’ny vanim-potoanan’ny fiovana paleoclimatika nisongadina. Na izany aza, ny fombam-pikarohana nampiasana teto dia tsy nahavita nanavaka ny fiovan’ny paleoclimatika akaikin’ny vanim-potoana lehibe nangatsiaka farany sy ny vokatry ny fitrandrahana nataon’ny olombelona. Mety ho fitaovana manan-danja amin’ny famakafakana ny firafitry ny mponina ny antotan-kevitra nampitombona na dia tsy misy fitaovana genomika iangaina aza. Fanambarana fiantraika ara-tsosialy Ny fanekena fa niely patrana ny hivoka itoeran’ny Loudetia talohan’ny fanjanahan’ny olombelona dia manamarika fa ireo hivoka ireo dia singa manan-danja amin’ny zavamananaina eto Madagasikara. Ilaina maika ny fahafantarana ara biolojika sy taxononomique ny zavam}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, author={Tiley, George P. and Crowl, Andrew A. and Almary, Tchana O. M. and Luke, W. R. Quentin and Solofondranohatra, Cédrique L. and Besnard, Guillaume and Lehmann, Caroline E.R. and Yoder, Anne D. and Vorontsova, Maria S.}, year={2023}, month={Apr} }
@article{shao_zhou_li_zhao_zhang_shao_chen_chen_bi_zhuang_et al._2023, title={Phylogenomic analyses provide insights into primate evolution}, url={https://publons.com/wos-op/publon/61340240/}, DOI={10.1126/SCIENCE.ABN6919}, abstractNote={Comparative analysis of primate genomes within a phylogenetic context is essential for understanding the evolution of human genetic architecture and primate diversity. We present such a study of 50 primate species spanning 38 genera and 14 families, including 27 genomes first reported here, with many from previously less well represented groups, the New World monkeys and the Strepsirrhini. Our analyses reveal heterogeneous rates of genomic rearrangement and gene evolution across primate lineages. Thousands of genes under positive selection in different lineages play roles in the nervous, skeletal, and digestive systems and may have contributed to primate innovations and adaptations. Our study reveals that many key genomic innovations occurred in the Simiiformes ancestral node and may have had an impact on the adaptive radiation of the Simiiformes and human evolution.}, journal={Science}, author={Shao, Yong and Zhou, Long and Li, Fang and Zhao, Lan and Zhang, Bao-Lin and Shao, Feng and Chen, Jia-Wei and Chen, Chun-Yan and Bi, Xupeng and Zhuang, Xiao-Lin and et al.}, year={2023}, month={Jun} }
@article{almary_white_rasaminirina_razanatsoa_lehmann_rakotoarinivo_ralimanana_vorontsova_tiley_2023, title={The grass that built the Central Highland of Madagascar: environmental niches and morphological diversity ofLoudetia simplex}, url={https://doi.org/10.1101/2023.09.25.559324}, DOI={10.1101/2023.09.25.559324}, abstractNote={SUMMARY 1) Research Aims — Loudetia simplex is a common and dominant species throughout grassland ecosystems in mainland Africa and Madagascar. It is highly polymorphic, often classified as two taxa endemic to Madagascar: L. simplex subsp. stipoides and L. madagascariensis . A better understanding of the inter- and intra-specific variation between these taxa and its contributing environmental factors could improve our understanding of the history of Madagascar’s grasslands. 2) Methods — The taxonomic status of L. simplex subsp. stipoides and L. madagascariensis was evaluated by morphometric analyses of 119 herbarium specimens. Species distribution modelling was used to determine the most important environmental factors underlying the L. simplex distribution in Madagascar versus other African grasslands. We investigated if L. simplex in Madagascar could be predicted by distributions across mainland Africa with niche overlap analyses. 3) Key Result — African and Malagasy species exhibited variation potentially associated with environment. Specimens from northern and western Madagascar were taller with smaller spikelets than those from Southern Africa and central Madagascar. Loudetia simplex typically occurred in cooler temperatures with high precipitation and pronounced seasonality, but taller populations were found in warmer conditions. Projecting ecological niches of Southern Africa and East Tropical Africa onto Madagascar demonstrates much of the present distribution in the Central Highlands is expected from other natural African grasslands. 4) Key Point — Malagasy and African individuals represent a single species, and the Malagasy species can be considered as a synonym of L. simplex. Distribution models are congruent with pre-human presence of grasslands in Madagascar. SOCIETAL IMPACT STATEMENT Understanding dominant species like Loudetia simplex is necessary to understand the fire-driven grassy ecosystems they create. In Madagascar, grasslands are considered a low-value ecosystem despite their unique biodiversity and crucial importance as zebu rangeland. Only 1.8% of Madagascar’s grasslands are protected despite facing similar threats to forests and biodiversity loss. This study of common grasses will support the management of protected areas by providing information on resource management of vulnerable open ecosystems in Madagascar. Distribution models of common grass species and clear taxonomic classification can help land management stakeholders identify natural grasslands versus degraded forests.}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, author={Almary, Tchana O. M. and White, Joseph and Rasaminirina, Fitiavana and Razanatsoa, Jacqueline and Lehmann, Caroline and Rakotoarinivo, Mijoro and Ralimanana, Hélène and Vorontsova, Maria S. and Tiley, George P.}, year={2023}, month={Sep} }
@article{crowl_fritsch_tiley_lynch_ranney_ashrafi_manos_2022, title={A first complete phylogenomic hypothesis for diploid blueberries (Vaccinium section Cyanococcus)}, volume={109}, ISSN={0002-9122 1537-2197}, url={http://dx.doi.org/10.1002/ajb2.16065}, DOI={10.1002/ajb2.16065}, abstractNote={Abstract Premise The true blueberries ( Vaccinium sect. Cyanococcus ; Ericaceae), endemic to North America, have been intensively studied for over a century. However, with species estimates ranging from nine to 24 and much confusion regarding species boundaries, this ecologically and economically valuable group remains inadequately understood at a basic evolutionary and taxonomic level. As a first step toward understanding the evolutionary history and taxonomy of this species complex, we present the first phylogenomic hypothesis of the known diploid blueberries. Methods We used flow cytometry to verify the ploidy of putative diploid taxa and a target‐enrichment approach to obtain a genomic data set for phylogenetic analyses. Results Despite evidence of gene flow, we found that a primary phylogenetic signal is present. Monophyly for all morphospecies was recovered, with two notable exceptions: one sample of V. boreale was consistently nested in the V. myrtilloides clade and V. caesariense was nested in the V. fuscatum clade. One diploid taxon, Vaccinium pallidum , is implicated as having a homoploid hybrid origin. Conclusions This foundational study represents the first attempt to elucidate evolutionary relationships of the true blueberries of North America with a phylogenomic approach and sets the stage for multiple avenues of future study such as a taxonomic revision of the group, the verification of a homoploid hybrid taxon, and the study of polyploid lineages within the context of a diploid phylogeny.}, number={10}, journal={American Journal of Botany}, publisher={Wiley}, author={Crowl, Andrew A. and Fritsch, Peter W. and Tiley, George P. and Lynch, Nathan P. and Ranney, Thomas G. and Ashrafi, Hamid and Manos, Paul S.}, year={2022}, month={Oct}, pages={1596–1606} }
@article{bergeron_besenbacher_turner_versoza_wang_price_armstrong_riera_carlson_chen_et al._2022, title={Author response: The Mutationathon highlights the importance of reaching standardization in estimates of pedigree-based germline mutation rates}, url={https://publons.com/wos-op/publon/53882992/}, DOI={10.7554/ELIFE.73577.SA2}, abstractNote={Article Figures and data Abstract Editor's evaluation Introduction Results and discussion Materials and methods Data availability References Decision letter Author response Article and author information Metrics Abstract In the past decade, several studies have estimated the human per-generation germline mutation rate using large pedigrees. More recently, estimates for various nonhuman species have been published. However, methodological differences among studies in detecting germline mutations and estimating mutation rates make direct comparisons difficult. Here, we describe the many different steps involved in estimating pedigree-based mutation rates, including sampling, sequencing, mapping, variant calling, filtering, and appropriately accounting for false-positive and false-negative rates. For each step, we review the different methods and parameter choices that have been used in the recent literature. Additionally, we present the results from a 'Mutationathon,' a competition organized among five research labs to compare germline mutation rate estimates for a single pedigree of rhesus macaques. We report almost a twofold variation in the final estimated rate among groups using different post-alignment processing, calling, and filtering criteria, and provide details into the sources of variation across studies. Though the difference among estimates is not statistically significant, this discrepancy emphasizes the need for standardized methods in mutation rate estimations and the difficulty in comparing rates from different studies. Finally, this work aims to provide guidelines for computational and statistical benchmarks for future studies interested in identifying germline mutations from pedigrees. Editor's evaluation Bergeron et al. show that mutation rate independently estimated by several teams with the same pedigree dataset can be different due to the methods and approaches used to identify de novo mutations. This result is of primary importance because it shows the necessity to have standard mutation identification methods and the difficulties to compare mutation rates from different studies. https://doi.org/10.7554/eLife.73577.sa0 Decision letter Reviews on Sciety eLife's review process Introduction Germline mutations are the source of most genetic diseases and provide the raw material for evolution. Thus, it is crucial to accurately estimate the frequency at which mutations occur in order to better understand the course of evolutionary events. The development of high-throughput next-generation sequencing offers the opportunity to directly estimate the germline mutation rate over a single generation, based on a whole-genome comparison of pedigree samples (mother, father, and offspring), without requiring assumptions about generation times or fossil calibrations (Tiley et al., 2020). Pedigree sequencing provides multiple pieces of information in addition to an overall mutation rate. For instance, the genomic locations, the spectrum of mutation types (e.g., transition or transversion), and the nucleotide context of all mutations can easily be gleaned. Furthermore, pedigree sequencing enables researchers to identify the parental origin of the mutations; that is, whether the mutation arose in the maternal or paternal germline. Finally, using pedigrees means that researchers often have precise information about the age of the parents at the time of reproduction, and comparing several trios (i.e., three related individuals: mother, father, and offspring) at different parental ages can tell us about the effect of parental age on the total number of transmitted mutations, their location, and their spectrum. Thus, there has been a growing interest in applying this method to address medical and evolutionary questions. The first estimate of the human germline mutation rate using pedigrees was published more than 10 years ago (Roach et al., 2010). Four years later, the first pedigree-based mutation rate for a nonhuman primate, the chimpanzee, was estimated (Venn et al., 2014). Today, at least 20 vertebrate species have mutation rates estimated by pedigree sequencing (Table 1), with half added in the past two years. Each study differs in the number of trios, the sequencing technology and depth, the ages of individuals included, and the bioinformatics pipelines used to analyze the data (see Table 1 and Supplementary file 1a). Thus, reported variation in mutation rates among studies might result from a combination of biological and methodological factors. Although most studies using human pedigrees have now reached similar rates of ~1.2 × 10–8 mutations per site per generation at an average age of around 30 years (Table 1), the effect of different methodologies is likely to have a much larger effect on estimates in other species. This is because these species have lower-quality genome assemblies, less information about segregating polymorphisms, often higher heterozygosity, and an overall deficit in prior information on mutation rates. With an increasing number of studies being published, an examination of the differences among studies and suggestions for standards that will minimize differences caused by methodological discrepancies are warranted. Table 1 Vertebrate species with a direct estimate of the mutation rate using a pedigree approach. The list of species includes 10 primates, 5 nonprimate mammals, 1 bird, and 4 fish (see Supplementary file 1b for differences in study design and methodology). SpeciesMutation rate per site per generation: µ × 10–8Number of triosParental age*ReferenceOrangutan (Pongo abelii)1.661♂: 31.00 and ♀: 15.00Besenbacher et al., 2019Human (Homo sapiens)1.170.971.201.201.281.051.291.281.301.101.221 (CEU)1 (YRI)782691371915501505165931449UnspecifiedUnspecified♂: 29.10 and ♀: 26.50Unspecified♂: 29.80♂: 33.40Unspecified~27.70♂: 33.40♂: 29.10 and ♀: 26.00♂: 29.70 and ♀: 26.90Conrad et al., 2011Conrad et al., 2011Kong et al., 2012Francioli et al., 2015Rahbari et al., 2016Wong et al., 2016Jónsson et al., 2017Maretty et al., 2017Turner et al., 2017Sasani et al., 2019Kessler et al., 2020Chimpanzee (Pan troglodytes)1.201.481.26617♂: 18.90 and ♀: 15.00♂: 24.00 and ♀: 24.00♂: 19.30 and ♀: 15.90Venn et al., 2014Tatsumoto et al., 2017Besenbacher et al., 2019Gorilla (Gorilla gorilla)1.132♂: 14.50 and ♀: 20.50Besenbacher et al., 2019Baboon (Papio anubis)0.5712♂: 10.70 and ♀: 10.20Wu et al., 2020Rhesus macaque (Macaca mulatta)0.580.771419♂: 7.80 and ♀: 7.10♂: 12.40 and ♀: 8.40Wang et al., 2020Bergeron et al., 2021aGreen monkey (Chlorocebus sabaeus)0.943♂: 8.70 and ♀: 4.70Pfeifer, 2017Owl monkey (Aotus nancymaae)0.8114♂: 6.60 and ♀: 6.50Thomas et al., 2018Marmoset (Callithrix jacchus)0.431~2.80Yang et al., 2021Gray mouse lemur (Microcebus murinus)1.522♂: 4.55 and ♀: 1.45Campbell et al., 2021Mouse (Mus musculus)0.570.39815Unspecified~0.47Milholland et al., 2017Lindsay et al., 2019Cattle (Bos taurus)1.175UnspecifiedHarland et al., 2017Wolf (Canis lupus)0.454♂: 4.00 and ♀: 2.25Koch et al., 2019Domestic cat (Felis catus)0.8611♂: 4.70 and ♀: 2.90Wang et al., 2021bPlatypus (Ornithorhynchus anatinus)0.702UnspecifiedMartin et al., 2018Collared flycatcher (Ficedula albicollis)0.467UnspecifiedSmeds et al., 2016Herring (Clupea harengus)0.2012UnspecifiedFeng et al., 2017Cichlid (Astatotilapia calliptera, Aulonocara stuartgranti, and Lethrinops lethrinus)0.359UnspecifiedMalinsky et al., 2018 * Depending on the study, the parental ages are reported as average paternal age (♂), average maternal age (♀), average parental age (~), or unspecified. The key principle of the pedigree-based approach is to detect de novo mutations (DNMs) present in a heterozygous state in an offspring that are absent from its parents' genomes (Figure 1). A per-site per-generation mutation rate can be inferred by dividing the number of DNMs by the number of sites in the genome that mutations could possibly be identified in (and accounting for the diploid length of the genome, as mutations can be transmitted by both the mother and the father). As mutations are rare events, detecting all the true DNMs (or having a high sensitivity) while avoiding errors (or increasing precision) from a single generation remains challenging. False-positive (FP) calls (sites incorrectly detected as DNMs) can be caused by sequencing errors, errors introduced by read mapping and genotyping steps, stochastically missing an alternative allele in a parent, or somatic mutations in the offspring. Numerous filters are thus often applied on the variant sites to increase the precision of the candidate DNMs' detection. However, filters that are too conservative can also discard true DNMs, reducing the sensitivity by increasing the rate of false-negative calls (true DNMs not detected). Therefore, a balance should be found between precision and sensitivity – a goal that has led to the development of multiple different methods to estimate germline mutation rates from pedigree samples. Figure 1 Download asset Open asset Detection of a de novo mutation (DNM) in a trio sample (mother, father, and offspring). Potential candidates for DNMs are sites where approximately half of the reads (indicated as gray bars) from the offspring have a variant (indicated in green) that is absent from the parental reads. In this study, we aim to define what we consider to be the state of the art in pedigree-based germline mutation rate estimation, to discuss the pros and cons of each methodological step, and to summarize best practices that should be used when calling germline mutations. We review several recently published methods that estimate germline mutation rates from pedigree samples. In parallel, we set up a competition – the 'Mutationathon' – among five research groups to explore the effect of different methodologies on mutation rate estimates. Using a common genomic dataset consisting of a pedigree of the rhesus macaque (Macaca mulatta; Bergeron et al., 2021a), each group estimated the number of candidate DNMs (validated by PCR amplification and Sanger resequencing) and a germline mutation rate. An examination of the estimated rates produced by different groups not only highlighted the choices that can be made in estimating per-generation mutation rates, but it also provided us with an opportunity to characterize the impact of these choices on the systematic differences in estimated rates, which in turn yielded important insights into the parameters that could reduce the occurrence of FP calls. Results and discussion Comparison of methods The overall pipeline from high-throughput next-generation sequencing data to an estimated mutation rate is similar across all studies listed in Table 1. It includes five steps (Figure 2): sampling and whole-genome sequencing of at least one trio or extended pedigrees that also include a third generation (useful for validation of putative DNMs in the offspring), alignment of reads to a reference genome and post-processing of alignments, variant calling to infer genotypes or genotype likelihoods for all individuals, detection of DNMs via filtering of candidates (including an assessment of the false discovery rate [FDR]), and finally the estimation of a per-generation mutation rate accounting for the length of the accessible genome (including an assessment of the false-negative rate [FNR]). Figure 2 Download asset Open asset Flow of the main steps to call de novo mutations (DNMs) from pedigree samples. Each step lists the various choices in study design and methodology that might impact mutation rate estimates. Step 1: Sampling and sequencing Sample size Pedigree-based study designs can vary significantly, from those that include only one trio per species (e.g., Besenbacher et al., 2019) to those that include thousands of trios (e.g., Halldorsson et al., 2019). The first study to estimate a pedigree-based human mutation rate used only two trios and estimated a mutation rate of 1.1 × 10–8 per site per generation (Roach et al., 2010), which is within the overall variation reported across studies with larger sample sizes (Table 1). Larger sample sizes reduce uncertainty in the average mutation rate for a species and offer more statistical power for the exploration of various parameters such as the parental age effect, the contribution of each parent to the total number of DNMs, and the distribution of mutations across genomes. Multi-sibling pedigrees (i.e., when there is more than one offspring) offer a unique opportunity to detect mutations that may be mosaic within one of the parents indicative of having occurred early in development. Indeed, if, for instance, a paternal DNM is detected in more than one sibling from a pedigree, it is unlikely that the same mutation occurred in different sperm cells. Instead, an early postzygotic mutation may have occurred in primordial germ cells (PGCs) during embryonic development of the father. Therefore, the mutation would be absent from the father's somatic tissue, while affecting more than one of his descendants. Moreover, by means of haplotype sharing with a third noncarrier sibling, DNMs that arose before the PGC specification can be detected, even if present in the parental somatic tissue sampled (e.g., Jónsson et al., 2018). Multigeneration pedigrees, also referred to as extended trios, can be used to validate true DNMs and to adjust quality filters by studying transmission to a third generation. Multigeneration pedigrees also allow researchers to easily determine whether these transmitted mutations came from the maternal or paternal parent in the first generation (e.g., Jónsson et al., 2017). Therefore, whenever possible, multiple trios should be analyzed and more than two generations should be included. Finally, the age of the parents at the time of reproduction is required for estimating the per-year mutation rate from the per-generation rates directly measured in the trios. In some studies, the age of the parents at conception is not available, and instead, the mean age of reproduction is used for the estimation of the per-year mutation rate. While useful, this approximation can lead to biased results if the age of the parents at conception was much older or much younger compared to the mean age in the population. Thus, when possible, the information on the age of each parent at the time of conception should be collected as it is essential for the interpretation of results and to help understand parental age effects on mutation rate. Sample type The most commonly used sample types are somatic tissues such as whole blood, muscle, or liver, which generally produce a high quantity of DNA with long fragment sizes and allow for high-coverage sequencing. The duration and temperature of storage can affect the quality of the extracted DNA and increase the rate of sequencing errors. Thus, to minimize DNA damage during storage, DNA is typically kept in TE (tris-EDTA) buffer. Moreover, it is advised to store DNA at –80°C for long-term storage (months to years) and in liquid nitrogen at –164°C for decades (Baust, 2008; Straube and Juen, 2013). Other materials such as buccal swabs or fur can be considered, but they can be technically challenging. For instance, as part of a recent study on rhesus macaques (Bergeron et al., 2021a), DNA was extracted from hair samples and sequenced at 95× coverage, yet, due to the fragmentation, only 38% of the reads were mappable to the reference genome. After variant calling, the average depth of usable reads was 6×, with only 10% of sites covered by more than 10 reads. To reduce the number of FP calls caused by somatic mutations, it is best to avoid tissues with an accumulation of such mutations, such as skin. In this regard, blood is often the preferred tissue: as many different tissues contribute cells to the blood, the hope is that a somatic mutation in any one of them will not be mistaken for a DNM. However, in rare cases, mainly in older individuals, clonal hematopoiesis can lead to high-frequency somatic mutations in the blood. Thus, sequencing more than one type of tissue, when feasible, should be considered. Comparing the DNMs called from different tissues could reduce the potential for mistaking somatic mutations as DNMs. If only one tissue is available, allelic balance of both candidate DNMs and known single-nucleotide polymorphisms (SNPs) should allow for better detection of somatic mutations. Libraries After DNA extraction, genomic library preparation is another step that can introduce sequencing errors. Most studies have used Illumina sequencing platforms, yet, even for a single technology, there are different library preparation protocols available. PCR amplification is commonly used to increase the quantity of DNA, but this can generate artifacts caused by the introduction of sequence errors (PCR errors) or by the overamplification of some reads (PCR bias) (Acinas et al., 2005). Thus, for samples yielding a sufficient amount of DNA, PCR-free libraries that do not involve amplification prior to cluster generation are preferable. Moreover, as different library preparation methods can result in different amplification biases (Ross et al., 2013; Wingett, 2017), utilizing different types of library preparations may be advisable to reduce the sources of error. Sequencing All Illumina sequencing platforms use similar sequencing chemistry (sequencing-by-synthesis) and mainly differ in running speed and throughput. Another equivalent technology, used in two studies (Bergeron et al., 2021a; Roach et al., 2010), is BGISEQ-500, combining DNA nanoball nanoarrays with polymerase-based stepwise sequencing (Mak et al., 2017) and showing similar performances to Illumina on data quality (Chen et al., 2019; Patch et al., 2018). Another study used 10X Genomics-linked reads, which can help phase maternal and paternal mutations (Campbell et al., 2021). However, it remains unclear if alternative library preparation and sequencing platforms introduce additional biases compared to standard Illumina protocols. Most pedigree-based studies of germline mutations have sequenced each individual to a depth between 30× and 50× (Besenbacher et al., 2019; Campbell et al., 2021; Jónsson et al., 2017; Kessler et al., 2020; Malinsky et al., 2018; Milholland et al., 2017; Sasani et al., 2019; Smeds et al., 2016; Thomas et al., 2018; Turner et al., 2017; Wang et al., 2020; Wu et al., 2020), three studies sequenced at a higher depth of 80× (Bergeron et al., 2021a; Maretty et al., 2017) and 150× (Tatsumoto et al., 2017), while six studies sequenced at a depth lower than 25× on average (Harland et al., 2017; Koch et al., 2019; Lindsay et al., 2019; Martin et al., 2018; Pfeifer, 2017; Rahbari et al., 2016). A minimum coverage of 15× has been advised to call SNPs accurately (Fumagalli et al., 2013). Yet, this depth might not be sufficient to call germline mutations since it might be hard to distinguish genuine germline mutations from somatic mutations that are present in a substantial fraction of cells. Furthermore, with low coverage the probability of calling a parent homozygous for the reference allele, when they are actually heterozygous, becomes non-negligible at the genome-wide For the probability of not a read with one of the in a with 15× coverage is × which will by around 30× in a genome with 1 heterozygous based on the the probability that a somatic mutation present in 10% of cells is in more than of reads is with coverage but to with Thus, it is advised to aim for a minimum of as a of Step and post-alignment To DNMs, we first where in the genome each of the sequencing reads The and is an to reads to a reference genome and has been used in the of studies on direct mutation rate estimation (Bergeron et al., 2021a; Besenbacher et al., 2019; et al., 2017; Jónsson et al., 2017; Kessler et al., 2020; Koch et al., 2019; Malinsky et al., 2018; Maretty et al., 2017; Milholland et al., 2017; Pfeifer, 2017; Sasani et al., 2019; Smeds et al., 2016; et al., 2017; Thomas et al., 2018; Turner et al., 2017; Wang et al., 2020; Wu et al., 2020). In the is and can be with an size to the of reads. of the study and study design can have effects on read studies reported a step to and reads – those with a high of or (Bergeron et al., 2021a; Maretty et al., 2017; et al., 2017; Wu et al., 2020). However, might not be as some mapping will (or the while reads can be during the The quality of the reference genome can an important in a large of reads with high mapping In the of a or reference genome, using the reference genome of a related species is an but this could make the more and 2021). Moreover, was to that using a related or even a related individual in the same species when is could impact the Finally, and such as can be for read have been for human genome and can be for germline mutation rate calling in species with et al., 2018). To for of sequencing reads to the reference genome, post-alignment quality is This step often includes quality of reads, and around for any in the quality by the by utilizing information from a set of known for the species. such a dataset is not available, as in many nonhuman the of the from the to first with variant calling in all samples and using the to the quality 2021). If multiple generations are available, transmitted across generations can be used for et al., 2020). However, some studies have this step due to the of this method and its computational as will be called (Bergeron et al., 2021a; Thomas et al., 2018; Wang et al., 2020). A study a difference of less than between the total variant sites called with and without and this difference was even lower for high-coverage sequencing et al., this step is advised to increase the quality of variant calling 2020). reads due to amplification (PCR or sequencing can increase FP calls and sequencing Therefore, should be or even for from PCR-free with are more likely to be on the variant around may be advised to for this around is required when calling with as but is not with variant as et al., et al., or and the reads around can be during the can be used to a with et al., 2017), including known polymorphisms and reads are to the reducing reference and read alignment et al., 2017) and et al., 2019). Finally, other quality can be applied mapping, such as reads mapping to multiple locations, as they could with a mapping quality in two or more and be by quality However, the overall impact of many of these such as and around on the final set of DNMs has not been Step calling have been to in calling nucleotide 2014). and is used among studies that call germline DNMs (Bergeron et al., 2021a; Besenbacher et al., 2019; Campbell et al., 2021; et al., 2017; et al., 2017; Jónsson et al., 2017; Koch et al., 2019; Malinsky et al., 2018; Maretty et al., 2017; Milholland et al., 2017; Pfeifer, 2017; Sasani et al., 2019; Smeds et al., 2016; et al., 2017; Thomas et al., 2018; Turner et al., 2017; Wang et al., 2020; et al., 2016; Wu et al., 2020). Other commonly used variant are et al., 2017; by et al., 2021; Halldorsson et al., 2019; et al., 2021; Jónsson et al., and and by Turner et al., 2017). Using more than one variant can increase in the set but can et al., 2017). within the same variant different methods can be used (see Supplementary file 1a). For instance, in three are variant calling, in which samples are analyzed and for and calling, in which are called across all samples the In the is to first call for each sample in and all the samples to genotype them the identification of variant sites is from the of genotypes to each and should have a similar to detect but differences in variant have been et al., 2014). Moreover, the has two the provides for single site in the genome, even while the groups the sites into a of This step is if are called in but it can be to determine the part of the genome in which there is power to detect mutations. It is unclear which should be it is advised to report the method used and any additional that have been The of applied during variant calling should also be kept in For instance, the prior is by at which could have an impact when species with much higher or much lower heterozygosity, the effect of this prior has not been in the context of mutation rate studies. Step de novo mutations filters information is in the file file which includes different types of information on the quality of the genotype calls (see 1). Thus, the first set of filters (i.e., can be applied to that there is a true variant at a and to a step to that genotypes are However, this is not for DNMs as it would many rare instead, hard filtering should be provides some for these that these should be a and filters may need to be on the or the species 2020). The advised hard for germline variant discovery are (see 1 and Supplementary file 1b for details on each Although some studies these best practices et al., 2017; Wu et al., 2020), only a of filters (e.g., three studies reported the filters without Koch et al., 2019; Thomas et al., 2018; Wang et al., and Besenbacher et al., kept only parameters – and – as they are based on statistical a known distribution or the filtering based on results (e.g., Koch et al., the}, journal={eLife}, author={Bergeron, Lucie A and Besenbacher, Søren and Turner, Tychele and Versoza, Cyril J and Wang, Richard J and Price, Alivia Lee and Armstrong, Ellie and Riera, Meritxell and Carlson, Jedidiah and Chen, Hwei-yen and et al.}, year={2022}, month={Jan} }
@article{tiley_elst_teixeira_schüßler_salmona_blanco_ralison_randrianambinina_rasoloarison_stahlke_et al._2022, title={Population genomic structure in Goodman's mouse lemur reveals long‐standing separation of Madagascar's Central Highlands and eastern rainforests}, url={https://doi.org/10.1111/mec.16632}, DOI={10.1111/mec.16632}, abstractNote={Abstract Madagascar's Central Highlands are largely composed of grasslands, interspersed with patches of forest. The historical perspective was that Madagascar's grasslands had anthropogenic origins, but emerging evidence suggests that grasslands were a component of the pre‐human Central Highlands vegetation. Consequently, there is now vigorous debate regarding the extent to which these grasslands have expanded due to anthropogenic pressures. Here, we shed light on the temporal dynamics of Madagascar's vegetative composition by conducting a population genomic investigation of Goodman's mouse lemur ( Microcebus lehilahytsara ; Cheirogaleidae). These small‐bodied primates occur both in Madagascar's eastern rainforests and in the Central Highlands, making them a valuable indicator species. Population divergences among forest‐dwelling mammals will reflect changes to their habitat, including fragmentation, whereas patterns of post‐divergence gene flow can reveal formerly wooded migration corridors. To explore these patterns, we used RADseq data to infer population genetic structure, demographic models of post‐divergence gene flow, and population size change through time. The results offer evidence that open habitats are an ancient component of the Central Highlands, and that widespread forest fragmentation occurred naturally during a period of decreased precipitation near the last glacial maximum. Models of gene flow suggest that migration across the Central Highlands has been possible from the Pleistocene through the recent Holocene via riparian corridors. Though our findings support the hypothesis that Central Highland grasslands predate human arrival, we also find evidence for human‐mediated population declines. This highlights the extent to which species imminently threatened by human‐mediated deforestation may already be vulnerable from paleoclimatic conditions.}, journal={Molecular Ecology}, author={Tiley, George P. and Elst, Tobias and Teixeira, Helena and Schüßler, Dominik and Salmona, Jordi and Blanco, Marina B. and Ralison, José M. and Randrianambinina, Blanchard and Rasoloarison, Rodin M. and Stahlke, Amanda R. and et al.}, year={2022}, month={Jul} }
@article{bergeron_besenbacher_turner_versoza_wang_price_armstrong_riera_carlson_chen_et al._2022, title={The Mutationathon highlights the importance of reaching standardization in estimates of pedigree-based germline mutation rates}, DOI={10.7554/elife.73577}, abstractNote={In the past decade, several studies have estimated the human per-generation germline mutation rate using large pedigrees. More recently, estimates for various nonhuman species have been published. However, methodological differences among studies in detecting germline mutations and estimating mutation rates make direct comparisons difficult. Here, we describe the many different steps involved in estimating pedigree-based mutation rates, including sampling, sequencing, mapping, variant calling, filtering, and appropriately accounting for false-positive and false-negative rates. For each step, we review the different methods and parameter choices that have been used in the recent literature. Additionally, we present the results from a ‘Mutationathon,’ a competition organized among five research labs to compare germline mutation rate estimates for a single pedigree of rhesus macaques. We report almost a twofold variation in the final estimated rate among groups using different post-alignment processing, calling, and filtering criteria, and provide details into the sources of variation across studies. Though the difference among estimates is not statistically significant, this discrepancy emphasizes the need for standardized methods in mutation rate estimations and the difficulty in comparing rates from different studies. Finally, this work aims to provide guidelines for computational and statistical benchmarks for future studies interested in identifying germline mutations from pedigrees.}, journal={eLife}, author={Bergeron, Lucie A and Besenbacher, Søren and Turner, Tychele and Versoza, Cyril J and Wang, Richard J and Price, Alivia Lee and Armstrong, Ellie and Riera, Meritxell and Carlson, Jedidiah and Chen, Hwei-yen and et al.}, year={2022}, month={Jan} }
@article{breinholt_carey_tiley_davis_endara_mcdaniel_neves_sessa_konrat_chantanaorrapint_et al._2021, title={A target enrichment probe set for resolving the flagellate land plant tree of life}, url={https://doi.org/10.1002/aps3.11406}, DOI={10.1002/aps3.11406}, abstractNote={PREMISE New sequencing technologies facilitate the generation of large‐scale molecular data sets for constructing the plant tree of life. We describe a new probe set for target enrichment sequencing to generate nuclear sequence data to build phylogenetic trees with any flagellate land plants, including hornworts, liverworts, mosses, lycophytes, ferns, and all gymnosperms. METHODS We leveraged existing transcriptome and genome sequence data to design the GoFlag 451 probes, a set of 56,989 probes for target enrichment sequencing of 451 exons that are found in 248 single‐copy or low‐copy nuclear genes across flagellate plant lineages. RESULTS Our results indicate that target enrichment using the GoFlag451 probe set can provide large nuclear data sets that can be used to resolve relationships among both distantly and closely related taxa across the flagellate land plants. We also describe the GoFlag 408 probes, an optimized probe set covering 408 of the 451 exons from the GoFlag 451 probe set that is commercialized by RAPiD Genomics. CONCLUSIONS A target enrichment approach using the new probe set provides a relatively low‐cost solution to obtain large‐scale nuclear sequence data for inferring phylogenetic relationships across flagellate land plants.}, journal={Applications in Plant Sciences}, author={Breinholt, Jesse W. and Carey, Sarah B. and Tiley, George P. and Davis, E. Christine and Endara, Lorena and McDaniel, Stuart F. and Neves, Leandro G. and Sessa, Emily B. and Konrat, Matt and Chantanaorrapint, Sahut and et al.}, year={2021}, month={Jan} }
@article{tiley_crowl_manos_sessa_solís-lemus_yoder_burleigh_2021, title={Benefits and Limits of Phasing Alleles for Network Inference of Allopolyploid Complexes}, url={https://doi.org/10.1101/2021.05.04.442457}, DOI={10.1101/2021.05.04.442457}, abstractNote={Abstract Accurately reconstructing the reticulate histories of polyploids remains a central challenge for understanding plant evolution. Although phylogenetic networks can provide insights into relationships among polyploid lineages, inferring networks may be hindered by the complexities of homology determination in polyploid taxa. We use simulations to show that phasing alleles from allopolyploid individuals can improve phylogenetic network inference under the multispecies coalescent by obtaining the true network with fewer loci compared to haplotype consensus sequences or sequences with heterozygous bases represented as ambiguity codes. Phased allelic data can also improve divergence time estimates for networks, which is helpful for evaluating allopolyploid speciation hypotheses and proposing mechanisms of speciation. To achieve these outcomes in empirical data, we present a novel pipeline that leverages a recently developed phasing algorithm to reliably phase alleles from polyploids. This pipeline is especially appropriate for target enrichment data, where depth of coverage is typically high enough to phase entire loci. We provide an empirical example in the North American Dryopteris fern complex that demonstrates insights from phased data as well as the challenges of network inference. We establish that our pipeline (PATÉ: Phased Alleles from Target Enrichment data) is capable of recovering a high proportion of phased loci from both diploids and polyploids. These data may improve network estimates compared to using haplotype consensus assemblies by accurately inferring the direction of gene flow, but statistical non-identifiability of phylogenetic networks poses a barrier to inferring the evolutionary history of reticulate complexes.}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, author={Tiley, George P. and Crowl, Andrew A. and Manos, Paul S. and Sessa, Emily B. and Solís-Lemus, Claudia and Yoder, Anne D. and Burleigh, J. Gordon}, year={2021}, month={May} }
@article{carey_jenkins_lovell_maumus_sreedasyam_payton_shu_tiley_fernandez-pozo_healey_et al._2021, title={Gene-rich UV sex chromosomes harbor conserved regulators of sexual development}, volume={7}, url={https://doi.org/10.1126/sciadv.abh2488}, DOI={10.1126/sciadv.abh2488}, abstractNote={Moss sex chromosomes retain thousands of broadly expressed genes despite millions of years of suppressed recombination.}, number={27}, journal={Science Advances}, publisher={American Association for the Advancement of Science (AAAS)}, author={Carey, Sarah B. and Jenkins, Jerry and Lovell, John T. and Maumus, Florian and Sreedasyam, Avinash and Payton, Adam C. and Shu, Shengqiang and Tiley, George P. and Fernandez-Pozo, Noe and Healey, Adam and et al.}, year={2021}, month={Jun} }
@article{campbell_tiley_poelstra_hunnicutt_larsen_lee_thorne_reis_yoder_2021, title={Pedigree-based and phylogenetic methods support surprising patterns of mutation rate and spectrum in the gray mouse lemur}, url={https://doi.org/10.1038/s41437-021-00446-5}, DOI={10.1038/s41437-021-00446-5}, abstractNote={Mutations are the raw material on which evolution acts, and knowledge of their frequency and genomic distribution is crucial for understanding how evolution operates at both long and short timescales. At present, the rate and spectrum of de novo mutations have been directly characterized in relatively few lineages. Our study provides the first direct mutation-rate estimate for a strepsirrhine (i.e., the lemurs and lorises), which comprises nearly half of the primate clade. Using high-coverage linked-read sequencing for a focal quartet of gray mouse lemurs (Microcebus murinus), we estimated the mutation rate to be among the highest calculated for a mammal at 1.52 × 10–8 (95% credible interval: 1.28 × 10−8–1.78 × 10−8) mutations/site/generation. Further, we found an unexpectedly low count of paternal mutations, and only a modest overrepresentation of mutations at CpG sites. Despite the surprising nature of these results, we found both the rate and spectrum to be robust to the manipulation of a wide range of computational filtering criteria. We also sequenced a technical replicate to estimate a false-negative and false-positive rate for our data and show that any point estimate of a de novo mutation rate should be considered with a large degree of uncertainty. For validation, we conducted an independent analysis of context-dependent substitution types for gray mouse lemur and five additional primate species for which de novo mutation rates have also been estimated. These comparisons revealed general consistency of the mutation spectrum between the pedigree-based and the substitution-rate analyses for all species compared.}, journal={Heredity}, author={Campbell, C. Ryan and Tiley, George P. and Poelstra, Jelmer W. and Hunnicutt, Kelsie E. and Larsen, Peter A. and Lee, Hui-Jie and Thorne, Jeffrey L. and Reis, Mario and Yoder, Anne D.}, year={2021}, month={Jul} }
@article{tiley_elst_teixeira_schüßler_salmona_blanco_ralison_randrianambinina_rasoloarison_stahlke_et al._2021, title={Population genomic structure in Goodman's mouse lemur reveals long-standing separation of Madagascar's Central Highlands and eastern rainforests}, url={https://doi.org/10.22541/au.163958777.74253126/v1}, DOI={10.22541/au.163958777.74253126/v1}, abstractNote={Madagascar’s Central Highlands are largely composed of grasslands, interspersed with patches of forest. The pre-human extent of these grasslands is a topic of vigorous debate, with conventional wisdom holding that they are anthropogenic in nature and emerging evidence supporting that grasslands were a component of the pre-human Central Highlands vegetation. Here, we shed light on the temporal dynamics of Madagascar’s vegetative composition by conducting a population genomic investigation of Goodman’s mouse lemur (Microcebus lehilahytsara; Cheirogaleidae). These small-bodied primates occur both in Madagascar’s eastern rainforests and in the Central Highlands, which makes them a valuable indicator species. Population divergences among forest-dwelling mammals can serve as a proxy for habitat fragmentation and patterns of post-divergence gene flow can reveal potential migration corridors consistent with a wooded grassland mosiac. We used RADseq data to infer phylogenetic relationships, population structure, demographic models of post-divergence gene flow, and population size change through time. These analyses offer evidence that open habitats are an ancient component of the Central Highlands, and that wide-spread forest fragmentation occurred naturally during a period of decreased precipitation near the last glacial maximum. Models of gene flow suggest that migration across the Central Highlands has been possible from the Pleistocene through the recent Holocene via riparian corridors. Notably, though our findings support the hypothesis that Central Highland grasslands predate human arrival, we also find evidence for human-mediated population declines. This highlights the extent to which species imminently threatened by human-mediated deforestation may be more vulnerable from paleoclimatic changes.}, author={Tiley, George and Elst, Tobias and Teixeira, Helena and Schüßler, Dominik and Salmona, Jordi and Blanco, Marina and Ralison, José and Randrianambinina, Blanchard and Rasoloarison, Rodin and Stahlke, Amanda and et al.}, year={2021}, month={Dec} }
@article{yoder_tiley_2021, title={The challenge and promise of estimating the de novo mutation rate from whole‐genome comparisons among closely related individuals}, url={https://doi.org/10.1111/mec.16007}, DOI={10.1111/mec.16007}, abstractNote={Abstract Germline mutations are the raw material for natural selection, driving species evolution and the generation of earth's biodiversity. Without this driver of genetic diversity, life on earth would stagnate. Yet, it is a double‐edged sword. An excess of mutations can have devastating effects on fitness and population viability. It is therefore one of the great challenges of molecular ecology to determine the rate and mechanisms by which these mutations accrue across the tree of life. Advances in high‐throughput sequencing technologies are providing new opportunities for characterizing the rates and mutational spectra within species and populations thus informing essential evolutionary parameters such as the timing of speciation events, the intricacies of historical demography, and the degree to which lineages are subject to the burdens of mutational load. Here, we will focus on both the challenge and promise of whole‐genome comparisons among parents and their offspring from known pedigrees for the detection of germline mutations as they arise in a single generation. The potential of these studies is high, but the field is still in its infancy and much uncertainty remains. Namely, the technical challenges are daunting given that pedigree‐based genome comparisons are essentially searching for needles in a haystack given the very low signal to noise ratio. Despite the challenges, we predict that rapidly developing methods for whole‐genome comparisons hold great promise for integrating empirically derived estimates of de novo mutation rates and mutation spectra across many molecular ecological applications.}, journal={Molecular Ecology}, author={Yoder, Anne D. and Tiley, George P.}, year={2021}, month={Jun} }
@article{poelstra_salmona_tiley_schüßler_blanco_andriambeloson_bouchez_campbell_etter_hohenlohe_et al._2020, title={Cryptic Patterns of Speciation in Cryptic Primates: Microendemic Mouse Lemurs and the Multispecies Coalescent}, volume={7}, url={http://dx.doi.org/10.1093/sysbio/syaa053}, DOI={10.1093/sysbio/syaa053}, abstractNote={Abstract Mouse lemurs (Microcebus) are a radiation of morphologically cryptic primates distributed throughout Madagascar for which the number of recognized species has exploded in the past two decades. This taxonomic revision has prompted understandable concern that there has been substantial oversplitting in the mouse lemur clade. Here, we investigate mouse lemur diversity in a region in northeastern Madagascar with high levels of microendemism and predicted habitat loss. We analyzed RADseq data with multispecies coalescent (MSC) species delimitation methods for two pairs of sister lineages that include three named species and an undescribed lineage previously identified to have divergent mtDNA. Marked differences in effective population sizes, levels of gene flow, patterns of isolation-by-distance, and species delimitation results were found among the two pairs of lineages. Whereas all tests support the recognition of the presently undescribed lineage as a separate species, the species-level distinction of two previously described species, M. mittermeieri and M. lehilahytsara is not supported—a result that is particularly striking when using the genealogical discordance index (gdi). Nonsister lineages occur sympatrically in two of the localities sampled for this study, despite an estimated divergence time of less than 1 Ma. This suggests rapid evolution of reproductive isolation in the focal lineages and in the mouse lemur clade generally. The divergence time estimates reported here are based on the MSC calibrated with pedigree-based mutation rates and are considerably more recent than previously published fossil-calibrated relaxed-clock estimates. We discuss the possible explanations for this discrepancy, noting that there are theoretical justifications for preferring the MSC estimates in this case. [Cryptic species; effective population size; microendemism; multispecies coalescent; speciation; species delimitation.]}, journal={Systematic Biology}, publisher={Oxford University Press (OUP)}, author={Poelstra, Jelmer W and Salmona, Jordi and Tiley, George P and Schüßler, Dominik and Blanco, Marina B and Andriambeloson, Jean B and Bouchez, Olivier and Campbell, C Ryan and Etter, Paul D and Hohenlohe, Paul A and et al.}, editor={Esselstyn, JacobEditor}, year={2020}, month={Jun} }
@article{tiley_poelstra_reis_yang_yoder_2020, title={Molecular Clocks without Rocks: New Solutions for Old Problems}, volume={36}, url={https://doi.org/10.1016/j.tig.2020.06.002}, DOI={10.1016/j.tig.2020.06.002}, abstractNote={Molecular data have been used to date species divergences ever since they were described as documents of evolutionary history in the 1960s. Yet, an inadequate fossil record and discordance between gene trees and species trees are persistently problematic. We examine how, by accommodating gene tree discordance and by scaling branch lengths to absolute time using mutation rate and generation time, multispecies coalescent (MSC) methods can potentially overcome these challenges. We find that time estimates can differ - in some cases, substantially - depending on whether MSC methods or traditional phylogenetic methods that apply concatenation are used, and whether the tree is calibrated with pedigree-based mutation rates or with fossils. We discuss the advantages and shortcomings of both approaches and provide practical guidance for data analysis when using these methods.}, number={11}, journal={Trends in Genetics}, publisher={Elsevier BV}, author={Tiley, George P. and Poelstra, Jelmer W. and Reis, Mario and Yang, Ziheng and Yoder, Anne D.}, year={2020}, month={Jul}, pages={845–856} }
@article{tiley_blanco_ralison_rasoloarison_stahlke_hohenlohe_yoder_tiley_blanco_ralison_et al._2020, title={Population genomic structure in Goodman’s mouse lemur reveals long-standing separation of Madagascar’s Central Highlands and eastern rainforests}, volume={1}, url={https://doi.org/10.1101/2020.01.30.923300}, DOI={10.1101/2020.01.30.923300}, abstractNote={Abstract The Central Highland Plateau of Madagascar is largely composed of grassland savanna, interspersed with patches of closed-canopy forest. Conventional wisdom has it that these grasslands are anthropogenic in nature, having been created very recently via human agricultural practices. Yet, the ancient origins of the endemic grasses suggest that the extensive savannas are natural biomes, similar to others found around the globe. We use a phylogeographic approach to compare these two competing scenarios. By sampling multiple populations of Goodman’s mouse lemur ( Microcebus lehilahytsara ), a small-bodied nocturnal primate, we reconstruct the phylogeographic and demographic history of these “environmental metronomes” to estimate the time at which their populations diverged, and thus proximally, when their habitats would have become fragmented. We applied coalescent methods to RADseq data to infer phylogenetic relationships, population structure, and migration corridors among sampling sites. These analyses indicate that forest fragmentation occurred rapidly during a period of decreased precipitation near the last glacial maximum and would have affected both the Central Highlands and eastern forests. Though there is clear genomic structure separating the populations of the Central Highland from those of the eastern rainforests, there is also evidence of historical migration between them. Findings support the hypothesis that the Central Highland savanna predates human arrival, indicating that it is a natural landscape that has long impacted the population dynamics of Goodman’s mouse lemur, and by extension, other forest-dwelling organisms in Madagascar.}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, publisher={Cold Spring Harbor Laboratory}, author={Tiley, George P. and Blanco, Marina B. and Ralison, José M. and Rasoloarison, Rodin M. and Stahlke, Amanda R. and Hohenlohe, Paul A. and Yoder, Anne D. and Tiley, George P. and Blanco, Marina B. and Ralison, José M. and et al.}, year={2020}, month={Jan} }
@article{carey_jenkins_lovell_maumus_sreedasyam_payton_shu_tiley_fernandez-pozo_barry_et al._2020, title={The Ceratodon purpureus genome uncovers structurally complex, gene rich sex chromosomes}, url={https://publons.com/wos-op/publon/36335969/}, DOI={10.1101/2020.07.03.163634}, abstractNote={Abstract Non-recombining sex chromosomes, like the mammalian Y, often lose genes and accumulate transposable elements, a process termed degeneration 1,2 . The correlation between suppressed recombination and degeneration is clear in animal XY systems 1,2 , but the absence of recombination is confounded with other asymmetries between the X and Y. In contrast, UV sex chromosomes, like those found in bryophytes, experience symmetrical population genetic conditions 3,4 . Here we test for degeneration in the bryophyte UV sex chromosome system through genomic comparisons with new female and male chromosome-scale reference genomes of the moss Ceratodon purpureus . We show that the moss sex chromosomes evolved over 300 million years ago and expanded via two chromosomal fusions. Although the sex chromosomes show signs of weaker purifying selection than autosomes, we find suppressed recombination alone is insufficient to drive gene loss on sex-specific chromosomes. Instead, the U and V sex chromosomes harbor thousands of broadly-expressed genes, including numerous key regulators of sexual development across land plants.}, journal={BioRxiv}, author={Carey, Sarah B. and Jenkins, Jerry and Lovell, John T. and Maumus, Florian and Sreedasyam, Avinash and Payton, Adam C. and Shu, Shenqiang and Tiley, George P. and Fernandez-Pozo, Noe and Barry, Kerrie and et al.}, year={2020}, month={Jul} }
@article{tiley_pandey_kimball_braun_burleigh_2020, title={Whole genome phylogeny of Gallus: introgression and data-type effects}, volume={11}, url={http://dx.doi.org/10.1186/s40657-020-00194-w}, DOI={10.1186/s40657-020-00194-w}, abstractNote={Abstract Background Previous phylogenetic studies that include the four recognized species of Gallus have resulted in a number of distinct topologies, with little agreement. Several factors could lead to the failure to converge on a consistent topology, including introgression, incomplete lineage sorting, different data types, or insufficient data. Methods We generated three novel whole genome assemblies for Gallus species, which we combined with data from the published genomes of Gallus gallus and Bambusicola thoracicus (a member of the sister genus to Gallus ). To determine why previous studies have failed to converge on a single topology, we extracted large numbers of orthologous exons, introns, ultra-conserved elements, and conserved non-exonic elements from the genome assemblies. This provided more than 32 million base pairs of data that we used for concatenated maximum likelihood and multispecies coalescent analyses of Gallus . Results All of our analyses, regardless of data type, yielded a single, well-supported topology. We found some evidence for ancient introgression involving specific Gallus lineages as well as modest data type effects that had an impact on support and branch length estimates in specific analyses. However, the estimated gene tree spectra for all data types had a relatively good fit to their expectation given the multispecies coalescent. Conclusions Overall, our data suggest that conflicts among previous studies probably reflect the use of smaller datasets (both in terms of number of sites and of loci) in those analyses. Our results demonstrate the importance of sampling large numbers of loci, each of which has a sufficient number of sites to provide robust estimates of gene trees. Low-coverage whole genome sequencing, as we did here, represents a cost-effective means to generate the very large data sets that include multiple data types that enabled us to obtain a robust estimate of Gallus phylogeny.}, number={1}, journal={Avian Research}, publisher={Springer Science and Business Media LLC}, author={Tiley, George P. and Pandey, Akanksha and Kimball, Rebecca T. and Braun, Edward L. and Burleigh, J. Gordon}, year={2020}, month={Mar} }
@article{hunnicutt_tiley_williams_larsen_blanco_rasoloarison_campbell_zhu_weisrock_matsunami_et al._2019, title={Comparative Genomic Analysis of the Pheromone Receptor Class 1 Family (V1R) Reveals Extreme Complexity in Mouse Lemurs (Genus, Microcebus) and a Chromosomal Hotspot across Mammals}, url={http://dx.doi.org/10.1093/gbe/evz200}, DOI={10.1093/gbe/evz200}, abstractNote={Abstract Sensory gene families are of special interest for both what they can tell us about molecular evolution and what they imply as mediators of social communication. The vomeronasal type-1 receptors (V1Rs) have often been hypothesized as playing a fundamental role in driving or maintaining species boundaries given their likely function as mediators of intraspecific mate choice, particularly in nocturnal mammals. Here, we employ a comparative genomic approach for revealing patterns of V1R evolution within primates, with a special focus on the small-bodied nocturnal mouse and dwarf lemurs of Madagascar (genera Microcebus and Cheirogaleus, respectively). By doubling the existing genomic resources for strepsirrhine primates (i.e. the lemurs and lorises), we find that the highly speciose and morphologically cryptic mouse lemurs have experienced an elaborate proliferation of V1Rs that we argue is functionally related to their capacity for rapid lineage diversification. Contrary to a previous study that found equivalent degrees of V1R diversity in diurnal and nocturnal lemurs, our study finds a strong correlation between nocturnality and V1R elaboration, with nocturnal lemurs showing elaborate V1R repertoires and diurnal lemurs showing less diverse repertoires. Recognized subfamilies among V1Rs show unique signatures of diversifying positive selection, as might be expected if they have each evolved to respond to specific stimuli. Furthermore, a detailed syntenic comparison of mouse lemurs with mouse (genus Mus) and other mammalian outgroups shows that orthologous mammalian subfamilies, predicted to be of ancient origin, tend to cluster in a densely populated region across syntenic chromosomes that we refer to as a V1R “hotspot.”}, journal={Genome Biology and Evolution}, author={Hunnicutt, Kelsie E and Tiley, George P and Williams, Rachel C and Larsen, Peter A and Blanco, Marina B and Rasoloarison, Rodin M and Campbell, C Ryan and Zhu, Kevin and Weisrock, David W and Matsunami, Hiroaki and et al.}, year={2019}, month={Sep} }
@article{schußler_salmona_blanco_poelstra_tiley_andriambeloson_bouchez_campbell_etter_iribar_et al._2019, title={Cryptic Patterns of Speciation in Cryptic Primates: Microendemic Mouse Lemurs and the Multispecies Coalescent}, url={https://publons.com/wos-op/publon/55525846/}, DOI={10.1101/742361}, abstractNote={A bstract Mouse lemurs ( Microcebus ) are a radiation of morphologically cryptic primates distributed throughout Madagascar for which the number of recognized species has exploded in the past two decades. This taxonomic explosion has prompted understandable concern that there has been substantial oversplitting in the mouse lemur clade. Here, we take an integrative approach to investigate species diversity in two pairs of sister lineages that occur in a region in northeastern Madagascar with high levels of microendemism and predicted habitat loss. We analyzed RADseq data with multispecies coalescent (MSC) species delimitation methods for three named species and an undescribed lineage previously identified to have divergent mtDNA. Marked differences in effective population sizes, levels of gene flow, patterns of isolation-by-distance, and species delimitation results were found among them. Whereas all tests support the recognition of the presently undescribed lineage as a separate species, the species-level distinction of two previously described species, M. mittermeieri and M. lehilahytsara is not supported – a result that is particularly striking when using the genealogical discordance index ( gdi ). Non-sister lineages occur sympatrically in two of the localities sampled for this study, despite an estimated divergence time of less than 1 Ma. This suggests rapid evolution of reproductive isolation in the focal lineages, and in the mouse lemur clade generally. The divergence time estimates reported here are based on the MSC and calibrated with pedigree-based mutation rates and are considerably more recent than previously published fossil-calibrated concatenated likelihood estimates, however. We discuss the possible explanations for this discrepancy, noting that there are theoretical justifications for preferring the MSC estimates in this case.}, journal={bioRxiv (Cold Spring Harbor Laboratory)}, author={Schußler, Dominik and Salmona, Jordi and Blanco, Marina B and Poelstra, Jelmer and Tiley, George P and Andriambeloson, Jean B and Bouchez, Olivier and Campbell, C. Ryan and Etter, Paul D and Iribar, Amaia and et al.}, year={2019}, month={Aug} }
@article{hunnicutt_tiley_williams_larsen_blanco_rasoloarison_campbell_zhu_weisrock_matsunami_et al._2019, title={Elaborate expansion of syntenic V1R hotspots correlates with high species diversity in nocturnal mouse and dwarf lemurs}, url={https://publons.com/wos-op/publon/55525863/}, DOI={10.1101/637348}, abstractNote={Abstract Sensory gene families are of special interest, both for what they can tell us about molecular evolution, and for what they imply as mediators of social communication. The vomeronasal type-1 receptors (V1Rs) have often been hypothesized as playing a fundamental role in driving or maintaining species boundaries given their likely function as mediators of intraspecific mate choice, particularly in nocturnal mammals. Here, we employ a comparative genomic approach for revealing patterns of V1R evolution within primates, with a special focus on the small-bodied nocturnal mouse and dwarf lemurs of Madagascar (genera Microcebus and Cheirogaleus , respectively). By doubling the existing genomic resources for strepsirrhine primates (i.e., the lemurs and lorises), we find that the highly-speciose and morphologically-cryptic mouse lemurs have experienced an elaborate proliferation of V1Rs that we argue is functionally related to their capacity for rapid lineage diversification. Contrary to a previous study that found equivalent degrees of V1R diversity in diurnal and nocturnal lemurs, our study finds a strong correlation between nocturnality and V1R elaboration, with nocturnal lemurs showing elaborate V1R repertoires and diurnal lemurs showing less diverse repertoires. Recognized subfamilies among V1Rs show unique signatures of diversifying positive selection, as might be expected if they have each evolved to respond to specific stimuli. Further, a detailed syntenic comparison of mouse lemurs with mouse (genus Mus ) and other mammalian outgroups shows that orthologous mammalian subfamilies, predicted to be of ancient origin, tend to cluster in a densely populated region across syntenic chromosomes that we refer to as V1R “hotspots.”}, journal={BioRxiv}, author={Hunnicutt, Kelsie E. and Tiley, George P. and Williams, Rachel C. and Larsen, Peter A. and Blanco, Marina B. and Rasoloarison, Rodin M. and Campbell, C. Ryan and Zhu, Kevin and Weisrock, David W. and Matsunami, Hiroaki and et al.}, year={2019}, month={May} }
@article{leebens-mack_barker_carpenter_deyholos_gitzendanner_graham_grosse_li_melkonian_mirarab_et al._2019, title={One thousand plant transcriptomes and the phylogenomics of green plants}, volume={574}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85074238403&partnerID=MN8TOARS}, DOI={10.1038/s41586-019-1693-2}, abstractNote={Abstract Green plants (Viridiplantae) include around 450,000–500,000 species 1,2 of great diversity and have important roles in terrestrial and aquatic ecosystems. Here, as part of the One Thousand Plant Transcriptomes Initiative, we sequenced the vegetative transcriptomes of 1,124 species that span the diversity of plants in a broad sense (Archaeplastida), including green plants (Viridiplantae), glaucophytes (Glaucophyta) and red algae (Rhodophyta). Our analysis provides a robust phylogenomic framework for examining the evolution of green plants. Most inferred species relationships are well supported across multiple species tree and supermatrix analyses, but discordance among plastid and nuclear gene trees at a few important nodes highlights the complexity of plant genome evolution, including polyploidy, periods of rapid speciation, and extinction. Incomplete sorting of ancestral variation, polyploidization and massive expansions of gene families punctuate the evolutionary history of green plants. Notably, we find that large expansions of gene families preceded the origins of green plants, land plants and vascular plants, whereas whole-genome duplications are inferred to have occurred repeatedly throughout the evolution of flowering plants and ferns. The increasing availability of high-quality plant genome sequences and advances in functional genomics are enabling research on genome evolution across the green tree of life.}, number={7780}, journal={Nature}, author={Leebens-Mack, James H. and Barker, Michael S. and Carpenter, Eric J. and Deyholos, Michael K. and Gitzendanner, Matthew A. and Graham, Sean W. and Grosse, Ivo and Li, Zheng and Melkonian, Michael and Mirarab, Siavash and et al.}, year={2019}, month={Oct}, pages={679-+} }
@article{campbell_tiley_poelstra_hunnicutt_larsen_lee_thorne_reis_yoder_2019, title={Pedigree-based measurement of the de novo mutation rate in the gray mouse lemur reveals a high mutation rate, few mutations in CpG sites, and a weak sex bias}, url={https://publons.com/wos-op/publon/68496721/}, DOI={10.1101/724880}, abstractNote={Abstract Spontaneous germline mutations are the raw material on which evolution acts, and knowledge of their frequency and genomic distribution is crucial for understanding how evolution operates at both long and short timescales. At present, the rate and spectrum of de novo mutations have been directly characterized in only a few lineages. It is therefore critical to expand the phylogenetic scope of these studies to gain a more general understanding of observed mutation rate patterns. Our study provides the first direct mutation rate estimate for a strepsirrhine (i.e., the lemurs and lorises), which comprise nearly half of the primate clade. Using high-coverage linked-read sequencing for a focal quartet of gray mouse lemurs ( Microcebus murinus ), we estimated the mutation rate to be 1.64 × 10 −8 (95% credible interval: 1.41 × 10 −8 to 1.98 × 10 −8 ) mutations/site/generation. This estimate is higher than those measured for most previously characterized mammals. Further, we found an unexpectedly low count of paternal mutations, and only a modest overrepresentation of mutations at CpG-sites. Given the surprising nature of these observations, we conducted an independent analysis of context-dependent substitution types for gray mouse lemur and five additional primate species. This analysis yielded patterns consistent with the mutation spectrum from the pedigree mutation-rate analysis, which provides confidence in our ability to accurately identify de novo mutations with our data and bioinformatic filters.}, journal={BioRxiv}, author={Campbell, C. Ryan and Tiley, George P. and Poelstra, Jelmer W. and Hunnicutt, Kelsie E. and Larsen, Peter A. and Lee, Hui-Jie and Thorne, Jeffrey L. and Reis, Mario and Yoder, Anne D.}, year={2019}, month={Aug} }
@article{li_tiley_rundell_barker_2019, title={Reply to Nakatani and McLysaght: Analyzing deep duplication events}, volume={116}, url={https://doi.org/10.1073/pnas.1819227116}, DOI={10.1073/pnas.1819227116}, abstractNote={As we wrote in our original report of these events (1), readers should not rush to interpret the six ancient large-scale duplications in hexapods as whole-genome duplications (WGDs). Analyses of syntenic relationships are needed to understand the evolutionary processes underlying these large-scale duplications that we inferred with phylogenomic methods (1). Our analysis of the Bombyx genome was conducted in service of phylogenomic analyses and used standard BLASTP parameters to identify synteny (2). We found that paralogs associated with an inferred large-scale duplication in the ancestry of the Lepidoptera were more likely than chance to be on collinear chains (1). Like Nakatani and McLysaght (3), we observed …
[↵][1]1To whom correspondence may be addressed. Email: liz7{at}email.arizona.edu or msbarker{at}email.arizona.edu.
[1]: #xref-corresp-1-1}, number={6}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Li, Zheng and Tiley, George P. and Rundell, Rebecca J. and Barker, Michael S.}, year={2019}, month={Jan}, pages={1819–1820} }
@article{tiley_barker_burleigh_2018, title={Assessing the performance of Ks plots for detecting ancient whole genome duplications}, volume={9}, url={https://publons.com/wos-op/publon/27714477/}, DOI={10.1093/gbe/evy200}, abstractNote={Genomic data have provided evidence of previously unknown ancient whole genome duplications (WGDs) and highlighted the role of WGDs in the evolution of many eukaryotic lineages. Ancient WGDs often are detected by examining distributions of synonymous substitutions per site (Ks) within a genome, or "Ks plots." For example, WGDs can be detected from Ks plots by using univariate mixture models to identify peaks in Ks distributions. We performed gene family simulation experiments to evaluate the effects of different Ks estimation methods and mixture models on our ability to detect ancient WGDs from Ks plots. The simulation experiments, which accounted for variation in substitution rates and gene duplication and loss rates across gene families, tested the effects of WGD age and gene retention rates following WGD on inferring WGDs from Ks plots. Our simulations reveal limitations of Ks plot analyses. Strict interpretations of mixture model analyses often overestimate the number of WGD events, and Ks plot analyses typically fail to detect WGDs when ≤10% of the duplicated genes are retained following the WGD. However, WGDs can accurately be characterized over an intermediate range of Ks. The simulation results are supported by empirical analyses of transcriptomic data, which also suggest that biases in gene retention likely affect our ability to detect ancient WGDs. Although our results indicate mixture model results should be interpreted with great caution, using node-averaged Ks estimates and applying more appropriate mixture models can improve the accuracy of detecting WGDs.}, journal={Genome Biology and Evolution}, publisher={Oxford University Press (OUP)}, author={Tiley, George P and Barker, Michael S and Burleigh, J Gordon}, year={2018}, month={Sep} }
@article{tiley_kimball_braun_burleigh_2018, title={Comparison of the Chinese bamboo partridge and red Junglefowl genome sequences highlights the importance of demography in genome evolution}, volume={19}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85046680042&partnerID=MN8TOARS}, DOI={10.1186/s12864-018-4711-0}, abstractNote={Our results emphasized the importance of demographic processes in generating the patterns of variation between Bambusicola and Gallus. We also demonstrated that genome assemblies generated using a single library can provide valuable insights into avian evolutionary history and found that it is important to account for alignment uncertainty in evolutionary inferences from draft genomes.}, number={1}, journal={BMC Genomics}, author={Tiley, G. P. and Kimball, R. T. and Braun, E. L. and Burleigh, J. G.}, year={2018}, month={May} }
@article{li_tiley_galuska_reardon_kidder_rundell_barker_2018, title={Multiple large-scale gene and genome duplications during the evolution of hexapods}, volume={115}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85046284472&partnerID=MN8TOARS}, DOI={10.1073/pnas.1710791115}, abstractNote={Polyploidy or whole genome duplication (WGD) is a major contributor to genome evolution and diversity. Although polyploidy is recognized as an important component of plant evolution, it is generally considered to play a relatively minor role in animal evolution. Ancient polyploidy is found in the ancestry of some animals, especially fishes, but there is little evidence for ancient WGDs in other metazoan lineages. Here we use recently published transcriptomes and genomes from more than 150 species across the insect phylogeny to investigate whether ancient WGDs occurred during the evolution of Hexapoda, the most diverse clade of animals. Using gene age distributions and phylogenomics, we found evidence for 18 ancient WGDs and six other large-scale bursts of gene duplication during insect evolution. These bursts of gene duplication occurred in the history of lineages such as the Lepidoptera, Trichoptera, and Odonata. To further corroborate the nature of these duplications, we evaluated the pattern of gene retention from putative WGDs observed in the gene age distributions. We found a relatively strong signal of convergent gene retention across many of the putative insect WGDs. Considering the phylogenetic breadth and depth of the insect phylogeny, this observation is consistent with polyploidy as we expect dosage balance to drive the parallel retention of genes. Together with recent research on plant evolution, our hexapod results suggest that genome duplications contributed to the evolution of two of the most diverse lineages of eukaryotes on Earth.}, number={18}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Li, Zheng and Tiley, George P. and Galuska, Sally R. and Reardon, Chris R. and Kidder, Thomas I. and Rundell, Rebecca J. and Barker, Michael S.}, year={2018}, month={Apr}, pages={4713–4718} }
@article{yoder_poelstra_tiley_williams_kumar_2018, title={Neutral Theory Is the Foundation of Conservation Genetics}, volume={35}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85048017799&partnerID=MN8TOARS}, DOI={10.1093/molbev/msy076}, abstractNote={Kimura's neutral theory of molecular evolution has been essential to virtually every advance in evolutionary genetics, and by extension, is foundational to the field of conservation genetics. Conservation genetics utilizes the key concepts of neutral theory to identify species and populations at risk of losing evolutionary potential by detecting patterns of inbreeding depression and low effective population size. In turn, this information can inform the management of organisms and their habitat providing hope for the long-term preservation of both. We expand upon Avise's "inventorial" and "functional" categories of conservation genetics by proposing a third category that is linked to the coalescent and that we refer to as "process-driven." It is here that connections between Kimura's theory and conservation genetics are strongest. Process-driven conservation genetics can be especially applied to large genomic data sets to identify patterns of historical risk, such as population bottlenecks, and accordingly, yield informed intuitions for future outcomes. By examining inventorial, functional, and process-driven conservation genetics in sequence, we assess the progression from theory, to data collection and analysis, and ultimately, to the production of hypotheses that can inform conservation policies.}, number={6}, journal={Molecular Biology and Evolution}, author={Yoder, A.D. and Poelstra, J.W. and Tiley, G.P. and Williams, R.C. and Kumar, S.}, year={2018}, month={Apr}, pages={1322–1326} }
@article{tiley_ané_burleigh_2016, title={Evaluating and Characterizing Ancient Whole-Genome Duplications in Plants with Gene Count Data}, volume={8}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85015355426&partnerID=MN8TOARS}, DOI={10.1093/gbe/evw058}, abstractNote={Whole-genome duplications (WGDs) have helped shape the genomes of land plants, and recent evidence suggests that the genomes of all angiosperms have experienced at least two ancient WGDs. In plants, WGDs often are followed by rapid fractionation, in which many homeologous gene copies are lost. Thus, it can be extremely difficult to identify, let alone characterize, ancient WGDs. In this study, we use a new maximum likelihood estimator to test for evidence of ancient WGDs in land plants and estimate the fraction of new genes copies that are retained following a WGD using gene count data, the number of gene copies in gene families. We identified evidence of many putative ancient WGDs in land plants and found that the genome fractionation rates vary tremendously among ancient WGDs. Analyses of WGDs within Brassicales also indicate that background gene duplication and loss rates vary across land plants, and different gene families have different probabilities of being retained following a WGD. Although our analyses are largely robust to errors in duplication and loss rates and the choice of priors, simulations indicate that this method can have trouble detecting multiple WGDs that occur on the same branch, especially when the gene retention rates for ancient WGDs are very low. They also suggest that we should carefully evaluate evidence for some ancient plant WGD hypotheses.}, number={4}, journal={Genome Biology and Evolution}, author={Tiley, George P. and Ané, Cécile and Burleigh, J. Gordon}, year={2016}, month={Mar}, pages={1023–1037} }
@article{tiley_burleigh_2015, title={Erratum to: The relationship of recombination rate, genome structure, and patterns of molecular evolution across angiosperms}, volume={15}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84946561229&partnerID=MN8TOARS}, DOI={10.1186/s12862-015-0525-8}, number={1}, journal={BMC Evolutionary Biology}, author={Tiley, George P. and Burleigh, J. Gordon}, year={2015}, month={Nov} }
@article{tiley_burleigh_2015, title={The relationship of recombination rate, genome structure, and patterns of molecular evolution across angiosperms}, volume={15}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84941645639&partnerID=MN8TOARS}, DOI={10.1186/s12862-015-0473-3}, abstractNote={{"Label"=>"BACKGROUND", "NlmCategory"=>"BACKGROUND"} Although homologous recombination affects the efficacy of selection in populations, the pattern of recombination rate evolution and its effects on genome evolution across plants are largely unknown. Recombination can reduce genome size by enabling the removal of LTR retrotransposons, alter codon usage by GC biased gene conversion, contribute to complex histories of gene duplication and loss through tandem duplication, and enhance purifying selection on genes. Therefore, variation in recombination rate across species may explain some of the variation in genomic architecture as well as rates of molecular evolution. We used phylogenetic comparative methods to investigate the evolution of global meiotic recombination rate in angiosperms and its effects on genome architecture and selection at the molecular level using genetic maps and genome sequences from thirty angiosperm species. {"Label"=>"RESULTS", "NlmCategory"=>"RESULTS"} Recombination rate is negatively correlated with genome size, which is likely caused by the removal of LTR retrotransposons. After correcting recombination rates for euchromatin content, we also found an association between global recombination rate and average gene family size. This suggests a role for recombination in the preservation of duplicate genes or expansion of gene families. An analysis of the correlation between the ratio of nonsynonymous to synonymous substitution rates (dN/dS) and recombination rate in 3748 genes indicates that higher recombination rates are associated with an increased efficacy of purifying selection, suggesting that global recombination rates affect variation in rates of molecular evolution across distantly related angiosperm species, not just between populations. We also identified shifts in dN/dS for recombination proteins that are associated with shifts in global recombination rate across our sample of angiosperms. {"Label"=>"CONCLUSIONS", "NlmCategory"=>"CONCLUSIONS"} Although our analyses only reveal correlations, not mechanisms, and do not include potential covariates of recombination rate, like effective population size, they suggest that global recombination rates may play an important role in shaping the macroevolutionary patterns of gene and genome evolution in plants. Interspecific recombination rate variation is tightly correlated with genome size as well as variation in overall LTR retrotransposon abundances. Recombination may shape gene-to-gene variation in dN/dS between species, which might impact the overall gene duplication and loss rates.}, number={1}, journal={BMC Evolutionary Biology}, author={Tiley, George P. and Burleigh, J. Gordon}, year={2015}, month={Sep} }
@article{evolutionary relationships among ipomoea batatas and closely related species_2013, url={https://publons.com/wos-op/publon/68496722/}, journal={Hortscience}, year={2013} }
@article{eserman_tiley_jarret_leebens‐mack_miller_2013, title={Phylogenetics and diversification of morning glories (tribe Ipomoeeae, Convolvulaceae) based on whole plastome sequences}, volume={101}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84892568483&partnerID=MN8TOARS}, DOI={10.3732/ajb.1300207}, abstractNote={Phylogenetic results provide confidence in relationships among Ipomoeeae lineages. Divergence time estimation results provide a temporal context for diversification of morning glories. Ancestral character reconstructions support previous findings that morning glory morphology is evolutionarily labile. Taken together, our study provides strong resolution of the morning glory phylogeny, which is broadly applicable to the evolution and ecology of these fascinating species.}, number={1}, journal={American Journal of Botany}, author={Eserman, Lauren A. and Tiley, George P. and Jarret, Robert L. and Leebens‐Mack, Jim H. and Miller, Richard E.}, year={2013}, month={Dec}, pages={92–103} }