@article{du_xiang_cheng_zhou_wang_soltis_soltis_2023, title={An updated phylogeny, biogeography, and PhyloCode-based classification of Cornaceae based on three sets of genomic data}, volume={2}, ISSN={["1537-2197"]}, url={https://doi.org/10.1002/ajb2.16116}, DOI={10.1002/ajb2.16116}, abstractNote={AbstractPremiseA major goal of systematic biology is to uncover the evolutionary history of organisms and translate that knowledge into stable classification systems. Here, we integrate three sets of genome‐wide data to resolve phylogenetic relationships in Cornaceae (containing only Cornus s.l.), reconstruct the biogeographic history of the clade, and provide a revised classification using the PhyloCode to stabilize names for this taxonomically controversial group.MethodsWe conducted phylogenetic analyses using 312 single‐copy nuclear genes and 70 plastid genes from Angiosperms353 Hyb‐Seq, plus numerous loci from RAD‐Seq. We integrated fossils using morphological data and produced a dated phylogeny for biogeographical analysis.ResultsA well‐resolved, strongly supported, comprehensive phylogeny was obtained. Biogeographic analyses support an origin and rapid diversification of Cornus into four morphologically distinct major clades in the Northern Hemisphere (with an eastern Asian ancestor) during the late Cretaceous. Dispersal into Africa from eastern Asia likely occurred along the Tethys Seaway during the Paleogene, whereas dispersal into South America likely occurred during the Neogene. Diversification within the northern hemisphere likely involved repeated independent colonization of new areas during the Paleogene and Neogene along the Bering Land Bridge, the North Atlantic Land Bridge, and the Tethys Seaway. Thirteen strongly supported clades were named following rules of the PhyloCode.ConclusionsOur study provides an example of integrating genomic and morphological data to produce a robust, explicit species phylogeny that includes fossil taxa, which we translate into an updated classification scheme using the PhyloCode to stabilize names.}, journal={AMERICAN JOURNAL OF BOTANY}, author={Du, Zhi-Yuan and Xiang, Qiu-Yun and Cheng, Jin and Zhou, Wenbin and Wang, Qing-Feng and Soltis, Douglas E. E. and Soltis, Pamela S. S.}, year={2023}, month={Feb} } @article{liu_ren_kwak_hodel_xu_he_zhou_huang_ma_qian_et al._2022, title={Phylogenomic conflict analyses in the apple genus Malus s.l. reveal widespread hybridization and allopolyploidy driving diversification, with insights into the complex biogeographic history in the Northern Hemisphere}, volume={64}, ISSN={["1744-7909"]}, DOI={10.1111/jipb.13246}, abstractNote={AbstractPhylogenomic evidence from an increasing number of studies has demonstrated that different data sets and analytical approaches often reconstruct strongly supported but conflicting relationships. In this study, 785 single‐copy nuclear genes and 75 complete plastomes were used to infer the phylogenetic relationships and estimate the historical biogeography of the apple genus Malus sensu lato, an economically important lineage disjunctly distributed in the Northern Hemisphere and involved in known and suspected hybridization and allopolyploidy events. The nuclear phylogeny recovered the monophyly of Malus s.l. (including Docynia); however, the genus was supported to be biphyletic in the plastid phylogeny. An ancient chloroplast capture event in the Eocene in western North America best explains the cytonuclear discordance. Our conflict analysis demonstrated that ILS, hybridization, and allopolyploidy could explain the widespread nuclear gene tree discordance. One deep hybridization event (Malus doumeri) and one recent event (Malus coronaria) were detected in Malus s.l. Furthermore, our historical biogeographic analysis integrating living and fossil data supported a widespread East Asian‐western North American origin of Malus s.l. in the Eocene, followed by several extinction and dispersal events in the Northern Hemisphere. We also propose a general workflow for assessing phylogenomic discordance and biogeographic analysis using deep genome skimming data sets.}, number={5}, journal={JOURNAL OF INTEGRATIVE PLANT BIOLOGY}, author={Liu, Bin-Bin and Ren, Chen and Kwak, Myounghai and Hodel, Richard G. J. and Xu, Chao and He, Jian and Zhou, Wen-Bin and Huang, Chien-Hsun and Ma, Hong and Qian, Guan-Ze and et al.}, year={2022}, month={May}, pages={1020–1043} } @article{zhou_xiang_2022, title={Phylogenomics AND biogeography of Castanea (chestnut) and Hamamelis (witch-hazel) - Choosing between RAD-seq and Hyb-Seq approaches}, volume={176}, ISSN={["1095-9513"]}, DOI={10.1016/j.ympev.2022.107592}, abstractNote={Hyb-Seq and RAD-seq are well-established high throughput sequencing technologies that have been increasingly used for plant phylogenomic studies. Each method has its own pros and cons. The choice between them is a practical issue for plant systematists studying the evolutionary histories of biodiversity of relatively recent origins. However, few studies have compared the congruence and conflict between results from the two methods within the same group of organisms in plants. In this study, we employed RAD-seq and Hyb-Seq of Angiosperms353 genes in phylogenomic and biogeographic studies of Hamamelis (the witch-hazels) and Castanea (chestnuts), two classic examples exhibiting the well-known eastern Asian (EA) -eastern North American (ENA) disjunct distribution, and compared them side by side. Our results showed congruences in phylogenetic inference and divergence time dating between the two data sets obtained through our customized procedures of library preparation and sequence trimming, although they differed in the number of loci and informative sites, the amount of missing data, and sampling within species. We provide recommendations regarding the selection of the two methods for phylogenomic study at generic level based on fund availability and sampling scale. If funds and time are not constrained, we recommend Hyb-Seq. If funds and time are somewhat limited and sampling is large, we recommend RAD-seq. However, we found greater conflict among gene trees from the RAD-seq data due to the short sequences per locus. Therefore, species tree building and network detecting with the RAD-seq data with short RAD-seq loci (e.g., <150 bp) should avoid using analytical methods relying on gene trees of individual locus, but using site-based methods such as SVDQuartets and D-statistic method. Our phylogenetic analyses of RAD-seq and Hyb-Seq data resulted in well-resolved species relationships. Analyses of the data using the D-statistic test and PhyloNet revealed ancient introgressions in both genera. Biogeographic analyses including fossil data using total evidence-based dated tree and DEC model applying specific inter-area dispersal probabilities revealed a complicated history for each genus, indicating multiple interareal dispersals and local extinctions within and outside areas of the taxa's modern ranges in both the Paleogene and Neogene. The study demonstrates the importance of including fossil taxa for a more accurate reconstruction of biogeographic histories of taxa to understand the EA and ENA floristic disjunction. Our results support a widespread ancestral range in EA-western North America (WNA) followed by early diversification in EA and expansion to North America (NA) and Europe for Castanea and a more widespread ancestral range in EA-ENA-WNA for Hamamelis. The origins of the modern EA-ENA disjunction in both genera were suggested to be the result of vicariance from widespread ancestors in Eurasia-ENA of the mid-Miocene and in EA-NA of the late Oligocene, respectively.}, journal={MOLECULAR PHYLOGENETICS AND EVOLUTION}, author={Zhou, Wenbin and Xiang, Qiu-Yun}, year={2022}, month={Nov} } @article{zhou_harris_xiang_2022, title={Phylogenomics and biogeography of Torreya (Taxaceae)-Integrating data from three organelle genomes, morphology, and fossils and a practical method for reducing missing data from RAD-seq}, volume={2}, ISSN={["1759-6831"]}, url={https://doi.org/10.1111/jse.12838}, DOI={10.1111/jse.12838}, abstractNote={AbstractRestriction site‐associated DNA sequencing (RAD‐seq) enables obtaining thousands of genetic markers for phylogenomic studies. However, RAD‐seq data are subject to allele dropout (ADO) due to polymorphisms at enzyme cutting sites. We developed a new pipeline, RAD‐seq Allele Dropout Remedy in our study of the gymnosperm genus, Torreya, to mitigate ADO in outgroups by recovering missing loci from previously published transcriptomes. By using RADADOR to supplement Rad‐seq data in combination with plastome and mitochondrial gene sequences, morphology, and fossil records, we reconstructed the phylogenetic and biogeographic histories of the genus and tested hypotheses on anomalies of biodiversity  of the eastern Asian‐North American floristic disjunction. Our results showed that our pipeline recovered many loci missing from the outgroup, and the improved data yielded a more robust phylogeny for Torreya. Using the fossilized birth–death model and divergence–extinction–cladogenesis method, we resolved a detailed biogeographic history of Torreya that suggested a Jurassic origin spanning Laurasia and differential speciation and extinction among continents accounting for modern diversity, which is biased toward eastern Asia (EA). The biogeographic results also supported a vicariance origin of modern Torreya from a widespread ancestor in EA and North America (NA) in the mid‐Eocene, and cross Beringian exchange in the early Paleogene before the vicariant isolation, in contrast to the “out of NA” pattern common to gymnosperms and to the “out of EA” hypothesis previously proposed for the genus. Furthermore, we observed phylogenetic discordance between the nuclear and plastid phylogenies for Torreya jackii, suggesting differential lineage sorting of plastid genomes among species of Torreya or plastid genome capture in T. jackii.}, journal={JOURNAL OF SYSTEMATICS AND EVOLUTION}, publisher={Wiley}, author={Zhou, Wenbin and Harris, A. J. and Xiang, Qiu-Yun}, year={2022}, month={May} } @article{zhou_soghigian_xiang_2022, title={A New Pipeline for Removing Paralogs in Target Enrichment Data}, volume={71}, ISSN={["1076-836X"]}, DOI={10.1093/sysbio/syab044}, abstractNote={AbstractTarget enrichment (such as Hyb-Seq) is a well-established high throughput sequencing method that has been increasingly used for phylogenomic studies. Unfortunately, current widely used pipelines for analysis of target enrichment data do not have a vigorous procedure to remove paralogs in target enrichment data. In this study, we develop a pipeline we call Putative Paralogs Detection (PPD) to better address putative paralogs from enrichment data. The new pipeline is an add-on to the existing HybPiper pipeline, and the entire pipeline applies criteria in both sequence similarity and heterozygous sites at each locus in the identification of paralogs. Users may adjust the thresholds of sequence identity and heterozygous sites to identify and remove paralogs according to the level of phylogenetic divergence of their group of interest. The new pipeline also removes highly polymorphic sites attributed to errors in sequence assembly and gappy regions in the alignment. We demonstrated the value of the new pipeline using empirical data generated from Hyb-Seq and the Angiosperms353 kit for two woody genera Castanea (Fagaceae, Fagales) and Hamamelis (Hamamelidaceae, Saxifragales). Comparisons of data sets showed that the PPD identified many more putative paralogs than the popular method HybPiper. Comparisons of tree topologies and divergence times showed evident differences between data from HybPiper and data from our new PPD pipeline. We further evaluated the accuracy and error rates of PPD by BLAST mapping of putative paralogous and orthologous sequences to a reference genome sequence of Castanea mollissima. Compared to HybPiper alone, PPD identified substantially more paralogous gene sequences that mapped to multiple regions of the reference genome (31 genes for PPD compared with 4 genes for HybPiper alone). In conjunction with HybPiper, paralogous genes identified by both pipelines can be removed resulting in the construction of more robust orthologous gene data sets for phylogenomic and divergence time analyses. Our study demonstrates the value of Hyb-Seq with data derived from the Angiosperms353 probe set for elucidating species relationships within a genus, and argues for the importance of additional steps to filter paralogous genes and poorly aligned regions (e.g., as occur through assembly errors), such as our new PPD pipeline described in this study. [Angiosperms353; Castanea; divergence time; Hamamelis; Hyb-Seq, paralogs, phylogenomics.]}, number={2}, journal={SYSTEMATIC BIOLOGY}, author={Zhou, Wenbin and Soghigian, John and Xiang, Qiu-Yun Jenny}, year={2022}, month={Feb}, pages={410–425} } @article{zhou_xiang_wen_2020, title={Phylogenomics, biogeography, and evolution of morphology and ecological niche of the eastern Asian-eastern North AmericanNyssa(Nyssaceae)}, volume={58}, ISSN={["1759-6831"]}, DOI={10.1111/jse.12599}, abstractNote={AbstractNyssa (Nyssaceae, Cornales) represents a classical example of the well‐known eastern Asian–eastern North American floristic disjunction. The genus consists of three species in eastern Asia, four species in eastern North America, and one species in Central America. Species of the genus are ecologically important trees in eastern North American and eastern Asian forests. The distribution of living species and a rich fossil record of the genus make it an excellent model for understanding the origin and evolution of the eastern Asian–eastern North American floristic disjunction. However, despite the small number of species, relationships within the genus have remained unclear and have not been elucidated using a molecular approach. Here, we integrate data from 48 nuclear genes, fossils, morphology, and ecological niche to resolve species relationships, elucidate its biogeographical history, and investigate the evolution of morphology and ecological niches, aiming at a better understanding of the well‐known EA–ENA floristic disjunction. Results showed that the Central American (CAM) Nyssa talamancana was sister to the remaining species, which were divided among three, rapidly diversified subclades. Estimated divergence times and biogeographical history suggested that Nyssa had an ancestral range in Eurasia and western North America in the late Paleocene. The rapid diversification occurred in the early Eocene, followed by multiple dispersals between and within the Erasian and North American continents. The genus experienced two major episodes of extinction in the early Oligocene and end of Neogene, respectively. The Central American N. talamancana represents a relic lineage of the boreotropical flora in the Paleocene/Eocene boundary that once diversified in western North America. The results supported the importance of both the North Atlantic land bridge and the Bering land bridge (BLB) for the Paleogene dispersals of Nyssa and the Neogene dispersals, respectively, as well as the role of Central America as refugia of the Paleogene flora. The total‐evidence‐based dated phylogeny suggested that the pattern of macroevolution of Nyssa coincided with paleoclimatic changes. We found a number of evolutionary changes in morphology (including wood anatomy and leaf traits) and ecological niches (precipitation and temperature) between the EA–ENA disjunct, supporting the ecological selection driving trait evolutions after geographic isolation. We also demonstrated challenges in phylogenomic studies of lineages with rapid diversification histories. The concatenation of gene data can lead to inference of strongly supported relationships incongruent with the species tree. However, conflicts in gene genealogies did not seem to impose a strong effect on divergence time dating in our case. Furthermore, we demonstrated that rapid diversification events may not be recovered in the divergence time dating analysis using BEAST if critical fossil constraints of the relevant nodes are not available. Our study provides an example of complex bidirectional exchanges of plants between Eurasia and North America in the Paleogene, but “out of Asia” migrations in the Neogene, to explain the present disjunct distribution of Nyssa in EA and ENA.}, number={5}, journal={JOURNAL OF SYSTEMATICS AND EVOLUTION}, author={Zhou, Wenbin and Xiang, Qiu-Yun and Wen, Jun}, year={2020}, month={Sep}, pages={571–603} } @article{wahlsteen_zhou_xiang_rushforth_2021, title={Rediscovery of the lost little dogwood Cornus wardiana (Cornaceae)-Its phylogenetic and morphological distinction and implication in the origin of the Arctic-Sino-Himalayan disjunction}, volume={59}, ISSN={["1759-6831"]}, DOI={10.1111/jse.12576}, abstractNote={AbstractThe dwarf dogwoods (subgenus Arctocrania) have been widely known to consist of three circumboreal species Cornus suecica, Cornus canadensis, and Cornus unalaschkensis. A fourth putative species was discovered from the northern Myanmar in 1937, but it had never been formally reported on. Here, we formally report the species on the basis of phylogenetic and morphological evidence and name it Cornus wardiana Rushforth & Wahlsteen (sp. nov.). We conducted phylogenetic and morphometric analyses to determine its evolutionary relationship and differentiation from the existing relatives. We dated the phylogeny using molecular data and conducted a biogeographic analysis to gain insights into the evolution and biogeography of the Arctic‐Sino‐Himalayan disjunction. The phylogenetic analysis used sequences of the nrITS and plastid matK and rbcL genes and included all four dwarf dogwoods and 20 other species representing the three other major lineages of Cornus and the outgroup. The morphometric analyses included 60 populations and 102 specimens of dwarf dogwood, representing the entire range of the subgenus. The results showed that C. wardiana diverged first within subgenus Arctocrania in the Miocene, from a wide‐spread ancestor. Results from principal component analysis and discriminant analysis also showed that the Myanmar samples are well separated from the others. Taken together, these results suggest that the dwarf dogwood lineage split from the big‐bracted dogwoods in Asia or Asia‐western North America during the late Paleocene and spread widely to form a Eurasia‐North America distribution; the Arctic‐Sino‐Himalayan disjunction was the result of southward migration in the Miocene followed by extinction in the intervening highland areas.}, number={2}, journal={JOURNAL OF SYSTEMATICS AND EVOLUTION}, author={Wahlsteen, Eric and Zhou, Wenbin and Xiang, Qiuyun and Rushforth, Keith}, year={2021}, month={Mar}, pages={405–416} } @article{dong_chen_cheng_zhou_ma_chen_fu_liu_zhao_soltis_et al._2019, title={Natural selection and repeated patterns of molecular evolution following allopatric divergence}, volume={8}, ISSN={2050-084X}, url={http://dx.doi.org/10.7554/eLife.45199}, DOI={10.7554/eLife.45199}, abstractNote={Although geographic isolation is a leading driver of speciation, the tempo and pattern of divergence at the genomic level remain unclear. We examine genome-wide divergence of putatively single-copy orthologous genes (POGs) in 20 allopatric species/variety pairs from diverse angiosperm clades, with 16 pairs reflecting the classic eastern Asia-eastern North America floristic disjunction. In each pair, >90% of POGs are under purifying selection, and <10% are under positive selection. A set of POGs are under strong positive selection, 14 of which are shared by 10–15 pairs, and one shared by all pairs; 15 POGs are annotated to biological processes responding to various stimuli. The relative abundance of POGs under different selective forces exhibits a repeated pattern among pairs despite an ~10 million-year difference in divergence time. Species divergence times are positively correlated with abundance of POGs under moderate purifying selection, but negatively correlated with abundance of POGs under strong purifying selection.}, journal={eLife}, publisher={eLife Sciences Publications, Ltd}, author={Dong, Yibo and Chen, Shichao and Cheng, Shifeng and Zhou, Wenbin and Ma, Qing and Chen, Zhiduan and Fu, Cheng-Xin and Liu, Xin and Zhao, Yun-peng and Soltis, Pamela S and et al.}, year={2019}, month={Aug} } @article{zhou_ji_obata_pais_dong_peet_xiang_2018, title={Resolving relationships and phylogeographic history of the Nyssa sylvatica complex using data from RAD-seq and species distribution modeling}, volume={126}, ISSN={1055-7903}, url={http://dx.doi.org/10.1016/j.ympev.2018.04.001}, DOI={10.1016/j.ympev.2018.04.001}, abstractNote={Nyssa sylvatica complex consists of several woody taxa occurring in eastern North America. These taxa were recognized as two or three species including three or four varieties by different authors. Due to high morphological similarities and complexity of morphological variation, classification and delineation of taxa in the group have been difficult and controversial. Here we employ data from RAD-seq to elucidate the genetic structure and phylogenetic relationships within the group. Using the genetic evidence, we evaluate previous classifications and delineate species. We also employ Species Distribution Modeling (SDM) to evaluate impacts of climatic changes on the ranges of the taxa and to gain insights into the relevant refugia in eastern North America. Results from Molecular Variance Analysis (AMOVA), STRUCTURE, phylogenetic analyses using Maximum likelihood, Bayesian Inference, and Splittree methods of RAD-seq data strongly support a two-clade pattern, largely separating samples of N. sylvatica from those of N. biflora-N. ursina mix. Divergence time analysis with BEAST suggests the two clades diverged in the mid Miocene. The ancestor of the present trees of N. sylvatica was suggested to be in the Pliocene and that of N. biflora-N. ursina mix in the end of the Miocene. Results from SDM predicted a smaller range in the southern part of the species present range of each clade during the Last Glacial Maximum (LGM). A northward expansion of the ranges during interglacial period and a northward shift of the ranges in the future under a model of global warming were also predicted. Our results support the recognition of two species in the complex, N. sylvatica and N. biflora, following the phylogenetic species concept. We found no genetic evidence supporting recognitions of intraspecific taxa. However, we propose subsp. ursina and subsp. biflora within N. biflora due to their distinction in habits, distributions, and habitats. Our results further support movements of trees in eastern North America in response to climatic changes. Finally, our study demonstrates that RAD-seq data and a combination of population genomics and SDM are valuable in resolving relationship and biogeographic history of closely related species that are taxonomically difficult.}, journal={Molecular Phylogenetics and Evolution}, publisher={Elsevier BV}, author={Zhou, Wenbin and Ji, Xiang and Obata, Shihori and Pais, Andrew and Dong, Yibo and Peet, Robert and Xiang, Qiu-Yun (Jenny)}, year={2018}, month={Sep}, pages={1–16} }