@article{mannino_wisotsky_pond_muse_2020, title={Equiprobable discrete models of site-specific substitution rates underestimate the extent of rate variability}, volume={15}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0229493}, abstractNote={It is standard practice to model site-to-site variability of substitution rates by discretizing a continuous distribution into a small number, K, of equiprobable rate categories. We demonstrate that the variance of this discretized distribution has an upper bound determined solely by the choice of K and the mean of the distribution. This bound can introduce biases into statistical inference, especially when estimating parameters governing site-to-site variability of substitution rates. Applications to two large collections of sequence alignments demonstrate that this upper bound is often reached in analyses of real data. When parameter estimation is of primary interest, additional rate categories or more flexible modeling methods should be considered.}, number={3}, journal={PLOS ONE}, author={Mannino, Frank and Wisotsky, Sadie and Pond, Sergei L. Kosakovsky and Muse, Spencer V}, year={2020}, month={Mar} } @article{pond_poon_velazquez_weaver_hepler_murrell_shank_magalis_bouvier_nekrutenko_et al._2020, title={HyPhy 2.5-A Customizable Platform for Evolutionary Hypothesis Testing Using Phylogenies}, volume={37}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msz197}, abstractNote={Abstract}, number={1}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Pond, Sergei L. Kosakovsky and Poon, Art F. Y. and Velazquez, Ryan and Weaver, Steven and Hepler, N. Lance and Murrell, Ben and Shank, Stephen D. and Magalis, Brittany Rife and Bouvier, Dave and Nekrutenko, Anton and et al.}, year={2020}, month={Jan}, pages={295–299} } @article{wisotsky_pond_shank_muse_2020, title={Synonymous Site-to-Site Substitution Rate Variation Dramatically Inflates False Positive Rates of Selection Analyses: Ignore at Your Own Peril}, volume={37}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msaa037}, abstractNote={Abstract}, number={8}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Wisotsky, Sadie R. and Pond, Sergei L. Kosakovsky and Shank, Stephen D. and Muse, Spencer V}, year={2020}, month={Aug}, pages={2430–2439} } @article{weaver_shank_spielman_li_muse_pond_2018, title={Datamonkey 2.0: A Modern Web Application for Characterizing Selective and Other Evolutionary Processes}, volume={35}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msx335}, abstractNote={Inference of how evolutionary forces have shaped extant genetic diversity is a cornerstone of modern comparative sequence analysis. Advances in sequence generation and increased statistical sophistication of relevant methods now allow researchers to extract ever more evolutionary signal from the data, albeit at an increased computational cost. Here, we announce the release of Datamonkey 2.0, a completely re-engineered version of the Datamonkey web-server for analyzing evolutionary signatures in sequence data. For this endeavor, we leveraged recent developments in open-source libraries that facilitate interactive, robust, and scalable web application development. Datamonkey 2.0 provides a carefully curated collection of methods for interrogating coding-sequence alignments for imprints of natural selection, packaged as a responsive (i.e. can be viewed on tablet and mobile devices), fully interactive, and API-enabled web application. To complement Datamonkey 2.0, we additionally release HyPhy Vision, an accompanying JavaScript application for visualizing analysis results. HyPhy Vision can also be used separately from Datamonkey 2.0 to visualize locally-executed HyPhy analyses. Together, Datamonkey 2.0 and HyPhy Vision showcase how scientific software development can benefit from general-purpose open-source frameworks. Datamonkey 2.0 is freely and publicly available at http://www.datamonkey. org, and the underlying codebase is available from https://github.com/veg/datamonkey-js.}, number={3}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Weaver, Steven and Shank, Stephen D. and Spielman, Stephanie J. and Li, Michael and Muse, Spencer V. and Pond, Sergei L. Kosakovsky}, year={2018}, month={Mar}, pages={773–777} } @article{katz_deberardinis_hall_kovner_dunthorn_muse_2011, title={Heterogeneous Rates of Molecular Evolution Among Cryptic Species of the Ciliate Morphospecies Chilodonella uncinata}, volume={73}, ISSN={["1432-1432"]}, DOI={10.1007/s00239-011-9468-x}, abstractNote={While molecular analyses have provided insight into the phylogeny of ciliates, the few studies assessing intraspecific variation have largely relied on just a single locus [e.g., nuclear small subunit rDNA (nSSU-rDNA) or mitochondrial cytochrome oxidase I]. In this study, we characterize the diversity of several nuclear protein-coding genes plus both nSSU-rDNA and mitochondrial small subunit rDNA (mtSSU-rDNA) of five isolates of the ciliate morphospecies Chilodonella uncinata. Although these isolates have nearly identical nSSU-rDNA sequences, they differ by up to 8.0% in mtSSU-rDNA. Comparative analyses of all loci, including β-tubulin paralogs, indicate a lack of recombination between strains, demonstrating that the morphospecies C. uncinata consists of multiple cryptic species. Further, there is considerable variation in substitution rates among loci as some protein-coding domains are nearly identical between isolates, while others differ by up to 13.2% at the amino acid level. Combining insights on macronuclear variation among isolates, the focus of this study, with published data from the micronucleus of two of these isolates, indicates that C. uncinata lineages are able to maintain both highly divergent and highly conserved genes within a rapidly evolving germline genome.}, number={5-6}, journal={JOURNAL OF MOLECULAR EVOLUTION}, author={Katz, Laura A. and DeBerardinis, Jennifer and Hall, Meaghan S. and Kovner, Alexandra M. and Dunthorn, Micah and Muse, Spencer V.}, year={2011}, month={Dec}, pages={266–272} } @article{delport_scheffler_gravenor_muse_pond_2010, title={Benchmarking multi-rate codon models}, volume={5}, number={7}, journal={PLoS One}, author={Delport, W. and Scheffler, K. and Gravenor, M. B. and Muse, S. V. and Pond, S. K.}, year={2010} } @article{delport_scheffler_botha_gravenor_muse_pond_2010, title={Codontest: modeling amino acid substitution preferences in coding sequences}, volume={6}, number={8}, journal={PLoS Computational Biology}, author={Delport, W. and Scheffler, K. and Botha, G. and Gravenor, M. B. and Muse, S. V. and Pond, S. L. K.}, year={2010} } @article{pond_delport_muse_scheffler_2010, title={Correcting the bias of empirical frequency parameter estimators in codon models}, volume={5}, number={7}, journal={PLoS One}, author={Pond, S. K. and Delport, W. and Muse, S. V. and Scheffler, K.}, year={2010} } @article{strain_kelley_schultz-cherry_muse_koci_2008, title={Genomic analysis of closely related astroviruses}, volume={82}, ISSN={["0022-538X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-43249085073&partnerID=MN8TOARS}, DOI={10.1128/JVI.01993-07}, abstractNote={ABSTRACT}, number={10}, journal={JOURNAL OF VIROLOGY}, author={Strain, Errol and Kelley, Laura A. and Schultz-Cherry, Stacey and Muse, Spencer V. and Koci, Matthew D.}, year={2008}, month={May}, pages={5099–5103} } @article{kosakovsky pond_mannino_gravenor_muse_frost_2007, title={Evolutionary model selection with a genetic algorithm: A case study using stem RNA}, volume={24}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msl144}, abstractNote={The choice of a probabilistic model to describe sequence evolution can and should be justified. Underfitting the data through the use of overly simplistic models may miss out on interesting phenomena and lead to incorrect inferences. Overfitting the data with models that are too complex may ascribe biological meaning to statistical artifacts and result in falsely significant findings. We describe a likelihood-based approach for evolutionary model selection. The procedure employs a genetic algorithm (GA) to quickly explore a combinatorially large set of all possible time-reversible Markov models with a fixed number of substitution rates. When applied to stem RNA data subject to well-understood evolutionary forces, the models found by the GA 1) capture the expected overall rate patterns a priori; 2) fit the data better than the best available models based on a priori assumptions, suggesting subtle substitution patterns not previously recognized; 3) cannot be rejected in favor of the general reversible model, implying that the evolution of stem RNA sequences can be explained well with only a few substitution rate parameters; and 4) perform well on simulated data, both in terms of goodness of fit and the ability to estimate evolutionary rates. We also investigate the utility of several distance measures for comparing and contrasting inferred evolutionary models. Using widely available small computer clusters, our approach allows, for the first time, to evaluate the performance of existing RNA evolutionary models by comparing them with a large pool of candidate models and to validate common modeling assumptions. In addition, the new method provides the foundation for rigorous selection and comparison of substitution models for other types of sequence data.}, number={1}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Kosakovsky Pond, Sergei L. and Mannino, Frank V. and Gravenor, Michael B. and Muse, Spencer V. and Frost, Simon D. W.}, year={2007}, month={Jan}, pages={159–170} } @article{zufall_mcgrath_muse_katz_2006, title={Genome architecture drives protein evolution in ciliates}, volume={23}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msl032}, abstractNote={Studies of microbial eukaryotes have been pivotal in the discovery of biological phenomena, including RNA editing, self-splicing RNA, and telomere addition. Here we extend this list by demonstrating that genome architecture, namely the extensive processing of somatic (macronuclear) genomes in some ciliate lineages, is associated with elevated rates of protein evolution. Using newly developed likelihood-based procedures for studying molecular evolution, we investigate 6 genes to compare 1) ciliate protein evolution to that of 3 other clades of eukaryotes (plants, animals, and fungi) and 2) protein evolution in ciliates with extensively processed macronuclear genomes to that of other ciliate lineages. In 5 of the 6 genes, ciliates are estimated to have a higher ratio of nonsynonymous/synonymous substitution rates, consistent with an increase in the rate of protein diversification in ciliates relative to other eukaryotes. Even more striking, there is a significant effect of genome architecture within ciliates as the most divergent proteins are consistently found in those lineages with the most highly processed macronuclear genomes. We propose a model whereby genome architecture-specifically chromosomal processing, amitosis within macronuclei, and epigenetics-allows ciliates to explore protein space in a novel manner. Further, we predict that examination of diverse eukaryotes will reveal additional evidence of the impact of genome architecture on molecular evolution.}, number={9}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Zufall, Rebecca A. and McGrath, Casey L. and Muse, Spencer V. and Katz, Laura A.}, year={2006}, month={Sep}, pages={1681–1687} } @article{pond_frost_muse_2005, title={HyPhy: hypothesis testing using phylogenies}, volume={21}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bti079}, abstractNote={Abstract}, number={5}, journal={BIOINFORMATICS}, author={Pond, SLK and Frost, SDW and Muse, SV}, year={2005}, month={Mar}, pages={676–679} } @article{strain_muse_2005, title={Positively selected sites in the Arabidopsis receptor-like kinase gene family}, volume={61}, ISSN={["1432-1432"]}, DOI={10.1007/s00239-004-0308-0}, abstractNote={We analyze members of the receptor-like kinase (RLK) gene family in Arabidopsis thaliana for positive selection. Likelihood analyses find evidence for positive selection in 12 of the 52 RLK family sequences groups. These 12 groups represent 97 of the 403 sequences analyzed. The majority of genes in groups subject to positive selection have not been functionally characterized, but sites under selection are predominantly located in the extracellular region. The pattern of selection in the extracellular leucine-rich repeat (LRR) motif of groups 14 and 51 is similar to previous studies where positively selected positions are located in a solvent exposed beta-strand that may determine disease specificity, raising the possibility that some RLK genes function in a similar role.}, number={3}, journal={JOURNAL OF MOLECULAR EVOLUTION}, author={Strain, E and Muse, SV}, year={2005}, month={Sep}, pages={325–332} } @article{liu_muse_2005, title={PowerMarker: an integrated analysis environment for genetic marker analysis}, volume={21}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/bti282}, abstractNote={SUMMARY PowerMarker delivers a data-driven, integrated analysis environment (IAE) for genetic data. The IAE integrates data management, analysis and visualization in a user-friendly graphical user interface. It accelerates the analysis lifecycle and enables users to maintain data integrity throughout the process. An ever-growing list of more than 50 different statistical analyses for genetic markers has been implemented in PowerMarker. AVAILABILITY www.powermarker.net}, number={9}, journal={BIOINFORMATICS}, author={Liu, KJ and Muse, SV}, year={2005}, month={May}, pages={2128–2129} } @article{pond_muse_2005, title={Site-to-site variation of synonymous substitution rates}, volume={22}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msi232}, abstractNote={We develop a new model for studying the molecular evolution of protein-coding DNA sequences. In contrast to existing models, we incorporate the potential for site-to-site heterogeneity of both synonymous and nonsynonymous substitution rates. We demonstrate that within-gene heterogeneity of synonymous substitution rates appears to be common. Using the new family of models, we investigate the utility of a variety of new statistical inference procedures, and we pay particular attention to issues surrounding the detection of sites undergoing positive selection. We discuss how failure to model synonymous rate variation in the model can lead to misidentification of sites as positively selected.}, number={12}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Pond, SK and Muse, SV}, year={2005}, month={Dec}, pages={2375–2385} } @book{gibson_muse_2004, title={A primer of genome science (2nd ed.)}, ISBN={0878932321}, publisher={Sunderland, MA: Sinauer Associates}, author={Gibson, G. and Muse, S. V.}, year={2004} } @article{pond_muse_2004, title={Column sorting: Rapid calculation of the phylogenetic likelihood function}, volume={53}, ISSN={["1076-836X"]}, DOI={10.1080/10635150490522269}, abstractNote={Likelihood applications have become a central approach for molecular evolutionary analyses since the first computationally tractable treatment two decades ago. Although Felsenstein's original pruning algorithm makes likelihood calculations feasible, it is usually possible to take advantage of repetitive structure present in the data to arrive at even greater computational reductions. In particular, alignment columns with certain similarities have components of the likelihood calculation that are identical and need not be recomputed if columns are evaluated in an optimal order. We develop an algorithm for exploiting this speed improvement via an application of graph theory. The reductions provided by the method depend on both the tree and the data, but typical savings range between 15%and 50%. Real-data examples with time reductions of 80%have been identified. The overhead costs associated with implementing the algorithm are minimal, and they are recovered in all but the smallest data sets. The modifications will provide faster likelihood algorithms, which will allow likelihood methods to be applied to larger sets of taxa and to include more thorough searches of the tree topology space.}, number={5}, journal={SYSTEMATIC BIOLOGY}, author={Pond, SLK and Muse, SV}, year={2004}, month={Oct}, pages={685–692} } @article{katz_bornstein_lasek-nesselquist_muse_2004, title={Dramatic diversity of ciliate histone H4 genes revealed by comparisons of patterns of substitutions and paralog divergences among eukaryotes}, volume={21}, DOI={10.1093/molbev.msh048}, abstractNote={The accumulation of divergent histone H4 amino acid sequences within and between ciliate lineages challenges traditional views of the evolution of this essential eukaryotic protein. We analyzed histone H4 sequences from 13 species of ciliates and compared these data with sequences from well-sampled eukaryotic clades. Ciliate histone H4s differ from one another at as many as 46% of their amino acids, in contrast with the highly conserved character of this protein in most other eukaryotes. Equally striking, we find paralogs of histone H4 within ciliate genomes that differ by up to 25% of their amino acids, whereas paralogs in other eukaryotes share identical or nearly identical amino acid sequences. Moreover, the most divergent H4 proteins within ciliates are found in the lineages with highly processed macronuclear genomes. Our analyses demonstrate that the dual nature of ciliate genomes-the presence of a "germline" micronucleus and a "somatic" macronucleus within each cell-allowed the dramatic variation in ciliate histone genes by altering functional constraints or enabling adaptive evolution of the histone H4 protein, or both.}, number={3}, journal={Molecular Biology and Evolution}, author={Katz, L. A. and Bornstein, J. G. and Lasek-Nesselquist, E. and Muse, S. V.}, year={2004}, pages={555–562} } @article{du_buckler_muse_2003, title={Development of a maize molecular evolutionary genomic database}, volume={4}, ISSN={["1531-6912"]}, DOI={10.1002/cfg.282}, abstractNote={PANZEA is the first public database for studying maize genomic diversity. It was initiated as a repository of genomic diversity for an NSF Plant Genome project on ‘Maize Evolutionary Genomics’. PANZEA is hosted at the Bioinformatics Research Center, North Carolina State University, and is open to the public (http://statgen.ncsu.edu/panzea). PANZEA is designed to capture the interrelationships between germplasm, molecular diversity, phenotypic diversity and genome structure. It has the ability to store, integrate and visualize DNA sequence, enzymatic, SSR (simple sequence repeat) marker, germplasm and phenotypic data. The relational data model is selected and implemented in Oracle. An automated DNA sequence data submission tool has been created that allows project researchers to remotely submit their DNA sequence data directly to PANZEA. On-line database search forms and reports have been created to allow users to search or download germplasm, DNA sequence, gene/locus data and much more, directly from the web.}, number={2}, journal={COMPARATIVE AND FUNCTIONAL GENOMICS}, author={Du, CG and Buckler, E and Muse, S}, year={2003}, month={Apr}, pages={246–249} } @article{liu_goodman_muse_smith_buckler_doebley_2003, title={Genetic structure and diversity among maize inbred lines as inferred from DNA microsatellites}, volume={165}, number={4}, journal={Genetics}, author={Liu, K. J. and Goodman, M. and Muse, S. and Smith, J. S. and Buckler, E. and Doebley, J.}, year={2003}, pages={2117–2128} } @book{gibson_muse_2002, title={A primer of genome science}, ISBN={0878932348}, publisher={Sunderland, MA: Sinauer}, author={Gibson, G. and Muse, S. V.}, year={2002} } @article{israel_pond_muse_katz_2002, title={Evolution of duplicated alpha-tubulin genes in ciliates}, volume={56}, DOI={10.1111/j.0014-3820.2002.tb01425.x}, abstractNote={Abstract Ciliates provide a powerful system to analyze the evolution of duplicated α‐tubulin genes in the context of single‐celled organisms. Genealogical analyses of ciliate α‐tubulin sequences reveal five apparently recent gene duplications. Comparisons of paralogs in different ciliates implicate differing patterns of substitutions (e.g., ratios of replacement/synonymous nucleotides and radical/conservative amino acids) following duplication. Most substitutions between paralogs in Euplotes crassus, Halteria grandinella and Paramecium tetraurelia are synonymous. In contrast, α‐tubulin paralogs within Stylonychia lemnae and Chilodonella uncinata are evolving at significantly different rates and have higher ratios of both replacement substitutions to synonymous substitutions and radical amino acid changes to conservative amino acid changes. Moreover, the amino acid substitutions in C. uncinata and S. lemnae paralogs are limited to short stretches that correspond to functionally important regions of the α‐tubulin protein. The topology of ciliate α‐tubulin genealogies are inconsistent with taxonomy based on morphology and other molecular markers, which may be due to taxonomic sampling, gene conversion, unequal rates of evolution, or asymmetric patterns of gene duplication and loss.}, number={6}, journal={Evolution}, author={Israel, R. L. and Pond, S. L. K. and Muse, S. V. and Katz, L. A.}, year={2002}, pages={1110–1122} } @article{buckler_doebley_gaut_goodman_kresovich_muse_weir_2002, title={Evolutionary genomics of maize}, number={76}, journal={Maize Genetics Cooperation Newsletter}, author={Buckler, E. and Doebley, J. and Gaut, B. and Goodman, M. and Kresovich, S. and Muse, S. and Weir, B.}, year={2002}, pages={86} } @article{muse_2000, title={Examining rates and patterns of nucleotide substitution in plants}, volume={42}, ISSN={["1573-5028"]}, DOI={10.1023/A:1006319803002}, number={1}, journal={PLANT MOLECULAR BIOLOGY}, author={Muse, SV}, year={2000}, month={Jan}, pages={25–43} } @article{muse_gaut_1994, title={A likelihood approach for comparing synonymous and nonsynonymous nucleotide substitution rates, with application to the chloroplast genome}, volume={11}, number={5}, journal={Molecular Biology and Evolution}, author={Muse, S. V. and Gaut, B. S.}, year={1994}, pages={715} }