@article{choi_stone_kishino_thorne_2009, title={Estimates of natural selection due to protein tertiary structure inform the ancestry of biallelic loci}, volume={441}, DOI={10.1016/j.gene.2008.07.020}, abstractNote={We consider the inference of which of two alleles is ancestral when the alleles have a single nonsynonymous difference and when natural selection acts via protein tertiary structure. Whereas the probability that an allele is ancestral under neutrality is equal to its frequency, under selection this probability depends on allele frequency and on the magnitude and direction of selection pressure. Although allele frequencies can be well estimated from intraspecific data, small fitness differences have a large evolutionary impact but can be difficult to estimate with only intraspecific data. Methods for predicting aspects of phenotype from genotype can supplement intraspecific sequence data. Recently developed statistical techniques can assess effects of phenotypes, such as protein tertiary structure on molecular evolution. While these techniques were initially designed for comparing protein-coding genes from different species, the resulting interspecific inferences can be assigned population genetic interpretations to assess the effect of selection pressure, and we use them here along with intraspecific allele frequency data to estimate the probability that an allele is ancestral. We focus on 140 nonsynonymous single nucleotide polymorphisms of humans that are in proteins with known tertiary structures. We find that our technique for employing protein tertiary structure information yields some biologically plausible results but that it does not substantially improve the inference of ancestral human allele types.}, number={1-2}, journal={Gene}, author={Choi, S. C. and Stone, E. A. and Kishino, H. and Thorne, J. L.}, year={2009}, pages={45–52} } @article{choi_redelings_thorne_2008, title={Basing population genetic inferences and models of molecular evolution upon desired stationary distributions of DNA or protein sequences}, volume={363}, ISSN={["0962-8436"]}, DOI={10.1098/rstb.2008.0167}, abstractNote={Models of molecular evolution tend to be overly simplistic caricatures of biology that are prone to assigning high probabilities to biologically implausible DNA or protein sequences. Here, we explore how to construct time-reversible evolutionary models that yield stationary distributions of sequences that match given target distributions. By adopting comparatively realistic target distributions, evolutionary models can be improved. Instead of focusing on estimating parameters, we concentrate on the population genetic implications of these models. Specifically, we obtain estimates of the product of effective population size and relative fitness difference of alleles. The approach is illustrated with two applications to protein-coding DNA. In the first, a codon-based evolutionary model yields a stationary distribution of sequences, which, when the sequences are translated, matches a variable-length Markov model trained on human proteins. In the second, we introduce an insertion–deletion model that describes selectively neutral evolutionary changes to DNA. We then show how to modify the neutral model so that its stationary distribution at the amino acid level can match a profile hidden Markov model, such as the one associated with the Pfam database.}, number={1512}, journal={PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY B-BIOLOGICAL SCIENCES}, author={Choi, Sang Chul and Redelings, Benjamin D. and Thorne, Jeffrey L.}, year={2008}, month={Dec}, pages={3931–3939} } @article{thorne_choi_yu_higgs_kishino_2007, title={Population genetics without intraspecific data}, volume={24}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msm085}, abstractNote={A central goal of computational biology is the prediction of phenotype from DNA and protein sequence data. Recent models of sequence change use in silico prediction systems to incorporate the effects of phenotype on evolutionary rates. These models have been designed for analyzing sequence data from different species and have been accompanied by statistical techniques for estimating model parameters when the incorporation of phenotype induces dependent change among sequence positions. A difficulty with these efforts to link phenotype and interspecific evolution is that evolution occurs within populations, and parameters of interspecific models should have population genetic interpretations. We show, with two examples, how population genetic interpretations can be assigned to evolutionary models. The first example considers the impact of RNA secondary structure on sequence change, and the second reflects the tendency for protein tertiary structure to influence nonsynonymous substitution rates. We argue that statistical fit to data should not be the sole criterion for assessing models of sequence change. A good interspecific model should also yield a clear and biologically plausible population genetic interpretation.}, number={8}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Thorne, Jeffrey L. and Choi, Sang Chul and Yu, Jiaye and Higgs, Paul G. and Kishino, Hirohisa}, year={2007}, month={Aug}, pages={1667–1677} } @article{choi_hobolth_robinson_kishino_thorne_2007, title={Quantifying the impact of protein tertiary structure on molecular evolution}, volume={24}, ISSN={["0737-4038"]}, DOI={10.1093/molbev/msm097}, abstractNote={To investigate the evolutionary impact of protein structure, the experimentally determined tertiary structure and the protein-coding DNA sequence were collected for each of 1,195 genes. These genes were studied via a model of sequence change that explicitly incorporates effects on evolutionary rates due to protein tertiary structure. In the model, these effects act via the solvent accessibility environments and pairwise amino acid interactions that are induced by tertiary structure. To compare the hypotheses that structure does and does not have a strong influence on evolution, Bayes factors were estimated for each of the 1,195 sequences. Most of the Bayes factors strongly support the hypothesis that protein structure affects protein evolution. Furthermore, both solvent accessibility and pairwise interactions among amino acids are inferred to have important roles in protein evolution. Our results also indicate that the strength of the relationship between tertiary structure and evolution has a weak but real correlation to the annotation information in the Gene Ontology database. Although their influences on rates of evolution vary among protein families, we find that the mean impacts of solvent accessibility and pairwise interactions are about the same.}, number={8}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Choi, Sang Chul and Hobolth, Asger and Robinson, Douglas M. and Kishino, Hirohisa and Thorne, Jeffrey L.}, year={2007}, month={Aug}, pages={1769–1782} }