@article{yang_xu_conant_kishino_thorne_ji_2023, title={Interlocus Gene Conversion, Natural Selection, and Paralog Homogenization}, volume={40}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msad198}, abstractNote={Abstract}, number={9}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Yang, Yixuan and Xu, Tanchumin and Conant, Gavin and Kishino, Hirohisa and Thorne, Jeffrey L. and Ji, Xiang}, year={2023}, month={Sep} } @article{ji_griffing_thorne_2016, title={A Phylogenetic Approach Finds Abundant Interlocus Gene Conversion in Yeast}, volume={33}, ISSN={["1537-1719"]}, DOI={10.1093/molbev/msw114}, abstractNote={Interlocus gene conversion (IGC) homogenizes repeats. While genomes can be repeat-rich, the evolutionary importance of IGC is poorly understood. Additional statistical tools for characterizing it are needed. We propose a composite likelihood strategy for incorporating IGC into widely-used probabilistic models for sequence changes that originate with point mutation. We estimated the percentage of nucleotide substitutions that originate with an IGC event rather than a point mutation in 14 groups of yeast ribosomal protein-coding genes, and found values ranging from 20% to 38%. We designed and applied a procedure to determine whether these percentages are inflated due to artifacts arising from model misspecification. The results of this procedure are consistent with IGC having had an important role in the evolution of each of these 14 gene families. We further investigate the properties of our IGC approach via simulation. In contrast to usual practice, our findings suggest that the IGC should and can be considered when multigene family evolution is investigated.}, number={9}, journal={MOLECULAR BIOLOGY AND EVOLUTION}, author={Ji, Xiang and Griffing, Alexander and Thorne, Jeffrey L.}, year={2016}, month={Sep}, pages={2469–2476} } @article{wang_yu_ji_lakner_griffing_thorne_2015, title={Roles of Solvent Accessibility and Gene Expression in Modeling Protein Sequence Evolution}, volume={11}, ISSN={["1176-9343"]}, DOI={10.4137/ebo.s22911}, abstractNote={Models of protein evolution tend to ignore functional constraints, although structural constraints are sometimes incorporated. Here we propose a probabilistic framework for codon substitution that evaluates joint effects of relative solvent accessibility (RSA), a structural constraint; and gene expression, a functional constraint. First, we explore the relationship between RSA and codon usage at the genomic scale as well as at the individual gene scale. Motivated by these results, we construct our framework by determining how probable is an amino acid, given RSA and gene expression, and then evaluating the relative probability of observing a codon compared to other synonymous codons. We come to the biologically plausible conclusion that both RSA and gene expression are related to amino acid frequencies, but, among synonymous codons, the relative probability of a particular codon is more closely related to gene expression than RSA. To illustrate the potential applications of our framework, we propose a new codon substitution model. Using this model, we obtain estimates of 27 N s, the product of effective population size N, and relative fitness difference of allele s. For a training data set consisting of human proteins with known structures and expression data, 2 N s is estimated separately for synonymous and nonsynonymous substitutions in each protein. We then contrast the patterns of synonymous and nonsynonymous 2 N s estimates across proteins while also taking gene expression levels of the proteins into account. We conclude that our 2 N s estimates are too concentrated around 0, and we discuss potential explanations for this lack of variability.}, journal={EVOLUTIONARY BIOINFORMATICS}, author={Wang, Kuangyu and Yu, Shuhui and Ji, Xiang and Lakner, Clemens and Griffing, Alexander and Thorne, Jeffrey L.}, year={2015} }