@article{fernandes_atchley_2008, title={Biochemical and functional evidence of p53 homology is inconsistent with molecular phylogenetics for distant sequences}, volume={67}, ISSN={["0022-2844"]}, DOI={10.1007/s00239-008-9124-2}, abstractNote={The tumor suppressor p53 is mutated in approximately 50% of all human cancer cases worldwide. It is commonly assumed that the phylogenetic history of this important tumor suppressor has been thoroughly studied; however, few detailed studies of the entire extended p53 protein family have been reported, and none comprehensively and simultaneously consider functional, molecular, and phylogenetic data. Herein we examine a diverse collection of reported p53-like protein sequences, including representatives from the arthropods, nematodes, and protists, with the goal of answering several important questions. First, what evidence supports these highly divergent proteins being true homologues to the p53 family? Second, is the inferred overall family phylogeny concordant with known structures and functions? Third, does the extended p53 family possess recognizable conserved sites outside of the within-chordate, highly-conserved DNA-binding domain? Our study shows that the biochemical and functional evidence of p53 homology for nematodes, arthropods, and protists is inconsistent with their implied phylogenetic relationship within the overall family. Although these divergent sequences are always reported as functionally similar to human p53, our results confirm and extend the hypothesis that p63 is a far more appropriate protein for comparison. Within these divergent sequences, we find minimal conservation within the DNA-binding domain, and no conservation elsewhere. Taken together, our findings suggest that these sequences are not bona fide homologues of the extended p53 family and provide baseline criteria for the future identification and characterization of distant p53-family homologues.}, number={1}, journal={JOURNAL OF MOLECULAR EVOLUTION}, author={Fernandes, Andrew D. and Atchley, William R.}, year={2008}, month={Jul}, pages={51–67} } @article{fernandes_atchley_2008, title={Site-specific evolutionary rates in proteins are better modeled as non-independent and strictly relative}, volume={24}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btn395}, abstractNote={Abstract}, number={19}, journal={BIOINFORMATICS}, author={Fernandes, Andrew D. and Atchley, William R.}, year={2008}, month={Oct}, pages={2177–2183} } @article{atchley_fernandes_2005, title={Sequence signatures and the probabilistic identification of proteins in the Myc-Max-Mad network}, volume={102}, ISSN={["0027-8424"]}, DOI={10.1073/pnas.0408964102}, abstractNote={Accurate identification of specific groups of proteins by their amino acid sequence is an important goal in genome research. Here we combine information theory with fuzzy logic search procedures to identify sequence signatures or predictive motifs for members of the Myc-Max-Mad transcription factor network. Myc is a well known oncoprotein, and this family is involved in cell proliferation, apoptosis, and differentiation. We describe a small set of amino acid sites from the N-terminal portion of the basic helix-loop-helix (bHLH) domain that provide very accurate sequence signatures for the Myc-Max-Mad transcription factor network and three of its member proteins. A predictive motif involving 28 contiguous bHLH sequence elements found 337 network proteins in the GenBank NR database with no mismatches or misidentifications. This motif also identifies at least one previously unknown fungal protein with strong affinity to the Myc-Max-Mad network. Another motif found 96% of known Myc protein sequences with only a single mismatch, including sequences from genomes previously not thought to contain Myc proteins. The predictive motif for Myc is very similar to the ancestral sequence for the Myc group estimated from phylogenetic analyses. Based on available crystal structure studies, this motif is discussed in terms of its functional consequences. Our results provide insight into evolutionary diversification of DNA binding and dimerization in a well characterized family of regulatory proteins and provide a method of identifying signature motifs in protein families.}, number={18}, journal={PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA}, author={Atchley, WR and Fernandes, AD}, year={2005}, month={May}, pages={6401–6406} } @article{atchley_zhao_fernandes_druke_2005, title={Solving the protein sequence metric problem}, volume={102}, ISSN={["1091-6490"]}, DOI={10.1073/pnas.0408677102}, abstractNote={Biological sequences are composed of long strings of alphabetic letters rather than arrays of numerical values. Lack of a natural underlying metric for comparing such alphabetic data significantly inhibits sophisticated statistical analyses of sequences, modeling structural and functional aspects of proteins, and related problems. Herein, we use multivariate statistical analyses on almost 500 amino acid attributes to produce a small set of highly interpretable numeric patterns of amino acid variability. These high-dimensional attribute data are summarized by five multidimensional patterns of attribute covariation that reflect polarity, secondary structure, molecular volume, codon diversity, and electrostatic charge. Numerical scores for each amino acid then transform amino acid sequences for statistical analyses. Relationships between transformed data and amino acid substitution matrices show significant associations for polarity and codon diversity scores. Transformed alphabetic data are used in analysis of variance and discriminant analysis to study DNA binding in the basic helix-loop-helix proteins. The transformed scores offer a general solution for analyzing a wide variety of sequence analysis problems.}, number={18}, journal={PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA}, author={Atchley, WR and Zhao, JP and Fernandes, AD and Druke, T}, year={2005}, month={May}, pages={6395–6400} }