@article{cardenas-alvarez_restrepo-montoya_bergholz_2022, title={Genome-Wide Association Study of Listeria monocytogenes Isolates Causing Three Different Clinical Outcomes}, url={https://doi.org/10.3390/microorganisms10101934}, DOI={10.3390/microorganisms10101934}, abstractNote={Heterogeneity in virulence potential of L. monocytogenes subgroups have been associated with genetic elements that could provide advantages in certain environments to invade, multiply, and survive within a host. The presence of gene mutations has been found to be related to attenuated phenotypes, while the presence of groups of genes, such as pathogenicity islands (PI), has been associated with hypervirulent or stress-resistant clones. We evaluated 232 whole genome sequences from invasive listeriosis cases in human and ruminants from the US and Europe to identify genomic elements associated with strains causing three clinical outcomes: central nervous system (CNS) infections, maternal-neonatal (MN) infections, and systemic infections (SI). Phylogenetic relationships and virulence-associated genes were evaluated, and a gene-based and single nucleotide polymorphism (SNP)-based genome-wide association study (GWAS) were conducted in order to identify loci associated with the different clinical outcomes. The orthologous results indicated that genes of phage phiX174, transfer RNAs, and type I restriction-modification (RM) system genes along with SNPs in loci involved in environmental adaptation such as rpoB and a phosphotransferase system (PTS) were associated with one or more clinical outcomes. Detection of phenotype-specific candidate loci represents an approach that could narrow the group of genetic elements to be evaluated in future studies.}, journal={Microorganisms}, author={Cardenas-Alvarez, Maria X. and Restrepo-Montoya, Daniel and Bergholz, Teresa M.}, year={2022}, month={Sep} } @article{restrepo-montoya_hulse-kemp_scheffler_haigler_hinze_love_percy_jones_frelichowski_2022, title={Leveraging National Germplasm Collections to Determine Significantly Associated Categorical Traits in Crops: Upland and Pima Cotton as a Case Study}, volume={13}, ISSN={["1664-462X"]}, url={http://dx.doi.org/10.3389/fpls.2022.837038}, DOI={10.3389/fpls.2022.837038}, abstractNote={Observable qualitative traits are relatively stable across environments and are commonly used to evaluate crop genetic diversity. Recently, molecular markers have largely superseded describing phenotypes in diversity surveys. However, qualitative descriptors are useful in cataloging germplasm collections and for describing new germplasm in patents, publications, and/or the Plant Variety Protection (PVP) system. This research focused on the comparative analysis of standardized cotton traits as represented within the National Cotton Germplasm Collection (NCGC). The cotton traits are named by ‘descriptors’ that have non-numerical sub-categories (descriptor states) reflecting the details of how each trait manifests or is absent in the plant. We statistically assessed selected accessions from three major groups ofGossypiumas defined by the NCGC curator: (1) “Stoneville accessions (SA),” containing mainly Upland cotton (Gossypium hirsutum) cultivars; (2) “Texas accessions (TEX),” containing mainlyG. hirsutumlandraces; and (3)Gossypium barbadense(Gb), containing cultivars or landraces of Pima cotton (Gossypium barbadense). For 33 cotton descriptors we: (a) revealed distributions of character states for each descriptor within each group; (b) analyzed bivariate associations between paired descriptors; and (c) clustered accessions based on their descriptors. The fewest significant associations between descriptors occurred in the SA dataset, likely reflecting extensive breeding for cultivar development. In contrast, the TEX and Gb datasets showed a higher number of significant associations between descriptors, likely correlating with less impact from breeding efforts. Three significant bivariate associations were identified for all three groups,bract nectaries:boll nectaries,leaf hair:stem hair, andlint color:seed fuzz color. Unsupervised clustering analysis recapitulated the species labels for about 97% of the accessions. Unexpected clustering results indicated accessions that may benefit from potential further investigation. In the future, the significant associations between standardized descriptors can be used by curators to determine whether new exotic/unusual accessions most closely resemble Upland or Pima cotton. In addition, the study shows how existing descriptors for large germplasm datasets can be useful to inform downstream goals in breeding and research, such as identifying rare individuals with specific trait combinations and targeting breakdown of remaining trait associations through breeding, thus demonstrating the utility of the analytical methods employed in categorizing germplasm diversity within the collection.}, journal={FRONTIERS IN PLANT SCIENCE}, publisher={Frontiers Media SA}, author={Restrepo-Montoya, Daniel and Hulse-Kemp, Amanda M. and Scheffler, Jodi A. and Haigler, Candace H. and Hinze, Lori L. and Love, Janna and Percy, Richard G. and Jones, Don C. and Frelichowski, James}, year={2022}, month={Apr} } @article{restrepo-montoya_mcclean_osorno_2021, title={Orthology and synteny analysis of receptor-like kinases “RLK” and receptor-like proteins “RLP” in legumes}, url={https://doi.org/10.1186/s12864-021-07384-w}, DOI={10.1186/s12864-021-07384-w}, abstractNote={Abstract}, journal={BMC Genomics}, author={Restrepo-Montoya, Daniel and McClean, Phillip E. and Osorno, Juan M.}, year={2021}, month={Feb} } @article{restrepo-montoya_brueggeman_mcclean_osorno_2020, title={Computational identification of receptor-like kinases “RLK” and receptor-like proteins “RLP” in legumes}, volume={21}, url={http://dx.doi.org/10.1186/s12864-020-06844-z}, DOI={10.1186/s12864-020-06844-z}, abstractNote={Abstract}, number={1}, journal={BMC Genomics}, publisher={Springer Science and Business Media LLC}, author={Restrepo-Montoya, Daniel and Brueggeman, Robert and McClean, Phillip E. and Osorno, Juan M.}, year={2020}, month={Jul} } @article{comparative genomics identifies potential virulence factors in clostridium tertium and c. paraputrificum_2019, url={http://dx.doi.org/10.1080/21505594.2019.1637699}, DOI={10.1080/21505594.2019.1637699}, abstractNote={ABSTRACT Some well-known Clostridiales species such as Clostridium difficile and C. perfringens are agents of high impact diseases worldwide. Nevertheless, other foreseen Clostridiales species have recently emerged such as Clostridium tertium and C. paraputrificum. Three fecal isolates were identified as Clostridium tertium (Gcol.A2 and Gcol.A43) and C. paraputrificum (Gcol.A11) during public health screening for C. difficile infections in Colombia. C. paraputrificum genomes were highly diverse and contained large numbers of accessory genes. Genetic diversity and accessory gene percentage were lower among the C. tertium genomes than in the C. paraputrificum genomes. C. difficile tcdA and tcdB toxins encoding homologous sequences and other potential virulence factors were also identified. EndoA interferase, a toxic component of the type II toxin-antitoxin system, was found among the C. tertium genomes. toxA was the only toxin encoding gene detected in Gcol.A43, the Colombian isolate with an experimentally-determined high cytotoxic effect. Gcol.A2 and Gcol.A43 had higher sporulation efficiencies than Gcol.A11 (84.5%, 83.8% and 57.0%, respectively), as supported by the greater number of proteins associated with sporulation pathways in the C. tertium genomes compared with the C. paraputrificum genomes (33.3 and 28.4 on average, respectively). This work allowed complete genome description of two clostridiales species revealing high levels of intra-taxa diversity, accessory genomes containing virulence-factors encoding genes (especially in C. paraputrificum), with proteins involved in sporulation processes more highly represented in C. tertium. These finding suggest the need to advance in the study of those species with potential importance at public health level.}, journal={Virulence}, year={2019}, month={Jul} } @article{integrated genomic epidemiology and phenotypic profiling of clostridium difficile across intra-hospital and community populations in colombia_2019, url={http://dx.doi.org/10.1038/s41598-019-47688-2}, DOI={10.1038/s41598-019-47688-2}, abstractNote={Abstract}, journal={Scientific Reports}, year={2019}, month={Aug} } @article{genetic architecture of flooding tolerance in the dry bean middle-american diversity panel_2017, url={http://dx.doi.org/10.3389/fpls.2017.01183}, DOI={10.3389/fpls.2017.01183}, abstractNote={Flooding is a devastating abiotic stress that endangers crop production in the twenty-first century. Because of the severe susceptibility of common bean (Phaseolus vulgaris L.) to flooding, an understanding of the genetic architecture and physiological responses of this crop will set the stage for further improvement. However, challenging phenotyping methods hinder a large-scale genetic study of flooding tolerance in common bean and other economically important crops. A greenhouse phenotyping protocol was developed to evaluate the flooding conditions at early stages. The Middle-American diversity panel (n = 272) of common bean was developed to capture most of the diversity exits in North American germplasm. This panel was evaluated for seven traits under both flooded and non-flooded conditions at two early developmental stages. A subset of contrasting genotypes was further evaluated in the field to assess the relationship between greenhouse and field data under flooding condition. A genome-wide association study using ~150 K SNPs was performed to discover genomic regions associated with multiple physiological responses. The results indicate a significant strong correlation (r > 0.77) between greenhouse and field data, highlighting the reliability of greenhouse phenotyping method. Black and small red beans were the least affected by excess water at germination stage. At the seedling stage, pinto and great northern genotypes were the most tolerant. Root weight reduction due to flooding was greatest in pink and small red cultivars. Flooding reduced the chlorophyll content to the greatest extent in the navy bean cultivars compared with other market classes. Races of Durango/Jalisco and Mesoamerica were separated by both genotypic and phenotypic data indicating the potential effect of eco-geographical variations. Furthermore, several loci were identified that potentially represent the antagonistic pleiotropy. The GWAS analysis revealed peaks at Pv08/1.6 Mb and Pv02/41 Mb that are associated with root weight and germination rate, respectively. These regions are syntenic with two QTL reported in soybean (Glycine max L.) that contribute to flooding tolerance, suggesting a conserved evolutionary pathway involved in flooding tolerance for these related legumes.}, journal={Frontiers in Plant Science}, year={2017}, month={Jul} } @article{changes in macrophage gene expression associated with leishmania (viannia) braziliensis infection_2015, url={http://dx.doi.org/10.1371/journal.pone.0128934}, DOI={10.1371/journal.pone.0128934}, abstractNote={Different Leishmania species cause distinct clinical manifestations of the infectious disease leishmaniasis. It is fundamentally important to understand the mechanisms governing the interaction between Leishmania and its host cell. Little is known about this interaction between Leishmania (Viannia) braziliensis and human macrophages. In this study, we aimed to identify differential gene expression between non-infected and L. (V) braziliensis-infected U937-derived macrophages. We deployed a whole human transcriptome microarray analysis using 72 hours post-infection samples and compared those samples with their non-infected counterparts. We found that 218 genes were differentially expressed between infected and non-infected macrophages. A total of 71.6% of these genes were down-regulated in the infected macrophages. Functional enrichment analyses identified the steroid and sterol/cholesterol biosynthetic processes between regulatory networks down-regulated in infected macrophages. RT-qPCR further confirmed this down-regulation in genes belonging to these pathways. These findings contrast with those from studies involving other Leishmania species at earlier infection stages, where gene up-regulation for this metabolic pathway has been reported. Sterol biosynthesis could be an important biological process associated with the expression profile of macrophages infected by L. (V.) braziliensis. Differential transcriptional results suggest a negative regulation of the genetic regulatory network involved in cholesterol biosynthesis.}, journal={PLOS ONE}, year={2015}, month={Jun} } @inproceedings{a multi-objective optimization energy approach to predict the ligand conformation in a docking process_2013, url={http://dx.doi.org/10.1007/978-3-642-37207-0_16}, DOI={10.1007/978-3-642-37207-0_16}, abstractNote={This work proposes a multi-objective algorithmic method for modelling the prediction of the conformation and configuration of ligands in receptor-ligand complexes by considering energy contributions of molecular interactions. The proposed approach is an improvement over others in the field, where the principle insight is that a Pareto front helps to understand the tradeoffs in the actual problem. The method is based on three main features: (i) Representation of molecular data using a trigonometric model; (ii) Modelling of molecular interactions with all-atoms force field energy functions and (iii) Exploration of the conformational space through a multi-objective evolutionary algorithm. The performance of the proposed model was evaluated and validated over a set of well known complexes. The method showed a promising performance when predicting ligands with high number of rotatable bonds.}, booktitle={Lecture Notes in Computer Science}, year={2013} } @article{the autoimmune tautology: an in silico approach_2012, url={http://dx.doi.org/10.1155/2012/792106}, DOI={10.1155/2012/792106}, abstractNote={There is genetic evidence of similarities and differences among autoimmune diseases (AIDs) that warrants looking at a general panorama of what has been published. Thus, our aim was to determine the main shared genes and to what extent they contribute to building clusters of AIDs. We combined a text-mining approach to build clusters of genetic concept profiles (GCPs) from the literature in MedLine with knowledge of protein-protein interactions to confirm if genes in GCP encode proteins that truly interact. We found three clusters in which the genes with the highest contribution encoded proteins that showed strong and specific interactions. After projecting the AIDs on a plane, two clusters could be discerned: Sjögren’s syndrome—systemic lupus erythematosus, and autoimmune thyroid disease—type1 diabetes—rheumatoid arthritis. Our results support the common origin of AIDs and the role of genes involved in apoptosis such asCTLA4,FASLG,andIL10.}, journal={Autoimmune Diseases}, year={2012}, month={Mar} } @article{identification of plasmodium vivax proteins with potential role in invasion using sequence redundancy reduction and profile hidden markov models_2011, url={http://dx.doi.org/10.1371/journal.pone.0025189}, DOI={10.1371/journal.pone.0025189}, abstractNote={Background This study describes a bioinformatics approach designed to identify Plasmodium vivax proteins potentially involved in reticulocyte invasion. Specifically, different protein training sets were built and tuned based on different biological parameters, such as experimental evidence of secretion and/or involvement in invasion-related processes. A profile-based sequence method supported by hidden Markov models (HMMs) was then used to build classifiers to search for biologically-related proteins. The transcriptional profile of the P. vivax intra-erythrocyte developmental cycle was then screened using these classifiers. Results A bioinformatics methodology for identifying potentially secreted P. vivax proteins was designed using sequence redundancy reduction and probabilistic profiles. This methodology led to identifying a set of 45 proteins that are potentially secreted during the P. vivax intra-erythrocyte development cycle and could be involved in cell invasion. Thirteen of the 45 proteins have already been described as vaccine candidates; there is experimental evidence of protein expression for 7 of the 32 remaining ones, while no previous studies of expression, function or immunology have been carried out for the additional 25. Conclusions The results support the idea that probabilistic techniques like profile HMMs improve similarity searches. Also, different adjustments such as sequence redundancy reduction using Pisces or Cd-Hit allowed data clustering based on rational reproducible measurements. This kind of approach for selecting proteins with specific functions is highly important for supporting large-scale analyses that could aid in the identification of genes encoding potential new target antigens for vaccine development and drug design. The present study has led to targeting 32 proteins for further testing regarding their ability to induce protective immune responses against P. vivax malaria.}, journal={PLoS ONE}, year={2011}, month={Oct} } @article{nclassg+: a classifier for non-classically secreted gram-positive bacterial proteins_2011, url={http://dx.doi.org/10.1186/1471-2105-12-21}, DOI={10.1186/1471-2105-12-21}, abstractNote={Abstract}, journal={BMC Bioinformatics}, year={2011}, month={Jan} } @inproceedings{a parallel multi-objective ab initio approach for protein structure prediction_2010, url={http://dx.doi.org/10.1109/bibm.2010.5706552}, DOI={10.1109/bibm.2010.5706552}, abstractNote={Protein structure prediction is one of the most important problems in bioinformatics and structural biology. This work proposes a novel and suitable methodology to model protein structure prediction with atomic-level detail by using a parallel multi-objective ab initio approach. In the proposed model, i) A trigonometric representation is used to compute backbone and side-chain torsion angles of protein atoms; ii) The Chemistry at HARvard Macromolecular Mechanics (CHARMm) function optimizes and evaluates the structures of the protein conformations; iii) The evolution of protein conformations is directed by optimization of protein energy contributions using the multi-objective genetic algorithm NSGA-II; and iv) The computation process is sped up and its effectiveness improved through the implementation of an island model of the evolutionary algorithm. The proposed model was validated on a set of benchmark proteins obtaining very promising results.}, booktitle={2010 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)}, year={2010}, month={Dec} } @article{computational prediction and experimental assessment of secreted/surface proteins from mycobacterium tuberculosis h37rv_2010, url={http://dx.doi.org/10.1371/journal.pcbi.1000824}, DOI={10.1371/journal.pcbi.1000824}, abstractNote={The mycobacterial cell envelope has been implicated in the pathogenicity of tuberculosis and therefore has been a prime target for the identification and characterization of surface proteins with potential application in drug and vaccine development. In this study, the genome of Mycobacterium tuberculosis H37Rv was screened using Machine Learning tools that included feature-based predictors, general localizers and transmembrane topology predictors to identify proteins that are potentially secreted to the surface of M. tuberculosis, or to the extracellular milieu through different secretory pathways. The subcellular localization of a set of 8 hypothetically secreted/surface candidate proteins was experimentally assessed by cellular fractionation and immunoelectron microscopy (IEM) to determine the reliability of the computational methodology proposed here, using 4 secreted/surface proteins with experimental confirmation as positive controls and 2 cytoplasmic proteins as negative controls. Subcellular fractionation and IEM studies provided evidence that the candidate proteins Rv0403c, Rv3630, Rv1022, Rv0835, Rv0361 and Rv0178 are secreted either to the mycobacterial surface or to the extracellular milieu. Surface localization was also confirmed for the positive controls, whereas negative controls were located on the cytoplasm. Based on statistical learning methods, we obtained computational subcellular localization predictions that were experimentally assessed and allowed us to construct a computational protocol with experimental support that allowed us to identify a new set of secreted/surface proteins as potential vaccine candidates.}, journal={PLoS Computational Biology}, year={2010}, month={Jun} } @article{validating subcellular localization prediction tools with mycobacterial proteins_2009, url={http://dx.doi.org/10.1186/1471-2105-10-134}, DOI={10.1186/1471-2105-10-134}, abstractNote={Abstract}, journal={BMC Bioinformatics}, year={2009}, month={May} }