@article{gonzalez_angus_tetteh_bello_padmanabhan_pendse_srinivas_yu_semazzi_kumar_et al._2015, title={On the data-driven inference of modulatory networks in climate science: an application to West African rainfall}, volume={22}, ISSN={["1607-7946"]}, DOI={10.5194/npg-22-33-2015}, abstractNote={Abstract. Decades of hypothesis-driven and/or first-principles research have been applied towards the discovery and explanation of the mechanisms that drive climate phenomena, such as western African Sahel summer rainfall~variability. Although connections between various climate factors have been theorized, not all of the key relationships are fully understood. We propose a data-driven approach to identify candidate players in this climate system, which can help explain underlying mechanisms and/or even suggest new relationships, to facilitate building a more comprehensive and predictive model of the modulatory relationships influencing a climate phenomenon of interest. We applied coupled heterogeneous association rule mining (CHARM), Lasso multivariate regression, and dynamic Bayesian networks to find relationships within a complex system, and explored means with which to obtain a consensus result from the application of such varied methodologies. Using this fusion of approaches, we identified relationships among climate factors that modulate Sahel rainfall. These relationships fall into two categories: well-known associations from prior climate knowledge, such as the relationship with the El Niño–Southern Oscillation (ENSO) and putative links, such as North Atlantic Oscillation, that invite further research. }, number={1}, journal={NONLINEAR PROCESSES IN GEOPHYSICS}, author={Gonzalez, D. L., II and Angus, M. P. and Tetteh, I. K. and Bello, G. A. and Padmanabhan, K. and Pendse, S. V. and Srinivas, S. and Yu, J. and Semazzi, F. and Kumar, V. and et al.}, year={2015}, pages={33–46} } @article{gonzalez_pendse_padmanabhan_angus_tetteh_srinivas_villanes_semazzi_kumar_samatova_2013, title={Coupled Heterogeneous Association Rule Mining (CHARM): Application toward Inference of Modulatory Climate Relationships}, ISSN={["1550-4786"]}, DOI={10.1109/icdm.2013.142}, abstractNote={The complex dynamic climate system often exhibits hierarchical modularity of its organization and function. Scientists have spent decades trying to discover and understand the driving mechanisms behind western African Sahel summer rainfall variability, mostly via hypothesis-driven and/or first-principles based research. Their work has furthered theory regarding the connections between various climate patterns, but the key relationships are still not fully understood. We present Coupled Heterogeneous Association Rule Mining (CHARM), a computationally efficient methodology that mines higher-order relationships between these subsystems' anomalous temporal phases with respect to their effect on the system's response. We apply this to climate science data, aiming to infer putative pathways/cascades of modulating events and the modulating signs that collectively define the network of pathways for the rainfall anomaly in the Sahel. Experimental results are consistent with fundamental theories of phenomena in climate science, especially physical processes that best describe sub-regional climate.}, journal={2013 IEEE 13TH INTERNATIONAL CONFERENCE ON DATA MINING (ICDM)}, author={Gonzalez, Doel L., II and Pendse, Saurabh V. and Padmanabhan, Kanchana and Angus, Michael P. and Tetteh, Isaac K. and Srinivas, Shashank and Villanes, Andrea and Semazzi, Fredrick and Kumar, Vipin and Samatova, Nagiza F.}, year={2013}, pages={1055–1060} } @article{padmanabhan_wang_samatova_2012, title={Functional Annotation of Hierarchical Modularity}, volume={7}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0033744}, abstractNote={In biological networks of molecular interactions in a cell, network motifs that are biologically relevant are also functionally coherent, or form functional modules. These functionally coherent modules combine in a hierarchical manner into larger, less cohesive subsystems, thus revealing one of the essential design principles of system-level cellular organization and function–hierarchical modularity. Arguably, hierarchical modularity has not been explicitly taken into consideration by most, if not all, functional annotation systems. As a result, the existing methods would often fail to assign a statistically significant functional coherence score to biologically relevant molecular machines. We developed a methodology for hierarchical functional annotation. Given the hierarchical taxonomy of functional concepts (e.g., Gene Ontology) and the association of individual genes or proteins with these concepts (e.g., GO terms), our method will assign a Hierarchical Modularity Score (HMS) to each node in the hierarchy of functional modules; the HMS score and its value measure functional coherence of each module in the hierarchy. While existing methods annotate each module with a set of “enriched” functional terms in a bag of genes, our complementary method provides the hierarchical functional annotation of the modules and their hierarchically organized components. A hierarchical organization of functional modules often comes as a bi-product of cluster analysis of gene expression data or protein interaction data. Otherwise, our method will automatically build such a hierarchy by directly incorporating the functional taxonomy information into the hierarchy search process and by allowing multi-functional genes to be part of more than one component in the hierarchy. In addition, its underlying HMS scoring metric ensures that functional specificity of the terms across different levels of the hierarchical taxonomy is properly treated. We have evaluated our method using Saccharomyces cerevisiae data from KEGG and MIPS databases and several other computationally derived and curated datasets. The code and additional supplemental files can be obtained from http://code.google.com/p/functional-annotation-of-hierarchical-modularity/ (Accessed 2012 March 13).}, number={4}, journal={PLOS ONE}, author={Padmanabhan, Kanchana and Wang, Kuangyu and Samatova, Nagiza F.}, year={2012}, month={Apr} } @article{padmanabhan_wilson_rocha_wang_mihelcic_samatova_2012, title={In-silico identification of phenotype-biased functional modules}, volume={10}, ISSN={["1477-5956"]}, DOI={10.1186/1477-5956-10-s1-s2}, abstractNote={Abstract Background Phenotypes exhibited by microorganisms can be useful for several purposes, e.g., ethanol as an alternate fuel. Sometimes, the target phenotype maybe required in combination with other phenotypes, in order to be useful, for e.g., an industrial process may require that the organism survive in an anaerobic, alcohol rich environment and be able to feed on both hexose and pentose sugars to produce ethanol. This combination of traits may not be available in any existing organism or if they do exist, the mechanisms involved in the phenotype-expression may not be efficient enough to be useful. Thus, it may be required to genetically modify microorganisms. However, before any genetic modification can take place, it is important to identify the underlying cellular subsystems responsible for the expression of the target phenotype. Results In this paper, we develop a method to identify statistically significant and phenotypically-biased functional modules. The method can compare the organismal network information from hundreds of phenotype expressing and phenotype non-expressing organisms to identify cellular subsystems that are more prone to occur in phenotype-expressing organisms than in phenotype non-expressing organisms. We have provided literature evidence that the phenotype-biased modules identified for phenotypes such as hydrogen production (dark and light fermentation), respiration, gram-positive, gram-negative and motility, are indeed phenotype-related. Conclusion Thus we have proposed a methodology to identify phenotype-biased cellular subsystems. We have shown the effectiveness of our methodology by applying it to several target phenotypes. The code and all supplemental files can be downloaded from (http://freescience.org/cs/phenotype-biased-biclusters/). }, journal={PROTEOME SCIENCE}, author={Padmanabhan, Kanchana and Wilson, Kevin and Rocha, Andrea M. and Wang, Kuangyu and Mihelcic, James R. and Samatova, Nagiza F.}, year={2012}, month={Jun} } @article{schmidt_rocha_padmanabhan_shpanskaya_banfield_scott_mihelcic_samatova_2012, title={NIBBS-Search for Fast and Accurate Prediction of Phenotype-Biased Metabolic Systems}, volume={8}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1002490}, abstractNote={Understanding of genotype-phenotype associations is important not only for furthering our knowledge on internal cellular processes, but also essential for providing the foundation necessary for genetic engineering of microorganisms for industrial use (e.g., production of bioenergy or biofuels). However, genotype-phenotype associations alone do not provide enough information to alter an organism's genome to either suppress or exhibit a phenotype. It is important to look at the phenotype-related genes in the context of the genome-scale network to understand how the genes interact with other genes in the organism. Identification of metabolic subsystems involved in the expression of the phenotype is one way of placing the phenotype-related genes in the context of the entire network. A metabolic system refers to a metabolic network subgraph; nodes are compounds and edges labels are the enzymes that catalyze the reaction. The metabolic subsystem could be part of a single metabolic pathway or span parts of multiple pathways. Arguably, comparative genome-scale metabolic network analysis is a promising strategy to identify these phenotype-related metabolic subsystems. Network Instance-Based Biased Subgraph Search (NIBBS) is a graph-theoretic method for genome-scale metabolic network comparative analysis that can identify metabolic systems that are statistically biased toward phenotype-expressing organismal networks. We set up experiments with target phenotypes like hydrogen production, TCA expression, and acid-tolerance. We show via extensive literature search that some of the resulting metabolic subsystems are indeed phenotype-related and formulate hypotheses for other systems in terms of their role in phenotype expression. NIBBS is also orders of magnitude faster than MULE, one of the most efficient maximal frequent subgraph mining algorithms that could be adjusted for this problem. Also, the set of phenotype-biased metabolic systems output by NIBBS comes very close to the set of phenotype-biased subgraphs output by an exact maximally-biased subgraph enumeration algorithm ( MBS-Enum ). The code (NIBBS and the module to visualize the identified subsystems) is available at http://freescience.org/cs/NIBBS.}, number={5}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={Schmidt, Matthew C. and Rocha, Andrea M. and Padmanabhan, Kanchana and Shpanskaya, Yekaterina and Banfield, Jill and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2012}, month={May} } @article{hendrix_rocha_padmanabhan_choudhary_scott_mihelcic_samatova_2011, title={DENSE: efficient and prior knowledge-driven discovery of phenotype-associated protein functional modules}, volume={5}, ISSN={["1752-0509"]}, DOI={10.1186/1752-0509-5-172}, abstractNote={AbstractBackgroundIdentifying cellular subsystems that are involved in the expression of a target phenotype has been a very active research area for the past several years. In this paper,cellular subsystemrefers to a group of genes (or proteins) that interact and carry out a common function in the cell. Most studies identify genes associated with a phenotype on the basis of some statistical bias, others have extended these statistical methods to analyze functional modules and biological pathways for phenotype-relatedness. However, a biologist might often have a specific question in mind while performing such analysis and most of the resulting subsystems obtained by the existing methods might be largely irrelevant to the question in hand. Arguably, it would be valuable to incorporate biologist's knowledge about the phenotype into the algorithm. This way, it is anticipated that the resulting subsytems would not only be related to the target phenotype but also contain information that the biologist is likely to be interested in.ResultsIn this paper we introduce a fast and theoretically guranteed method calledDENSE(Dense and ENriched Subgraph Enumeration) that can take in as input a biologist'spriorknowledge as a set of query proteins and identify all the dense functional modules in a biological network that contain some part of the query vertices. The density (in terms of the number of network egdes) and the enrichment (the number of query proteins in the resulting functional module) can be manipulated via two parameters γ andμ, respectively.ConclusionThis algorithm has been applied to the protein functional association network ofClostridium acetobutylicumATCC 824, a hydrogen producing, acid-tolerant organism. The algorithm was able to verify relationships known to exist in literature and also some previously unknown relationships including those with regulatory and signaling functions. Additionally, we were also able to hypothesize that some uncharacterized proteins are likely associated with the target phenotype. The DENSE code can be downloaded fromhttp://www.freescience.org/cs/DENSE/}, journal={BMC SYSTEMS BIOLOGY}, author={Hendrix, Willam and Rocha, Andrea M. and Padmanabhan, Kanchana and Choudhary, Alok and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2011}, month={Oct} } @article{schmidt_rocha_padmanabhan_chen_scott_mihelcic_samatova_2011, title={Efficient alpha, beta-motif finder for identification of phenotype-related functional modules}, volume={12}, ISSN={["1471-2105"]}, DOI={10.1186/1471-2105-12-440}, abstractNote={Abstract Background Microbial communities in their natural environments exhibit phenotypes that can directly cause particular diseases, convert biomass or wastewater to energy, or degrade various environmental contaminants. Understanding how these communities realize specific phenotypic traits (e.g., carbon fixation, hydrogen production) is critical for addressing health, bioremediation, or bioenergy problems. Results In this paper, we describe a graph-theoretical method for in silico prediction of the cellular subsystems that are related to the expression of a target phenotype. The proposed (α, β)-motif finder approach allows for identification of these phenotype-related subsystems that, in addition to metabolic subsystems, could include their regulators, sensors, transporters, and even uncharacterized proteins. By comparing dozens of genome-scale networks of functionally associated proteins, our method efficiently identifies those statistically significant functional modules that are in at least α networks of phenotype-expressing organisms but appear in no more than β networks of organisms that do not exhibit the target phenotype. It has been shown via various experiments that the enumerated modules are indeed related to phenotype-expression when tested with different target phenotypes like hydrogen production, motility, aerobic respiration, and acid-tolerance. Conclusion Thus, we have proposed a methodology that can identify potential statistically significant phenotype-related functional modules. The functional module is modeled as an (α, β)-clique, where α and β are two criteria introduced in this work. We also propose a novel network model, called the two-typed, divided network. The new network model and the criteria make the problem tractable even while very large networks are being compared. The code can be downloaded from http://www.freescience.org/cs/ABClique/ }, journal={BMC BIOINFORMATICS}, author={Schmidt, Matthew C. and Rocha, Andrea M. and Padmanabhan, Kanchana and Chen, Zhengzhang and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2011}, month={Nov} } @article{padmanabhan_nudelman_harenberg_bello_sohn_shpanskaya_dikshit_yerramsetty_tanzi_saykin_et al., title={Characterizing gene and protein crosstalks in subjects at risk of developing Alzheimer's disease: A new computational approach}, volume={5}, number={3}, journal={Processes}, author={Padmanabhan, K. and Nudelman, K. and Harenberg, S. and Bello, G. and Sohn, D. and Shpanskaya, K. and Dikshit, P. T. and Yerramsetty, P. S. and Tanzi, R. E. and Saykin, A. J. and et al.} } @book{harenberg_bello_gjeltema_ranshous_harlalka_seay_padmanabhan_samatova, title={Community detection in large-scale networks: A Survey and empirical evaluation}, journal={Technical Report- Not held in TRLN member libraries}, author={Harenberg, S. and Bello, G. A. and Gjeltema, L. and Ranshous, S. and Harlalka, J. and Seay, R. and Padmanabhan, K. and Samatova, N.}, pages={2014} }