@article{schmidt_vogel_2021, title={Algorithms that Empower? Platformization in US Intelligence Analysis}, ISSN={["2158-3404"]}, DOI={10.1109/ISTAS50296.2020.9555838}, abstractNote={This paper discusses a computational architecture called the Analytic Component System (ACS), which aims to provide intelligence analysts with a service-oriented computational platform. This platform is designed to empower intelligence analysts by improving the integration of people, algorithms, software, tools, and manual work in the production of time-pressured intelligence assessments. Combining the perspectives of the ACS computer science design team and an embedded social scientist, this paper will use ACS to discuss the “platformization” of intelligence analysis and what this means for how we might think about and plan for reflexive design in future computational intelligence analytic systems.}, journal={PROCEEDINGS OF THE 2020 IEEE INTERNATIONAL SYMPOSIUM ON TECHNOLOGY AND SOCIETY (ISTAS)}, author={Schmidt, Matthew and Vogel, Kathleen M.}, year={2021} }
@article{schmidt_rocha_padmanabhan_shpanskaya_banfield_scott_mihelcic_samatova_2012, title={NIBBS-Search for Fast and Accurate Prediction of Phenotype-Biased Metabolic Systems}, volume={8}, ISSN={["1553-7358"]}, DOI={10.1371/journal.pcbi.1002490}, abstractNote={Understanding of genotype-phenotype associations is important not only for furthering our knowledge on internal cellular processes, but also essential for providing the foundation necessary for genetic engineering of microorganisms for industrial use (e.g., production of bioenergy or biofuels). However, genotype-phenotype associations alone do not provide enough information to alter an organism's genome to either suppress or exhibit a phenotype. It is important to look at the phenotype-related genes in the context of the genome-scale network to understand how the genes interact with other genes in the organism. Identification of metabolic subsystems involved in the expression of the phenotype is one way of placing the phenotype-related genes in the context of the entire network. A metabolic system refers to a metabolic network subgraph; nodes are compounds and edges labels are the enzymes that catalyze the reaction. The metabolic subsystem could be part of a single metabolic pathway or span parts of multiple pathways. Arguably, comparative genome-scale metabolic network analysis is a promising strategy to identify these phenotype-related metabolic subsystems. Network Instance-Based Biased Subgraph Search (NIBBS) is a graph-theoretic method for genome-scale metabolic network comparative analysis that can identify metabolic systems that are statistically biased toward phenotype-expressing organismal networks. We set up experiments with target phenotypes like hydrogen production, TCA expression, and acid-tolerance. We show via extensive literature search that some of the resulting metabolic subsystems are indeed phenotype-related and formulate hypotheses for other systems in terms of their role in phenotype expression. NIBBS is also orders of magnitude faster than MULE, one of the most efficient maximal frequent subgraph mining algorithms that could be adjusted for this problem. Also, the set of phenotype-biased metabolic systems output by NIBBS comes very close to the set of phenotype-biased subgraphs output by an exact maximally-biased subgraph enumeration algorithm ( MBS-Enum ). The code (NIBBS and the module to visualize the identified subsystems) is available at http://freescience.org/cs/NIBBS.}, number={5}, journal={PLOS COMPUTATIONAL BIOLOGY}, author={Schmidt, Matthew C. and Rocha, Andrea M. and Padmanabhan, Kanchana and Shpanskaya, Yekaterina and Banfield, Jill and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2012}, month={May} }
@article{schmidt_rocha_padmanabhan_chen_scott_mihelcic_samatova_2011, title={Efficient alpha, beta-motif finder for identification of phenotype-related functional modules}, volume={12}, ISSN={["1471-2105"]}, DOI={10.1186/1471-2105-12-440}, abstractNote={Abstract}, journal={BMC BIOINFORMATICS}, author={Schmidt, Matthew C. and Rocha, Andrea M. and Padmanabhan, Kanchana and Chen, Zhengzhang and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2011}, month={Nov} }
@article{chen_schmidt_samatova_2011, title={On the parameterized complexity of the Multi-MCT and Multi-MCST problems}, volume={21}, ISSN={["1573-2886"]}, DOI={10.1007/s10878-009-9220-2}, number={2}, journal={JOURNAL OF COMBINATORIAL OPTIMIZATION}, author={Chen, Wenbin and Schmidt, Matthew C. and Samatova, Nagiza F.}, year={2011}, month={Feb}, pages={151–158} }
@article{hendrix_schmidt_breimyer_samatova_2010, title={Theoretical underpinnings for maximal clique enumeration on perturbed graphs}, volume={411}, ISSN={["1879-2294"]}, DOI={10.1016/j.tcs.2010.03.011}, abstractNote={The problem of enumerating the maximal cliques of a graph is a computationally expensive problem with applications in a number of different domains. Sometimes the benefit of knowing the maximal clique enumeration (MCE) of a single graph is worth investing the initial computation time. However, when graphs are abstractions of noisy or uncertain data, the MCE of several closely related graphs may need to be found, and the computational cost of doing so becomes prohibitively expensive. Here, we present a method by which the cost of enumerating the set of maximal cliques for related graphs can be reduced. By using the MCE for some baseline graph, the MCE for a modified, or perturbed, graph may be obtained by enumerating only the maximal cliques that are created or destroyed by the perturbation. When the baseline and perturbed graphs are relatively similar, the difference set between the two MCEs can be overshadowed by the maximal cliques common to both. Thus, by enumerating only the difference set between the baseline and perturbed graphs’ MCEs, the computational cost of enumerating the maximal cliques of the perturbed graph can be reduced. We present necessary and sufficient conditions for enumerating difference sets when the perturbed graph is formed by several different types of perturbations. We also present results of an algorithm based on these conditions that demonstrate a speedup over traditional calculations of the MCE of perturbed, real biological networks.}, number={26-28}, journal={THEORETICAL COMPUTER SCIENCE}, author={Hendrix, William and Schmidt, Matthew C. and Breimyer, Paul and Samatova, Nagiza F.}, year={2010}, month={Jun}, pages={2520–2536} }
@article{schmidt_samatova_thomas_park_2009, title={A scalable, parallel algorithm for maximal clique enumeration}, volume={69}, ISSN={["1096-0848"]}, DOI={10.1016/j.jpdc.2009.01.003}, abstractNote={The problem of maximal clique enumeration (MCE) is to enumerate all of the maximal cliques in a graph. Once enumerated, maximal cliques are widely used to solve problems in areas such as 3-D protein structure alignment, genome mapping, gene expression analysis, and detection of social hierarchies. Even the most efficient serial MCE algorithms require large amounts of time to enumerate the maximal cliques in networks arising from these problems that contain hundreds, thousands, or larger numbers of vertices. The previous attempts to provide practical solutions to the MCE problem through parallel implementation have had limited success, largely due to a number of challenges inherent to the nature of the MCE combinatorial search space. On the one hand, MCE algorithms often create a backtracking search tree that has a highly irregular and hard-or-impossible to predict structure; therefore, almost any static decomposition of the search tree by parallel processors results in highly unbalanced processor execution times. On the other hand, the data-intensive nature of the MCE problem often makes naive dynamic load distribution strategies that require extensive data movement prohibitively expensive. As a result, good scaling of the overall execution time of parallel MCE algorithms has been reported for only up to a couple hundred processors. In this paper, we propose a parallel, scalable, and memory-efficient MCE algorithm for distributed and/or shared memory high performance computing architectures, whose runtime scales linearly for thousands of processors on real-world application graphs with hundreds and thousands of nodes. Its scalability and efficiency are attributed to the proposed: (a) representation of the search tree decomposition to enable parallelization; (b) parallel depth-first backtracking search to both constrain the search space and minimize memory requirement; (c) least stringent synchronization to minimize data movement; and (d) on-demand work stealing intelligently coupled with work stack splitting to minimize computing elements’ idle time. To the best of our knowledge, the proposed parallel MCE algorithm is the first to achieve a linear scaling runtime using up to 2048 processors on Cray XT machines for a number of real-world biological networks.}, number={4}, journal={JOURNAL OF PARALLEL AND DISTRIBUTED COMPUTING}, author={Schmidt, Matthew C. and Samatova, Nagiza F. and Thomas, Kevin and Park, Byung-Hoon}, year={2009}, month={Apr}, pages={417–428} }
@article{chen_schmidt_samatova_2009, title={On parameterized complexity of the Multi-MCS problem}, volume={410}, ISSN={["1879-2294"]}, DOI={10.1016/j.tcs.2008.12.060}, abstractNote={We introduce the maximum common subgraph problem for multiple graphs (Multi-MCS) inspired by various biological applications such as multiple alignments of gene sequences, protein structures, metabolic pathways, or protein–protein interaction networks. Multi-MCS is a generalization of the two-graph Maximum Common Subgraph problem (MCS). On the basis of the framework of parameterized complexity theory, we derive the parameterized complexity of Multi-MCS for various parameters for different classes of graphs. For example, for directed graphs with labeled vertices, we prove that the parameterized m-Multi-MCS problem is W[2]-hard, while the parameterized k-Multi-MCS problem is W[t]-hard (∀t≥1), where m and k are the size of the maximum common subgraph and the number of multiple graphs, respectively. We show similar results for other parameterized versions of the Multi-MCS problem for directed graphs with vertex labels and undirected graphs with vertex and edge labels by giving linear FPT reductions of the problems from parameterized versions of the longest common subsequence problem. Likewise, for unlabeled undirected graphs, we show that a parameterized version of the Multi-MCS problem with a fixed number of input graphs is W[1]-complete by showing a linear FPT reduction to and from a parameterized version of the maximum clique problem.}, number={21-23}, journal={THEORETICAL COMPUTER SCIENCE}, author={Chen, Wenbin and Schmidt, Matthew C. and Samatova, Nagiza F.}, year={2009}, month={May}, pages={2024–2032} }
@inproceedings{samatova_schmidt_hendrix_breimyer_thomas_park, title={Coupling graph perturbation theory with scalable parallel algorithms for large-scale enumeration of maximal cliques in biological graphs - art. no. 012053}, volume={125}, booktitle={Scidac 2008: Scientific discovery through advanced computing}, author={Samatova, N. F. and Schmidt, M. C. and Hendrix, W. and Breimyer, P. and Thomas, K. and Park, B. H.}, pages={12053–12053} }