@article{chen_samatova_stallmann_hendrix_ying_2016, title={On size-constrained minimum s-t cut problems and size-constrained dense subgraph problems}, volume={609}, ISSN={["1879-2294"]}, DOI={10.1016/j.tcs.2015.10.031}, abstractNote={In some application cases, the solutions of combinatorial optimization problems on graphs should satisfy an additional vertex size constraint. In this paper, we consider size-constrained minimum s–t cut problems and size-constrained dense subgraph problems. We introduce the minimum s–t cut with at-least-k vertices problem, the minimum s–t cut with at-most-k vertices problem, and the minimum s–t cut with exactly k vertices problem. We prove that they are NP-complete. Thus, they are not polynomially solvable unless P=NP. On the other hand, we also study the densest at-least-k-subgraph problem (DalkS) and the densest at-most-k-subgraph problem (DamkS) introduced by Andersen and Chellapilla [1]. We present a polynomial time algorithm for DalkS when k is bounded by some constant c. We also present two approximation algorithms for DamkS. The first approximation algorithm for DamkS has an approximation ratio of n−1k−1, where n is the number of vertices in the input graph. The second approximation algorithm for DamkS has an approximation ratio of O(nδ), for some δ<1/3.}, journal={THEORETICAL COMPUTER SCIENCE}, author={Chen, Wenbin and Samatova, Nagiza F. and Stallmann, Matthias F. and Hendrix, William and Ying, Weiqin}, year={2016}, month={Jan}, pages={434–442} } @article{chen_hendrix_guan_tetteh_choudhary_semazzi_samatova_2013, title={Discovery of extreme events-related communities in contrasting groups of physical system networks}, volume={27}, ISSN={["1573-756X"]}, DOI={10.1007/s10618-012-0289-3}, abstractNote={The latent behavior of a physical system that can exhibit extreme events such as hurricanes or rainfalls, is complex. Recently, a very promising means for studying complex systems has emerged through the concept of complex networks. Networks representing relationships between individual objects usually exhibit community dynamics. Conventional community detection methods mainly focus on either mining frequent subgraphs in a network or detecting stable communities in time-varying networks. In this paper, we formulate a novel problem—detection of predictive and phase-biased communities in contrasting groups of networks, and propose an efficient and effective machine learning solution for finding such anomalous communities. We build different groups of networks corresponding to different system’s phases, such as higher or low hurricane activity, discover phase-related system components as seeds to help bound the search space of community generation in each network, and use the proposed contrast-based technique to identify the changing communities across different groups. The detected anomalous communities are hypothesized (1) to play an important role in defining the target system’s state(s) and (2) to improve the predictive skill of the system’s states when used collectively in the ensemble of predictive models. When tested on the two important extreme event problems—identification of tropical cyclone-related and of African Sahel rainfall-related climate indices—our algorithm demonstrated the superior performance in terms of various skill and robustness metrics, including 8–16 % accuracy increase, as well as physical interpretability of detected communities. The experimental results also show the efficiency of our algorithm on synthetic datasets.}, number={2}, journal={DATA MINING AND KNOWLEDGE DISCOVERY}, author={Chen, Zhengzhang and Hendrix, William and Guan, Hang and Tetteh, Isaac K. and Choudhary, Alok and Semazzi, Fredrick and Samatova, Nagiza F.}, year={2013}, month={Sep}, pages={225–258} } @article{chen_hendrix_samatova_2012, title={Community-based anomaly detection in evolutionary networks}, volume={39}, DOI={10.1007/s10844-011-0183-2}, abstractNote={Networks of dynamic systems, including social networks, the World Wide Web, climate networks, and biological networks, can be highly clustered. Detecting clusters, or communities, in such dynamic networks is an emerging area of research; however, less work has been done in terms of detecting community-based anomalies. While there has been some previous work on detecting anomalies in graph-based data, none of these anomaly detection approaches have considered an important property of evolutionary networks—their community structure. In this work, we present an approach to uncover community-based anomalies in evolutionary networks characterized by overlapping communities. We develop a parameter-free and scalable algorithm using a proposed representative-based technique to detect all six possible types of community-based anomalies: grown, shrunken, merged, split, born, and vanished communities. We detail the underlying theory required to guarantee the correctness of the algorithm. We measure the performance of the community-based anomaly detection algorithm by comparison to a non–representative-based algorithm on synthetic networks, and our experiments on synthetic datasets show that our algorithm achieves a runtime speedup of 11–46 over the baseline algorithm. We have also applied our algorithm to two real-world evolutionary networks, Food Web and Enron Email. Significant and informative community-based anomaly dynamics have been detected in both cases.}, number={1}, journal={Journal of Intelligent Information Systems}, author={Chen, Z. Z. and Hendrix, W. and Samatova, N. F.}, year={2012}, pages={59–85} } @article{hendrix_rocha_padmanabhan_choudhary_scott_mihelcic_samatova_2011, title={DENSE: efficient and prior knowledge-driven discovery of phenotype-associated protein functional modules}, volume={5}, ISSN={["1752-0509"]}, DOI={10.1186/1752-0509-5-172}, abstractNote={Abstract}, journal={BMC SYSTEMS BIOLOGY}, author={Hendrix, Willam and Rocha, Andrea M. and Padmanabhan, Kanchana and Choudhary, Alok and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2011}, month={Oct} } @article{hendrix_schmidt_breimyer_samatova_2010, title={Theoretical underpinnings for maximal clique enumeration on perturbed graphs}, volume={411}, ISSN={["1879-2294"]}, DOI={10.1016/j.tcs.2010.03.011}, abstractNote={The problem of enumerating the maximal cliques of a graph is a computationally expensive problem with applications in a number of different domains. Sometimes the benefit of knowing the maximal clique enumeration (MCE) of a single graph is worth investing the initial computation time. However, when graphs are abstractions of noisy or uncertain data, the MCE of several closely related graphs may need to be found, and the computational cost of doing so becomes prohibitively expensive. Here, we present a method by which the cost of enumerating the set of maximal cliques for related graphs can be reduced. By using the MCE for some baseline graph, the MCE for a modified, or perturbed, graph may be obtained by enumerating only the maximal cliques that are created or destroyed by the perturbation. When the baseline and perturbed graphs are relatively similar, the difference set between the two MCEs can be overshadowed by the maximal cliques common to both. Thus, by enumerating only the difference set between the baseline and perturbed graphs’ MCEs, the computational cost of enumerating the maximal cliques of the perturbed graph can be reduced. We present necessary and sufficient conditions for enumerating difference sets when the perturbed graph is formed by several different types of perturbations. We also present results of an algorithm based on these conditions that demonstrate a speedup over traditional calculations of the MCE of perturbed, real biological networks.}, number={26-28}, journal={THEORETICAL COMPUTER SCIENCE}, author={Hendrix, William and Schmidt, Matthew C. and Breimyer, Paul and Samatova, Nagiza F.}, year={2010}, month={Jun}, pages={2520–2536} }