@article{chen_chen_samatova_peng_wang_tang_2014, title={Solving the maximum duo-preservation string mapping problem with linear programming}, volume={530}, ISSN={["1879-2294"]}, DOI={10.1016/j.tcs.2014.02.017}, abstractNote={In this paper, we introduce the maximum duo-preservation string mapping problem (MPSM), which is complementary to the minimum common string partition problem (MCSP). When each letter occurs at most k times in any input string, the version of MPSM is called k-MPSM. In order to design approximation algorithms for MPSM, we also introduce the constrained maximum induced subgraph problem (CMIS) and the constrained minimum induced subgraph (CNIS) problem. We show that both CMIS and CNIS are NP-complete. We also study the approximation algorithms for the restricted version of CMIS, which is called k-CMIS (k⩾2). Using Linear Programming method, we propose an approximation algorithm for 2-CMIS with approximation ratio 2 and an approximation algorithm for k-CMIS (k⩾3) with approximation ratio k2. Based on approximation algorithms for k-CMIS, we get approximation algorithms for k-MPSM with the same approximation ratio.}, journal={THEORETICAL COMPUTER SCIENCE}, author={Chen, Wenbin and Chen, Zhengzhang and Samatova, Nagiza F. and Peng, Lingxi and Wang, Jianxiong and Tang, Maobin}, year={2014}, month={Apr}, pages={1–11} } @article{chen_hendrix_guan_tetteh_choudhary_semazzi_samatova_2013, title={Discovery of extreme events-related communities in contrasting groups of physical system networks}, volume={27}, ISSN={["1573-756X"]}, DOI={10.1007/s10618-012-0289-3}, abstractNote={The latent behavior of a physical system that can exhibit extreme events such as hurricanes or rainfalls, is complex. Recently, a very promising means for studying complex systems has emerged through the concept of complex networks. Networks representing relationships between individual objects usually exhibit community dynamics. Conventional community detection methods mainly focus on either mining frequent subgraphs in a network or detecting stable communities in time-varying networks. In this paper, we formulate a novel problem—detection of predictive and phase-biased communities in contrasting groups of networks, and propose an efficient and effective machine learning solution for finding such anomalous communities. We build different groups of networks corresponding to different system’s phases, such as higher or low hurricane activity, discover phase-related system components as seeds to help bound the search space of community generation in each network, and use the proposed contrast-based technique to identify the changing communities across different groups. The detected anomalous communities are hypothesized (1) to play an important role in defining the target system’s state(s) and (2) to improve the predictive skill of the system’s states when used collectively in the ensemble of predictive models. When tested on the two important extreme event problems—identification of tropical cyclone-related and of African Sahel rainfall-related climate indices—our algorithm demonstrated the superior performance in terms of various skill and robustness metrics, including 8–16 % accuracy increase, as well as physical interpretability of detected communities. The experimental results also show the efficiency of our algorithm on synthetic datasets.}, number={2}, journal={DATA MINING AND KNOWLEDGE DISCOVERY}, author={Chen, Zhengzhang and Hendrix, William and Guan, Hang and Tetteh, Isaac K. and Choudhary, Alok and Semazzi, Fredrick and Samatova, Nagiza F.}, year={2013}, month={Sep}, pages={225–258} } @article{chen_hendrix_samatova_2012, title={Community-based anomaly detection in evolutionary networks}, volume={39}, DOI={10.1007/s10844-011-0183-2}, abstractNote={Networks of dynamic systems, including social networks, the World Wide Web, climate networks, and biological networks, can be highly clustered. Detecting clusters, or communities, in such dynamic networks is an emerging area of research; however, less work has been done in terms of detecting community-based anomalies. While there has been some previous work on detecting anomalies in graph-based data, none of these anomaly detection approaches have considered an important property of evolutionary networks—their community structure. In this work, we present an approach to uncover community-based anomalies in evolutionary networks characterized by overlapping communities. We develop a parameter-free and scalable algorithm using a proposed representative-based technique to detect all six possible types of community-based anomalies: grown, shrunken, merged, split, born, and vanished communities. We detail the underlying theory required to guarantee the correctness of the algorithm. We measure the performance of the community-based anomaly detection algorithm by comparison to a non–representative-based algorithm on synthetic networks, and our experiments on synthetic datasets show that our algorithm achieves a runtime speedup of 11–46 over the baseline algorithm. We have also applied our algorithm to two real-world evolutionary networks, Food Web and Enron Email. Significant and informative community-based anomaly dynamics have been detected in both cases.}, number={1}, journal={Journal of Intelligent Information Systems}, author={Chen, Z. Z. and Hendrix, W. and Samatova, N. F.}, year={2012}, pages={59–85} } @article{gonzalez_chen_tetteh_pansombut_semazzi_kumar_melechko_samatova_2012, title={Hierarchical Classifier-Regression Ensemble for Multi-Phase Non-Linear Dynamic System Response Prediction: Application to Climate Analysis}, ISBN={["978-1-4673-5164-5"]}, ISSN={["2375-9232"]}, DOI={10.1109/icdmw.2012.133}, abstractNote={A dynamic physical system often undergoes phase transitions in response to fluctuations induced on system parameters. For example, hurricane activity is the climate system's response initiated by a liquid-vapor phase transition associated with non-linearly coupled fluctuations in the ocean and the atmosphere. Because our quantitative knowledge about highly non-linear dynamic systems is very meager, scientists often resort to linear regression techniques such as Least Absolute Deviation (LAD) to learn the non-linear system's response (e.g., hurricane activity) from observed or simulated system's parameters (e.g., temperature, precipitable water, pressure). While insightful, such models still offer limited predictability, and alternatives intended to capture non-linear behaviors such as Stepwise Regression are often controversial in nature. In this paper, we hypothesize that one of the primary reasons for lack of predictability is the treatment of an inherently multi-phase system as being phase less. To bridge this gap, we propose a hybrid approach that first predicts the phase the system is in, and then estimates the magnitude of the system's response using the regression model optimized for this phase. Our approach is designed for systems that could be characterized by multi-variate spatio-temporal data from observations, simulations, or both.}, journal={12TH IEEE INTERNATIONAL CONFERENCE ON DATA MINING WORKSHOPS (ICDMW 2012)}, author={Gonzalez, Doel L., II and Chen, Zhengzhang and Tetteh, Isaac K. and Pansombut, Tatdow and Semazzi, Fredrick and Kumar, Vipin and Melechko, Anatoli and Samatova, Nagiza F.}, year={2012}, pages={781–788} } @article{schmidt_rocha_padmanabhan_chen_scott_mihelcic_samatova_2011, title={Efficient alpha, beta-motif finder for identification of phenotype-related functional modules}, volume={12}, ISSN={["1471-2105"]}, DOI={10.1186/1471-2105-12-440}, abstractNote={Abstract}, journal={BMC BIOINFORMATICS}, author={Schmidt, Matthew C. and Rocha, Andrea M. and Padmanabhan, Kanchana and Chen, Zhengzhang and Scott, Kathleen and Mihelcic, James R. and Samatova, Nagiza F.}, year={2011}, month={Nov} } @inproceedings{chen_wilson_jin_samatova_2010, title={Detecting and Tracking Community Dynamics in Evolutionary Networks}, DOI={10.1109/icdmw.2010.32}, abstractNote={Community structure or clustering is ubiquitous in many evolutionary networks including social networks, biological networks and financial market networks. Detecting and tracking community deviations in evolutionary networks can uncover important and interesting behaviors that are latent if we ignore the dynamic information. In biological networks, for example, a small variation in a gene community may indicate an event, such as gene fusion, gene fission, or gene decay. In contrast to the previous work on detecting communities in static graphs or tracking conserved communities in time-varying graphs, this paper first introduces the concept of community dynamics, and then shows that the baseline approach by enumerating all communities in each graph and comparing all pairs of communities between consecutive graphs is infeasible and impractical. We propose an efficient method for detecting and tracking community dynamics in evolutionary networks by introducing graph representatives and community representatives to avoid generating redundant communities and limit the search space. We measure the performance of the representative-based algorithm by comparison to the baseline algorithm on synthetic networks, and our experiments show that our algorithm achieves a runtime speedup of 11–46. The method has also been applied to two real-world evolutionary networks including Food Web and Enron Email. Significant and informative community dynamics have been detected in both cases.}, booktitle={The 10th IEEE International Conference on Data Mining Workshops}, publisher={Los Alamitos, Calif. : IEEE Computer Society}, author={Chen, Z and Wilson, K.A. and Jin, Y. and Samatova, N.F.}, year={2010}, pages={318–327} } @article{chen_yin_chen_2010, title={Inapproximability results for equations over infinite groups}, volume={411}, number={26-28}, journal={Theoretical Computer Science}, author={Chen, W. B. and Yin, D. P. and Chen, Z. Z.}, year={2010}, pages={2513–2519} } @book{chen_pansombut_hendrix_gonzalez_semazzi_choudhary_kumar_melechko_samatova, title={Forecaster: Forecast Oriented Feature Elimination-based Classification of Adverse Spatio-Temporal Extremes}, author={Chen, Z. and Pansombut, T. and Hendrix, W. and Gonzalez, D. and Semazzi, F. and Choudhary, A. and Kumar, V. and Melechko, A.V. and Samatova, N.F.} }