@article{tan_nguyen_shen_gu_venkatramani_rajan_2012, title={PREPARE: Predictive Performance Anomaly Prevention for Virtualized Cloud Systems}, ISSN={["1063-6927"]}, DOI={10.1109/icdcs.2012.65}, abstractNote={Virtualized cloud systems are prone to performance anomalies due to various reasons such as resource contentions, software bugs, and hardware failures. In this paper, we present a novel Predictive Performance Anomaly Prevention (PREPARE) system that provides automatic performance anomaly prevention for virtualized cloud computing infrastructures. PREPARE integrates online anomaly prediction, learning-based cause inference, and predictive prevention actuation to minimize the performance anomaly penalty without human intervention. We have implemented PREPARE on top of the Xen platform and tested it on the NCSU's Virtual Computing Lab using a commercial data stream processing system (IBM System S) and an online auction benchmark (RUBiS). The experimental results show that PREPARE can effectively prevent performance anomalies while imposing low overhead to the cloud infrastructure.}, journal={2012 IEEE 32ND INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING SYSTEMS (ICDCS)}, author={Tan, Yongmin and Nguyen, Hiep and Shen, Zhiming and Gu, Xiaohui and Venkatramani, Chitra and Rajan, Deepak}, year={2012}, pages={285–294} } @article{tan_gu_wang_2010, title={Adaptive System Anomaly Prediction for Large-Scale Hosting Infrastructures}, ISBN={["978-1-60558-888-9"]}, DOI={10.1145/1835698.1835741}, abstractNote={Large-scale hosting infrastructures require automatic system anomaly management to achieve continuous system operation. In this paper, we present a novel adaptive runtime anomaly prediction system, called ALERT, to achieve robust hosting infrastructures. In contrast to traditional anomaly detection schemes, ALERT aims at raising advance anomaly alerts to achieve just-in-time anomaly prevention. We propose a novel context-aware anomaly prediction scheme to improve prediction accuracy in dynamic hosting infrastructures. We have implemented the ALERT system and deployed it on several production hosting infrastructures such as IBM System S stream processing cluster and PlanetLab. Our experiments show that ALERT can achieve high prediction accuracy for a range of system anomalies and impose low overhead to the hosting infrastructure.}, journal={PODC 2010: PROCEEDINGS OF THE 2010 ACM SYMPOSIUM ON PRINCIPLES OF DISTRIBUTED COMPUTING}, author={Tan, Yongmin and Gu, Xiaohui and Wang, Haixun}, year={2010}, pages={173–182} }