@article{hsu_nair_menzies_freeh_2018, title={Micky: A Cheaper Alternative for Selecting Cloud Instances}, DOI={10.1109/CLOUD.2018.00058}, abstractNote={Most cloud computing optimizers explore and improve one workload at a time. When optimizing many workloads, the single-optimizer approach can be prohibitively expensive. Accordingly, we examine "collective optimizer" that concurrently explore and improve a set of workloads significantly reducing the measurement costs. Our large-scale empirical study shows that there is often a single cloud configuration which is surprisingly near-optimal for most workloads. Consequently, we create a collective-optimizer, MICKY, that reformulates the task of finding the near-optimal cloud configuration as a multi-armed bandit problem. MICKY efficiently balances exploration (of new cloud configurations) and exploitation (of known good cloud configuration). Our experiments show that MICKY can achieve on average 8.6 times reduction in measurement cost as compared to the state-of-the-art method while finding near-optimal solutions. Hence we propose MICKY as the basis of a practical collective optimization method for finding good cloud configurations (based on various constraints such as budget and tolerance to near-optimal configurations)}, journal={PROCEEDINGS 2018 IEEE 11TH INTERNATIONAL CONFERENCE ON CLOUD COMPUTING (CLOUD)}, author={Hsu, Chin-Jung and Nair, Vivek and Menzies, Tim and Freeh, Vincent}, year={2018}, pages={409–416} } @inproceedings{hsu_freeh_villanustre_2017, title={Trilogy: data placement to improve performance and robustness of cloud computing}, DOI={10.1109/bigdata.2017.8258202}, abstractNote={Infrastructure as a Service, one of the most disruptive aspects of cloud computing, enables configuring a cluster for each application for each workload. When the workload changes, a cluster will be either underutilized (wasting resources) or unable to meet demand (incurring opportunity costs). Consequently, efficient cluster resizing requires proper data replication and placement. Our work reveals that coarse-grain, workload-aware replication addresses over-utilization but cannot resolve under-utilization. With fine-grain partitioning of the dataset, data replication can reduce both under- and over-utilization. In our empirical studies, compared to a näive uniform data replication a coarse-grain workload-aware replication increases throughput by 81% on a highly-skewed workload. A fine-grain scheme further reaches 166% increase. Furthermore, a surprisingly small increase in granularity is sufficient to obtain most benefits. Evaluations also show that maximizing the number of unique partitions per node increases robustness to tolerate workload deviation while minimizing this number reduces storage footprint.}, booktitle={2017 IEEE International Conference on Big Data (Big Data)}, author={Hsu, C. J. and Freeh, V. W. and Villanustre, F.}, year={2017}, pages={2442–2451} } @inproceedings{hsu_panta_ra_freeh_2016, title={Inside-out: Reliable performance prediction for distributed storage systems in the cloud}, DOI={10.1109/srds.2016.025}, abstractNote={Many storage systems are undergoing a significant shift from dedicated appliance-based model to software-defined storage (SDS) because the latter is flexible, scalable and cost-effective for modern workloads. However, it is challenging to provide a reliable guarantee of end-to-end performance in SDS due to complex software stack, time-varying workload and performance interference among tenants. Therefore, modeling and monitoring the performance of storage systems is critical for ensuring reliable QoS guarantees. Existing approaches such as performance benchmarking and analytical modeling are inadequate because they are not efficient in exploring large configuration space, and cannot support elastic operations and diverse storage services in SDS. This paper presents Inside-Out, an automatic model building tool that creates accurate performance models for distributed storage services. Inside-Out is a black-box approach. It builds high-level performance models by applying machine learning techniques to low-level system performance metrics collected from individual components of the distributed SDS system. Inside-Out uses a two-level learning method that combines two machine learning models to automatically filter irrelevant features, boost prediction accuracy and yield consistent prediction. Our in-depth evaluation shows that Inside-Out is a robust solution that enables SDS to predict end-to-end performance even in challenging conditions, e.g., changes in workload, storage configuration, available cloud resources, size of the distributed storage service, and amount of interference due to multi-tenants. Our experiments show that Inside-Out can predict end-to-end performance with 91.1% accuracy on average. Its prediction accuracy is consistent across diverse storage environments.}, booktitle={Proceedings of 2016 ieee 35th symposium on reliable distributed systems (srds)}, author={Hsu, C. J. and Panta, R. K. and Ra, M. R. and Freeh, V. W.}, year={2016}, pages={127–136} } @article{kc_hsu_freeh_2015, title={Evaluation of MapReduce in a large cluster}, DOI={10.1109/cloud.2015.68}, abstractNote={MapReduce is a widely used framework that runs large scale data processing applications. However, there are very few systematic studies of MapReduce on large clusters and thus there is a lack of reference for expected behavior or issues while running applications in a large cluster. This paper describes our findings of running applications on Pivotal's Analytics Workbench, which consists of a 540-node Hadoop cluster. Our experience sheds light on how applications behave in a large-scale cluster. This paper discusses our experiences in three areas. The first describes scaling behavior of applications as the dataset size increases. The second discusses the appropriate settings for parallelism and overlap of map and reduce tasks. The third area discusses general observations. These areas have not been reported or studied previously. Our findings show that IO-intensive applications do not scale as data size increases and MapReduce applications require different amounts of parallelism and overlap to minimize completion time. Additionally, our observations also highlight the need for appropriate memory allocation for a MapReduce component and the importance of decreasing log file size.}, journal={2015 IEEE 8TH INTERNATIONAL CONFERENCE ON CLOUD COMPUTING}, author={Kc, Kamal and Hsu, Chin-Jung and Freeh, Vincent W.}, year={2015}, pages={461–468} } @article{chung_hsu_lai_li_chung_2014, title={Maintenance of cooperative overlays in multi-overlay networks}, volume={8}, number={15}, journal={IET Communications}, author={Chung, W. C. and Hsu, C. J. and Lai, K. C. and Li, K. C. and Chung, Y. C.}, year={2014}, pages={2676–2683} }