@article{boyuka_lakshminarasimhan_zou_gong_jenkins_schendel_podhorszki_liu_klasky_samatova_2014, title={Transparent In Situ Data Transformations in ADIOS}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2014.73}, abstractNote={Though an abundance of novel "data transformation" technologies have been developed (such as compression, level-of-detail, layout optimization, and indexing), there remains a notable gap in the adoption of such services by scientific applications. In response, we develop an in situ data transformation framework in the ADIOS I/O middleware with a "plug in" interface, thus greatly simplifying both the deployment and use of data transform services in scientific applications. Our approach ensures user-transparency, runtime-configurability, compatibility with existing I/O optimizations, and the potential for exploiting read-optimizing transforms (such as level-of-detail) to achieve I/O reduction. We demonstrate use of our framework with the QLG simulation at up to 8,192 cores on the leadership-class Titan supercomputer, showing negligible overhead. We also explore the read performance implications of data transforms with respect to parameters such as chunk size, access pattern, and the "opacity" of different transform methods including compression and level-of-detail.}, journal={2014 14TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID)}, author={Boyuka, David A., II and Lakshminarasimhan, Sriram and Zou, Xiaocheng and Gong, Zhenhuan and Jenkins, John and Schendel, Eric R. and Podhorszki, Norbert and Liu, Qing and Klasky, Scott and Samatova, Nagiza F.}, year={2014}, pages={256–266} } @article{gong_boyuka_zou_liu_podhorszki_klasky_ma_samatova_2013, title={PARLO: PArallel Run-time Layout Optimization for Scientific Data Explorations with Heterogeneous Access Patterns}, ISBN={["978-1-4673-6465-2"]}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2013.58}, abstractNote={The size and scope of cutting-edge scientific simulations are growing much faster than the I/O and storage capabilities of their run-time environments. The growing gap is exacerbated by exploratory, data-intensive analytics, such as querying simulation data with multivariate, spatio-temporal constraints, which induces heterogeneous access patterns that stress the performance of the underlying storage system. Previous work addresses data layout and indexing techniques to improve query performance for a single access pattern, which is not sufficient for complex analytics jobs. We present PARLO a parallel run-time layout optimization framework, to achieve multi-level data layout optimization for scientific applications at run-time before data is written to storage. The layout schemes optimize for heterogeneous access patterns with user-specified priorities. PARLO is integrated with ADIOS, a high-performance parallel I/O middleware for large-scale HPC applications, to achieve user-transparent, light-weight layout optimization for scientific datasets. It offers simple XML-based configuration for users to achieve flexible layout optimization without the need to modify or recompile application codes. Experiments show that PARLO improves performance by 2 to 26 times for queries with heterogeneous access patterns compared to state-of-the-art scientific database management systems. Compared to traditional post-processing approaches, its underlying run-time layout optimization achieves a 56% savings in processing time and a reduction in storage overhead of up to 50%. PARLO also exhibits a low run-time resource requirement, while also limiting the performance impact on running applications to a reasonable level.}, journal={PROCEEDINGS OF THE 2013 13TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID 2013)}, author={Gong, Zhenhuan and Boyuka, David A., II and Zou, Xiaocheng and Liu, Qing and Podhorszki, Norbert and Klasky, Scott and Ma, Xiaosong and Samatova, Nagiza F.}, year={2013}, pages={343–351} } @article{gong_lakshminarasimhan_jenkins_kolla_ethier_chen_ross_klasky_samatova_2012, title={Multi-level Layout Optimization for Efficient Spatio-temporal Queries on ISABELA-compressed Data}, ISSN={["1530-2075"]}, DOI={10.1109/ipdps.2012.83}, abstractNote={The size and scope of cutting-edge scientific simulations are growing much faster than the I/O subsystems of their runtime environments, not only making I/O the primary bottleneck, but also consuming space that pushes the storage capacities of many computing facilities. These problems are exacerbated by the need to perform data-intensive analytics applications, such as querying the dataset by variable and spatio-temporal constraints, for what current database technologies commonly build query indices of size greater than that of the raw data. To help solve these problems, we present a parallel query-processing engine that can handle both range queries and queries with spatio-temporal constraints, on B-spline compressed data with user-controlled accuracy. Our method adapts to widening gaps between computation and I/O performance by querying on compressed metadata separated into bins by variable values, utilizing Hilbert space-filling curves to optimize for spatial constraints and aggregating data access to improve locality of per-bin stored data, reducing the false positive rate and latency bound I/O operations (such as seek) substantially. We show our method to be efficient with respect to storage, computation, and I/O compared to existing database technologies optimized for query processing on scientific data.}, journal={2012 IEEE 26TH INTERNATIONAL PARALLEL AND DISTRIBUTED PROCESSING SYMPOSIUM (IPDPS)}, author={Gong, Zhenhuan and Lakshminarasimhan, Sriram and Jenkins, John and Kolla, Hemanth and Ethier, Stephane and Chen, Jackie and Ross, Robert and Klasky, Scott and Samatova, Nagiza F.}, year={2012}, pages={873–884} } @inproceedings{gong_ramaswamy_gu_ma_2009, title={SigLM: signature-driven load management for cloud computing infrastructures}, booktitle={Iwqos: 2009 ieee 17th international workshop on quality of service}, author={Gong, Z. H. and Ramaswamy, P. and Gu, X. H. and Ma, X. S.}, year={2009}, pages={226–234} }