@article{zhang_tang_harenberg_byna_zou_devendran_martin_wu_dong_klasky_et al._2016, title={AMRZone: A Runtime AMR Data Sharing Framework For Scientific Applications}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2016.62}, abstractNote={Frameworks that facilitate runtime data sharingacross multiple applications are of great importance for scientificdata analytics. Although existing frameworks work well overuniform mesh data, they can not effectively handle adaptive meshrefinement (AMR) data. Among the challenges to construct anAMR-capable framework include: (1) designing an architecturethat facilitates online AMR data management, (2) achievinga load-balanced AMR data distribution for the data stagingspace at runtime, and (3) building an effective online indexto support the unique spatial data retrieval requirements forAMR data. Towards addressing these challenges to supportruntime AMR data sharing across scientific applications, wepresent the AMRZone framework. Experiments over real-worldAMR datasets demonstrate AMRZone's effectiveness at achievinga balanced workload distribution, reading/writing large-scaledatasets with thousands of parallel processes, and satisfyingqueries with spatial constraints. Moreover, AMRZone's performance and scalability are even comparable with existing state-of-the-art work when tested over uniform mesh data with up to16384 cores, in the best case, our framework achieves a 46% performance improvement.}, journal={2016 16TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID)}, author={Zhang, Wenzhao and Tang, Houjun and Harenberg, Steve and Byna, Surendra and Zou, Xiaocheng and Devendran, Dharshi and Martin, Daniel F. and Wu, Kesheng and Dong, Bin and Klasky, Scott and et al.}, year={2016}, pages={116–125} } @inproceedings{zhang_tang_ranshous_byna_martin_wu_dong_klasky_samatova_2016, title={Exploring memory hierarchy and network topology for runtime AMR data sharing across scientific applications}, DOI={10.1109/bigdata.2016.7840743}, abstractNote={Runtime data sharing across applications is of great importance for avoiding high I/O overhead for scientific data analytics. Sharing data on a staging space running on a set of dedicated compute nodes is faster than writing data to a slow disk-based parallel file system (PFS) and then reading it back for post-processing. Originally, the staging space has been purely based on main memory (DRAM), and thus was several orders of magnitude faster than the PFS approach. However, storing all the data produced by large-scale simulations on DRAM is impractical. Moving data from memory to SSD-based burst buffers is a potential approach to address this issue. However, SSDs are about one order of magnitude slower than DRAM. To optimize data access performance over the staging space, methods such as prefetching data from SSDs according to detected spatial access patterns and distributing data across the network topology have been explored. Although these methods work well for uniform mesh data, which they were designed for, they are not well suited for adaptive mesh refinement (AMR) data. Two mąjor issues must be addressed before constructing such a memory hierarchy and topology-aware runtime AMR data sharing framework: (1) spatial access pattern detection and prefetching for AMR data; (2) AMR data distribution across the network topology at runtime. We propose a framework that addresses these challenges and demonstrate its effectiveness with extensive experiments on AMR data. Our results show the framework's spatial access pattern detection and prefetching methods demonstrate about 26% performance improvement for client analytical processes. Moreover, the framework's topology-aware data placement can improve overall data access performance by up to 18%.}, booktitle={2016 IEEE International Conference on Big Data (Big Data)}, author={Zhang, W. Z. and Tang, H. J. and Ranshous, S. and Byna, S. and Martin, D. F. and Wu, K. S. and Dong, B. and Klasky, S. and Samatova, N. F.}, year={2016}, pages={1359–1366} } @article{tang_byna_harenberg_zhang_zou_martin_dong_devendran_wu_trebotich_et al._2016, title={In situ Storage Layout Optimization for AMR Spatio-temporal Read Accesses}, ISSN={["0190-3918"]}, DOI={10.1109/icpp.2016.53}, abstractNote={Analyses of large simulation data often concentrate on regions in space and in time that contain important information. As simulations adopt Adaptive Mesh Refinement (AMR), the data records from a region of interest could be widely scattered on storage devices and accessing interesting regions results in significantly reduced I/O performance. In this work, we study the organization of block-structured AMR data on storage to improve performance of spatio-temporal data accesses. AMR has a complex hierarchical multi-resolution data structure that does not fit easily with the existing approaches that focus on uniform mesh data. To enable efficient AMR read accesses, we develop an in situ data layout optimization framework. Our framework automatically selects from a set of candidate layouts based on a performance model, and reorganizes the data before writing to storage. We evaluate this framework with three AMR datasets and access patterns derived from scientific applications. Our performance model is able to identify the best layout scheme and yields up to a 3X read performance improvement compared to the original layout. Though it is not possible to turn all read accesses into contiguous reads, we are able to achieve 90% of contiguous read throughput with the optimized layouts on average.}, journal={PROCEEDINGS 45TH INTERNATIONAL CONFERENCE ON PARALLEL PROCESSING - ICPP 2016}, author={Tang, Houjun and Byna, Suren and Harenberg, Steve and Zhang, Wenzhao and Zou, Xiaocheng and Martin, Daniel F. and Dong, Bin and Devendran, Dharshi and Wu, Kesheng and Trebotich, David and et al.}, year={2016}, pages={406–415} } @article{tang_byna_harenberg_zou_zhang_wu_dong_rubel_bouchard_klasky_et al._2016, title={Usage Pattern-Driven Dynamic Data Layout Reorganization}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2016.15}, abstractNote={As scientific simulations and experiments move toward extremely large scales and generate massive amounts of data, the data access performance of analytic applications becomes crucial. A mismatch often happens between write and read patterns of data accesses, typically resulting in poor read performance. Data layout reorganization has been used to improve the locality of data accesses. However, current data reorganizations are static and focus on generating a single (or set of) optimized layouts that rely on prior knowledge of exact future access patterns. We propose a framework that dynamically recognizes the data usage patterns, replicates the data of interest in multiple reorganized layouts that would benefit common read patterns, and makes runtime decisions on selecting a favorable layout for a given read pattern. This framework supports reading individual elements and chunks of a multi-dimensional array of variables. Our pattern-driven layout selection strategy achieves multi-fold speedups compared to reading from the original dataset.}, journal={2016 16TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID)}, author={Tang, Houjun and Byna, Suren and Harenberg, Steve and Zou, Xiaocheng and Zhang, Wenzhao and Wu, Kesheng and Dong, Bin and Rubel, Oliver and Bouchard, Kristofer and Klasky, Scott and et al.}, year={2016}, pages={356–365} } @article{zhang_tang_zou_harenberg_liu_klasky_samatova_2015, title={Exploring Memory Hierarchy to Improve Scientific Data Read Performance}, ISSN={["1552-5244"]}, DOI={10.1109/cluster.2015.18}, abstractNote={Improving read performance is one of the major challenges with speeding up scientific data analytic applications. Utilizing the memory hierarchy is one major line of researches to address the read performance bottleneck. Related methods usually combine solide-state-drives(SSDs) with dynamic random-access memory(DRAM) and/or parallel file system(PFS) to mitigate the speed and space gap between DRAM and PFS. However, these methods are unable to handle key performance issues plaguing SSDs, namely read contention that may cause up to 50% performance reduction. In this paper, we propose a framework that exploits the memory hierarchy resource to address the read contention issues involved with SSDs. The framework employs a general purpose online read algorithm that able to detect and utilize memory hierarchy resource to relieve the problem. To maintain a near optimal operating environment for SSDs, the framework is able to orchastrate data chunks across different memory layers to facilitate the read algorithm. Compared to existing tools, our framework achieves up to 50% read performance improvement when tested on datasets from real-world scientific simulations.}, journal={2015 IEEE INTERNATIONAL CONFERENCE ON CLUSTER COMPUTING - CLUSTER 2015}, author={Zhang, Wenzhao and Tang, Houjun and Zou, Xiaocheng and Harenberg, Steven and Liu, Qing and Klasky, Scott and Samatova, Nagiza F.}, year={2015}, pages={66–69} }