@article{lakshminarasimhan_zou_boyuka_pendse_jenkins_vishwanath_papka_klasky_samatova_2014, title={DIRAQ: scalable in situ data- and resource-aware indexing for optimized query performance}, volume={17}, ISSN={["1573-7543"]}, DOI={10.1007/s10586-014-0358-z}, number={4}, journal={CLUSTER COMPUTING-THE JOURNAL OF NETWORKS SOFTWARE TOOLS AND APPLICATIONS}, author={Lakshminarasimhan, Sriram and Zou, Xiaocheng and Boyuka, David A., II and Pendse, Saurabh V. and Jenkins, John and Vishwanath, Venkatram and Papka, Michael E. and Klasky, Scott and Samatova, Nagiza F.}, year={2014}, month={Dec}, pages={1101–1119} } @inproceedings{jenkins_schendel_lakshminarasimhan_boyuka_rogers_ethier_ross_klasky_samatova_2012, title={Byte-precision level of detail processing for variable precision analytics}, DOI={10.1109/sc.2012.26}, abstractNote={I/O bottlenecks in HPC applications are becoming a more pressing problem as compute capabilities continue to outpace I/O capabilities. While double-precision simulation data often must be stored losslessly, the loss of some of the fractional component may introduce acceptably small errors to many types of scientific analyses. Given this observation, we develop a precision level of detail (APLOD) library, which partitions double-precision datasets along user-defined byte boundaries. APLOD parameterizes the analysis accuracy-I/O performance tradeoff, bounds maximum relative error, maintains I/O access patterns compared to full precision, and operates with low overhead. Using ADIOS as an I/O use-case, we show proportional reduction in disk access time to the degree of precision. Finally, we show the effects of partial precision analysis on accuracy for operations such as k-means and Fourier analysis, finding a strong applicability for the use of varying degrees of precision to reduce the cost of analyzing extreme-scale data.}, booktitle={International conference for high performance computing networking}, author={Jenkins, J. and Schendel, E. R. and Lakshminarasimhan, S. and Boyuka, D. A. and Rogers, T. and Ethier, S. and Ross, R. and Klasky, S. and Samatova, N. F.}, year={2012} } @article{lakshminarasimhan_shah_ethier_ku_chang_klasky_latham_ross_samatova_2013, title={ISABELA for effective in situ compression of scientific data}, volume={25}, ISSN={["1532-0634"]}, DOI={10.1002/cpe.2887}, abstractNote={SUMMARYExploding dataset sizes from extreme‐scale scientific simulations necessitates efficient data management and reduction schemes to mitigate I/O costs. With the discrepancy between I/O bandwidth and computational power, scientists are forced to capture data infrequently, thereby making data collection an inherently lossy process. Although data compression can be an effective solution, the random nature of real‐valued scientific datasets renders lossless compression routines ineffective. These techniques also impose significant overhead during decompression, making them unsuitable for data analysis and visualization, which require repeated data access.To address this problem, we propose an effective method for In situ Sort‐And‐B‐spline Error‐bounded Lossy Abatement (ISABELA) of scientific data that is widely regarded as effectively incompressible. With ISABELA, we apply a pre‐conditioner to seemingly random and noisy data along spatial resolution to achieve an accurate fitting model that guarantees a ⩾0.99 correlation with the original data. We further take advantage of temporal patterns in scientific data to compress data by ≈ 85%, while introducing only a negligible overhead on simulations in terms of runtime. ISABELA significantly outperforms existing lossy compression methods, such as wavelet compression, in terms of data reduction and accuracy.We extend upon our previous paper by additionally building a communication‐free, scalable parallel storage framework on top of ISABELA‐compressed data that is ideally suited for extreme‐scale analytical processing. The basis for our storage framework is an inherently local decompression method (it need not decode the entire data), which allows for random access decompression and low‐overhead task division that can be exploited over heterogeneous architectures. Furthermore, analytical operations such as correlation and query processing run quickly and accurately over data in the compressed space. Copyright © 2012 John Wiley & Sons, Ltd.}, number={4}, journal={CONCURRENCY AND COMPUTATION-PRACTICE & EXPERIENCE}, author={Lakshminarasimhan, Sriram and Shah, Neil and Ethier, Stephane and Ku, Seung-Hoe and Chang, C. S. and Klasky, Scott and Latham, Rob and Ross, Rob and Samatova, Nagiza F.}, year={2013}, pages={524–540} } @article{gong_lakshminarasimhan_jenkins_kolla_ethier_chen_ross_klasky_samatova_2012, title={Multi-level Layout Optimization for Efficient Spatio-temporal Queries on ISABELA-compressed Data}, ISSN={["1530-2075"]}, DOI={10.1109/ipdps.2012.83}, abstractNote={The size and scope of cutting-edge scientific simulations are growing much faster than the I/O subsystems of their runtime environments, not only making I/O the primary bottleneck, but also consuming space that pushes the storage capacities of many computing facilities. These problems are exacerbated by the need to perform data-intensive analytics applications, such as querying the dataset by variable and spatio-temporal constraints, for what current database technologies commonly build query indices of size greater than that of the raw data. To help solve these problems, we present a parallel query-processing engine that can handle both range queries and queries with spatio-temporal constraints, on B-spline compressed data with user-controlled accuracy. Our method adapts to widening gaps between computation and I/O performance by querying on compressed metadata separated into bins by variable values, utilizing Hilbert space-filling curves to optimize for spatial constraints and aggregating data access to improve locality of per-bin stored data, reducing the false positive rate and latency bound I/O operations (such as seek) substantially. We show our method to be efficient with respect to storage, computation, and I/O compared to existing database technologies optimized for query processing on scientific data.}, journal={2012 IEEE 26TH INTERNATIONAL PARALLEL AND DISTRIBUTED PROCESSING SYMPOSIUM (IPDPS)}, author={Gong, Zhenhuan and Lakshminarasimhan, Sriram and Jenkins, John and Kolla, Hemanth and Ethier, Stephane and Chen, Jackie and Ross, Robert and Klasky, Scott and Samatova, Nagiza F.}, year={2012}, pages={873–884} } @inproceedings{lakshminarasimhan_kumar_liao_choudhary_kumar_samatova_2012, title={On the path to sustainable, scalable, and energy-efficient data analytics: Challenges, promises, and future directions}, DOI={10.1109/igcc.2012.6322265}, abstractNote={As scientific data is reaching exascale, scalable and energy efficient data analytics is quickly becoming a top notch priority. Yet, a sustainable solution to this problem is hampered by a number of technical challenges that get exacerbated with the emerging hardware and software technology trends. In this paper, we present a number of recently created “secret sauces” that promise to address some of these challenges. We discuss transformative approaches to efficient data reduction, analytics-driven query processing, scalable analytical kernels, approximate analytics, among others. We propose a number of future directions that could be pursued on the path to sustainable data analytics at scale.}, booktitle={2012 International Green Computing Conference (IGCC)}, author={Lakshminarasimhan, S. and Kumar, P. and Liao, W. K. and Choudhary, A. and Kumar, V. and Samatova, N. F.}, year={2012} } @inproceedings{jenkins_arkatkar_lakshminarasimhan_boyuka_schendel_shah_ethier_chang_chen_kolla_et al., title={ALACRITY: Analytics-driven lossless data compression for rapid in-situ indexing, storing, and querying}, volume={8220}, booktitle={Transactions on large-scale data- and knowledge- centered systems x: special issue on database- and expert-systems applications}, author={Jenkins, J. and Arkatkar, I. and Lakshminarasimhan, S. and Boyuka, D. A. and Schendel, E. R. and Shah, N. and Ethier, S. and Chang, C. S. and Chen, J. and Kolla, H. and et al.}, pages={95–114} } @book{lakshminarasimhan_shah_ethier_klasky_latham_ross_n.f., title={Compressing the Incompressible with ISABELA: In-situ Reduction of Spatio-Temporal Data}, journal={Technical Report- Not held in TRLN member libraries}, author={Lakshminarasimhan, S. and Shah, N. and Ethier, S.J and Klasky, S. and Latham, R. and Ross, R. and N.F., Samatova} }