@article{zhao_yang_bolnykh_harenberg_korchiev_yerramsetty_vellanki_kodumagulla_samatova_2021, title={Predictive models with end user preference}, volume={8}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11545}, abstractNote={Abstract}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Zhao, Yifan and Yang, Xian and Bolnykh, Carolina and Harenberg, Steve and Korchiev, Nodirbek and Yerramsetty, Saavan Raj and Vellanki, Bhanu Prasad and Kodumagulla, Ramakanth and Samatova, Nagiza F.}, year={2021}, month={Aug} } @article{xu_yang_harenberg_samatova_2017, title={A Lifelong Learning Topic Model Structured Using Latent Embeddings}, ISSN={["2325-6516"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85018319146&partnerID=MN8TOARS}, DOI={10.1109/icsc.2017.15}, abstractNote={We propose a latent-embedding-structured lifelong learning topic model, called the LLT model, to discover coherent topics from a corpus. Specifically, we exploit latent word embeddings to structure our model and mine word correlation knowledge to assist in topic modeling. During each learning iteration, our model learns new word embeddings based on the topics generated in the previous learning iteration. Experimental results demonstrate that our LLT model is able to generate more coherent topics than state-of-the-art methods.}, journal={2017 11TH IEEE INTERNATIONAL CONFERENCE ON SEMANTIC COMPUTING (ICSC)}, author={Xu, Mingyang and Yang, Ruixin and Harenberg, Steve and Samatova, Nagiza F.}, year={2017}, pages={260–261} } @article{zhang_tang_harenberg_byna_zou_devendran_martin_wu_dong_klasky_et al._2016, title={AMRZone: A Runtime AMR Data Sharing Framework For Scientific Applications}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2016.62}, abstractNote={Frameworks that facilitate runtime data sharingacross multiple applications are of great importance for scientificdata analytics. Although existing frameworks work well overuniform mesh data, they can not effectively handle adaptive meshrefinement (AMR) data. Among the challenges to construct anAMR-capable framework include: (1) designing an architecturethat facilitates online AMR data management, (2) achievinga load-balanced AMR data distribution for the data stagingspace at runtime, and (3) building an effective online indexto support the unique spatial data retrieval requirements forAMR data. Towards addressing these challenges to supportruntime AMR data sharing across scientific applications, wepresent the AMRZone framework. Experiments over real-worldAMR datasets demonstrate AMRZone's effectiveness at achievinga balanced workload distribution, reading/writing large-scaledatasets with thousands of parallel processes, and satisfyingqueries with spatial constraints. Moreover, AMRZone's performance and scalability are even comparable with existing state-of-the-art work when tested over uniform mesh data with up to16384 cores, in the best case, our framework achieves a 46% performance improvement.}, journal={2016 16TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID)}, author={Zhang, Wenzhao and Tang, Houjun and Harenberg, Steve and Byna, Surendra and Zou, Xiaocheng and Devendran, Dharshi and Martin, Daniel F. and Wu, Kesheng and Dong, Bin and Klasky, Scott and et al.}, year={2016}, pages={116–125} } @article{tang_byna_harenberg_zhang_zou_martin_dong_devendran_wu_trebotich_et al._2016, title={In situ Storage Layout Optimization for AMR Spatio-temporal Read Accesses}, ISSN={["0190-3918"]}, DOI={10.1109/icpp.2016.53}, abstractNote={Analyses of large simulation data often concentrate on regions in space and in time that contain important information. As simulations adopt Adaptive Mesh Refinement (AMR), the data records from a region of interest could be widely scattered on storage devices and accessing interesting regions results in significantly reduced I/O performance. In this work, we study the organization of block-structured AMR data on storage to improve performance of spatio-temporal data accesses. AMR has a complex hierarchical multi-resolution data structure that does not fit easily with the existing approaches that focus on uniform mesh data. To enable efficient AMR read accesses, we develop an in situ data layout optimization framework. Our framework automatically selects from a set of candidate layouts based on a performance model, and reorganizes the data before writing to storage. We evaluate this framework with three AMR datasets and access patterns derived from scientific applications. Our performance model is able to identify the best layout scheme and yields up to a 3X read performance improvement compared to the original layout. Though it is not possible to turn all read accesses into contiguous reads, we are able to achieve 90% of contiguous read throughput with the optimized layouts on average.}, journal={PROCEEDINGS 45TH INTERNATIONAL CONFERENCE ON PARALLEL PROCESSING - ICPP 2016}, author={Tang, Houjun and Byna, Suren and Harenberg, Steve and Zhang, Wenzhao and Zou, Xiaocheng and Martin, Daniel F. and Dong, Bin and Devendran, Dharshi and Wu, Kesheng and Trebotich, David and et al.}, year={2016}, pages={406–415} } @article{tang_byna_harenberg_zou_zhang_wu_dong_rubel_bouchard_klasky_et al._2016, title={Usage Pattern-Driven Dynamic Data Layout Reorganization}, ISSN={["2376-4414"]}, DOI={10.1109/ccgrid.2016.15}, abstractNote={As scientific simulations and experiments move toward extremely large scales and generate massive amounts of data, the data access performance of analytic applications becomes crucial. A mismatch often happens between write and read patterns of data accesses, typically resulting in poor read performance. Data layout reorganization has been used to improve the locality of data accesses. However, current data reorganizations are static and focus on generating a single (or set of) optimized layouts that rely on prior knowledge of exact future access patterns. We propose a framework that dynamically recognizes the data usage patterns, replicates the data of interest in multiple reorganized layouts that would benefit common read patterns, and makes runtime decisions on selecting a favorable layout for a given read pattern. This framework supports reading individual elements and chunks of a multi-dimensional array of variables. Our pattern-driven layout selection strategy achieves multi-fold speedups compared to reading from the original dataset.}, journal={2016 16TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON CLUSTER, CLOUD AND GRID COMPUTING (CCGRID)}, author={Tang, Houjun and Byna, Suren and Harenberg, Steve and Zou, Xiaocheng and Zhang, Wenzhao and Wu, Kesheng and Dong, Bin and Rubel, Oliver and Bouchard, Kristofer and Klasky, Scott and et al.}, year={2016}, pages={356–365} } @misc{ranshous_shen_koutra_harenberg_faloutsos_samatova_2015, title={Anomaly detection in dynamic networks: a survey}, volume={7}, ISSN={["1939-0068"]}, DOI={10.1002/wics.1347}, abstractNote={Anomaly detection is an important problem with multiple applications, and thus has been studied for decades in various research domains. In the past decade there has been a growing interest in anomaly detection in data represented as networks, or graphs, largely because of their robust expressiveness and their natural ability to represent complex relationships. Originally, techniques focused on anomaly detection in static graphs, which do not change and are capable of representing only a single snapshot of data. As real‐world networks are constantly changing, there has been a shift in focus to dynamic graphs, which evolve over time.}, number={3}, journal={WILEY INTERDISCIPLINARY REVIEWS-COMPUTATIONAL STATISTICS}, author={Ranshous, Stephen and Shen, Shitian and Koutra, Danai and Harenberg, Steve and Faloutsos, Christos and Samatova, Nagiza F.}, year={2015}, pages={223–247} } @article{zhang_tang_zou_harenberg_liu_klasky_samatova_2015, title={Exploring Memory Hierarchy to Improve Scientific Data Read Performance}, ISSN={["1552-5244"]}, DOI={10.1109/cluster.2015.18}, abstractNote={Improving read performance is one of the major challenges with speeding up scientific data analytic applications. Utilizing the memory hierarchy is one major line of researches to address the read performance bottleneck. Related methods usually combine solide-state-drives(SSDs) with dynamic random-access memory(DRAM) and/or parallel file system(PFS) to mitigate the speed and space gap between DRAM and PFS. However, these methods are unable to handle key performance issues plaguing SSDs, namely read contention that may cause up to 50% performance reduction. In this paper, we propose a framework that exploits the memory hierarchy resource to address the read contention issues involved with SSDs. The framework employs a general purpose online read algorithm that able to detect and utilize memory hierarchy resource to relieve the problem. To maintain a near optimal operating environment for SSDs, the framework is able to orchastrate data chunks across different memory layers to facilitate the read algorithm. Compared to existing tools, our framework achieves up to 50% read performance improvement when tested on datasets from real-world scientific simulations.}, journal={2015 IEEE INTERNATIONAL CONFERENCE ON CLUSTER COMPUTING - CLUSTER 2015}, author={Zhang, Wenzhao and Tang, Houjun and Zou, Xiaocheng and Harenberg, Steven and Liu, Qing and Klasky, Scott and Samatova, Nagiza F.}, year={2015}, pages={66–69} } @inproceedings{schendel_harenberg_tang_vishwanath_papka_samatova_2013, title={A generic high-performance method for deinterleaving scientific data}, volume={8097}, DOI={10.1007/978-3-642-40047-6_58}, abstractNote={High-performance and energy-efficient data management applications are a necessity for HPC systems due to the extreme scale of data produced by high fidelity scientific simulations that these systems support. Data layout in memory hugely impacts the performance. For better performance, most simulations interleave variables in memory during their calculation phase, but deinterleave the data for subsequent storage and analysis. As a result, efficient data deinterleaving is critical; yet, common deinterleaving methods provide inefficient throughput and energy performance. To address this problem, we propose a deinterleaving method that is high performance, energy efficient, and generic to any data type. To the best of our knowledge, this is the first deinterleaving method that 1) exploits data cache prefetching, 2) reduces memory accesses, and 3) optimizes the use of complete cache line writes. When evaluated against conventional deinterleaving methods on 105 STREAM standard micro-benchmarks, our method always improved throughput and throughput/watt on multi-core systems. In the best case, our deinterleaving method improved throughput up to 26.2x and throughput/watt up to 7.8x.}, booktitle={Euro-par 2013 parallel processing}, author={Schendel, E. R. and Harenberg, S. and Tang, H. J. and Vishwanath, V. and Papka, M. E. and Samatova, N. F.}, year={2013}, pages={571–582} } @article{padmanabhan_nudelman_harenberg_bello_sohn_shpanskaya_dikshit_yerramsetty_tanzi_saykin_et al., title={Characterizing gene and protein crosstalks in subjects at risk of developing Alzheimer's disease: A new computational approach}, volume={5}, number={3}, journal={Processes}, author={Padmanabhan, K. and Nudelman, K. and Harenberg, S. and Bello, G. and Sohn, D. and Shpanskaya, K. and Dikshit, P. T. and Yerramsetty, P. S. and Tanzi, R. E. and Saykin, A. J. and et al.} } @book{harenberg_bello_gjeltema_ranshous_harlalka_seay_padmanabhan_samatova, title={Community detection in large-scale networks: A Survey and empirical evaluation}, journal={Technical Report- Not held in TRLN member libraries}, author={Harenberg, S. and Bello, G. A. and Gjeltema, L. and Ranshous, S. and Harlalka, J. and Seay, R. and Padmanabhan, K. and Samatova, N.}, pages={2014} }