@article{zhai_liu_jin_ma_chen_2015, title={Automatic cloud I/O configurator for I/O intensive parallel applications}, volume={26}, number={12}, journal={IEEE Transactions on Parallel and Distributed Systems}, author={Zhai, J. D. and Liu, M. L. and Jin, Y. and Ma, X. S. and Chen, W. G.}, year={2015}, pages={3275–3288} } @article{schendel_jin_shah_chen_chang_ku_ethier_klasky_latham_ross_et al._2012, title={ISOBAR Preconditioner for Effective and High-throughput Lossless Data Compression}, ISSN={["1084-4627"]}, DOI={10.1109/icde.2012.114}, abstractNote={Efficient handling of large volumes of data is a necessity for exascale scientific applications and database systems. To address the growing imbalance between the amount of available storage and the amount of data being produced by high speed (FLOPS) processors on the system, data must be compressed to reduce the total amount of data placed on the file systems. General-purpose loss less compression frameworks, such as zlib and bzlib2, are commonly used on datasets requiring loss less compression. Quite often, however, many scientific data sets compress poorly, referred to as hard-to-compress datasets, due to the negative impact of highly entropic content represented within the data. An important problem in better loss less data compression is to identify the hard-to-compress information and subsequently optimize the compression techniques at the byte-level. To address this challenge, we introduce the In-Situ Orthogonal Byte Aggregate Reduction Compression (ISOBAR-compress) methodology as a preconditioner of loss less compression to identify and optimize the compression efficiency and throughput of hard-to-compress datasets.}, journal={2012 IEEE 28TH INTERNATIONAL CONFERENCE ON DATA ENGINEERING (ICDE)}, author={Schendel, Eric R. and Jin, Ye and Shah, Neil and Chen, Jackie and Chang, C. S. and Ku, Seung-Hoe and Ethier, Stephane and Klasky, Scott and Latham, Robert and Ross, Robert and et al.}, year={2012}, pages={138–149} } @inproceedings{chen_wilson_jin_samatova_2010, title={Detecting and Tracking Community Dynamics in Evolutionary Networks}, DOI={10.1109/icdmw.2010.32}, abstractNote={Community structure or clustering is ubiquitous in many evolutionary networks including social networks, biological networks and financial market networks. Detecting and tracking community deviations in evolutionary networks can uncover important and interesting behaviors that are latent if we ignore the dynamic information. In biological networks, for example, a small variation in a gene community may indicate an event, such as gene fusion, gene fission, or gene decay. In contrast to the previous work on detecting communities in static graphs or tracking conserved communities in time-varying graphs, this paper first introduces the concept of community dynamics, and then shows that the baseline approach by enumerating all communities in each graph and comparing all pairs of communities between consecutive graphs is infeasible and impractical. We propose an efficient method for detecting and tracking community dynamics in evolutionary networks by introducing graph representatives and community representatives to avoid generating redundant communities and limit the search space. We measure the performance of the representative-based algorithm by comparison to the baseline algorithm on synthetic networks, and our experiments show that our algorithm achieves a runtime speedup of 11–46. The method has also been applied to two real-world evolutionary networks including Food Web and Enron Email. Significant and informative community dynamics have been detected in both cases.}, booktitle={The 10th IEEE International Conference on Data Mining Workshops}, publisher={Los Alamitos, Calif. : IEEE Computer Society}, author={Chen, Z and Wilson, K.A. and Jin, Y. and Samatova, N.F.}, year={2010}, pages={318–327} }