@article{guan_shen_krim_2021, title={An Automatic Synthesizer of Advising Tools for High Performance Computing}, volume={32}, ISSN={["1558-2183"]}, DOI={10.1109/TPDS.2020.3018636}, abstractNote={This article presents Egeria, the first automatic synthesizer of advising tools for High-Performance Computing (HPC). When one provides it with some HPC programming guides as inputs, Egeria automatically constructs a text retrieval tool that can advise on what to do to improve the performance of a given program. The advising tool provides a concise list of essential rules automatically extracted from the documents and can retrieve relevant optimization knowledge for optimization questions. Egeria is built based on a distinctive multi-layered design that leverages natural language processing (NLP) techniques and extends them with HPC-specific knowledge and considerations. This article presents the design, implementation, and both quantitative and qualitative evaluation results of Egeria.}, number={2}, journal={IEEE TRANSACTIONS ON PARALLEL AND DISTRIBUTED SYSTEMS}, author={Guan, Hui and Shen, Xipeng and Krim, Hamid}, year={2021}, month={Feb}, pages={330–341} } @article{ning_guan_shen_2019, title={Adaptive Deep Reuse: Accelerating CNN Training on the Fly}, ISSN={["1084-4627"]}, DOI={10.1109/ICDE.2019.00138}, abstractNote={This work proposes adaptive deep reuse, a method for accelerating CNN training by identifying and avoiding the unnecessary computations contained in each specific training on the fly. It makes two-fold major contributions. (1) It empirically proves the existence of a lot of similarities among neuron vectors in both forward and backward propagation of CNN. (2) It introduces the first adaptive strategy for translating the similarities into computation reuse in CNN training. The strategy adaptively adjusts the strength of reuse based on the different tolerance of precision relaxation in different CNN training stages. Experiments show that adaptive deep reuse saves 69% CNN training time with no accuracy loss.}, journal={2019 IEEE 35TH INTERNATIONAL CONFERENCE ON DATA ENGINEERING (ICDE 2019)}, author={Ning, Lin and Guan, Hui and Shen, Xipeng}, year={2019}, pages={1538–1549} } @article{guan_ding_shen_krim_2018, title={Reuse-Centric K-Means Configuration}, ISSN={["1084-4627"]}, DOI={10.1109/ICDE.2018.00116}, abstractNote={K-means configuration is a time-consuming process due to the iterative nature of k-means. This paper proposes reuse-centric k-means configuration to accelerate k-means configuration. It is based on the observation that the explorations of different configurations share lots of common or similar computations. Effectively reusing the computations from prior trials of different configurations could largely shorten the configuration time. The paper presents a set of novel techniques to materialize the idea, including reuse-based filtering, center reuse, and a two-phase design to capitalize on the reuse opportunities on three levels: validation, k, and feature sets. Experiments show that our approach can accelerate some common configuration tuning methods by 5-9X.}, journal={2018 IEEE 34TH INTERNATIONAL CONFERENCE ON DATA ENGINEERING (ICDE)}, author={Guan, Hui and Ding, Yufei and Shen, Xipeng and Krim, Hamid}, year={2018}, pages={1224–1227} } @inproceedings{ding_ning_guan_shen_2017, title={Generalizations of the theory and deployment of triangular inequality for compiler-based strength reduction}, volume={52}, DOI={10.1145/3140587.3062377}, abstractNote={Triangular Inequality (TI) has been used in many manual algorithm designs to achieve good efficiency in solving some distance calculation-based problems. This paper presents our generalization of the idea into a compiler optimization technique, named TI-based strength reduction. The generalization consists of three parts. The first is the establishment of the theoretic foundation of this new optimization via the development of a new form of TI named Angular Triangular Inequality, along with several fundamental theorems. The second is the revealing of the properties of the new forms of TI and the proposal of guided TI adaptation, a systematic method to address the difficulties in effective deployments of TI optimizations. The third is an integration of the new optimization technique in an open-source compiler. Experiments on a set of data mining and machine learning algorithms show that the new technique can speed up the standard implementations by as much as 134X and 46X on average for distance-related problems, outperforming previous TI-based optimizations by 2.35X on average. It also extends the applicability of TI-based optimizations to vector related problems, producing tens of times of speedup.}, number={6}, booktitle={ACM SIGPLAN Notices}, author={Ding, Y. F. and Ning, L. and Guan, H. and Shen, Xipeng}, year={2017}, pages={33–48} } @inproceedings{guan_tang_krim_keiser_rindos_sazdanovic_2016, title={A topological collapse for document summarization}, volume={2016-August}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84984653594&partnerID=MN8TOARS}, DOI={10.1109/spawc.2016.7536867}, abstractNote={As a useful tool to summarize documents, keyphrase extraction extracts a set of single or multiple words, called keyphrases, that capture the primary topics discussed in a document. In this paper we propose DoCollapse, a topological collapse-based unsupervised keyphrase extraction method that relies on networking document by semantic relatedness of candidate keyphrases. A semantic graph is built with candidates keyphrases as vertices and then reduced to its core using topological collapse algorithm to facilitate final keyphrase selection. Iteratively collapsing dominated vertices aids in removing noisy candidates and revealing important points. We conducted experiments on two standard evaluation datasets composed of scientific papers and found that DoCollapse outperforms state-of-the-art methods. Results show that simplifying a document graph by homology-preserving topological collapse benefits keyphrase extraction.}, booktitle={2016 IEEE 17th International Workshop on Signal Processing Advances in Wireless Communications (SPAWC)}, publisher={IEEE}, author={Guan, Hui and Tang, Wen and Krim, Hamid and Keiser, James and Rindos, Andrew and Sazdanovic, Radmila}, year={2016}, month={Jul} }