@article{yang_shen_lim_2021, title={Revisit the Scalability of Deep Auto-Regressive Models for Graph Generation}, ISSN={["2161-4393"]}, DOI={10.1109/IJCNN52387.2021.9534206}, abstractNote={As a new promising approach to graph generations, deep auto-regressive graph generation has drawn increasing attention. It however has been commonly deemed as hard to scale up to work with large graphs. In existing studies, it is perceived that the consideration of the full non-local graph dependences is indispensable for this approach to work, which entails the needs for keeping the entire graph's info in memory and hence the perceived “inherent” scalability limitation of the approach. This paper revisits the common perception. It proposes three ways to relax the dependences and conducts a series of empirical measurements. It concludes that the perceived “inherent” scalability limitation is a misperception; with the right design and implementation, deep auto-regressive graph generation can be applied to graphs much larger than the device memory. The rectified perception removes a fundamental barrier for this approach to meet practical needs.}, journal={2021 INTERNATIONAL JOINT CONFERENCE ON NEURAL NETWORKS (IJCNN)}, author={Yang, Shuai and Shen, Xipeng and Lim, Seung-Hwan}, year={2021} } @article{yang_shen_2018, title={FALCON: A Fast Drop-In Replacement of Citation KNN for Multiple Instance Learning}, DOI={10.1145/3269206.3271787}, abstractNote={Citation KNN is an important but compute-intensive algorithm for multiple instance learning (MIL). This paper presents FALCON, a fast replacement of Citation KNN. FALCON accelerates Citation KNN by removing unnecessary distance calculations through two novel optimizations, multi-level triangle inequality-based distance filtering and heap optimization. The careful design allows it to produce the same results as the original Citation KNN does while avoiding 84--99.8% distance calculations. On seven datasets of various sizes and dimensions, FALCON consistently outperforms Citation KNN by one or two orders of magnitude, making it a promising drop-in replacement of Citation KNN for multiple instance learning.}, journal={CIKM'18: PROCEEDINGS OF THE 27TH ACM INTERNATIONAL CONFERENCE ON INFORMATION AND KNOWLEDGE MANAGEMENT}, author={Yang, Shuai and Shen, Xipeng}, year={2018}, pages={67–76} } @article{yang_shen_2018, title={LEEM: Lean Elastic EM for Gaussian Mixture Model via Bounds-Based Filtering}, ISSN={["1550-4786"]}, DOI={10.1109/ICDM.2018.00083}, abstractNote={Gaussian Mixture Model (GMM) is widely used in characterizing complicated real-world data and has played a crucial role in many pattern recognition problems. GMM is usually trained by Expectation Maximization algorithm (EM) which is computationally intensive. Previous studies have proposed a family of variants of EM. By considering only the data points that are the most important to a model in a GMM when updating that model, they help reduce some GMM training time. They are named Elastic EM in this paper. This work proposes several novel optimizations to further accelerate Elastic EM. These optimizations detect and avoid unnecessary probability calculations through novel bounds-based filtering at E-step as well as a Delta optimization to the M-step. Together, they create Lean Elastic EM (LEEM), which brings multi-fold speedups on six datasets of various sizes and dimensions.}, journal={2018 IEEE INTERNATIONAL CONFERENCE ON DATA MINING (ICDM)}, author={Yang, Shuai and Shen, Xipeng}, year={2018}, pages={677–686} }