@inproceedings{wang_ding_liu_kang_rossi_derr_2024, title={Data Quality-aware Graph Machine Learning}, url={https://doi.org/10.1145/3627673.3679095}, DOI={10.1145/3627673.3679095}, author={Wang, Yu and Ding, Kaize and Liu, Xiaorui and Kang, Jian and Rossi, Ryan and Derr, Tyler}, year={2024}, month={Oct} } @inproceedings{zhang_xue_fan_xu_li_pei_liu_2024, title={Linear-Time Graph Neural Networks for Scalable Recommendations}, url={https://doi.org/10.1145/3589334.3645486}, DOI={10.1145/3589334.3645486}, abstractNote={In an era of information explosion, recommender systems are vital tools to deliver personalized recommendations for users. The key of recommender systems is to forecast users' future behaviors based on previous user-item interactions. Due to their strong expressive power of capturing high-order connectivities in user-item interaction data, recent years have witnessed a rising interest in leveraging Graph Neural Networks (GNNs) to boost the prediction performance of recommender systems. Nonetheless, classic Matrix Factorization (MF) and Deep Neural Network (DNN) approaches still play an important role in real-world large-scale recommender systems due to their scalability advantages. Despite the existence of GNN-acceleration solutions, it remains an open question whether GNN-based recommender systems can scale as efficiently as classic MF and DNN methods. In this paper, we propose a Linear-Time Graph Neural Network (LTGNN) to scale up GNN-based recommender systems to achieve comparable scalability as classic MF approaches while maintaining GNNs' powerful expressiveness for superior prediction accuracy. Extensive experiments and ablation studies are presented to validate the effectiveness and scalability of the proposed algorithm. Our implementation based on PyTorch is available.}, author={Zhang, Jiahao and Xue, Rui and Fan, Wenqi and Xu, Xin and Li, Qing and Pei, Jian and Liu, Xiaorui}, year={2024}, month={May} } @article{li_liu_starly_2024, title={Manufacturing service capability prediction with Graph Neural Networks}, volume={74}, ISSN={["1878-6642"]}, url={https://doi.org/10.1016/j.jmsy.2024.03.010}, DOI={10.1016/j.jmsy.2024.03.010}, abstractNote={In the current landscape, the predominant methods for identifying manufacturing capabilities from manufacturers rely heavily on keyword matching and semantic matching. However, these methods often fall short by either overlooking valuable hidden information or misinterpreting critical data. Consequently, such approaches result in an incomplete identification of manufacturers' capabilities. This underscores the pressing need for data-driven solutions to enhance the accuracy and completeness of manufacturing capability identification. To address the need, this study proposes a Graph Neural Network-based method for manufacturing service capability identification over a knowledge graph. To enhance the identification performance, this work introduces a novel approach that involves aggregating information from the graph nodes' neighborhoods as well as oversampling the graph data, which can be effectively applied across a wide range of practical scenarios. Evaluations conducted on a Manufacturing Service Knowledge Graph and subsequent ablation studies demonstrate the efficacy and robustness of the proposed approach. This study not only contributes a innovative method for inferring manufacturing service capabilities but also significantly augments the quality of Manufacturing Service Knowledge Graphs.}, journal={JOURNAL OF MANUFACTURING SYSTEMS}, author={Li, Yunqing and Liu, Xiaorui and Starly, Binil}, year={2024}, month={Jun}, pages={291–301} } @article{liu_han_jin_liu_liu_2023, title={Enhancing Graph Representations Learning with Decorrelated Propagation}, url={https://doi.org/10.1145/3580305.3599334}, DOI={10.1145/3580305.3599334}, abstractNote={In recent years, graph neural networks (GNNs) have been widely used in many domains due to their powerful capability in representation learning on graph-structured data. While a majority of extant studies focus on mitigating the over-smoothing problem, recent works also reveal the limitation of GNN from a new over-correlation perspective which states that the learned representation becomes highly correlated after feature transformation and propagation in GNNs. In this paper, we thoroughly re-examine the issue of over-correlation in deep GNNs, both empirically and theoretically. We demonstrate that the propagation operator in GNNs exacerbates the feature correlation. In addition, we discovered through empirical study that existing decorrelation solutions fall short of maintaining a low feature correlation, potentially encoding redundant information. Thus, to more effectively address the over-correlation problem, we propose a decorrelated propagation scheme (DeProp) as a fundamental component to decorrelate the feature learning in GNN models, which achieves feature decorrelation at the propagation step. Comprehensive experiments on multiple real-world datasets demonstrate that DeProp can be easily integrated into prevalent GNNs, leading to significant performance enhancements. Furthermore, we find that it can be used to solve over-smoothing and over-correlation problems simultaneously and significantly outperform state-of-the-art methods on missing feature settings. The code is available at https://github.com/hualiu829/DeProp.}, journal={PROCEEDINGS OF THE 29TH ACM SIGKDD CONFERENCE ON KNOWLEDGE DISCOVERY AND DATA MINING, KDD 2023}, author={Liu, Hua and Han, Haoyu and Jin, Wei and Liu, Xiaorui and Liu, Hui}, year={2023}, pages={1466–1476} } @article{xu_liu_wang_liu_jain_tang_2023, title={How does the Memorization of Neural Networks Impact Adversarial Robust Models?}, url={https://doi.org/10.1145/3580305.3599381}, DOI={10.1145/3580305.3599381}, abstractNote={Recent studies suggest that "memorization" is one necessary factor for overparameterized deep neural networks (DNNs) to achieve optimal performance. Specifically, the perfectly fitted DNNs can memorize the labels of many atypical samples, generalize their memorization to correctly classify test atypical samples and enjoy better test performance. While, DNNs which are optimized via adversarial training algorithms can also achieve perfect training performance by memorizing the labels of atypical samples, as well as the adversarially perturbed atypical samples. However, adversarially trained models always suffer from poor generalization, with both relatively low clean accuracy and robustness on the test set. In this work, we study the effect of memorization in adversarial trained DNNs and disclose two important findings: (a) Memorizing atypical samples is only effective to improve DNN's accuracy on clean atypical samples, but hardly improve their adversarial robustness and (b) Memorizing certain atypical samples will even hurt the DNN's performance on typical samples. Based on these two findings, we propose Benign Adversarial Training (BAT) which can facilitate adversarial training to avoid fitting "harmful" atypical samples and fit as more "benign" atypical samples as possible. In our experiments, we validate the effectiveness of BAT, and show that it can achieve better clean accuracy vs. robustness trade-off than baseline methods, in benchmark datasets for image classification.}, journal={PROCEEDINGS OF THE 29TH ACM SIGKDD CONFERENCE ON KNOWLEDGE DISCOVERY AND DATA MINING, KDD 2023}, author={Xu, Han and Liu, Xiaorui and Wang, Wentao and Liu, Zitao and Jain, Anil K. and Tang, Jiliang}, year={2023}, pages={2801–2812} } @article{xue_han_zhao_shah_tang_liu_2023, title={Large-Scale Graph Neural Networks: The Past and New Frontiers}, url={https://doi.org/10.1145/3580305.3599565}, DOI={10.1145/3580305.3599565}, abstractNote={Graph Neural Networks (GNNs) have gained significant attention in recent years due to their ability to model complex relationships between entities in graph-structured data such as social networks, protein structures, and knowledge graphs. However, due to the size of real-world industrial graphs and the special architecture of GNNs, it is a long-lasting challenge for engineers and researchers to deploy GNNs on large-scale graphs, which significantly limits their applications in real-world applications. In this tutorial, we will cover the fundamental scalability challenges of GNNs, frontiers of large-scale GNNs including classic approaches and some newly emerging techniques, the evaluation and comparison of scalable GNNs, and their large-scale real-world applications. Overall, this tutorial aims to provide a systematic and comprehensive understanding of the challenges and state-of-the-art techniques for scaling GNNs. The summary and discussion on future directions will inspire engineers and researchers to explore new ideas and developments in this rapidly evolving field. The website of this tutorial is available at https://sites.google.com/ncsu.edu/gnnkdd2023tutorial.}, journal={PROCEEDINGS OF THE 29TH ACM SIGKDD CONFERENCE ON KNOWLEDGE DISCOVERY AND DATA MINING, KDD 2023}, author={Xue, Rui and Han, Haoyu and Zhao, Tong and Shah, Neil and Tang, Jiliang and Liu, Xiaorui}, year={2023}, pages={5835–5836} } @article{wang_xu_liu_li_thuraisingham_tang_2022, title={Imbalanced Adversarial Training with Reweighting}, ISSN={["1550-4786"]}, DOI={10.1109/ICDM54844.2022.00156}, abstractNote={Adversarial training has been empirically proven to be one of the most effective and reliable defense methods against adversarial attacks. However, the majority of existing studies are focused on balanced datasets, where each class has a similar amount of training examples. Research on adversarial training with imbalanced training datasets is rather limited. As the initial effort to investigate this problem, we reveal the facts that adversarially trained models present two distinguished behaviors from naturally trained models in imbalanced datasets: (1) Compared to natural training, adversarially trained models can suffer much worse performance on under-represented classes, when the training dataset is extremely imbalanced. (2) Traditional reweighting strategies which assign large weights to underrepresented classes will drastically hurt the model’s performance on well-represented classes. In this paper, to further understand our observations, we theoretically show that the poor data separability is one key reason causing this strong tension between under-represented and well-represented classes. Motivated by this finding, we propose the Separable Reweighted Adversarial Training (SRAT) framework to facilitate adversarial training under imbalanced scenarios, by learning more separable features for different classes. Extensive experiments on various datasets verify the effectiveness of the proposed framework.}, journal={2022 IEEE INTERNATIONAL CONFERENCE ON DATA MINING (ICDM)}, author={Wang, Wentao and Xu, Han and Liu, Xiaorui and Li, Yaxin and Thuraisingham, Bhavani and Tang, Jiliang}, year={2022}, pages={1209–1214} } @article{liu_wang_fan_liu_li_jain_liu_jain_tang_2023, title={Trustworthy AI: A Computational Perspective}, url={https://doi.org/10.1145/3546872}, DOI={10.1145/3546872}, abstractNote={In the past few decades,artificial intelligence (AI)technology has experienced swift developments, changing everyone’s daily life and profoundly altering the course of human society. The intention behind developing AI was and is to benefit humans by reducing labor, increasing everyday conveniences, and promoting social good. However, recent research and AI applications indicate that AI can cause unintentional harm to humans by, for example, making unreliable decisions in safety-critical scenarios or undermining fairness by inadvertently discriminating against a group or groups. Consequently, trustworthy AI has recently garnered increased attention regarding the need to avoid the adverse effects that AI could bring to people, so people can fully trust and live in harmony with AI technologies.A tremendous amount of research on trustworthy AI has been conducted and witnessed in recent years. In this survey, we present a comprehensive appraisal of trustworthy AI from a computational perspective to help readers understand the latest technologies for achieving trustworthy AI. Trustworthy AI is a large and complex subject, involving various dimensions. In this work, we focus on six of the most crucial dimensions in achieving trustworthy AI: (i) Safety & Robustness, (ii) Nondiscrimination & Fairness, (iii) Explainability, (iv) Privacy, (v) Accountability & Auditability, and (vi) Environmental Well-being. For each dimension, we review the recent related technologies according to a taxonomy and summarize their applications in real-world systems. We also discuss the accordant and conflicting interactions among different dimensions and discuss potential aspects for trustworthy AI to investigate in the future.}, journal={ACM Transactions on Intelligent Systems and Technology}, author={Liu, Haochen and Wang, Yiqi and Fan, Wenqi and Liu, Xiaorui and Li, Yaxin and Jain, Shaili and Liu, Yunhao and Jain, Anil and Tang, Jiliang}, year={2023}, month={Feb} }