@article{luo_xu_peng_wang_duan_mahmood_wen_ding_xu_2023, title={AQ2PNN: Enabling Two-party Privacy-Preserving Deep Neural Network Inference with Adaptive Quantization}, DOI={10.1145/3613424.3614297}, abstractNote={The growing prevalence of Machine Learning as a Service (MLaaS) enables a wide range of applications but simultaneously raises numerous security and privacy concerns. A key issue involves the potential privacy exposure of involved parties, such as the customer’s input data and the vendor’s model. Consequently, two-party computing (2PC) has emerged as a promising solution to safeguard the privacy of different parties during deep neural network (DNN) inference. However, the state-of-the-art (SOTA) 2PC-DNN techniques are tailored explicitly to traditional instruction set architecture (ISA) systems like CPUs and CPU+GPU. This reliance on ISA systems significantly constrains their energy efficiency, as these architectures typically employ 32- or 64-bit instruction sets. In contrast, the possibilities of harnessing dynamic and adaptive quantization to build high-performance 2PC-DNNs remain largely unexplored due to the lack of compatible algorithms and hardware accelerators.To mitigate the bottleneck of SOTA solutions and fill the existing research gaps, this work investigates the construction of 2PC-DNNs on field programmable gate arrays (FPGAs). We introduce AQ2PNN, an end-to-end framework that effectively employs adaptive quantization schemes to develop high-performance 2PC-DNNs on FPGAs. From an algorithmic perspective, AQ2PNN introduces an innovative 2PC-ReLU method to replace Yao’s Garbled Circuits (GC). Regarding hardware, AQ2PNN employs an extensive set of building blocks for linear operators, non-linear operators, and a specialized Oblivious Transfer (OT) module for secure data exchange, respectively. These algorithm-hardware co-designed modules extremely utilize the fine-grained reconfigurability of FPGAs, to adapt the data bit-width of different DNN layers in the ciphertext domain, thereby reducing communication overhead between parties without compromising DNN performance, such as accuracy. We thoroughly assess AQ2PNN using widely adopted DNN architectures, including ResNet18, ResNet50, and VGG16, all trained on ImageNet and producing quantized models. Experimental results demonstrate that AQ2PNN outperforms SOTA solutions, achieving significantly reduced communication overhead by 25%, improved energy efficiency by 26.3×, and comparable or even superior throughput and accuracy.}, journal={56TH IEEE/ACM INTERNATIONAL SYMPOSIUM ON MICROARCHITECTURE, MICRO 2023}, author={Luo, Yukui and Xu, Nuo and Peng, Hongwu and Wang, Chenghong and Duan, Shijin and Mahmood, Kaleel and Wen, Wujie and Ding, Caiwen and Xu, Xiaolin}, year={2023}, pages={628–640} } @article{peng_huang_zhou_luo_wang_wang_zhao_xie_li_geng_et al._2023, title={AutoReP: Automatic ReLU Replacement for Fast Private Network Inference}, ISSN={["1550-5499"]}, DOI={10.1109/ICCV51070.2023.00478}, abstractNote={The growth of the Machine-Learning-As-A-Service (MLaaS) market has highlighted clients’ data privacy and security issues. Private inference (PI) techniques using cryptographic primitives offer a solution but often have high computation and communication costs, particularly with non-linear operators like ReLU. Many attempts to reduce ReLU operations exist, but they may need heuristic threshold selection or cause substantial accuracy loss. This work introduces AutoReP, a gradient-based approach to lessen non-linear operators and alleviate these issues. It automates the selection of ReLU and polynomial functions to speed up PI applications and introduces distribution-aware polynomial approximation (DaPa) to maintain model expressivity while accurately approximating ReLUs. Our experimental results demonstrate significant accuracy improvements of 6.12% (94.31%, 12.9K ReLU budget, CIFAR-10), 8.39% (74.92%, 12.9K ReLU budget, CIFAR-100), and 9.45% (63.69%, 55K ReLU budget, Tiny-ImageNet) over current state-of-the-art methods, e.g., SNL. Morever, AutoReP is applied to EfficientNet-B2 on ImageNet dataset, and achieved 75.55% accuracy with 176.1 × ReLU budget reduction. The codes are shared on Github 1 .}, journal={2023 IEEE/CVF INTERNATIONAL CONFERENCE ON COMPUTER VISION, ICCV}, author={Peng, Hongwu and Huang, Shaoyi and Zhou, Tong and Luo, Yukui and Wang, Chenghong and Wang, Zigeng and Zhao, Jiahui and Xie, Xi and Li, Ang and Geng, Tony and et al.}, year={2023}, pages={5155–5165} } @article{yan_qin_wen_hu_shi_2023, title={Improving Realistic Worst-Case Performance of NVCiM DNN Accelerators through Training with Right-Censored Gaussian Noise}, ISSN={["1933-7760"]}, DOI={10.1109/ICCAD57390.2023.10323830}, abstractNote={Compute-in-Memory (CiM), built upon non-volatile memory (NVM) devices, is promising for accelerating deep neural networks (DNNs) owing to its in-situ data processing capability and superior energy efficiency. To battle device variations, noise injection training is commonly used, which perturbs weights with Gaussian noise during training to make the model more robust to weight variations. Despite its prevalence, however, existing successes are mostly empirical, and very little theoretical support is available. Even the most fundamental questions such as why Gaussian but not other types of noises should be used is not answered. In this work, through formally analyzing the effect of injecting Gaussian noise in training to improve the k-th percentile performance (KPP), a realistic worst-case performance metric, for the first time we provide a theoretical justification of the effectiveness of the approach. We further show that surprisingly Gaussian noise is not the best option, contrary to what has been taken for granted in the literature. Instead, a right-censored Gaussian noise significantly improves the KPP of DNNs. We further propose an automated method to determine the optimal hyperparameters for injecting this right-censored Gaussian noise during the training process. Our method achieves up to a 26% improvement in KPP compared to the state-of-the-art methods employed to enhance DNN robustness under the impact of device variations.}, journal={2023 IEEE/ACM INTERNATIONAL CONFERENCE ON COMPUTER AIDED DESIGN, ICCAD}, author={Yan, Zheyu and Qin, Yifan and Wen, Wujie and Hu, Xiaobo Sharon and Shi, Yiyu}, year={2023} } @article{huang_fang_mahmood_lei_xu_lei_sun_xu_wen_ding_2023, title={Neurogenesis Dynamics-inspired Spiking Neural Network Training Acceleration}, DOI={10.1109/DAC56929.2023.10247810}, abstractNote={Biologically inspired Spiking Neural Networks (SNNs) have attracted significant attention for their ability to provide extremely energy-efficient machine intelligence through event-driven operation and sparse activities. As artificial intelligence (AI) becomes ever more democratized, there is an increasing need to execute SNN models on edge devices. Existing works adopt weight pruning to reduce SNN model size and accelerate inference. However, these methods mainly focus on how to obtain a sparse model for efficient inference, rather than training efficiency. To overcome these drawbacks, in this paper, we propose a Neurogenesis Dynamics-inspired Spiking Neural Network training acceleration framework, NDSNN. Our framework is computational efficient and trains a model from scratch with dynamic sparsity without sacrificing model fidelity. Specifically, we design a new drop-and-grow strategy with decreasing number of non-zero weights, to maintain extreme high sparsity and high accuracy. We evaluate NDSNN using VGG-16 and ResNet-19 on CIFAR-10, CIFAR-100 and TinyImageNet. Experimental results show that NDSNN achieves up to 20.52% improvement in accuracy on Tiny-ImageNet using ResNet-19 (with a sparsity of 99%) as compared to other SOTA methods (e.g., Lottery Ticket Hypothesis (LTH), SET-SNN, RigL-SNN). In addition, the training cost of NDSNN is only 40.89% of the LTH training cost on ResNet-19 and 31.35% of the LTH training cost on VGG-16 on CIFAR-10.}, journal={2023 60TH ACM/IEEE DESIGN AUTOMATION CONFERENCE, DAC}, author={Huang, Shaoyi and Fang, Haowen and Mahmood, Kaleel and Lei, Bowen and Xu, Nuo and Lei, Bin and Sun, Yue and Xu, Dongkuan and Wen, Wujie and Ding, Caiwen}, year={2023} }