@misc{kashyap_deroo_baron_wong_wu_franzon_2024, title={High-Speed Receiver Transient Modeling with Generative Adversarial Networks}, ISBN={["979-8-3503-6466-8"]}, ISSN={["2573-7589"]}, url={http://dx.doi.org/10.1109/MDTS61600.2024.10570127}, DOI={10.1109/MDTS61600.2024.10570127}, abstractNote={Data-intensive applications such as artificial intelligence and graph processing are becoming commonplace, requiring high-speed IO to enable the deployment of these critical applications. To accommodate the increasing data requirements Serializer/Deserializer (SerDes) receivers have become increasingly complex, with different equalization schemes to mitigate channel impairments. It has become increasingly important to model this receiver as they are performance-critical.This paper shows an approach to modeling the transient of a high-speed receiver with fixed and varying equalization through generative networks. The method considers the receiver as a black box, with its inputs and outputs as two different domains, framing the problem as a domain translation task. The proposed approach uses an intermediate representation of the time series to model the receiver successfully. We demonstrate that the proposed method is invariant to the input waveform, receiver configuration, and channel. In a fixed equalization setting, the proposed approach has a root-mean-squared error of 0.016 in a [0, 1] range and an error of 0.054 in the same range for a variable redriver. The approach can predict a batched set of results under 250ms, faster than an equivalent spice model for the same time steps.}, journal={2024 IEEE 33rd Microelectronics Design & Test Symposium (MDTS)}, publisher={IEEE}, author={Kashyap, Priyank and Deroo, Andries and Baron, Dror and Wong, Chau-Wai and Wu, Tianfu and Franzon, Paul D.}, year={2024}, month={May}, pages={1–6} } @article{gajjar_kashyap_aysu_franzon_choi_cheng_pedretti_ignowski_2024, title={RD-FAXID: Ransomware Detection with FPGA-Accelerated XGBoost}, volume={17}, ISSN={["1936-7414"]}, url={https://doi.org/10.1145/3688396}, DOI={10.1145/3688396}, abstractNote={Over the last decade, there has been a rise in cyberattacks, particularly ransomware, causing significant disruption and financial repercussions across public and private sectors. Tremendous efforts have been spent on developing techniques to detect ransomware to, ideally, protect data or have as minimum data loss as possible. Ransomware attacks are becoming more frequent and sophisticated as there is a constant tussle between attackers and cybersecurity defenders. Machine Learning (ML) approaches have proven more effective in detecting ransomware than classical signature-based detection. In particular, tree-based algorithms such as Decision Trees (DT), Random Forest (RF), and eXtreme Gradient Boosting (XGBoost) spike up interest among cybersecurity researchers. However, due to the nature of the problem, traditional CPUs and GPUs fail to keep up with the desired performance, especially for large data workloads. Thus, the problem demands a customized solution to detect the ransomware. Here, we propose an FPGA accelerated tree-based ML model for multi-dataset ransomware detection. We show the capability of the proposed prototype to address the problem from more than one set of features, reducing false positive and negative rates to have robust predictions by looking at Hardware Performance Counters (HPCs), Operating System (OS) calls, and network traffic information simultaneously. With 1,000 samples per batch, the FPGA prototype has 65.8 \({\times}\) and 4.1 \({\times}\) lower latency over the CPU and GPU, respectively. Moreover, the FPGA design is up to 11.3 \({\times}\) cost-effective and 643 \({\times}\) energy-efficient compared to the CPU and 3 \({\times}\) cost-effective and 16.8 \({\times}\) energy-efficient over the GPU.}, number={4}, journal={ACM TRANSACTIONS ON RECONFIGURABLE TECHNOLOGY AND SYSTEMS}, author={Gajjar, Archit and Kashyap, Priyank and Aysu, Aydin and Franzon, Paul and Choi, Yongjin and Cheng, Chris and Pedretti, Giacomo and Ignowski, Jim}, year={2024}, month={Dec} } @misc{kashyap_ravichandiran_baron_wong_wu_franzon_2023, title={Generative Adversarial Network Based Adaptive Transmitter Modeling}, url={http://dx.doi.org/10.1109/ECTC51909.2023.00376}, DOI={10.1109/ECTC51909.2023.00376}, abstractNote={As transmission speed keeps increasing, Serializer/Deserializer (SerDes) complexity grows increasingly with different filters, such as feed-forward equalizers (FFEs) and finite impulse response (FIR) filters. Today, vendors share information about their intellectual property (IP) through IBIS-AMI models, which requires significant engineering time. This paper presents a data-driven approach for transmitter modeling, which, coupled with prior receiver modeling work, enables end-to-end modeling of a SerDes link. The transmitter modeling considers multiple output channel conditions and different FIR configurations and recovers a pulse response for the transmitter. The recovered pulse response strongly correlates to the simulated one and has a root-mean squared error (RMSE) of 0.0080 with the data range between [0, 1]. Moreover, the results are comparable to the setup when additional image transformations are added, which should improve performance. Lastly, the proposed approach can extend the modeling to incorporate the operation at different speeds, showing the flexibility of the approach. This approach reduces design time considerably, from running simulations to the final model deployment in under a week.}, journal={2023 IEEE 73rd Electronic Components and Technology Conference (ECTC)}, publisher={IEEE}, author={Kashyap, Priyank and Ravichandiran, Prasanth Prabu and Baron, Dror and Wong, Chau-Wai and Wu, Tianfu and Franzon, Paul D.}, year={2023}, month={May}, pages={2183–2187} } @misc{kashyap_ravichandiran_wang_baron_wong_wu_franzon_2023, title={Thermal Estimation for 3D-ICs Through Generative Networks}, ISSN={["2164-0157"]}, url={http://dx.doi.org/10.1109/3DIC57175.2023.10154977}, DOI={10.1109/3DIC57175.2023.10154977}, abstractNote={Thermal limitations play a significant role in modern integrated chips (ICs) design and performance. 3D integrated chip (3DIC) makes the thermal problem even worse due to a high density of transistors and heat dissipation bottlenecks within the stack-up. These issues exacerbate the need for quick thermal solutions throughout the design flow. This paper presents a generative approach for modeling the power to heat dissipation for a 3DIC. This approach focuses on a single layer in a stack and shows that, given the power map, the model can generate the resultant heat for the bulk. It shows two approaches, one straightforward approach where the model only uses the power map and the other where it learns the additional parameters through random vectors. The first approach recovers the temperature maps with 1.2 C° or a root-mean-squared error (RMSE) of 0.31 over the images with pixel values ranging from -1 to 1. The second approach performs better, with the RMSE decreasing to 0.082 in a 0 to 1 range. For any result, the model inference takes less than 100 millisecond for any given power map. These results show that the generative approach has speed advantages over traditional solvers while enabling results with reasonable accuracy for 3DIC, opening the door for thermally aware floorplanning.}, journal={2023 IEEE International 3D Systems Integration Conference (3DIC)}, publisher={IEEE}, author={Kashyap, Priyank and Ravichandiran, Prasanth P. and Wang, Lee and Baron, Dror and Wong, Chau-Wai and Wu, Tianfu and Franzon, Paul D.}, year={2023}, month={May}, pages={1–4} } @misc{gajjar_kashyap_aysu_franzon_dey_cheng_2022, title={FAXID: FPGA-Accelerated XGBoost Inference for Data Centers using HLS}, ISSN={["2576-2621"]}, url={http://dx.doi.org/10.1109/FCCM53951.2022.9786085}, DOI={10.1109/FCCM53951.2022.9786085}, abstractNote={Advanced ensemble trees have proven quite effective in providing real-time predictions against ransomware detection, medical diagnosis, recommendation engines, fraud detection, failure predictions, crime risk, to name a few. Especially, XGBoost, one of the most prominent and widely used decision trees, has gained popularity due to various optimizations on gradient boosting framework that provides increased accuracy for classification and regression problems. XGBoost's ability to train relatively faster, handling missing values, flexibility and parallel processing make it a better candidate to handle data center workload. Today's data centers with enormous Input/Output Operations per Second (IOPS) demand a real-time accelerated inference with low latency and high throughput because of significant data processing due to applications such as ransomware detection or fraud detection.This paper showcases an FPGA-based XGBoost accelerator designed with High-Level Synthesis (HLS) tools and design flow accelerating binary classification inference. We employ Alveo U50 and U200 to demonstrate the performance of the proposed design and compare it with existing state-of-the-art CPU (Intel Xeon E5-2686 v4) and GPU (Nvidia Tensor Core T4) implementations with relevant datasets. We show a latency speedup of our proposed design over state-of-art CPU and GPU implementations, including energy efficiency and cost-effectiveness. The proposed accelerator is up to 65.8x and 5.3x faster, in terms of latency than CPU and GPU, respectively. The Alveo U50 is a more cost-effective device, and the Alveo U200 stands out as more energy-efficient.}, journal={2022 IEEE 30th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, publisher={IEEE}, author={Gajjar, Archit and Kashyap, Priyank and Aysu, Aydin and Franzon, Paul and Dey, Sumon and Cheng, Chris}, year={2022}, month={May}, pages={1–9} } @misc{kashyap_choi_dey_baron_wong_wu_cheng_franzon_2022, title={Modeling of Adaptive Receiver Performance Using Generative Adversarial Networks}, ISSN={["2377-5726"]}, url={http://dx.doi.org/10.1109/ECTC51906.2022.00307}, DOI={10.1109/ECTC51906.2022.00307}, abstractNote={As the development of IBIS Algorithmic Modeling Interface (IBIS-AMI) models gets complex and requires time-consuming simulations, a data-driven and domain-independent approach can have tremendous value. This paper presents a data-driven approach to modeling a high-speed serializer/deserializer (SerDes) receiver through generative adversarial networks (GANs). In this work, the modeling considers multiple channels, random bitstreams, and varying decision feedback equalizer (DFE) tap values to predict an accurate bit error rate (BER) contour plot. We employ a discriminator structure that improves the training to generate a contour plot that makes it difficult to distinguish the ground truth. The generated plots' bathtub curves strongly correlate to the ground truth bathtub curves and have a root-mean-squared error (RMSE) of 0.014, indicating a good fit.}, journal={2022 IEEE 72nd Electronic Components and Technology Conference (ECTC)}, publisher={IEEE}, author={Kashyap, Priyank and Choi, Yongjin and Dey, Sumon and Baron, Dror and Wong, Chau-Wai and Wu, Tianfu and Cheng, Chris and Franzon, Paul D.}, year={2022}, month={May}, pages={1958–1963} } @article{kashyap_gajjar_choi_wong_baron_wu_cheng_franzon_2022, title={RxGAN: Modeling High-Speed Receiver through Generative Adversarial Networks}, url={http://dx.doi.org/10.1145/3551901.3556480}, DOI={10.1145/3551901.3556480}, abstractNote={Creating models for modern high-speed receivers using circuit-level simulations is costly, as it requires computationally expensive simulations and upwards of months to finalize a model. Added to this is that many models do not necessarily agree with the final hardware they are supposed to emulate. Further, these models are complex due to the presence of various filters, such as a decision feedback equalizer (DFE) and continuous-time linear equalizer (CTLE), which enable the correct operation of the receiver. Other data-driven approaches tackle receiver modeling through multiple models to account for as many configurations as possible. This work proposes a data-driven approach using generative adversarial training to model a real-world receiver with varying DFE and CTLE configurations while handling different channel conditions and bitstreams. The approach is highly accurate as the eye height and width are within 1.59% and 1.12% of the ground truth. The horizontal and vertical bathtub curves match the ground truth and correlate to the ground truth bathtub curves.}, journal={MLCAD '22: PROCEEDINGS OF THE 2022 ACM/IEEE 4TH WORKSHOP ON MACHINE LEARNING FOR CAD (MLCAD)}, publisher={ACM}, author={Kashyap, Priyank and Gajjar, Archit and Choi, Yongjin and Wong, Chau-Wai and Baron, Dror and Wu, Tianfu and Cheng, Chris and Franzon, Paul}, year={2022}, pages={167–172} } @article{kashyap_aydin_potluri_franzon_aysu_2021, title={2Deep: Enhancing Side-Channel Attacks on Lattice-Based Key-Exchange via 2-D Deep Learning}, volume={40}, ISSN={["1937-4151"]}, url={http://dx.doi.org/10.1109/tcad.2020.3038701}, DOI={10.1109/TCAD.2020.3038701}, abstractNote={Advancements in quantum computing present a security threat to classical cryptography algorithms. Lattice-based key exchange protocols show strong promise due to their resistance to theoretical quantum-cryptanalysis and low implementation overhead. By contrast, their physical implementations have shown vulnerability against side-channel attacks (SCAs) even with a single power measurement. The state-of-the-art SCAs are, however, limited to simple, sequentialized executions of post-quantum key-exchange (PQKE) protocols, leaving the vulnerability of complex, parallelized architectures unknown. This article proposes 2Deep-a deep-learning (DL)-based SCA-targeting parallelized implementations of PQKE protocols, namely, Frodo and NewHope with data augmentation techniques. Specifically, we explore approaches that convert 1-D time-series power measurement data into 2-D images to formulate SCA an image recognition task. The results show our attack's superiority over conventional techniques including horizontal differential power analysis (DPA), template attacks (TAs), and straightforward DL approaches. We demonstrate improvements up to 1.5× to recover a 100% success rate compared to DL with 1-D input data while using fewer data. We furthermore show that machine learning improves the results up to 1.25× compared to TAs. Furthermore, we perform cross-device attacks that obtain profiles from a single device, which has never been explored. Our 2-D approach is especially favored in this setting, improving the success rate of attacking Frodo from 20% to 99% compared to the 1-D approach. Our work thus urges countermeasures even on parallel architectures and single-trace attacks.}, number={6}, journal={IEEE TRANSACTIONS ON COMPUTER-AIDED DESIGN OF INTEGRATED CIRCUITS AND SYSTEMS}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Kashyap, Priyank and Aydin, Furkan and Potluri, Seetal and Franzon, Paul D. and Aysu, Aydin}, year={2021}, month={Jun}, pages={1217–1229} } @article{kashyap_pitts_baron_wong_wu_franzon_2021, title={High Speed Receiver Modeling Using Generative Adversarial Networks}, ISSN={["2165-4107"]}, DOI={10.1109/EPEPS51341.2021.9609124}, abstractNote={This paper presents a generative approach to modeling a high-speed receiver with a time series input. The model is not built with domain knowledge but learned from a wide range of channel conditions and input bitstreams to generate an eye diagram. The generated eye diagrams are similar to the simulated eye diagrams for the same scenario. We also developed a neural network model to evaluate the generated eye diagram's relevant characteristics, such as eye height and width. The generated eye diagrams are within 7% and 3% error to the ground-truth in eye height and eye width, respectively, based on our evaluation neural network.}, journal={IEEE 30TH CONFERENCE ON ELECTRICAL PERFORMANCE OF ELECTRONIC PACKAGING AND SYSTEMS (EPEPS 2021)}, author={Kashyap, Priyank and Pitts, W. Shepherd and Baron, Dror and Wong, Chau-Wai and Wu, Tianfu and Franzon, Paul D.}, year={2021} } @misc{aydin_kashyap_potluri_franzon_aysu_2020, title={DeePar-SCA: Breaking Parallel Architectures of Lattice Cryptography via Learning Based Side-Channel Attacks}, ISBN={9783030609382 9783030609399}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-030-60939-9_18}, DOI={10.1007/978-3-030-60939-9_18}, abstractNote={This paper proposes the first deep-learning based side-channel attacks on post-quantum key-exchange protocols. We target hardware implementations of two lattice-based key-exchange protocols—Frodo and NewHope—and analyze power side-channels of the security-critical arithmetic functions. The challenge in applying side-channel attacks stems from the single-trace nature of the protocols: each new execution will use a fresh and unique key, limiting the adversary to a single power measurement. Although such single-trace attacks are known, they have been so far constrained to sequentialized designs running on simple micro-controllers. By using deep-learning and data augmentation techniques, we extend those attacks to break parallelized hardware designs, and we quantify the attack’s limitations. Specifically, we demonstrate single-trace deep-learning based attacks that outperform traditional attacks such as horizontal differential power analysis and template attacks by up to 900% and 25%, respectively. The developed attacks can therefore break implementations that are otherwise secure, motivating active countermeasures even on parallel architectures for key-exchange protocols.}, journal={Lecture Notes in Computer Science}, publisher={Springer International Publishing}, author={Aydin, Furkan and Kashyap, Priyank and Potluri, Seetal and Franzon, Paul and Aysu, Aydin}, year={2020}, pages={262–280} } @article{regazzoni_bhasin_pour_alshaer_aydin_aysu_beroulle_di natale_franzon_hely_et al._2020, title={Machine Learning and Hardware security: Challenges and Opportunities -Invited Talk}, ISSN={["1933-7760"]}, DOI={10.1145/3400302.3416260}, abstractNote={Machine learning techniques have significantly changed our lives. They helped improving our everyday routines, but they also demonstrated to be an extremely helpful tool for more advanced and complex applications. However, the implications of hardware security problems under a massive diffusion of machine learning techniques are still to be completely understood. This paper first highlights novel applications of machine learning for hardware security, such as evaluation of post quantum cryptography hardware and extraction of physically unclonable functions from neural networks. Later, practical model extraction attack based on electromagnetic side-channel measurements are demonstrated followed by a discussion of strategies to protect proprietary models by watermarking them.}, journal={2020 IEEE/ACM INTERNATIONAL CONFERENCE ON COMPUTER AIDED-DESIGN (ICCAD)}, author={Regazzoni, Francesco and Bhasin, Shivam and Pour, Amir Ali and Alshaer, Ihab and Aydin, Furkan and Aysu, Aydin and Beroulle, Vincent and Di Natale, Giorgio and Franzon, Paul and Hely, David and et al.}, year={2020} } @inproceedings{kashyap_saleh_shakhbulatov_dong_2018, title={An Autonomous Simultaneous Localization and Mapping Walker for Indoor Navigation}, url={http://dx.doi.org/10.1109/sarnof.2018.8720504}, DOI={10.1109/sarnof.2018.8720504}, abstractNote={Walkers have been used to help the elderly and individuals with movement disorders as an assistive and rehabilitation tool. This study presents a smart walker, a system which guides the users to navigate in an indoor environment. The Walker can be controlled by voice commands to create location markers and navigate the user while avoiding obstacles. We evaluated three localization implementations, namely, Adaptive Monte Carlo Localization (AMCL), Gmapping and Hector_Slam for this system and compared their navigation accuracy with an ideal path. We collected the data on the paths of AMCL, Gmapping and Hector_Slam and applied statistical tests on the data. The results show that AMCL achieves the lowest mean absolute error while navigating to its goal with an error of 2.15% over the path distance, as compared to Gmapping and Hector in this implementation.}, booktitle={2018 IEEE 39th Sarnoff Symposium}, publisher={IEEE}, author={Kashyap, Priyank and Saleh, Mahmoud and Shakhbulatov, Denisolt and Dong, Ziqian}, year={2018}, month={Sep} }