@article{abdullah_lee_zhou_awad_2024, title={Salus: Efficient Security Support for CXL-Expanded GPU Memory}, ISBN={["979-8-3503-9314-9"]}, ISSN={["1530-0897"]}, DOI={10.1109/HPCA57654.2024.00027}, abstractNote={GPUs have become indispensable accelerators for many data-intensive applications such as scientific workloads, deep learning models, and graph analytics; these applications share a common demand for increasingly large memory. As the memory capacity connected through traditional memory interfaces is reaching limits, heterogeneous memory systems have gained traction in expanding the memory pool. These systems involve dynamic data movement between different memory locations for efficient utilization, which poses challenges for existing security implementations, whose metadata are tied to the physical location of data. In this work, we propose a new security model specifically designed for systems with dynamic page migration. Our model minimizes the need for security recalculations due to data movement, optimizes security structures for efficient bandwidth utilization, and reduces the overall traffic caused by security operations. Based on our evaluation, our proposed security support improves the GPU throughput by a geometric mean of 29.94% (up to 190.43%) over the conventional security model, and it reduces the security traffic in the memory subsystem to 47.79% on average (as low as 17.71% overhead).}, journal={2024 IEEE INTERNATIONAL SYMPOSIUM ON HIGH-PERFORMANCE COMPUTER ARCHITECTURE, HPCA 2024}, author={Abdullah, Rahaf and Lee, Hyokeun and Zhou, Huiyang and Awad, Amro}, year={2024}, pages={233–248} } @article{abdullah_zhou_awad_2023, title={Plutus: Bandwidth-Efficient Memory Security for GPUs}, ISSN={["1530-0897"]}, DOI={10.1109/HPCA56546.2023.10071100}, abstractNote={Graphic-Processing Units (GPUs) are increasingly used in systems where security is a critical design requirement. Such systems include cloud computing, safety-critical systems, and edge devices, where sensitive data is processed or/and generated. Thus, the ability to reduce the attack surface while achieving high performance is of utmost importance. However, adding security features to GPUs comes at the expense of high-performance overheads due to the extra memory bandwidth required to handle security metadata. In particular, memory authentication metadata (e.g., authentication tags) along with encryption counters can lead to significant performance overheads due to the memory bandwidth used to fetch the metadata. Such metadata can lead to more than 200% extra bandwidth usage for irregular access patterns.In this work, we propose a novel design, Plutus, which enables low-overhead secure GPU memory. Plutus has three key ideas. The first is to leverage value locality to reduce authentication metadata. Our observation is that a large percentage of memory accesses could be verified without the need to bring the authentication tags. Specifically, through comparing decrypted blocks against known/verified values, we can with high confidence guarantee that no tampering occurred. Our analysis shows that the probability of the decryption of a tampered (and/or replayed) block leading to a known value is extremely low, in fact, lower than the collision probability in the most secure hash functions. Second, based on the observation that many GPU workloads have limited numbers of dirty block evictions, Plutus proposes a second layer of compact counters to reduce the memory traffic due to both the encryption counters and integrity tree. Third, by exploring the interesting tradeoff between the integrity tree organization vs. metadata fetch granularity, Plutus uses smaller block sizes for security metadata caches to optimize the number of security metadata memory requests. Based on our evaluation, Plutus can improve the GPU throughput by 16.86% (up to 58.38%) and reduce the memory bandwidth usage of secure memory by 48.14% (up to 80.30%).}, journal={2023 IEEE INTERNATIONAL SYMPOSIUM ON HIGH-PERFORMANCE COMPUTER ARCHITECTURE, HPCA}, author={Abdullah, Rahaf and Zhou, Huiyang and Awad, Amro}, year={2023}, pages={543–555} } @article{abu zubair_abdullah_mohaisen_awad_2024, title={RC-NVM: Recovery-Aware Reliability-Security Co-Design for Non-Volatile Memories}, volume={21}, ISSN={["1941-0018"]}, DOI={10.1109/TDSC.2023.3279031}, abstractNote={Non-Volatile Memory (NVM) technologies are now available in the form of byte-addressable and fast main memory. Despite their benefits, such memories require secure and reliable memory management to prevent malicious and spontaneous data alteration. However, in NVM security, it is still a major challenge to maintain crash consistency and reliable system recovery. In particular, Message Authentication Codes (MAC) are rarely discussed in recent recovery-aware NVM studies since they are generally not cached. MACs have outstanding sensitivity to memory errors and hence they can be used for reliability enhancement alongside their mainstream use to detect malicious tampering. However, persisting MACs is challenging and requires 2x writes and reads in a conventional secure NVM system. It is possible to cache MACs in a MAC-assisted reliability scheme; however, this brings many challenges related to crash consistency and reliability. In this paper, we present the difficulties associated with MAC recovery if they are cached, and solutions to guarantee reliable system recovery. Finally, we propose a novel scheme, R ecoverable and C hipkill capable NVM , RC-NVM, which can effectively use a volatile write-back cache for MACs as well as recover them quickly after a system crash. Our scheme reduces 27% of the writes and allows 18.2% performance improvement compared to the state-of-the-art, while preserving the ability to recover from a system crash.}, number={4}, journal={IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING}, author={Abu Zubair, Kazi and Abdullah, Rahaf and Mohaisen, David and Awad, Amro}, year={2024}, pages={1817–1830} }