@article{alshboul_ramrakhyani_wang_tuck_solihin_2021, title={BBB: Simplifying Persistent Programming using Battery-Backed Buffers}, ISSN={["1530-0897"]}, DOI={10.1109/HPCA51647.2021.00019}, abstractNote={Non-volatile memory (NVM) is poised to augment or replace DRAM as main memory. With the right abstraction and support, non-volatile main memory (NVMM) can provide an alternative to the storage system to host long-lasting persistent data. However, keeping persistent data in memory requires programs to be written such that data is crash consistent (i.e. it can be recovered after failure). Critical to supporting crash recovery is the guarantee of ordering of when stores become durable with respect to program order. Strict persistency, which requires persist order to coincide with program order of stores, is simple and intuitive but generally thought to be too slow. More relaxed persistency models are available but demand higher programming complexity, e.g. they require the programmer to insert persist barriers correctly in their program.We identify the source of strict persistency inefficiency as the gap between the point of visibility (PoV) which is the cache, and the point of persistency (PoP) which is the memory. In this paper, we propose a new approach to close the PoV/PoP gap which we refer to as Battery-Backed Buffer (BBB). The key idea of BBB is to provide a battery-backed persist buffer (bbPB) in each core next to the L1 data cache (L1D). A store value is allocated in the bbPB as it is written to cache, becoming part of the persistence domain. If a crash occurs, battery ensures bbPB can be fully drained to NVMM. BBB simplifies persistent programming as the programmer does not need to insert persist barriers or flushes. Furthermore, our BBB design achieves nearly identical results to eADR in terms of performance and number of NVMM writes, while requiring two orders of magnitude smaller energy and time to drain.}, journal={2021 27TH IEEE INTERNATIONAL SYMPOSIUM ON HIGH-PERFORMANCE COMPUTER ARCHITECTURE (HPCA 2021)}, author={Alshboul, Mohammad and Ramrakhyani, Prakash and Wang, William and Tuck, James and Solihin, Yan}, year={2021}, pages={111–124} } @article{elkhouly_alshboul_hayashi_solihin_kimura_2019, title={Compiler-support for Critical Data Persistence in NVM}, volume={16}, ISSN={["1544-3973"]}, DOI={10.1145/3371236}, abstractNote={Non-volatile Main Memories (NVMs) offer a promising way to preserve data persistence and enable computation recovery in case of failure. While the use of NVMs can significantly reduce the overhead of failure recovery, which is the case with High-Performance Computing (HPC) kernels, rewriting existing programs or writing new applications for NVMs is non-trivial. In this article, we present a compiler-support that automatically inserts complex instructions into kernels to achieve NVM data-persistence based on a simple programmer directive. Unlike checkpointing techniques that store the whole system state, our technique only persists user-designated objects as well as some parameters required for safe recovery such as loop induction variables. Also, our technique can reduce the number of data transfer operations, because our compiler coalesces consecutive memory-persisting operations into a single memory transaction per cache line when possible.}, number={4}, journal={ACM TRANSACTIONS ON ARCHITECTURE AND CODE OPTIMIZATION}, author={Elkhouly, Reem and Alshboul, Mohammad and Hayashi, Akihiro and Solihin, Yan and Kimura, Keiji}, year={2019}, month={Dec} } @article{alshboul_elnawawy_elkhouly_kimura_tuck_solihin_2019, title={Efficient Checkpointing with Recompute Scheme for Non-volatile Main Memory}, volume={16}, ISSN={["1544-3973"]}, DOI={10.1145/3323091}, abstractNote={Future main memory will likely include Non-Volatile Memory. Non-Volatile Main Memory (NVMM) provides an opportunity to rethink checkpointing strategies for providing failure safety to applications. While there are many checkpointing and logging schemes in the literature, their use must be revisited as they incur high execution time overheads as well as a large number of additional writes to NVMM, which may significantly impact write endurance.}, number={2}, journal={ACM TRANSACTIONS ON ARCHITECTURE AND CODE OPTIMIZATION}, author={Alshboul, Mohammad and Elnawawy, Hussein and Elkhouly, Reem and Kimura, Keiji and Tuck, James and Solihin, Yan}, year={2019}, month={May} } @inproceedings{lin_alshboul_solihin_zhou_2019, title={Exploring Memory Persistency Models for GPUs}, ISSN={["1089-795X"]}, DOI={10.1109/PACT.2019.00032}, abstractNote={Given its high integration density, high speed, byte addressability, and low standby power, non-volatile or persistent memory is expected to supplement/replace DRAM as main memory. Through persistency programming model (which defines durability ordering of stores) and durable transaction constructs, the programmer can provide recoverable data structure (RDS) which allows programs to recover to a consistent state after a failure. While persistency models have been well studied for CPUs, they have been neglected for graphics processing units (GPUs). Considering the importance of GPUs as a dominant accelerator for high performance computing, we investigate persistency models for GPUs. GPU applications exhibit substantial differences with CPUs applications, hence in this paper we adapt, re-architect, and optimize CPU persistency models for GPUs. We design a pragma-based compiler scheme for expressing persistency model for GPUs. We identify that the thread hierarchy in GPUs offers intuitive scopes to form epochs and durable transactions. We find that undo logging produces significant performance overheads. We propose to use idempotency analysis to reduce both logging frequency and the size of logs. Through both real-system and simulation evaluations, we show low overheads of our proposed architecture support.}, booktitle={28th International Conference on Parallel Architectures and Compilation Techniques (PACT)}, author={Lin, Zhen and Alshboul, Mohammad and Solihin, Yan and Zhou, Huiyang}, year={2019}, pages={310–322} } @article{aldwairi_alshboul_seyam_2018, title={Characterizing Realistic Signature-based Intrusion Detection Benchmarks}, DOI={10.1145/3301551.3301591}, abstractNote={Speeding up pattern matching for intrusion detection systems has been a growing field of research. There has been a flux of new algorithms, modifications to existing algorithms and even hardware architectures aimed at improving pattern matching performance. Establishing an accurate comparison to related work is a real challenge because researchers use different datasets and metrics to evaluate their work. The purpose of this paper is to characterize and identify realistic workloads, propose standard benchmarks, and establish common metrics to better compare work in the area of pattern matching for intrusion detection. We collect traffic traces and attack signatures from popular open source platforms. The datasets are processed, cleansed and studied, to give the researchers a better understanding of their characteristics. The final datasets along with detailed information about their origins, contents, features, statistical analysis and performance evaluation using well-known pattern-matching algorithms are available to the public. In addition, we provide a generic parser capable of parsing different intrusion detection systems rule formats and extract attack signatures. Finally, a pattern-matching engine that enables researchers to plug-and-play their new pattern matching algorithms and compare to existing algorithms using the predefined metrics.}, journal={PROCEEDINGS OF THE 6TH INTERNATIONAL CONFERENCE ON INFORMATION TECHNOLOGY: IOT AND SMART CITY (ICIT 2018)}, author={Aldwairi, Monther and Alshboul, Mohammad A. and Seyam, Asmaa}, year={2018}, pages={97–103} } @article{alshboul_tuck_solihin_2018, title={Lazy Persistency: a High-Performing and Write-Efficient Software Persistency Technique}, ISSN={["1063-6897"]}, DOI={10.1109/ISCA.2018.00044}, abstractNote={Emerging Non-Volatile Memories (NVMs) are expected to be included in future main memory, providing the opportunity to host important data persistently in main memory. However, achieving persistency requires that programs be written with failure-safety in mind. Many persistency models and techniques have been proposed to help the programmer reason about failure-safety. They require that the programmer eagerly flush data out of caches to make it persistent. Eager persistency comes with a large overhead because it adds many instructions to the program for flushing cache lines and incurs costly stalls at barriers to wait for data to become durable. To reduce these overheads, we propose Lazy Persistency (LP), a software persistency technique that allows caches to slowly send dirty blocks to the NVMM through natural evictions. With LP, there are no additional writes to NVMM, no decrease in write endurance, and no performance degradation from cache line flushes and barriers. Persistency failures are discovered using software error detection (checksum), and the system recovers from them by recomputing inconsistent results. We describe the properties and design of LP and demonstrate how it can be applied to loop-based kernels popularly used in scientific computing. We evaluate LP and compare it to the state-of-the-art Eager Persistency technique from prior work. Compared to it, LP reduces the execution time and write amplification overheads from 9% and 21% to only 1% and 3%, respectively.}, journal={2018 ACM/IEEE 45TH ANNUAL INTERNATIONAL SYMPOSIUM ON COMPUTER ARCHITECTURE (ISCA)}, author={Alshboul, Mohammad and Tuck, James and Solihin, Yan}, year={2018}, pages={439–451} }