@inproceedings{wibowo_agrawal_tuck_2017, title={Characterizing the impact of soft errors across microarchitectural structures and implications for predictability}, DOI={10.1109/iiswc.2017.8167782}, abstractNote={The trends of transistor size and system complexity scaling continue. As a result, soft errors in the system, including the processor core, are predicted to become one of the major reliability challenges. A fraction of soft errors at the device level could become an unmasked error visible to the user. Unmasked soft errors may manifest as a detectable error, which could be recoverable (DRE) or unrecoverable (DUE), or a Silent Data Corruption (SDC). Detecting and recovering from an SDC is especially challenging since an explicit checker is needed to detect erroneous state. Predicting when SDCs are more likely could be valuable in designing resilient systems. To gain insight, we evaluate the Architectural Vulnerability Factor (AVF) of all major in-core memory structures of an out-of-order superscalar processor. In particular, we focus on the vulnerability factors for detectable and unrecoverable errors (DUEAVF) and silent data corruptions (SDCAVF) across windows of execution to study their characteristics, time-varying behavior, and their predictability using a linear regression trained offline. We perform more than 35 million microarchitectural fault injection simulations and, if necessary, run-to-completion using functional simulations to determine AVF, DUEAVF, and SDCAVF. Our study shows that, similar to AVF, DUEAVF and SDCAVF vary over time and across applications. We also find significant differences in DUEAVF and SDCAVF across the processor structures we studied. Furthermore, we find that DUEAVF can be predicted using a linear regression with similar accuracy as AVF estimation. However, SDCAVF could not be predicted with the same level of accuracy. As a remedy, we propose adding a software vulnerability factor, in the form of SDCPVF, to the linear regression model for estimating SDCAVF. We find that SDCPVF of the Architectural Register File explains most of the behavior of SDCAVF for the combined microarchitectural structures studied in this paper. Our evaluation shows that the addition of SDCPVF improves the accuracy by 5.19×, on average, to a level similar to DUEAVF and AVF estimates. We also evaluate the impact of limiting software-layer reliability information to only 5 basic blocks (16× cost reduction, on average), and observe that it increases error only by 18.7%, on average.}, booktitle={Proceedings of the 2017 ieee international symposium on workload characterization (iiswc)}, author={Wibowo, B. and Agrawal, A. and Tuck, J.}, year={2017}, pages={250–260} } @article{wibowo_agrawal_stanton_tuck_2016, title={An Accurate Cross-Layer Approach for Online Architectural Vulnerability Estimation}, volume={13}, ISSN={["1544-3973"]}, DOI={10.1145/2975588}, abstractNote={Processor soft-error rates are projected to increase as feature sizes scale down, necessitating the adoption of reliability-enhancing techniques, but power and performance overhead remain a concern of such techniques. Dynamic cross-layer techniques are a promising way to improve the cost-effectiveness of resilient systems. As a foundation for making such a system, we propose a cross-layer approach for estimating the architectural vulnerability of a processor core online that works by combining information from software, compiler, and microarchitectural layers at runtime. The hardware layer combines the metadata from software and compiler layers with microarchitectural measurements to estimate architectural vulnerability online. We describe our design and evaluate it in detail on a set of SPEC CPU 2006 applications. We find that our online AVF estimate is highly accurate with respect to a postmortem AVF analysis, with only 0.46% average absolute error. Also, our design incurs negligible performance impact for SPEC2006 applications and about 1.2% for a Monte Carlo application, requires approximately 1.4% area overhead, and costs about 3.3% more power on average. We compare our technique against two prior online AVF estimation techniques, one using a linear regression to estimate AVF and another based on PVF-HVF; our evaluation finds that our approach, on average, is more accurate. Our case study of a Monte Carlo simulation shows that our AVF estimate can adapt to the inherent resiliency of the algorithm. Finally, we demonstrate the effectiveness of our approach using a dynamic protection scheme that limits vulnerability to soft errors while reducing the energy consumption by an average of 4.8%, and with a target normalized SER of 10%, compared to enabling a simple parity+ECC protection at all times.}, number={3}, journal={ACM TRANSACTIONS ON ARCHITECTURE AND CODE OPTIMIZATION}, author={Wibowo, Bagus and Agrawal, Abhinav and Stanton, Thomas and Tuck, James}, year={2016}, month={Sep} } @article{agrawal_wibowo_tuck_2015, title={SourceMark: A Source-Level Approach for Identifying Architecture and Optimization Agnostic Regions for Performance Analysis}, DOI={10.1109/iiswc.2015.27}, abstractNote={Computer architects often evaluate performance on only parts of a program and not the entire program due to long simulation times that could take weeks or longer to finish. However, choosing regions of a program to evaluate in a way that is consistent and correct with respect to different compilers and different architectures is very challenging and has not received sufficient attention. The need for such tools is growing in importance given the diversity of architectures and compilers in use today. In this work, we propose a technique that identifies regions of a desired granularity for performance evaluation. We use a source-to-source compiler that inserts software marks into the program's source code to divide the execution into regions with a desired dynamic instruction count. An evaluation framework chooses from among a set of candidate marks to find ones that are both consistent across different architectures or compilers and can yield a low run-time instruction overhead. Evaluated on a set of SPEC applications, with a region size of about 100 million instructions, our technique has a dynamic instruction overhead as high as 3.3% with an average overhead of 0.47%. We also demonstrate the scalability of our technique by evaluating the dynamic instruction overhead for regions of finer granularity and show similar small overheads, of the applications we studied, we were unable to find suitable fine grained regions only for 462.libquantum and 444.namd. Our technique is an effective alternative to traditional binary-level approaches. We have demonstrated that a source-level approach is robust, that it can achieve low overhead, and that it reduces the effort for bringing up new architectures or compilers into an existing evaluation framework.}, journal={2015 IEEE INTERNATIONAL SYMPOSIUM ON WORKLOAD CHARACTERIZATION (IISWC)}, author={Agrawal, Abhinav and Wibowo, Bagus and Tuck, James}, year={2015}, pages={160–171} }