@article{gholkar_mueller_rountree_2019, title={Uncore Power Scavenger: A Runtime for Uncore Power Conservation on HPC Systems}, DOI={10.1145/3295500.3356150}, abstractNote={The US Department of Energy (DOE) has set a power target of 20-30MW on the first exascale machines. To achieve one exaflop under this power constraint, it is necessary to minimize wasteful consumption of power while striving to improve performance. Toward this end, we investigate uncore frequency scaling (UFS) as a potential knob for reducing the power footprint of HPC jobs. We propose Uncore Power Scavenger (UPSCavenger), a runtime system that dynamically detects phase changes and automatically sets the best uncore frequency for every phase to save power without significant impact on performance. Our experimental evaluations on a cluster show that UPSCavenger achieves up to 10% energy savings with under 1% slowdown. It achieves 14% energy savings with the worst case slowdown of 5.5%. We also show that UPSCavenger achieves up to 20% speedup and proportional energy savings compared to Intel's RAPL with equivalent power usage making it a viable solution even for power-constrained computing.}, journal={PROCEEDINGS OF SC19: THE INTERNATIONAL CONFERENCE FOR HIGH PERFORMANCE COMPUTING, NETWORKING, STORAGE AND ANALYSIS}, author={Gholkar, Neha and Mueller, Frank and Rountree, Barry}, year={2019} } @article{gholkar_mueller_rountree_marathe_2018, title={PShiter: Feedback-based Dynamic Power Shiting within HPC Jobs for Performance}, DOI={10.1145/3208040.3208047}, abstractNote={The US Department of Energy (DOE) has set a power target of 20-30MW on the first exascale machines. To achieve one exaFLOPS under this power constraint, it is necessary to manage power intelligently while maximizing performance. Most production-level parallel applications suffer from computational load imbalance across distributed processes due to non-uniform work decomposition. Other factors like manufacturing variation and thermal variation in the machine room may amplify this imbalance. As a result of this imbalance, some processes of a job reach the blocking calls, collectives or barriers earlier and wait for others to reach the same point. This waiting results in a wastage of energy and CPU cycles which degrades application efficiency and performance. We address this problem for power-limited jobs via Power Shifter (PShifter), a dual-level, feedback-based mechanism that intelligently and automatically detects such imbalance and reduces it by dynamically re-distributing a job's power budget across processors to improve the overall performance of the job compared to a naïve uniform power distribution across nodes. In contrast to prior work, PShifter ensures that a given power budget is not violated. At the bottom level of PShifter, local agents monitor and control the performance of processors by actuating different power levels. They reduce power from the processors that incur substantial wait times. At the top level, the cluster agent that has the global view of the system, monitors the job's power consumption and provides feedback on the unused power, which is then distributed across the processors of the same job. Our evaluation on an Intel cluster shows that PShifter achieves performance improvement of up to 21% and energy savings of up to 23% compared to uniform power allocation, outperforms static approaches by up to 40% and 22% for codes with and without phase changes, respectively, and outperforms dynamic schemes by up to 19%. To the best of our knowledge, PShifter is the first approach to transparently and automatically apply power capping non-uniformly across processors of a job in a dynamic manner adapting to phase changes.}, journal={HPDC '18: PROCEEDINGS OF THE 27TH INTERNATIONAL SYMPOSIUM ON HIGH-PERFORMANCE PARALLEL AND DISTRIBUTED COMPUTING}, author={Gholkar, Neha and Mueller, Frank and Rountree, Barry and Marathe, Aniruddha}, year={2018}, pages={106–117} } @article{gholkar_mueller_rountree_2016, title={A Power-aware Cost Model for HPC Procurement}, ISSN={["2164-7062"]}, DOI={10.1109/ipdpsw.2016.35}, abstractNote={With the supercomputing community headed toward the era of exascale computing, power has become one of the foremost concern. Today's fastest supercomputer, Tianhe-2, already consumes 17.8MW to achieves a peak performance of 33.86PFlops [1]. At least an order of magnitude improvement in performance while maintaining the power envelope is required for exascale. Yet, manufacturing variations are increasingly creating a heterogeneous computing environment, even when identical processing components are deployed, particularly when operating under controlled power ceiling. This work contributes a procurement model to aid in the design of a capability system that achieves maximum performance while considering manufacturing variations. It appropriately partitions a single, compound system budget into the CAPEX (infrastructure cost) and the OPEX (operating power cost). Early results indicate that aggressive infrastructure procurement disregarding such operational needs can lead to severe performance degradation, or significant hidden operating cost will be incurred after procurement.}, journal={2016 IEEE 30TH INTERNATIONAL PARALLEL AND DISTRIBUTED PROCESSING SYMPOSIUM WORKSHOPS (IPDPSW)}, author={Gholkar, Neha and Mueller, Frank and Rountree, Barry}, year={2016}, pages={1110–1113} } @inproceedings{leon_smith_oates_miles_2016, title={Sensitivity analysis for a quantum informed ferroelectric energy model}, DOI={10.1115/smasis2016-9035}, abstractNote={We perform global sensitivity analysis for parameters in a continuum energy model for ferroelectric materials, which is informed by density functional theory (DFT). Specifically, we use global sensitivity analysis to rank the sensitivity of phenomeno-logical parameters governing the Landau and electrostriction energy for single-domain ferroelectric lead titanate. These techniques include Pearson correlations constructed directly from input and output relations, Sobol sensitivity indices, and Morris indices.}, booktitle={Proceedings of the asme conference on smart materials adaptive}, author={Leon, L. S. and Smith, R. C. and Oates, W. S. and Miles, P.}, year={2016} }