@article{zhao_chabbi_liu_2024, title={EASYVIEW: Bringing Performance Profiles into Integrated Development Environments}, ISSN={["2164-2397"]}, DOI={10.1109/CGO57630.2024.10444840}, journal={2024 IEEE/ACM INTERNATIONAL SYMPOSIUM ON CODE GENERATION AND OPTIMIZATION, CGO}, author={Zhao, Qidong and Chabbi, Milind and Liu, Xu}, year={2024}, pages={386–398} } @article{li_zhao_jiao_liu_2023, title={DroidPerf: Profiling Memory Objects on Android Devices}, DOI={10.1145/3570361.3592503}, abstractNote={Optimizing performance inefficiencies in memory hierarchies is well-known for native languages, such as C and C++. There are few studies, however, on exploring memory inefficiencies in Android Runtime (ART). Running in ART, managed languages, such as Java and Kotlin, employ various abstractions, such as runtime support, ahead-of-time (AOT) compilation, and garbage collection (GC), which hide important execution details from the plain source code. In this paper, we develop DroidPerf, a lightweight, object-centric memory profiler for ART, which associates memory inefficiencies with objects created and used in Android apps. With such object-level information, DroidPerf is able to guide locality optimization on memory layouts, access patterns, and allocation patterns. Guided by DroidPerf, we optimize a number of popular Android apps and obtain significant performance gains. Many inefficiencies are confirmed by the code authors and optimization patches are under evaluation for upstreaming. As a practical tool, DroidPerf incurs ~32% runtime overhead and ~14% memory overhead on average. Furthermore, DroidPerf works in the production environment with off-the-shelf hardware, OS, Dalvik virtual machine, ART, and unmodified Android app source code.}, journal={PROCEEDINGS OF THE 29TH ANNUAL INTERNATIONAL CONFERENCE ON MOBILE COMPUTING AND NETWORKING, MOBICOM 2023}, author={Li, Bolun and Zhao, Qidong and Jiao, Shuyin and Liu, Xu}, year={2023}, pages={75–89} } @article{li_xu_zhao_su_chabbi_jiao_liu_2022, title={OJXPerf: Featherlight Object Replica Detection for Java Programs}, ISSN={["0270-5257"]}, DOI={10.1145/3510003.3510083}, abstractNote={Memory bloat is an important source of inefficiency in complex production software, especially in software written in managed languages such as Java. Prior approaches to this problem have focused on identifying objects that outlive their life span. Few studies have, however, looked into whether and to what extent myriad objects of the same type are identical. A quantitative assessment of identical objects with code-level attribution can assist developers in refactoring code to eliminate object bloat, and favor reuse of existing object(s). The result is reduced memory pressure, reduced allocation and garbage collection, enhanced data locality, and reduced re-computation, all of which result in superior performance. We develop OJXPerf, a lightweight sampling-based profiler, which probabilistically identifies identical objects. OJXPerf employs hardware performance monitoring units (PMU) in conjunction with hardware debug registers to sample and compare field values of different objects of the same type allocated at the same calling context but potentially accessed at different program points. The result is a lightweight measurement – a combination of object allocation contexts and usage contexts ordered by duplication frequency. This class of duplicated objects is relatively easier to optimize. OJXPerf incurs 9% runtime and 6% memory overheads on average. We empirically show the benefit of OJXPerf by using its profiles to instruct us to optimize a number of Java programs, including well-known benchmarks and real-world applications. The results show a noticeable reduction in memory usage (up to 11%) and a significant speedup (up to 25%).}, journal={2022 ACM/IEEE 44TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2022)}, author={Li, Bolun and Xu, Hao and Zhao, Qidong and Su, Pengfei and Chabbi, Milind and Jiao, Shuyin and Liu, Xu}, year={2022}, pages={1558–1570} }