@article{li_su_chabbi_jiao_liu_2023, title={DJXPerf: Identifying Memory Inefficiencies via Object-Centric Profiling for Java}, ISSN={["2164-2397"]}, DOI={10.1145/3579990.3580010}, abstractNote={Java is the “go-to” programming language choice for developing scalable enterprise cloud applications. In such systems, even a few percent CPU time savings can offer a significant competitive advantage and cost savings. Although performance tools abound for Java, those that focus on the data locality in the memory hierarchy are rare. In this paper, we first categorize data locality issues in Java programs. We then present DJXPerf, a lightweight, object-centric memory profiler for Java, which associates memory-hierarchy performance metrics (e.g., cache/TLB misses) with Java objects. DJXPerf uses statistical sampling of hardware performance monitoring counters to attribute metrics to not only source code locations but also Java objects. DJXPerf presents Java object allocation contexts combined with their usage contexts and presents them ordered by the poor locality behaviors. DJXPerf’s performance measurement, object attribution, and presentation techniques guide optimizing object allocation, layout, and access patterns. DJXPerf incurs only ~8.5% runtime overhead and ∼6% memory overhead on average, requiring no modifications to hardware, OS, Java virtual machine, or application source code, which makes it attractive to use in production. Guided by DJXPerf, we study and optimize a number of Java and Scala programs, including well-known benchmarks and real-world applications, and demonstrate significant speedups.}, journal={PROCEEDINGS OF THE 21ST ACM/IEEE INTERNATIONAL SYMPOSIUM ON CODE GENERATION AND OPTIMIZATION, CGO 2023}, author={Li, Bolun and Su, Pengfei and Chabbi, Milind and Jiao, Shuyin and Liu, Xu}, year={2023}, pages={81–94} } @article{li_zhao_jiao_liu_2023, title={DroidPerf: Profiling Memory Objects on Android Devices}, DOI={10.1145/3570361.3592503}, abstractNote={Optimizing performance inefficiencies in memory hierarchies is well-known for native languages, such as C and C++. There are few studies, however, on exploring memory inefficiencies in Android Runtime (ART). Running in ART, managed languages, such as Java and Kotlin, employ various abstractions, such as runtime support, ahead-of-time (AOT) compilation, and garbage collection (GC), which hide important execution details from the plain source code. In this paper, we develop DroidPerf, a lightweight, object-centric memory profiler for ART, which associates memory inefficiencies with objects created and used in Android apps. With such object-level information, DroidPerf is able to guide locality optimization on memory layouts, access patterns, and allocation patterns. Guided by DroidPerf, we optimize a number of popular Android apps and obtain significant performance gains. Many inefficiencies are confirmed by the code authors and optimization patches are under evaluation for upstreaming. As a practical tool, DroidPerf incurs ~32% runtime overhead and ~14% memory overhead on average. Furthermore, DroidPerf works in the production environment with off-the-shelf hardware, OS, Dalvik virtual machine, ART, and unmodified Android app source code.}, journal={PROCEEDINGS OF THE 29TH ANNUAL INTERNATIONAL CONFERENCE ON MOBILE COMPUTING AND NETWORKING, MOBICOM 2023}, author={Li, Bolun and Zhao, Qidong and Jiao, Shuyin and Liu, Xu}, year={2023}, pages={75–89} } @article{li_xu_zhao_su_chabbi_jiao_liu_2022, title={OJXPerf: Featherlight Object Replica Detection for Java Programs}, ISSN={["0270-5257"]}, DOI={10.1145/3510003.3510083}, abstractNote={Memory bloat is an important source of inefficiency in complex production software, especially in software written in managed languages such as Java. Prior approaches to this problem have focused on identifying objects that outlive their life span. Few studies have, however, looked into whether and to what extent myriad objects of the same type are identical. A quantitative assessment of identical objects with code-level attribution can assist developers in refactoring code to eliminate object bloat, and favor reuse of existing object(s). The result is reduced memory pressure, reduced allocation and garbage collection, enhanced data locality, and reduced re-computation, all of which result in superior performance. We develop OJXPerf, a lightweight sampling-based profiler, which probabilistically identifies identical objects. OJXPerf employs hardware performance monitoring units (PMU) in conjunction with hardware debug registers to sample and compare field values of different objects of the same type allocated at the same calling context but potentially accessed at different program points. The result is a lightweight measurement – a combination of object allocation contexts and usage contexts ordered by duplication frequency. This class of duplicated objects is relatively easier to optimize. OJXPerf incurs 9% runtime and 6% memory overheads on average. We empirically show the benefit of OJXPerf by using its profiles to instruct us to optimize a number of Java programs, including well-known benchmarks and real-world applications. The results show a noticeable reduction in memory usage (up to 11%) and a significant speedup (up to 25%).}, journal={2022 ACM/IEEE 44TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2022)}, author={Li, Bolun and Xu, Hao and Zhao, Qidong and Su, Pengfei and Chabbi, Milind and Jiao, Shuyin and Liu, Xu}, year={2022}, pages={1558–1570} }