@article{qiu_shao_zhao_khan_hui_jin_2022, title={A Deep Study of the Effects and Fixes of Server-Side Request Races in Web Applications}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3528463}, abstractNote={Server-side web applications are vulnerable to request races. While some previous studies of real-world request races exist, they primarily focus on the root cause of these bugs. To better combat request races in server-side web applications, we need a deep understanding of their characteristics. In this paper, we provide a complementary focus on race effects and fixes with an enlarged set of request races from web applications developed with Object-Relational Mapping (ORM) frameworks. We revisit characterization questions used in previous studies on newly included request races, distinguish the external and internal effects of request races, and relate requestrace fixes with concurrency control mechanisms in languages and frameworks for developing server-side web applications. Our study reveals that: (1) request races from ORM-based web applications share the same characteristics as those from raw-SQL web applications; (2) request races violating application semantics without explicit crashes and error messages externally are common, and latent request races, which only corrupt some shared resource internally but require extra requests to expose the misbehavior, are also common; and (3) various fix strategies other than using synchronization mechanisms are used to fix request races. We expect that our results can help developers better understand request races and guide the design and development of tools for combating request races.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Qiu, Zhengyi and Shao, Shudi and Zhao, Qi and Khan, Hassan Ali and Hui, Xinning and Jin, Guoliang}, year={2022}, pages={744–756} } @article{khan_iqbal_shahzad_jin_2022, title={RMS: Removing Barriers to Analyze the Availability and Surge Pricing of Ridesharing Services}, DOI={10.1145/3491102.3517464}, abstractNote={Ridesharing services do not make data of their availability (supply, utilization, idle time, and idle distance) and surge pricing publicly available. It limits the opportunities to study the spatiotemporal trends of the availability and surge pricing of these services. Only a few research studies conducted in North America analyzed these features for only Uber and Lyft. Despite the interesting observations, the results of prior works are not generalizable or reproducible because: i) the datasets collected in previous publications are spatiotemporally sensitive, i.e., previous works do not represent the current availability and surge pricing of ridesharing services in different parts of the world; and ii) the analyses presented in previous works are limited in scope (in terms of countries and ridesharing services they studied). Hence, prior works are not generally applicable to ridesharing services operating in different countries. This paper addresses the issue of ridesharing-data unavailability by presenting Ridesharing Measurement Suite (RMS). RMS removes the barrier of entry for analyzing the availability and surge pricing of ridesharing services for ridesharing users, researchers from various scientific domains, and regulators. RMS continuously collects the data of the availability and surge pricing of ridesharing services. It exposes real-time data of these services through i) graphical user interfaces and ii) public APIs to assist various stakeholders of these services and simplify the data collection and analysis process for future ridesharing research studies. To signify the utility of RMS, we deployed RMS to collect and analyze the availability and surge pricing data of 10 ridesharing services operating in nine countries for eight weeks in pre and during pandemic periods. Using the data collected and analyzed by RMS, we identify that previous articles miscalculated the utilization of ridesharing services as they did not count in the vehicles driving in multiple categories of the same service. We observe that during COVID-19, the supply of ridesharing services decreased by 54%, utilization of available vehicles increased by 6%, and a 5 × increase in the surge frequency of services. We also find that surge occurs in a small geographical region, and its intensity reduces by 50% in about 0.5 miles away from the location of a surge. We present several other interesting observations on ridesharing services’ availability and surge pricing.}, journal={PROCEEDINGS OF THE 2022 CHI CONFERENCE ON HUMAN FACTORS IN COMPUTING SYSTEMS (CHI' 22)}, author={Khan, Hassan Ali and Iqbal, Hassan and Shahzad, Muhammad and Jin, Guoliang}, year={2022} } @article{zhao_qiu_shao_hui_khan_jin_2022, title={Understanding and Reaching the Performance Limit of Schedule Tuning on Stable Synchronization Determinism}, DOI={10.1145/3559009.3569669}, abstractNote={Deterministic MultiThreading (DMT) systems eliminate nondeterminism from the dynamic executions of multithreaded programs. They can greatly simplify multithreaded programming and ease the deployment of systems that rely on replication. We first categorize and compare existing DMT system designs along three axes, incorporating the most recent advances in DMT systems. From our study, we conclude that stable synchronization determinism is the most cost-effective design, and it is thus the focus of our work. To reduce the overhead of enforcing stable synchronization determinism, previous work has explored scheduling-based methods that tune the synchronization schedule. However, it is not clear how low the performance overhead can be through schedule tuning and how to reach the performance limit. To answer these questions, we then follow an iterative process of understanding the performance limit of schedule tuning on stable synchronization determinism and designing new scheduling policies to reach the performance limit. Through this process, we identify two types of scheduling-oblivious overheads that cannot be eliminated by schedule tuning alone. In addition, we also design a group of new policies and implement them in minSMT. Our evaluation shows that minSMT successfully reaches the performance limit of stable synchronization determinism on 107 out of 108 benchmarks after excluding the impact of scheduling-oblivious overheads, and this also results in significant performance improvements compared with state-of-the-art stable synchronization-determinism systems on 9 benchmarks. Our results also suggest that, to further improve the performance of stable synchronization determinism, future research should focus on addressing the two types of scheduling-oblivious overheads with approaches other than schedule tuning.}, journal={PROCEEDINGS OF THE 2022 31ST INTERNATIONAL CONFERENCE ON PARALLEL ARCHITECTURES AND COMPILATION TECHNIQUES, PACT 2022}, author={Zhao, Qi and Qiu, Zhengyi and Shao, Shudi and Hui, Xinning and Khan, Hassan Ali and Jin, Guoliang}, year={2022}, pages={223–238} } @article{qiu_zhao_shao_jin_2021, title={Understanding and Detecting Server-Side Request Races in Web Applications}, DOI={10.1145/3468264.3468594}, abstractNote={Modern web sites often run web applications on the server to handle HTTP requests from users and generate dynamic responses. Due to their concurrent nature, web applications are vulnerable to server-side request races. The problem becomes more severe with the ever-increasing popularity of web applications. We first conduct a comprehensive characteristic study of 157 real-world server-side request races collected from different, popular types of web applications. The findings of this study can provide guidance for future development support in combating server-side request races. Guided by our study results, we develop a dynamic framework, ReqRacer, for detecting and exposing server-side request races in web applications. We propose novel approaches to model happens-before relationships between HTTP requests, which are essential to web applications. Our evaluation shows that ReqRacer can effectively and efficiently detect known and unknown request races.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, author={Qiu, Zhengyi and Zhao, Qi and Shao, Shudi and Jin, Guoliang}, year={2021}, pages={842–854} } @article{wang_yu_qiu_jin_mueller_2020, title={BarrierFinder: recognizing ad hoc barriers}, volume={25}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-020-09862-3}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Wang, Tao and Yu, Xiao and Qiu, Zhengyi and Jin, Guoliang and Mueller, Frank}, year={2020}, month={Nov}, pages={4676–4706} } @article{shao_qiu_yu_yang_jin_xie_wu_2020, title={Database-Access Performance Antipatterns in Database-Backed Web Applications}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME46990.2020.00016}, abstractNote={Database-backed web applications are prone to performance bugs related to database accesses. While much work has been conducted on database-access antipatterns with some recent work focusing on performance impact, there still lacks a comprehensive view of database-access performance antipatterns in database-backed web applications. To date, no existing work systematically reports known antipatterns in the literature, and no existing work has studied database-access performance bugs in major types of web applications that access databases differently.To address this issue, we first summarize all known database-access performance antipatterns found through our literature survey, and we report all of them in this paper. We further collect database-access performance bugs from web applications that access databases through language-provided SQL interfaces, which have been largely ignored by recent work, to check how extensively the known antipatterns can cover these bugs. For bugs not covered by the known antipatterns, we extract new database-access performance antipatterns based on real-world performance bugs from such web applications. Our study in total reports 24 known and 10 new database-access performance antipatterns. Our results can guide future work to develop effective tool support for different types of web applications.}, journal={2020 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2020)}, author={Shao, Shudi and Qiu, Zhengyi and Yu, Xiao and Yang, Wei and Jin, Guoliang and Xie, Tao and Wu, Xintao}, year={2020}, pages={58–69} } @article{wang_yu_qiu_jin_mueller_2019, title={BARRIERFINDER: Recognizing Ad Hoc Barriers}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME.2019.00049}, abstractNote={Ad hoc synchronizations are pervasive in multi-threaded programs. Due to their diversity and complexity, understanding the enforced synchronization relationships of ad hoc synchronizations is challenging but crucial to multi-threaded program development and maintenance. Existing techniques can partially detect primitive ad hoc synchronizations, but they cannot recognize complete implementations or infer the enforced synchronization relationships. In this paper, we propose a framework to automatically identify complex ad hoc synchronizations in full and infer their synchronization relationships. We instantiate the framework with a tool called BarrierFinder, which features various techniques, including program slicing and bounded symbolic execution, to efficiently explore the interleaving space of ad hoc synchronizations within multi-threaded programs and collect execution traces. BarrierFinder then uses these traces to characterize ad hoc synchronizations into different types with a focus on recognizing barriers. Our evaluation shows that BarrierFinder is both effective and efficient in doing this, and BarrierFinder is also helpful for programmers to understand the correctness of their implemented ad hoc synchronizations.}, journal={2019 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2019)}, author={Wang, Tao and Yu, Xiao and Qiu, Zhengyi and Jin, Guoliang and Mueller, Frank}, year={2019}, pages={323–327} } @article{zhao_qiu_jin_2019, title={Semantics-Aware Scheduling Policies for Synchronization Determinism}, url={http://dx.doi.org/10.1145/3293883.3295731}, DOI={10.1145/3293883.3295731}, abstractNote={A common task for all deterministic multithreading (DMT) systems is to enforce synchronization determinism. However, synchronization determinism has not been the focus of existing DMT research. Instead, most DMT systems focused on how to order data races remained after synchronization determinism is enforced. Consequently, existing scheduling policies for synchronization determinism all have limitations. They may either require performance annotations to achieve good performance or fail to provide schedule stability. In this paper, we argue that synchronization determinism is more fundamental to DMT systems than existing research suggests and propose efficient and effective scheduling policies. Our key insight is that synchronization operations actually encode programmers' intention on how inter-thread communication should be done and can be used as hints while scheduling synchronization operations. Based on this insight, we have built QiThread, a synchronization-determinism system with semantics-aware scheduling policies. Results of a diverse set of 108 programs show that QiThread is able to achieve comparable low overhead as state-of-the-art synchronization-determinism systems without the limitations associated with them.}, journal={PROCEEDINGS OF THE 24TH SYMPOSIUM ON PRINCIPLES AND PRACTICE OF PARALLEL PROGRAMMING (PPOPP '19)}, author={Zhao, Qi and Qiu, Zhengyi and Jin, Guoliang}, year={2019}, pages={242–256} } @article{yu_jin_2018, title={Dataflow Tunneling}, DOI={10.1145/3180155.3180171}, abstractNote={Request-based applications, e.g., most server-side applications, expose services to users in a request-based paradigm, in which requests are served by request-handler methods. An important task for request-based applications is inter-request analysis, which analyzes request-handler methods that are related by inter-request data dependencies together. However, in the request-based paradigm, data dependencies between related request-handler methods are implicitly established by the underlying frameworks that execute these methods. As a result, existing analysis tools are usually limited to the scope of each single method without the knowledge of dependencies between different methods. In this paper, we design an approach called dataflow tunneling to capture inter-request data dependencies from concrete application executions and produce data-dependency specifications. Our approach answers two key questions: (1) what request-handler methods have data dependencies and (2) what these data dependencies are. Our evaluation using applications developed with two representative and popular frameworks shows that our approach is general and accurate. We also present a characteristic study and a use case of cache tuning based on the mined specifications. We envision that our approach can provide key information to enable future inter-request analysis techniques.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE)}, author={Yu, Xiao and Jin, Guoliang}, year={2018}, pages={586–597} } @article{yu_joshi_xu_jin_zhang_jiang_2016, title={CloudSeer: Workflow Monitoring of Cloud Infrastructures via Interleaved Logs}, volume={51}, ISSN={["1558-1160"]}, DOI={10.1145/2954679.2872407}, abstractNote={Cloud infrastructures provide a rich set of management tasks that operate computing, storage, and networking resources in the cloud. Monitoring the executions of these tasks is crucial for cloud providers to promptly find and understand problems that compromise cloud availability. However, such monitoring is challenging because there are multiple distributed service components involved in the executions. CloudSeer enables effective workflow monitoring. It takes a lightweight non-intrusive approach that purely works on interleaved logs widely existing in cloud infrastructures. CloudSeer first builds an automaton for the workflow of each management task based on normal executions, and then it checks log messages against a set of automata for workflow divergences in a streaming manner. Divergences found during the checking process indicate potential execution problems, which may or may not be accompanied by error log messages. For each potential problem, CloudSeer outputs necessary context information including the affected task automaton and related log messages hinting where the problem occurs to help further diagnosis. Our experiments on OpenStack, a popular open-source cloud infrastructure, show that CloudSeer's efficiency and problem-detection capability are suitable for online monitoring.}, number={4}, journal={ACM SIGPLAN NOTICES}, author={Yu, Xiao and Joshi, Pallavi and Xu, Jianwu and Jin, Guoliang and Zhang, Hui and Jiang, Guofei}, year={2016}, month={Apr}, pages={489–502} } @inproceedings{yu_joshi_xu_jin_zhang_jiang_2016, title={CloudSeer: Workflow monitoring of cloud infrastructures via interleaved logs}, volume={50}, number={2}, booktitle={Operating Systems Review}, author={Yu, X. and Joshi, P. and Xu, J. W. and Jin, G. L. and Zhang, H. and Jiang, G. F.}, year={2016}, pages={489–502} } @article{dean_wang_gu_enck_jin_2015, title={Automatic Server Hang Bug Diagnosis: Feasible Reality or Pipe Dream?}, DOI={10.1109/icac.2015.52}, abstractNote={It is notoriously difficult to diagnose server hang bugs as they often generate little diagnostic information and are difficult to reproduce offline. In this paper, we present a characteristic study of 177 real software hang bugs from 8 common open source server systems (i.e., Apache, Lighttpd, My SQL, Squid, HDFS, Hadoop Mapreduce, Tomcat, Cassandra). We identify three major root cause categories (i.e., Programmer errors, mishandled values, concurrency issues). We then describe two major problems (i.e., False positives and false negatives) while applying existing rule-based bug detection techniques to those bugs.}, journal={2015 IEEE INTERNATIONAL CONFERENCE ON AUTONOMIC COMPUTING}, author={Dean, Daniel J. and Wang, Peipei and Gu, Xiaohui and Enck, William and Jin, Guoliang}, year={2015}, pages={127–132} } @article{deng_jin_de kruijf_li_liblit_lu_qi_ren_sankaralingam_song_et al._2015, title={Fixing, preventing, and recovering from concurrency bugs}, volume={58}, ISSN={1674-733X 1869-1919}, url={http://dx.doi.org/10.1007/S11432-015-5315-9}, DOI={10.1007/S11432-015-5315-9}, number={5}, journal={Science China Information Sciences}, publisher={Springer Science and Business Media LLC}, author={Deng, DongDong and Jin, GuoLiang and de Kruijf, Marc and Li, Ang and Liblit, Ben and Lu, Shan and Qi, ShanXiang and Ren, JingLei and Sankaralingam, Karthikeyan and Song, LinHai and et al.}, year={2015}, month={Apr}, pages={1–18} } @article{gu_jin_song_zhu_lu_2015, title={What Change History Tells Us about Thread Synchronization}, DOI={10.1145/2786805.2786815}, abstractNote={Multi-threaded programs are pervasive, yet difficult to write. Missing proper synchronization leads to correctness bugs and over synchronization leads to performance problems. To improve the correctness and efficiency of multi-threaded software, we need a better understanding of synchronization challenges faced by real-world developers. This paper studies the code repositories of open-source multi-threaded software projects to obtain a broad and in- depth view of how developers handle synchronizations. We first examine how critical sections are changed when software evolves by checking over 250,000 revisions of four representative open-source software projects. The findings help us answer questions like how often synchronization is an afterthought for developers; whether it is difficult for devel- opers to decide critical section boundaries and lock variables; and what are real-world over-synchronization problems. We then conduct case studies to better understand (1) how critical sections are changed to solve performance prob- lems (i.e. over-synchronization issues) and (2) how soft- ware changes lead to synchronization-related correctness problems (i.e. concurrency bugs). This in-depth study shows that tool support is needed to help developers tackle over-synchronization problems; it also shows that concur- rency bug avoidance, detection, and testing can be improved through better awareness of code revision history.}, journal={2015 10TH JOINT MEETING OF THE EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND THE ACM SIGSOFT SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE 2015) PROCEEDINGS}, author={Gu, Rui and Jin, Guoliang and Song, Linhai and Zhu, Linjie and Lu, Shan}, year={2015}, pages={426–438} } @inbook{harris_jin_lu_jha_2013, title={Validating Library Usage Interactively}, ISBN={9783642397981 9783642397998}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-39799-8_56}, DOI={10.1007/978-3-642-39799-8_56}, abstractNote={Programmers who develop large, mature applications often want to optimize the performance of their program without changing its semantics. They often do so by changing how their program invokes a library function or a function implemented in another module of the program. Unfortunately, once a programmer makes such an optimization, it is difficult for him to validate that the optimization does not change the semantics of the original program, because the original and optimized programs are equivalent only due to subtle, implicit assumptions about library functions called by the programs.In this work, we present an interactive program analysis that a programmer can apply to validate that his optimization does not change his program’s semantics. Our analysis casts the problem of validating an optimization as an abductive inference problem in the context of checking program equivalence. Our analysis solves the abductive equivalence problem by interacting with the programmer so that the programmer implements a solver for a logical theory that models library functions invoked by the program. We have used our analysis to validate optimizations of real-world, mature applications: the Apache software suite, the Mozilla Suite, and the MySQL database.}, booktitle={Computer Aided Verification}, publisher={Springer Berlin Heidelberg}, author={Harris, William R. and Jin, Guoliang and Lu, Shan and Jha, Somesh}, year={2013}, pages={796–812} }