@article{yu_carver_rothermel_menzies_2022, title={Assessing expert system-assisted literature reviews with a case study}, volume={200}, ISSN={["1873-6793"]}, DOI={10.1016/j.eswa.2022.116958}, abstractNote={Given the large numbers of publications in software engineering, frequent literature reviews are required to keep current on work in specific areas. One tedious work in literature reviews is to find relevant studies amongst thousands of non-relevant search results. In theory, expert systems can assist in finding relevant work but those systems have primarily been tested in simulations rather than in application to actual literature reviews. Hence, few researchers have faith in such expert systems. Accordingly, using a realistic case study, this paper assesses how well our state-of-the-art expert system can help with literature reviews. The assessed literature review aimed at identifying test case prioritization techniques for automated UI testing, specifically from 8,349 papers on IEEE Xplore. This corpus was studied with an expert system that incorporates an incrementally updated human-in-the-loop active learning tool. Using that expert system, in three hours, we found 242 relevant papers from which we identified 12 techniques representing the state-of-the-art in test case prioritization when source code information is not available. These results were then validated by six other graduate students manually exploring the same corpus. Without the expert system, this task would have required 53 h and would have found 27 additional papers. That is, our expert system achieved 90% recall with 6% of the human effort cost when compared to a conventional manual method. Significantly, the same 12 state-of-the-art test case prioritization techniques were identified by both the expert system and the manual method. That is, the 27 papers missed by the expert system would not have changed the conclusion of the literature review. Hence, if this result generalizes, it endorses the use of our expert system to assist in literature reviews.}, journal={EXPERT SYSTEMS WITH APPLICATIONS}, author={Yu, Zhe and Carver, Jeffrey C. and Rothermel, Gregg and Menzies, Tim}, year={2022}, month={Aug} } @article{bachala_tsutano_srisa-an_rothermel_dinh_hu_2022, title={REHANA: An Efficient Program Analysis Framework to Uncover Reflective Code in Android}, volume={419}, ISBN={["978-3-030-94821-4"]}, ISSN={["1867-822X"]}, DOI={10.1007/978-3-030-94822-1_19}, journal={MOBILE AND UBIQUITOUS SYSTEMS: COMPUTING, NETWORKING AND SERVICES}, author={Bachala, Shakthi and Tsutano, Yutaka and Srisa-an, Witawas and Rothermel, Gregg and Dinh, Jackson and Hu, Yuanjiu}, year={2022}, pages={347–374} } @article{hu_silva_bagheri_srisa-an_rothermel_dinh_2022, title={SEMEO: A Semantic Equivalence Analysis Framework for Obfuscated Android Applications}, volume={419}, ISBN={["978-3-030-94821-4"]}, ISSN={["1867-822X"]}, DOI={10.1007/978-3-030-94822-1_18}, abstractNote={Software repackaging is a common approach for creating malware. Malware authors often use software repackaging to obfuscate code containing malicious payloads. This forces analysts to spend a large amount of time filtering out benign obfuscated methods in order to locate potentially malicious methods for further analysis. If an effective mechanism for filtering out benign obfuscated methods were available, the number of methods that analysts must consider could be reduced, allowing them to be more productive. In this paper, we present Semeo, an obfuscation-resilient approach for semantic equivalence analysis of Android apps. Semeo automatically and with high accuracy determines whether a repackaged and obfuscated version of a method is semantically equivalent to an original version thereof. Semeo further handles widely-used and complicated types of obfuscations, as well as the scenarios where multiple obfuscation types are applied in tandem. Our empirical evaluation corroborates that Semeo significantly outperforms the state-of-the-art, achieving 100% precision in identifying semantically equivalent methods across almost all apps under analysis. Semeo consistently provides over 80% recall when one or two types of obfuscation are used and 73% recall when five different types of obfuscation are compositely applied.}, journal={MOBILE AND UBIQUITOUS SYSTEMS: COMPUTING, NETWORKING AND SERVICES}, author={Hu, Zhen and Silva, Bruno Vieira Resende E. and Bagheri, Hamid and Srisa-an, Witawas and Rothermel, Gregg and Dinh, Jackson}, year={2022}, pages={322–346} } @article{sadri‐moshkenani_bradley_rothermel_2021, title={Survey on test case generation, selection and prioritization for cyber‐physical systems}, volume={32}, ISSN={0960-0833 1099-1689}, url={http://dx.doi.org/10.1002/stvr.1794}, DOI={10.1002/stvr.1794}, abstractNote={SummaryA cyber‐physical system (CPS) is a collection of computing devices that communicate with each other, operate in the target environment via actuators and interact with the physical world through sensors in a feedback loop. CPSs need to be safe and reliable and function in accordance with their requirements. Testing, focusing on a CPS model and/or its code, is the primary approach used by engineers to achieve this. Generating, selecting and prioritizing test cases that can reveal faults in CPSs, from the wide range of possible input values and stimuli that affect their operation, are of central importance in this process. To date, however, in our search of the literature, we have found no comprehensive survey of research on test case generation, selection and prioritization for CPSs. In this article, therefore, we report the results of a survey of approaches for generating, selecting and prioritizing test cases for CPSs; the results illustrate the progress that has been made on these approaches to date, the properties that characterize the approaches and the challenges that remain open in these areas of research.}, number={1}, journal={Software Testing, Verification and Reliability}, publisher={Wiley}, author={Sadri‐Moshkenani, Zahra and Bradley, Justin and Rothermel, Gregg}, year={2021}, month={Sep} } @article{zheng_bagheri_rothermel_wang_2020, title={Platinum: Reusing Constraint Solutions in Bounded Analysis of Relational Logic}, volume={12076}, ISBN={["978-3-030-45233-9"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-030-45234-6_2}, abstractNote={Alloy is a lightweight specification language based on relational logic, with an analysis engine that relies on SAT solvers to automate bounded verification of specifications. In spite of its strengths, the reliance of the Alloy Analyzer on computationally heavy solvers means that it can take a significant amount of time to verify software properties, even within limited bounds. This challenge is exacerbated by the ever-evolving nature of complex software systems. This paper presents Platinum, a technique for efficient analysis of evolving Alloy specifications, that recognizes opportunities for constraint reduction and reuse of previously identified constraint solutions. The insight behind Platinum is that formula constraints recur often during the analysis of a single specification and across its revisions, and constraint solutions can be reused over sequences of analyses performed on evolving specifications. Our empirical results show that Platinum substantially reduces (by 66.4% on average) the analysis time required on specifications extracted from real-world software systems.}, journal={FUNDAMENTAL APPROACHES TO SOFTWARE ENGINEERING (FASE 2020)}, author={Zheng, Guolong and Bagheri, Hamid and Rothermel, Gregg and Wang, Jianghao}, year={2020}, pages={29–52} } @article{kuttal_sarma_burnett_rothermel_koeppe_shepherd_2019, title={How end-user programmers debug visual web-based programs: An information foraging theory perspective}, volume={53}, ISSN={2590-1184}, url={http://dx.doi.org/10.1016/j.cola.2019.04.003}, DOI={10.1016/j.cola.2019.04.003}, abstractNote={Web-active end-user programmers squander much of their time foraging for bugs and related information in mashup programming environments as well as on the web. To analyze this foraging behavior while debugging, we utilize an Information Foraging Theory perspective. Information Foraging Theory models the human (predator) behavior to forage for specific information (prey) in the webpages or programming IDEs (patches) by following the information features (cues) in the environment. We qualitatively studied the debugging behavior of 16 web-active end users. Our results show that end-user programmers spend substantial amounts (73%) of their time just foraging. Further, our study reveals new cue types and foraging strategies framed in terms of Information Foraging Theory, and it uncovers which of these helped end-user programmers succeed in their debugging efforts.}, journal={Journal of Computer Languages}, publisher={Elsevier BV}, author={Kuttal, Sandeep Kaur and Sarma, Anita and Burnett, Margaret and Rothermel, Gregg and Koeppe, Ian and Shepherd, Brooke}, year={2019}, month={Aug}, pages={22–37} } @article{eghbali_kudva_rothermel_tahvildari_2019, title={Supervised Tie Breaking in Test Case Prioritization}, DOI={10.1109/ICSE-Companion.2019.00095}, abstractNote={Test case prioritization reorders sequences of test cases with the aim of increasing the rate at which faults can be detected. Most existing prioritization techniques employ coverage information gathered on previous test case executions to rank test cases. Existing studies in the literature, however, show that there is a high chance that "ties" occur during the prioritization procedure when using coverage-based techniques; that is, there is a high chance that cases will occur in which two or more candidate test cases have identical code coverage behaviors. To break such ties, most techniques resort to random re-ordering of test cases, which can degrade the rate of fault detection. In this work, we use an ensemble of defect prediction models to guide prioritization techniques towards breaking such ties by re-ordering test cases in terms of the likelihood that they will cover fault-prone units of code.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: COMPANION PROCEEDINGS (ICSE-COMPANION 2019)}, author={Eghbali, Sepehr and Kudva, Vinit and Rothermel, Gregg and Tahvildari, Ladan}, year={2019}, pages={242–243} } @article{yu_fahid_menzies_rothermel_patrick_cherian_2019, title={TERMINATOR: Better Automated UI Test Case Prioritization}, DOI={10.1145/3338906.3340448}, abstractNote={Automated UI testing is an important component of the continuous integration process of software development. A modern web-based UI is an amalgam of reports from dozens of microservices written by multiple teams. Queries on a page that opens up another will fail if any of that page's microservices fails. As a result, the overall cost for automated UI testing is high since the UI elements cannot be tested in isolation. For example, the entire automated UI testing suite at LexisNexis takes around 30 hours (3-5 hours on the cloud) to execute, which slows down the continuous integration process. To mitigate this problem and give developers faster feedback on their code, test case prioritization techniques are used to reorder the automated UI test cases so that more failures can be detected earlier. Given that much of the automated UI testing is "black box" in nature, very little information (only the test case descriptions and testing results) can be utilized to prioritize these automated UI test cases. Hence, this paper evaluates 17 "black box" test case prioritization approaches that do not rely on source code information. Among these, we propose a novel TCP approach, that dynamically re-prioritizes the test cases when new failures are detected, by applying and adapting a state of the art framework from the total recall problem. Experimental results on LexisNexis automated UI testing data show that our new approach (which we call TERMINATOR), outperformed prior state of the art approaches in terms of failure detection rates with negligible CPU overhead.}, journal={ESEC/FSE'2019: PROCEEDINGS OF THE 2019 27TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Yu, Zhe and Fahid, Fahmid and Menzies, Tim and Rothermel, Gregg and Patrick, Kyle and Cherian, Snehit}, year={2019}, pages={883–894} } @article{rothermel_2018, title={Improving Regression Testing in Continuous Integration Development Environments (Keynote)}, DOI={10.1145/3278186.3281454}, abstractNote={In continuous integration development environments, software engineers frequently integrate new or changed code with the mainline codebase. Merged code is then regression tested to help ensure that the codebase remains stable and that continuing engineering efforts can be performed more reliably. Continuous integration is advantageous because it can reduce the amount of code rework that is needed in later phases of development, and speed up overall development time. From a testing standpoint, however, continuous integration raises several challenges. Chief among these challenges are the costs, in terms and time and resources, associated with handling a constant flow of requests to execute tests. To help with this, organizations often utilize farms of servers to run tests in parallel, or execute tests "in the cloud", but even then, test suites tend to expand to utilize all available resources, and then continue to expand beyond that. We have been investigating strategies for applying regression testing in continuous integration development environments more cost-effectively. Our strategies are based on two well-researched techniques for improving the cost-effectiveness of regression testing – regression test selection (RTS) and test case prioritization (TCP). In the continuous integration context, however, traditional RTS and TCP techniques are difficult to apply, because these techniques rely on instrumentation and analyses that cannot easily be applied to fast-arriving streams of test suites. We have thus created new forms of RTS and TCP techniques that utilize relatively lightweight analyses, that can cope with the volume of test requests. To evaluate our techniques, we have conducted an empirical study on several large data sets. In this talk, I describe our techniques and the empirical results we have obtained in studying them.}, journal={PROCEEDINGS OF THE 9TH ACM SIGSOFT INTERNATIONAL WORKSHOP ON AUTOMATING TEST CASE DESIGN, SELECTION, AND EVALUATION (A-TEST '18)}, author={Rothermel, Gregg}, year={2018}, pages={1–1} }