@article{tu_yu_menzies_2022, title={Better Data Labelling With EMBLEM (and how that Impacts Defect Prediction)}, volume={48}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2020.2986415}, DOI={10.1109/TSE.2020.2986415}, abstractNote={Standard automatic methods for recognizing problematic development commits can be greatly improved via the incremental application of human+artificial expertise. In this approach, called EMBLEM, an AI tool first explore the software development process to label commits that are most problematic. Humans then apply their expertise to check those labels (perhaps resulting in the AI updating the support vectors within their SVM learner). We recommend this human+AI partnership, for several reasons. When a new domain is encountered, EMBLEM can learn better ways to label which comments refer to real problems. Also, in studies with 9 open source software projects, labelling via EMBLEM's incremental application of human+AI is at least an order of magnitude cheaper than existing methods ($\approx$ eight times). Further, EMBLEM is very effective. For the data sets explored here, EMBLEM better labelling methods significantly improved $P_{opt}20$Popt20 and G-scores performance in nearly all the projects studied here.}, number={1}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Tu, Huy and Yu, Zhe and Menzies, Tim}, year={2022}, month={Jan}, pages={278–294} } @article{yu_fahid_tu_menzies_2022, title={Identifying Self-Admitted Technical Debts With Jitterbug: A Two-Step Approach}, volume={48}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2020.3031401}, DOI={10.1109/TSE.2020.3031401}, abstractNote={Keeping track of and managing Self-Admitted Technical Debts (SATDs) are important to maintaining a healthy software project. This requires much time and effort from human experts to identify the SATDs manually. The current automated solutions do not have satisfactory precision and recall in identifying SATDs to fully automate the process. To solve the above problems, we propose a two-step framework called Jitterbug for identifying SATDs. Jitterbug first identifies the “easy to find” SATDs automatically with close to 100 percent precision using a novel pattern recognition technique. Subsequently, machine learning techniques are applied to assist human experts in manually identifying the remaining “hard to find” SATDs with reduced human effort. Our simulation studies on ten software projects show that Jitterbug can identify SATDs more efficiently (with less human effort) than the prior state-of-the-art methods.}, number={5}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yu, Zhe and Fahid, Fahmid Morshed and Tu, Huy and Menzies, Tim}, year={2022}, month={May}, pages={1676–1691} } @article{yu_theisen_williams_menzies_2021, title={Improving Vulnerability Inspection Efficiency Using Active Learning}, volume={47}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2019.2949275}, DOI={10.1109/TSE.2019.2949275}, abstractNote={Software engineers can find vulnerabilities with less effort if they are directed towards code that might contain more vulnerabilities. HARMLESS is an incremental support vector machine tool that builds a vulnerability prediction model from the source code inspected to date, then suggests what source code files should be inspected next. In this way, HARMLESS can reduce the time and effort required to achieve some desired level of recall for finding vulnerabilities. The tool also provides feedback on when to stop (at that desired level of recall) while at the same time, correcting human errors by double-checking suspicious files. This paper evaluates HARMLESS on Mozilla Firefox vulnerability data. HARMLESS found 80, 90, 95, 99 percent of the vulnerabilities by inspecting 10, 16, 20, 34 percent of the source code files. When targeting 90, 95, 99 percent recall, HARMLESS could stop after inspecting 23, 30, 47 percent of the source code files. Even when human reviewers fail to identify half of the vulnerabilities (50 percent false negative rate), HARMLESS could detect 96 percent of the missing vulnerabilities by double-checking half of the inspected files. Our results serve to highlight the very steep cost of protecting software from vulnerabilities (in our case study that cost is, for example, the human effort of inspecting 28,750 × 20% = 5,750 source code files to identify 95 percent of the vulnerabilities). While this result could benefit the mission-critical projects where human resources are available for inspecting thousands of source code files, the research challenge for future work is how to further reduce that cost. The conclusion of this paper discusses various ways that goal might be achieved.}, number={11}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yu, Zhe and Theisen, Christopher and Williams, Laurie and Menzies, Tim}, year={2021}, month={Nov}, pages={2401–2420} } @article{yang_yu_wang_menzies_2021, title={Understanding static code warnings: An incremental AI approach}, volume={167}, ISSN={["1873-6793"]}, url={https://doi.org/10.1016/j.eswa.2020.114134}, DOI={10.1016/j.eswa.2020.114134}, abstractNote={Knowledge-based systems reason over some knowledge base. Hence, an important issue for such systems is how to acquire the knowledge needed for their inference. This paper assesses active learning methods for acquiring knowledge for “static code warnings”. Static code analysis is a widely-used method for detecting bugs and security vulnerabilities in software systems. As software becomes more complex, analysis tools also report lists of increasingly complex warnings that developers need to address on a daily basis. Such static code analysis tools are usually over-cautious; i.e. they often offer many warnings about spurious issues. Previous research work shows that about 35% to 91 % warnings reported as bugs by SA tools are actually unactionable (i.e., warnings that would not be acted on by developers because they are falsely suggested as bugs). Experienced developers know which errors are important and which can be safely ignored. How can we capture that experience? This paper reports on an incremental AI tool that watches humans reading false alarm reports. Using an incremental support vector machine mechanism, this AI tool can quickly learn to distinguish spurious false alarms from more serious matters that deserve further attention. In this work, nine open-source projects are employed to evaluate our proposed model on the features extracted by previous researchers and identify the actionable warnings in a priority order given by our algorithm. We observe that our model can identify over 90% of actionable warnings when our methods tell humans to ignore 70 to 80% of the warnings.}, journal={EXPERT SYSTEMS WITH APPLICATIONS}, author={Yang, Xueqi and Yu, Zhe and Wang, Junjie and Menzies, Tim}, year={2021}, month={Apr} } @article{agrawal_menzies_minku_wagner_yu_2020, title={Better software analytics via "DUO": Data mining algorithms using/used-by optimizers}, volume={25}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-020-09808-9}, DOI={10.1007/s10664-020-09808-9}, abstractNote={This paper claims that a new field of empirical software engineering research and practice is emerging: data mining using/used-by optimizers for empirical studies, or DUO. For example, data miners can generate models that are explored by optimizers. Also, optimizers can advise how to best adjust the control parameters of a data miner. This combined approach acts like an agent leaning over the shoulder of an analyst that advises "ask this question next" or "ignore that problem, it is not relevant to your goals". Further, those agents can help us build "better" predictive models, where "better" can be either greater predictive accuracy or faster modeling time (which, in turn, enables the exploration of a wider range of options). We also caution that the era of papers that just use data miners is coming to an end. Results obtained from an unoptimized data miner can be quickly refuted, just by applying an optimizer to produce a different (and better performing) model. Our conclusion, hence, is that for software analytics it is possible, useful and necessary to combine data mining and optimization using DUO.}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Agrawal, Amritanshu and Menzies, Tim and Minku, Leandro L. and Wagner, Markus and Yu, Zhe}, year={2020}, month={May}, pages={2099–2136} } @article{nair_yu_menzies_siegmund_apel_2020, title={Finding Faster Configurations Using FLASH}, volume={46}, url={https://doi.org/10.1109/TSE.2018.2870895}, DOI={10.1109/TSE.2018.2870895}, abstractNote={Finding good configurations of a software system is often challenging since the number of configuration options can be large. Software engineers often make poor choices about configuration or, even worse, they usually use a sub-optimal configuration in production, which leads to inadequate performance. To assist engineers in finding the better configuration, this article introduces Flash, a sequential model-based method that sequentially explores the configuration space by reflecting on the configurations evaluated so far to determine the next best configuration to explore. Flash scales up to software systems that defeat the prior state-of-the-art model-based methods in this area. Flash runs much faster than existing methods and can solve both single-objective and multi-objective optimization problems. The central insight of this article is to use the prior knowledge of the configuration space (gained from prior runs) to choose the next promising configuration. This strategy reduces the effort (i.e., number of measurements) required to find the better configuration. We evaluate Flash using 30 scenarios based on 7 software systems to demonstrate that Flash saves effort in 100 and 80 percent of cases in single-objective and multi-objective problems respectively by up to several orders of magnitude compared to state-of-the-art techniques.}, number={7}, journal={IEEE Transactions on Software Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Nair, Vivek and Yu, Zhe and Menzies, Tim and Siegmund, Norbert and Apel, Sven}, year={2020}, month={Jul}, pages={794–811} } @article{liao_shi_yu_yi_wang_xiang_2019, title={An Alternating Direction Method of Multipliers Based Approach for PMU Data Recovery}, volume={10}, ISSN={["1949-3061"]}, DOI={10.1109/TSG.2018.2864176}, abstractNote={This paper presents a novel algorithm for recovering missing phasor measurement unit (PMU) data. Due to the low-rank property of PMU data, missing measurement recovery can be formulated as a low-rank matrix-completion problem. Based on maximum-margin matrix factorization, we propose an efficient algorithm, alternating direction method of multipliers (ADMM), for solving the matrix completion problem. Compared to existing approaches, the proposed ADMM based algorithm does not need to estimate the rank of the target data matrix and provides better performance in computation complexity. Since PMU data are transferred through insecure and delayed communications, we consider the case of measurements missing from all PMU channels in several sampling instants and provide the strategies of reshaping the matrix composed by the received PMU data for the recovery. Numerical results using real PMU measurements from State Grid Jiangsu Electric Power Company illustrate the effectiveness and efficiency of the proposed approaches.}, number={4}, journal={IEEE TRANSACTIONS ON SMART GRID}, author={Liao, Mang and Shi, Di and Yu, Zhe and Yi, Zhehan and Wang, Zhiwei and Xiang, Yingmeng}, year={2019}, month={Jul}, pages={4554–4565} } @article{yu_menzies_2019, title={FAST(2): An intelligent assistant for finding relevant papers}, volume={120}, ISSN={["1873-6793"]}, url={https://doi.org/10.1016/j.eswa.2018.11.021}, DOI={10.1016/j.eswa.2018.11.021}, abstractNote={Literature reviews are essential for any researcher trying to keep up to date with the burgeoning software engineering literature. FAST$^2$ is a novel tool for reducing the effort required for conducting literature reviews by assisting the researchers to find the next promising paper to read (among a set of unread papers). This paper describes FAST$^2$ and tests it on four large software engineering literature reviews conducted by Wahono (2015), Hall (2012), Radjenovi\'c (2013) and Kitchenham (2017). We find that FAST$^2$ is a faster and robust tool to assist researcher finding relevant SE papers which can compensate for the errors made by humans during the review process. The effectiveness of FAST$^2$ can be attributed to three key innovations: (1) a novel way of applying external domain knowledge (a simple two or three keyword search) to guide the initial selection of papers---which helps to find relevant research papers faster with less variances; (2) an estimator of the number of remaining relevant papers yet to be found---which in practical settings can be used to decide if the reviewing process needs to be terminated; (3) a novel self-correcting classification algorithm---automatically corrects itself, in cases where the researcher wrongly classifies a paper.}, journal={EXPERT SYSTEMS WITH APPLICATIONS}, publisher={Elsevier BV}, author={Yu, Zhe and Menzies, Tim}, year={2019}, month={Apr}, pages={57–71} } @article{yu_fahid_menzies_rothermel_patrick_cherian_2019, title={TERMINATOR: Better Automated UI Test Case Prioritization}, DOI={10.1145/3338906.3340448}, abstractNote={Automated UI testing is an important component of the continuous integration process of software development. A modern web-based UI is an amalgam of reports from dozens of microservices written by multiple teams. Queries on a page that opens up another will fail if any of that page's microservices fails. As a result, the overall cost for automated UI testing is high since the UI elements cannot be tested in isolation. For example, the entire automated UI testing suite at LexisNexis takes around 30 hours (3-5 hours on the cloud) to execute, which slows down the continuous integration process. To mitigate this problem and give developers faster feedback on their code, test case prioritization techniques are used to reorder the automated UI test cases so that more failures can be detected earlier. Given that much of the automated UI testing is "black box" in nature, very little information (only the test case descriptions and testing results) can be utilized to prioritize these automated UI test cases. Hence, this paper evaluates 17 "black box" test case prioritization approaches that do not rely on source code information. Among these, we propose a novel TCP approach, that dynamically re-prioritizes the test cases when new failures are detected, by applying and adapting a state of the art framework from the total recall problem. Experimental results on LexisNexis automated UI testing data show that our new approach (which we call TERMINATOR), outperformed prior state of the art approaches in terms of failure detection rates with negligible CPU overhead.}, journal={ESEC/FSE'2019: PROCEEDINGS OF THE 2019 27TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Yu, Zhe and Fahid, Fahmid and Menzies, Tim and Rothermel, Gregg and Patrick, Kyle and Cherian, Snehit}, year={2019}, pages={883–894} } @article{nair_agrawal_chen_fu_mathew_menzies_minku_wagner_yu_2018, title={Data-Driven Search-based Software Engineering}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196442}, abstractNote={This paper introduces Data-Driven Search-based Software Engineering (DSE), which combines insights from Mining Software Repositories (MSR) and Search-based Software Engineering (SBSE). While MSR formulates software engineering problems as data mining problems, SBSE reformulates Software Engineering (SE) problems as optimization problems and use meta-heuristic algorithms to solve them. Both MSR and SBSE share the common goal of providing insights to improve software engineering. The algorithms used in these two areas also have intrinsic relationships. We, therefore, argue that combining these two fields is useful for situations (a)~which require learning from a large data source or (b)~when optimizers need to know the lay of the land to find better solutions, faster. This paper aims to answer the following three questions: (1) What are the various topics addressed by DSE?, (2) What types of data are used by the researchers in this area?, and (3) What research approaches do researchers use? The paper briefly sets out to act as a practical guide to develop new DSE techniques and also to serve as a teaching resource. This paper also presents a resource (tiny.cc/data-se) for exploring DSE. The resource contains 89 artifacts which are related to DSE, divided into 13 groups such as requirements engineering, software product lines, software processes. All the materials in this repository have been used in recent software engineering papers; i.e., for all this material, there exist baseline results against which researchers can comparatively assess their new ideas.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Nair, Vivek and Agrawal, Amritanshu and Chen, Jianfeng and Fu, Wei and Mathew, George and Menzies, Tim and Minku, Leandro and Wagner, Markus and Yu, Zhe}, year={2018}, pages={341–352} } @article{yu_kraft_menzies_2018, title={Finding better active learners for faster literature reviews}, volume={23}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-017-9587-0}, DOI={10.1007/s10664-017-9587-0}, abstractNote={Literature reviews can be time-consuming and tedious to complete. By cataloging and refactoring three state-of-the-art active learning techniques from evidence-based medicine and legal electronic discovery, this paper finds and implements FASTREAD, a faster technique for studying a large corpus of documents, combining and parametrizing the most efficient active learning algorithms. This paper assesses FASTREAD using datasets generated from existing SE literature reviews (Hall, Wahono, Radjenović, Kitchenham et al.). Compared to manual methods, FASTREAD lets researchers find 95% relevant studies after reviewing an order of magnitude fewer papers. Compared to other state-of-the-art automatic methods, FASTREAD reviews 20–50% fewer studies while finding same number of relevant primary studies in a systematic literature review.}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Nature}, author={Yu, Zhe and Kraft, Nicholas A. and Menzies, Tim}, year={2018}, month={Dec}, pages={3161–3186} } @article{yu_menzies_2018, title={Total Recall, Language Processing, and Software Engineering}, DOI={10.1145/3283812.3283818}, abstractNote={A broad class of software engineering problems can be generalized as the "total recall problem". This short paper claims that identifying and exploring the total recall problems in software engineering is an important task with wide applicability. To make that case, we show that by applying and adapting the state of the art active learning and natural language processing algorithms for solving the total recall problem, two important software engineering tasks can also be addressed : (a) supporting large literature reviews and (b) identifying software security vulnerabilities. Furthermore, we conjecture that (c) test case prioritization and (d) static warning identification can also be generalized as and benefit from the total recall problem. The widespread applicability of "total recall" to software engineering suggests that there exists some underlying framework that encompasses not just natural language processing, but a wide range of important software engineering tasks.}, journal={PROCEEDINGS OF THE 4TH ACM SIGSOFT INTERNATIONAL WORKSHOP ON NLP FOR SOFTWARE ENGINEERING (NL4SE '18)}, author={Yu, Zhe and Menzies, Tim}, year={2018}, pages={10–13} }