@article{wang_wang_chen_menzies_cui_xie_wang_2021, title={Characterizing Crowds to Better Optimize Worker Recommendation in Crowdsourced Testing}, volume={47}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2019.2918520}, DOI={10.1109/TSE.2019.2918520}, abstractNote={Crowdsourced testing is an emerging trend, in which test tasks are entrusted to the online crowd workers. Typically, a crowdsourced test task aims to detect as many bugs as possible within a limited budget. However not all crowd workers are equally skilled at finding bugs; Inappropriate workers may miss bugs, or report duplicate bugs, while hiring them requires nontrivial budget. Therefore, it is of great value to recommend a set of appropriate crowd workers for a test task so that more software bugs can be detected with fewer workers. This paper first presents a new characterization of crowd workers and characterizes them with testing context, capability, and domain knowledge. Based on the characterization, we then propose Multi-Objective Crowd wOrker recoMmendation approach (MOCOM), which aims at recommending a minimum number of crowd workers who could detect the maximum number of bugs for a crowdsourced testing task. Specifically, MOCOM recommends crowd workers by maximizing the bug detection probability of workers, the relevance with the test task, the diversity of workers, and minimizing the test cost. We experimentally evaluate MOCOM on 532 test tasks, and results show that MOCOM significantly outperforms five commonly-used and state-of-the-art baselines. Furthermore, MOCOM can reduce duplicate reports and recommend workers with high relevance and larger bug detection probability; because of this it can find more bugs with fewer workers.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Wang, Junjie and Wang, Song and Chen, Jianfeng and Menzies, Tim and Cui, Qiang and Xie, Miao and Wang, Qing}, year={2021}, month={Jun}, pages={1259–1276} } @article{shu_xia_chen_williams_menzies_2021, title={How to Better Distinguish Security Bug Reports (Using Dual Hyperparameter Optimization)}, volume={26}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-020-09906-8}, DOI={10.1007/s10664-020-09906-8}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Chen, Jianfeng and Williams, Laurie and Menzies, Tim}, year={2021}, month={May} } @article{yang_chen_yedida_yu_menzies_2021, title={Learning to recognize actionable static code warnings (is intrinsically easy)}, volume={26}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-021-09948-6}, DOI={10.1007/s10664-021-09948-6}, abstractNote={Static code warning tools often generate warnings that programmers ignore. Such tools can be made more useful via data mining algorithms that select the “actionable” warnings; i.e. the warnings that are usually not ignored. In this paper, we look for actionable warnings within a sample of 5,675 actionable warnings seen in 31,058 static code warnings from FindBugs. We find that data mining algorithms can find actionable warnings with remarkable ease. Specifically, a range of data mining methods (deep learners, random forests, decision tree learners, and support vector machines) all achieved very good results (recalls and AUC(TRN, TPR) measures usually over 95% and false alarms usually under 5%). Given that all these learners succeeded so easily, it is appropriate to ask if there is something about this task that is inherently easy. We report that while our data sets have up to 58 raw features, those features can be approximated by less than two underlying dimensions. For such intrinsically simple data, many different kinds of learners can generate useful models with similar performance. Based on the above, we conclude that learning to recognize actionable static code warnings is easy, using a wide range of learning algorithms, since the underlying data is intrinsically simple. If we had to pick one particular learner for this task, we would suggest linear SVMs (since, at least in our sample, that learner ran relatively quickly and achieved the best median performance) and we would not recommend deep learning (since this data is intrinsically very simple).}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Yang, Xueqi and Chen, Jianfeng and Yedida, Rahul and Yu, Zhe and Menzies, Tim}, year={2021}, month={May} } @article{chen_nair_krishna_menzies_2019, title={"Sampling" as a Baseline Optimizer for Search-Based Software Engineering}, volume={45}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2018.2790925}, DOI={10.1109/TSE.2018.2790925}, abstractNote={Increasingly, Software Engineering (SE) researchers use search-based optimization techniques to solve SE problems with multiple conflicting objectives. These techniques often apply CPU-intensive evolutionary algorithms to explore generations of mutations to a population of candidate solutions. An alternative approach, proposed in this paper, is to start with a very large population and sample down to just the better solutions. We call this method “Sway”, short for “the sampling way”. This paper compares Sway versus state-of-the-art search-based SE tools using seven models: five software product line models; and two other software process control models (concerned with project management, effort estimation, and selection of requirements) during incremental agile development. For these models, the experiments of this paper show that Sway is competitive with corresponding state-of-the-art evolutionary algorithms while requiring orders of magnitude fewer evaluations. Considering the simplicity and effectiveness of Sway, we, therefore, propose this approach as a baseline method for search-based software engineering models, especially for models that are very slow to execute.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Chen, Jianfeng and Nair, Vivek and Krishna, Rahul and Menzies, Tim}, year={2019}, month={Jun}, pages={597–614} } @article{chen_chakraborty_clark_haverlock_cherian_menzies_2019, title={Predicting Breakdowns in Cloud Services (with SPIKE)}, DOI={10.1145/3338906.3340450}, abstractNote={Maintaining web-services is a mission-critical task where any down- time means loss of revenue and reputation (of being a reliable service provider). In the current competitive web services market, such a loss of reputation causes extensive loss of future revenue. To address this issue, we developed SPIKE, a data mining tool which can predict upcoming service breakdowns, half an hour into the future. Such predictions let an organization alert and assemble the tiger team to address the problem (e.g. by reconguring cloud hardware in order to reduce the likelihood of that breakdown). SPIKE utilizes (a) regression tree learning (with CART); (b) synthetic minority over-sampling (to handle how rare spikes are in our data); (c) hyperparameter optimization (to learn best settings for our local data) and (d) a technique we called “topology sampling” where training vectors are built from extensive details of an individual node plus summary details on all their neighbors. In the experiments reported here, SPIKE predicted service spikes 30 minutes into future with recalls and precision of 75% and above. Also, SPIKE performed relatively better than other widely-used learning methods (neural nets, random forests, logistic regression).}, journal={ESEC/FSE'2019: PROCEEDINGS OF THE 2019 27TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Chen, Jianfeng and Chakraborty, Joymallya and Clark, Philip and Haverlock, Kevin and Cherian, Snehit and Menzies, Tim}, year={2019}, pages={916–924} } @article{chen_nair_menzies_2018, title={Beyond evolutionary algorithms for search-based software engineering}, volume={95}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2017.08.007}, abstractNote={Context: Evolutionary algorithms typically require a large number of evaluations (of solutions) to converge - which can be very slow and expensive to evaluate.Objective: To solve search-based software engineering (SE) problems, using fewer evaluations than evolutionary methods.Method: Instead of mutating a small population, we build a very large initial population which is then culled using a recursive bi-clustering chop approach. We evaluate this approach on multiple SE models, unconstrained as well as constrained, and compare its performance with standard evolutionary algorithms. Results: Using just a few evaluations (under 100), we can obtain comparable results to state-of-the-art evolutionary algorithms.Conclusion: Just because something works, and is widespread use, does not necessarily mean that there is no value in seeking methods to improve that method. Before undertaking search-based SE optimization tasks using traditional EAs, it is recommended to try other techniques, like those explored here, to obtain the same results with fewer evaluations.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Chen, Jianfeng and Nair, Vivek and Menzies, Tim}, year={2018}, month={Mar}, pages={281–294} } @article{nair_agrawal_chen_fu_mathew_menzies_minku_wagner_yu_2018, title={Data-Driven Search-based Software Engineering}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196442}, abstractNote={This paper introduces Data-Driven Search-based Software Engineering (DSE), which combines insights from Mining Software Repositories (MSR) and Search-based Software Engineering (SBSE). While MSR formulates software engineering problems as data mining problems, SBSE reformulates Software Engineering (SE) problems as optimization problems and use meta-heuristic algorithms to solve them. Both MSR and SBSE share the common goal of providing insights to improve software engineering. The algorithms used in these two areas also have intrinsic relationships. We, therefore, argue that combining these two fields is useful for situations (a)~which require learning from a large data source or (b)~when optimizers need to know the lay of the land to find better solutions, faster. This paper aims to answer the following three questions: (1) What are the various topics addressed by DSE?, (2) What types of data are used by the researchers in this area?, and (3) What research approaches do researchers use? The paper briefly sets out to act as a practical guide to develop new DSE techniques and also to serve as a teaching resource. This paper also presents a resource (tiny.cc/data-se) for exploring DSE. The resource contains 89 artifacts which are related to DSE, divided into 13 groups such as requirements engineering, software product lines, software processes. All the materials in this repository have been used in recent software engineering papers; i.e., for all this material, there exist baseline results against which researchers can comparatively assess their new ideas.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Nair, Vivek and Agrawal, Amritanshu and Chen, Jianfeng and Fu, Wei and Mathew, George and Menzies, Tim and Minku, Leandro and Wagner, Markus and Yu, Zhe}, year={2018}, pages={341–352} } @article{chen_menzies_2018, title={RIOT: a Stochastic-based Method for Workflow Scheduling in the Cloud}, DOI={10.1109/CLOUD.2018.00047}, abstractNote={Cloud computing provides engineers or scientists a place to run complex computing tasks. Finding a workflows’s deployment configuration in a cloud environment is not easy. Traditional workflow scheduling algorithms were based on some heuristics, e.g. reliability greedy, cost greedy, cost-time balancing, etc., or more recently, the meta-heuristic methods, such as genetic algorithms. These methods are very slow and not suitable for rescheduling in dynamic cloud environment. This paper introduces RIOT (Randomized Instance Order Types), a stochastic based method for workflow scheduling. RIOT groups the tasks in the workflow into virtual machines via a probability model and then uses an effective surrogate based method to assess large amount of potential schedulings. Experiments in dozens of study cases showed that RIOT executes tens of times faster than traditional methods while generating comparable results to other methods.}, journal={PROCEEDINGS 2018 IEEE 11TH INTERNATIONAL CONFERENCE ON CLOUD COMPUTING (CLOUD)}, author={Chen, Jianfeng and Menzies, Tim}, year={2018}, pages={318–325} } @article{nair_menzies_chen_2016, title={An (Accidental) Exploration of Alternatives to Evolutionary Algorithms for SBSE}, volume={9962}, ISBN={["978-3-319-47105-1"]}, ISSN={["0302-9743"]}, DOI={10.1007/978-3-319-47106-8_7}, abstractNote={SBSE researchers often use an evolutionary algorithm to solve various software engineering problems. This paper explores an alternate approach of sampling. This approach is called SWAY (Samplying WAY) and finds the (near) optimal solutions to the problem by (i) creating a larger initial population and (ii) intelligently sampling the solution space to find the best subspace. Unlike evolutionary algorithms, SWAY does not use mutation or cross-over or multi-generational reasoning to find interesting subspaces but relies on the underlying dimensions of the solution space. Experiments with Software Engineering (SE) models shows that SWAY’s performance improvement is competitive with standard MOEAs while, terminating over an order of magnitude faster.}, journal={SEARCH BASED SOFTWARE ENGINEERING, SSBSE 2016}, author={Nair, Vivek and Menzies, Tim and Chen, Jianfeng}, year={2016}, pages={96–111} }