@article{krishna_nair_jamshidi_menzies_2021, title={Whence to Learn? Transferring Knowledge in Configurable Systems Using BEETLE}, volume={47}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2020.2983927}, DOI={10.1109/TSE.2020.2983927}, abstractNote={As software systems grow in complexity and the space of possible configurations increases exponentially, finding the near-optimal configuration of a software system becomes challenging. Recent approaches address this challenge by learning performance models based on a sample set of configurations. However, collecting enough sample configurations can be very expensive since each such sample requires configuring, compiling, and executing the entire system using a complex test suite. When learning on new data is too expensive, it is possible to use Transfer Learning to “transfer” old lessons to the new context. Traditional transfer learning has a number of challenges, specifically, (a) learning from excessive data takes excessive time, and (b) the performance of the models built via transfer can deteriorate as a result of learning from a poor source. To resolve these problems, we propose a novel transfer learning framework called BEETLE, which is a “bellwether”-based transfer learner that focuses on identifying and learning from the most relevant source from amongst the old data. This paper evaluates BEETLE with 57 different software configuration problems based on five software systems (a video encoder, an SAT solver, a SQL database, a high-performance C-compiler, and a streaming data analytics tool). In each of these cases, BEETLE found configurations that are as good as or better than those found by other state-of-the-art transfer learners while requiring only a fraction ($\frac{1}{7}$17th) of the measurements needed by those other methods. Based on these results, we say that BEETLE is a new high-water mark in optimally configuring software.}, number={12}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Krishna, Rahul and Nair, Vivek and Jamshidi, Pooyan and Menzies, Tim}, year={2021}, month={Dec}, pages={2956–2972} } @article{chen_nair_krishna_menzies_2019, title={"Sampling" as a Baseline Optimizer for Search-Based Software Engineering}, volume={45}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2018.2790925}, DOI={10.1109/TSE.2018.2790925}, abstractNote={Increasingly, Software Engineering (SE) researchers use search-based optimization techniques to solve SE problems with multiple conflicting objectives. These techniques often apply CPU-intensive evolutionary algorithms to explore generations of mutations to a population of candidate solutions. An alternative approach, proposed in this paper, is to start with a very large population and sample down to just the better solutions. We call this method “Sway”, short for “the sampling way”. This paper compares Sway versus state-of-the-art search-based SE tools using seven models: five software product line models; and two other software process control models (concerned with project management, effort estimation, and selection of requirements) during incremental agile development. For these models, the experiments of this paper show that Sway is competitive with corresponding state-of-the-art evolutionary algorithms while requiring orders of magnitude fewer evaluations. Considering the simplicity and effectiveness of Sway, we, therefore, propose this approach as a baseline method for search-based software engineering models, especially for models that are very slow to execute.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Chen, Jianfeng and Nair, Vivek and Krishna, Rahul and Menzies, Tim}, year={2019}, month={Jun}, pages={597–614} } @article{chen_nair_menzies_2018, title={Beyond evolutionary algorithms for search-based software engineering}, volume={95}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2017.08.007}, abstractNote={Context: Evolutionary algorithms typically require a large number of evaluations (of solutions) to converge - which can be very slow and expensive to evaluate.Objective: To solve search-based software engineering (SE) problems, using fewer evaluations than evolutionary methods.Method: Instead of mutating a small population, we build a very large initial population which is then culled using a recursive bi-clustering chop approach. We evaluate this approach on multiple SE models, unconstrained as well as constrained, and compare its performance with standard evolutionary algorithms. Results: Using just a few evaluations (under 100), we can obtain comparable results to state-of-the-art evolutionary algorithms.Conclusion: Just because something works, and is widespread use, does not necessarily mean that there is no value in seeking methods to improve that method. Before undertaking search-based SE optimization tasks using traditional EAs, it is recommended to try other techniques, like those explored here, to obtain the same results with fewer evaluations.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Chen, Jianfeng and Nair, Vivek and Menzies, Tim}, year={2018}, month={Mar}, pages={281–294} } @article{nair_agrawal_chen_fu_mathew_menzies_minku_wagner_yu_2018, title={Data-Driven Search-based Software Engineering}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196442}, abstractNote={This paper introduces Data-Driven Search-based Software Engineering (DSE), which combines insights from Mining Software Repositories (MSR) and Search-based Software Engineering (SBSE). While MSR formulates software engineering problems as data mining problems, SBSE reformulates Software Engineering (SE) problems as optimization problems and use meta-heuristic algorithms to solve them. Both MSR and SBSE share the common goal of providing insights to improve software engineering. The algorithms used in these two areas also have intrinsic relationships. We, therefore, argue that combining these two fields is useful for situations (a)~which require learning from a large data source or (b)~when optimizers need to know the lay of the land to find better solutions, faster. This paper aims to answer the following three questions: (1) What are the various topics addressed by DSE?, (2) What types of data are used by the researchers in this area?, and (3) What research approaches do researchers use? The paper briefly sets out to act as a practical guide to develop new DSE techniques and also to serve as a teaching resource. This paper also presents a resource (tiny.cc/data-se) for exploring DSE. The resource contains 89 artifacts which are related to DSE, divided into 13 groups such as requirements engineering, software product lines, software processes. All the materials in this repository have been used in recent software engineering papers; i.e., for all this material, there exist baseline results against which researchers can comparatively assess their new ideas.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Nair, Vivek and Agrawal, Amritanshu and Chen, Jianfeng and Fu, Wei and Mathew, George and Menzies, Tim and Minku, Leandro and Wagner, Markus and Yu, Zhe}, year={2018}, pages={341–352} } @article{tu_nair_2018, title={Is One Hyperparameter Optimizer Enough?}, DOI={10.1145/3278142.3278145}, abstractNote={Hyperparameter tuning is the black art of automatically finding a good combination of control parameters for a data miner. While widely applied in empirical Software Engineering, there has not been much discussion on which hyperparameter tuner is best for software analytics.To address this gap in the literature, this paper applied a range of hyperparameter optimizers (grid search, random search, differential evolution, and Bayesian optimization) to a defect prediction problem. Surprisingly, no hyperparameter optimizer was observed to be “best” and, for one of the two evaluation measures studied here (F-measure), hyperparameter optimization, in 50% of cases, was no better than using default configurations. We conclude that hyperparameter optimization is more nuanced than previously believed. While such optimization can certainly lead to large improvements in the performance of classifiers used in software analytics, it remains to be seen which specific optimizers should be applied to a new dataset.}, journal={PROCEEDINGS OF THE 4TH ACM SIGSOFT INTERNATIONAL WORKSHOP ON SOFTWARE ANALYTICS (SWAN'18)}, author={Tu, Huy and Nair, Vivek}, year={2018}, pages={19–25} } @article{hsu_nair_menzies_freeh_2018, title={Micky: A Cheaper Alternative for Selecting Cloud Instances}, DOI={10.1109/CLOUD.2018.00058}, abstractNote={Most cloud computing optimizers explore and improve one workload at a time. When optimizing many workloads, the single-optimizer approach can be prohibitively expensive. Accordingly, we examine "collective optimizer" that concurrently explore and improve a set of workloads significantly reducing the measurement costs. Our large-scale empirical study shows that there is often a single cloud configuration which is surprisingly near-optimal for most workloads. Consequently, we create a collective-optimizer, MICKY, that reformulates the task of finding the near-optimal cloud configuration as a multi-armed bandit problem. MICKY efficiently balances exploration (of new cloud configurations) and exploitation (of known good cloud configuration). Our experiments show that MICKY can achieve on average 8.6 times reduction in measurement cost as compared to the state-of-the-art method while finding near-optimal solutions. Hence we propose MICKY as the basis of a practical collective optimization method for finding good cloud configurations (based on various constraints such as budget and tolerance to near-optimal configurations)}, journal={PROCEEDINGS 2018 IEEE 11TH INTERNATIONAL CONFERENCE ON CLOUD COMPUTING (CLOUD)}, author={Hsu, Chin-Jung and Nair, Vivek and Menzies, Tim and Freeh, Vincent}, year={2018}, pages={409–416} } @article{nair_menzies_siegmund_apel_2017, title={Using Bad Learners to Find Good Configurations}, DOI={10.1145/3106237.3106238}, abstractNote={Finding the optimally performing configuration of a software system for a given setting is often challenging. Recent approaches address this challenge by learning performance models based on a sample set of configurations. However, building an accurate performance model can be very expensive (and is often infeasible in practice). The central insight of this paper is that exact performance values (e.g., the response time of a software system) are not required to rank configurations and to identify the optimal one. As shown by our experiments, performance models that are cheap to learn but inaccurate (with respect to the difference between actual and predicted performance) can still be used rank configurations and hence find the optimal configuration. This novel rank-based approach allows us to significantly reduce the cost (in terms of number of measurements of sample configuration) as well as the time required to build performance models. We evaluate our approach with 21 scenarios based on 9 software systems and demonstrate that our approach is beneficial in 16 scenarios; for the remaining 5 scenarios, an accurate model can be built by using very few samples anyway, without the need for a rank-based approach.}, journal={ESEC/FSE 2017: PROCEEDINGS OF THE 2017 11TH JOINT MEETING ON FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Nair, Vivek and Menzies, Tim and Siegmund, Norbert and Apel, Sven}, year={2017}, pages={257–267} } @article{nair_menzies_chen_2016, title={An (Accidental) Exploration of Alternatives to Evolutionary Algorithms for SBSE}, volume={9962}, ISBN={["978-3-319-47105-1"]}, ISSN={["0302-9743"]}, DOI={10.1007/978-3-319-47106-8_7}, abstractNote={SBSE researchers often use an evolutionary algorithm to solve various software engineering problems. This paper explores an alternate approach of sampling. This approach is called SWAY (Samplying WAY) and finds the (near) optimal solutions to the problem by (i) creating a larger initial population and (ii) intelligently sampling the solution space to find the best subspace. Unlike evolutionary algorithms, SWAY does not use mutation or cross-over or multi-generational reasoning to find interesting subspaces but relies on the underlying dimensions of the solution space. Experiments with Software Engineering (SE) models shows that SWAY’s performance improvement is competitive with standard MOEAs while, terminating over an order of magnitude faster.}, journal={SEARCH BASED SOFTWARE ENGINEERING, SSBSE 2016}, author={Nair, Vivek and Menzies, Tim and Chen, Jianfeng}, year={2016}, pages={96–111} }