@article{majumder_xia_krishna_menzies_2022, title={Methods for Stabilizing Models Across Large Samples of Projects (with case studies on Predicting Defect and Project Health)}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3527934}, abstractNote={Despite decades of research, Software Engineering (SE) lacks widely accepted models (that offer precise quantitative stable predictions) about what factors most influence software quality. This paper provides a promising result showing such stable models can be generated using a new transfer learning framework called “STABILIZER”. Given a tree of recursively clustered projects (using project meta-data), STABILIZER promotes a model upwards if it performs best in the lower clusters (stopping when the promoted model performs worse than the models seen at a lower level). The number of models found by STABILIZER is minimal: one for defect prediction (756 projects) and less than a dozen for project health (1628 projects). Hence, via STABILIZER, it is possible to find a few projects which can be used for transfer learning and make conclusions that hold across hundreds of projects at a time. Further, the models produced in this manner offer predictions that perform as well or better than the prior state-of-the-art. To the best of our knowledge, STABILIZER is order of magnitude faster than the prior state-of-the-art transfer learners which seek to find conclusion stability, and these case studies are the largest demonstration of the generalizability of quantitative predictions of project quality yet reported in the SE literature. In order to support open science, all our scripts and data are online at https://github.com/Anonymous633671/STABILIZER.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Majumder, Suvodeep and Xia, Tianpei and Krishna, Rahul and Menzies, Tim}, year={2022}, pages={566–578} } @article{krishna_menzies_layman_2017, title={Less is more: Minimizing code reorganization using XTREE}, volume={88}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2017.03.012}, abstractNote={Context: Developers use bad code smells to guide code reorganization. Yet developers, text books, tools, and researchers disagree on which bad smells are important. Objective: To evaluate the likelihood that a code reorganization to address bad code smells will yield improvement in the defect-proneness of the code. Method: We introduce XTREE, a tool that analyzes a historical log of defects seen previously in the code and generates a set of useful code changes. Any bad smell that requires changes outside of that set can be deprioritized (since there is no historical evidence that the bad smell causes any problems). Evaluation: We evaluate XTREE's recommendations for bad smell improvement against recommendations from previous work (Shatnawi, Alves, and Borges) using multiple data sets of code metrics and defect counts. Results: Code modules that are changed in response to XTREE's recommendations contain significantly fewer defects than recommendations from previous studies. Further, XTREE endorses changes to very few code metrics, and the bad smell recommendations (learned from previous studies) are not universal to all software projects. Conclusion: Before undertaking a code reorganization based on a bad smell report, use a tool like XTREE to check and ignore any such operations that are useless; i.e. ones which lack evidence in the historical record that it is useful to make that change. Note that this use case applies to both manual code reorganizations proposed by developers as well as those conducted by automatic methods. This recommendation assumes that there is an historical record. If none exists, then the results of this paper could be used as a guide.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Krishna, Rahul and Menzies, Tim and Layman, Lucas}, year={2017}, month={Aug}, pages={53–66} } @article{krishna_menzies_fu_2016, title={Too Much Automation? The Bellwether Effect and Its Implications for Transfer Learning}, ISSN={["1527-1366"]}, DOI={10.1145/2970276.2970339}, abstractNote={“Transfer learning”: is the process of translating quality predictors learned in one data set to another. Transfer learning has been the subject of much recent research. In practice, that research means changing models all the time as transfer learners continually exchange new models to the current project. This paper offers a very simple “bellwether” transfer learner. Given N data sets, we find which one produces the best predictions on all the others. This “bellwether” data set is then used for all subsequent predictions (or, until such time as its predictions start failing-at which point it is wise to seek another bellwether). Bellwethers are interesting since they are very simple to find (just wrap a for-loop around standard data miners). Also, they simplify the task of making general policies in SE since as long as one bellwether remains useful, stable conclusions for N data sets can be achieved just by reasoning over that bellwether. From this, we conclude (1) this bellwether method is a useful (and very simple) transfer learning method; (2) “bellwethers” are a baseline method against which future transfer learners should be compared; (3) sometimes, when building increasingly complex automatic methods, researchers should pause and compare their supposedly more sophisticated method against simpler alternatives.}, journal={2016 31ST IEEE/ACM INTERNATIONAL CONFERENCE ON AUTOMATED SOFTWARE ENGINEERING (ASE)}, author={Krishna, Rahul and Menzies, Tim and Fu, Wei}, year={2016}, pages={122–131} }