@article{shu_xia_williams_menzies_2022, title={Dazzle: Using Optimized Generative Adversarial Networks to Address Security Data Class Imbalance Issue}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3528437}, abstractNote={Background: Machine learning techniques have been widely used and demonstrate promising performance in many software security tasks such as software vulnerability prediction. However, the class ratio within software vulnerability datasets is often highly imbalanced (since the percentage of observed vulnerability is usually very low). Goal: To help security practitioners address software security data class imbalanced issues and further help build better prediction models with resampled datasets. Method: We introduce an approach called Dazzle which is an optimized version of conditional Wasserstein Generative Adversarial Networks with gradient penalty (cWGAN-GP). Dazzle explores the architecture hyperparameters of cWGAN-GP with a novel optimizer called Bayesian Optimization. We use Dazzle to generate minority class samples to resample the original imbalanced training dataset. Results: We evaluate Dazzle with three software security datasets, i.e., Moodle vulnerable files, Ambari bug reports, and JavaScript function code. We show that Dazzle is practical to use and demonstrates promising improvement over existing state-of-the-art oversampling techniques such as SMOTE (e.g., with an average of about 60% improvement rate over SMOTE in recall among all datasets). Conclusion: Based on this study, we would suggest the use of optimized GANs as an alternative method for security vulnerability data class imbalanced issues.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, pages={144–155} } @article{majumder_xia_krishna_menzies_2022, title={Methods for Stabilizing Models Across Large Samples of Projects (with case studies on Predicting Defect and Project Health)}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3527934}, abstractNote={Despite decades of research, Software Engineering (SE) lacks widely accepted models (that offer precise quantitative stable predictions) about what factors most influence software quality. This paper provides a promising result showing such stable models can be generated using a new transfer learning framework called “STABILIZER”. Given a tree of recursively clustered projects (using project meta-data), STABILIZER promotes a model upwards if it performs best in the lower clusters (stopping when the promoted model performs worse than the models seen at a lower level). The number of models found by STABILIZER is minimal: one for defect prediction (756 projects) and less than a dozen for project health (1628 projects). Hence, via STABILIZER, it is possible to find a few projects which can be used for transfer learning and make conclusions that hold across hundreds of projects at a time. Further, the models produced in this manner offer predictions that perform as well or better than the prior state-of-the-art. To the best of our knowledge, STABILIZER is order of magnitude faster than the prior state-of-the-art transfer learners which seek to find conclusion stability, and these case studies are the largest demonstration of the generalizability of quantitative predictions of project quality yet reported in the SE literature. In order to support open science, all our scripts and data are online at https://github.com/Anonymous633671/STABILIZER.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Majumder, Suvodeep and Xia, Tianpei and Krishna, Rahul and Menzies, Tim}, year={2022}, pages={566–578} } @article{shu_xia_williams_menzies_2022, title={Omni: automated ensemble with unexpected models against adversarial evasion attack}, volume={27}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-021-10064-8}, DOI={10.1007/s10664-021-10064-8}, abstractNote={Machine learning-based security detection models have become prevalent in modern malware and intrusion detection systems. However, previous studies show that such models are susceptible to adversarial evasion attacks. In this type of attack, inputs (i.e., adversarial examples) are specially crafted by intelligent malicious adversaries, with the aim of being misclassified by existing state-of-the-art models (e.g., deep neural networks). Once the attackers can fool a classifier to think that a malicious input is actually benign, they can render a machine learning-based malware or intrusion detection system ineffective. To help security practitioners and researchers build a more robust model against non-adaptive, white-box and non-targeted adversarial evasion attacks through the idea of ensemble model. We propose an approach called Omni, the main idea of which is to explore methods that create an ensemble of "unexpected models"; i.e., models whose control hyperparameters have a large distance to the hyperparameters of an adversary's target model, with which we then make an optimized weighted ensemble prediction. In studies with five types of adversarial evasion attacks (FGSM, BIM, JSMA, DeepFool and Carlini-Wagner) on five security datasets (NSL-KDD, CIC-IDS-2017, CSE-CIC-IDS2018, CICAndMal2017 and the Contagio PDF dataset), we show Omni is a promising approach as a defense strategy against adversarial attacks when compared with other baseline treatments. When employing ensemble defense against adversarial evasion attacks, we suggest to create ensemble with unexpected models that are distant from the attacker's expected model (i.e., target model) through methods such as hyperparameter optimization.}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, month={Jan} } @article{xia_fu_shu_agrawal_menzies_2022, title={Predicting health indicators for open source projects (using hyperparameter optimization)}, volume={27}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-022-10171-0}, DOI={10.1007/s10664-022-10171-0}, abstractNote={Software developed on public platform is a source of data that can be used to make predictions about those projects. While the individual developing activity may be random and hard to predict, the developing behavior on project level can be predicted with good accuracy when large groups of developers work together on software projects. To demonstrate this, we use 64,181 months of data from 1,159 GitHub projects to make various predictions about the recent status of those projects (as of April 2020). We find that traditional estimation algorithms make many mistakes. Algorithms like k-nearest neighbors (KNN), support vector regression (SVR), random forest (RFT), linear regression (LNR), and regression trees (CART) have high error rates. But that error rate can be greatly reduced using hyperparameter optimization. To the best of our knowledge, this is the largest study yet conducted, using recent data for predicting multiple health indicators of open-source projects. To facilitate open science (and replications and extensions of this work), all our materials are available online at https://github.com/arennax/Health_Indicator_Prediction .}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Xia, Tianpei and Fu, Wei and Shu, Rui and Agrawal, Rishabh and Menzies, Tim}, year={2022}, month={Nov} } @article{xia_shu_shen_menzies_2022, title={Sequential Model Optimization for Software Effort Estimation}, volume={48}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2020.3047072}, DOI={10.1109/TSE.2020.3047072}, abstractNote={Many methods have been proposed to estimate how much effort is required to build and maintain software. Much of that research tries to recommend a single method – an approach that makes the dubious assumption that one method can handle the diversity of software project data. To address this drawback, we apply a configuration technique called “ROME” (Rapid Optimizing Methods for Estimation), which uses sequential model-based optimization (SMO) to find what configuration settings of effort estimation techniques work best for a particular data set. We test this method using data from 1161 traditional waterfall projects and 120 contemporary projects (from GitHub). In terms of magnitude of relative error and standardized accuracy, we find that ROME achieves better performance than the state-of-the-art methods for both traditional waterfall and contemporary projects. In addition, we conclude that we should not recommend one method for estimation. Rather, it is better to search through a wide range of different methods to find what works best for the local data. To the best of our knowledge, this is the largest effort estimation experiment yet attempted and the only one to test its methods on traditional waterfall and contemporary projects.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Xia, Tianpei and Shu, Rui and Shen, Xipeng and Menzies, Tim}, year={2022}, month={Jun}, pages={1994–2009} } @article{shu_xia_chen_williams_menzies_2021, title={How to Better Distinguish Security Bug Reports (Using Dual Hyperparameter Optimization)}, volume={26}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-020-09906-8}, DOI={10.1007/s10664-020-09906-8}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Chen, Jianfeng and Williams, Laurie and Menzies, Tim}, year={2021}, month={May} }