@article{shu_xia_williams_menzies_2022, title={Dazzle: Using Optimized Generative Adversarial Networks to Address Security Data Class Imbalance Issue}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3528437}, abstractNote={Background: Machine learning techniques have been widely used and demonstrate promising performance in many software security tasks such as software vulnerability prediction. However, the class ratio within software vulnerability datasets is often highly imbalanced (since the percentage of observed vulnerability is usually very low). Goal: To help security practitioners address software security data class imbalanced issues and further help build better prediction models with resampled datasets. Method: We introduce an approach called Dazzle which is an optimized version of conditional Wasserstein Generative Adversarial Networks with gradient penalty (cWGAN-GP). Dazzle explores the architecture hyperparameters of cWGAN-GP with a novel optimizer called Bayesian Optimization. We use Dazzle to generate minority class samples to resample the original imbalanced training dataset. Results: We evaluate Dazzle with three software security datasets, i.e., Moodle vulnerable files, Ambari bug reports, and JavaScript function code. We show that Dazzle is practical to use and demonstrates promising improvement over existing state-of-the-art oversampling techniques such as SMOTE (e.g., with an average of about 60% improvement rate over SMOTE in recall among all datasets). Conclusion: Based on this study, we would suggest the use of optimized GANs as an alternative method for security vulnerability data class imbalanced issues.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, pages={144–155} } @article{elder_zahan_shu_metro_kozarev_menzies_williams_2022, title={Do I really need all this work to find vulnerabilities? An empirical case study comparing vulnerability detection techniques on a Java application}, volume={27}, ISSN={["1573-7616"]}, url={http://dx.doi.org/10.1007/s10664-022-10179-6}, DOI={10.1007/s10664-022-10179-6}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Elder, Sarah and Zahan, Nusrat and Shu, Rui and Metro, Monica and Kozarev, Valeri and Menzies, Tim and Williams, Laurie}, year={2022}, month={Nov} } @article{shu_xia_williams_menzies_2022, title={Omni: automated ensemble with unexpected models against adversarial evasion attack}, volume={27}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-021-10064-8}, DOI={10.1007/s10664-021-10064-8}, abstractNote={Machine learning-based security detection models have become prevalent in modern malware and intrusion detection systems. However, previous studies show that such models are susceptible to adversarial evasion attacks. In this type of attack, inputs (i.e., adversarial examples) are specially crafted by intelligent malicious adversaries, with the aim of being misclassified by existing state-of-the-art models (e.g., deep neural networks). Once the attackers can fool a classifier to think that a malicious input is actually benign, they can render a machine learning-based malware or intrusion detection system ineffective. To help security practitioners and researchers build a more robust model against non-adaptive, white-box and non-targeted adversarial evasion attacks through the idea of ensemble model. We propose an approach called Omni, the main idea of which is to explore methods that create an ensemble of “unexpected models”; i.e., models whose control hyperparameters have a large distance to the hyperparameters of an adversary’s target model, with which we then make an optimized weighted ensemble prediction. In studies with five types of adversarial evasion attacks (FGSM, BIM, JSMA, DeepFool and Carlini-Wagner) on five security datasets (NSL-KDD, CIC-IDS-2017, CSE-CIC-IDS2018, CICAndMal2017 and the Contagio PDF dataset), we show Omni is a promising approach as a defense strategy against adversarial attacks when compared with other baseline treatments. When employing ensemble defense against adversarial evasion attacks, we suggest to create ensemble with unexpected models that are distant from the attacker’s expected model (i.e., target model) through methods such as hyperparameter optimization.}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, month={Jan} } @article{xia_fu_shu_agrawal_menzies_2022, title={Predicting health indicators for open source projects (using hyperparameter optimization)}, volume={27}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-022-10171-0}, DOI={10.1007/s10664-022-10171-0}, abstractNote={Software developed on public platform is a source of data that can be used to make predictions about those projects. While the individual developing activity may be random and hard to predict, the developing behavior on project level can be predicted with good accuracy when large groups of developers work together on software projects. To demonstrate this, we use 64,181 months of data from 1,159 GitHub projects to make various predictions about the recent status of those projects (as of April 2020). We find that traditional estimation algorithms make many mistakes. Algorithms like k-nearest neighbors (KNN), support vector regression (SVR), random forest (RFT), linear regression (LNR), and regression trees (CART) have high error rates. But that error rate can be greatly reduced using hyperparameter optimization. To the best of our knowledge, this is the largest study yet conducted, using recent data for predicting multiple health indicators of open-source projects. To facilitate open science (and replications and extensions of this work), all our materials are available online at https://github.com/arennax/Health_Indicator_Prediction .}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Xia, Tianpei and Fu, Wei and Shu, Rui and Agrawal, Rishabh and Menzies, Tim}, year={2022}, month={Nov} } @article{xia_shu_shen_menzies_2022, title={Sequential Model Optimization for Software Effort Estimation}, volume={48}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2020.3047072}, DOI={10.1109/TSE.2020.3047072}, abstractNote={Many methods have been proposed to estimate how much effort is required to build and maintain software. Much of that research tries to recommend a single method – an approach that makes the dubious assumption that one method can handle the diversity of software project data. To address this drawback, we apply a configuration technique called “ROME” (Rapid Optimizing Methods for Estimation), which uses sequential model-based optimization (SMO) to find what configuration settings of effort estimation techniques work best for a particular data set. We test this method using data from 1161 traditional waterfall projects and 120 contemporary projects (from GitHub). In terms of magnitude of relative error and standardized accuracy, we find that ROME achieves better performance than the state-of-the-art methods for both traditional waterfall and contemporary projects. In addition, we conclude that we should not recommend one method for estimation. Rather, it is better to search through a wide range of different methods to find what works best for the local data. To the best of our knowledge, this is the largest effort estimation experiment yet attempted and the only one to test its methods on traditional waterfall and contemporary projects.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Xia, Tianpei and Shu, Rui and Shen, Xipeng and Menzies, Tim}, year={2022}, month={Jun}, pages={1994–2009} } @article{shu_xia_chen_williams_menzies_2021, title={How to Better Distinguish Security Bug Reports (Using Dual Hyperparameter Optimization)}, volume={26}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-020-09906-8}, DOI={10.1007/s10664-020-09906-8}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Chen, Jianfeng and Williams, Laurie and Menzies, Tim}, year={2021}, month={May} } @article{elder_zahan_kozarev_shu_menzies_williams_2021, title={Structuring a Comprehensive Software Security Course Around the OWASP Application Security Verification Standard}, url={http://dx.doi.org/10.1109/icse-seet52601.2021.00019}, DOI={10.1109/ICSE-SEET52601.2021.00019}, abstractNote={Lack of security expertise among software practitioners is a problem with many implications. First, there is a deficit of security professionals to meet current needs. Additionally, even practitioners who do not plan to work in security may benefit from increased understanding of security. The goal of this paper is to aid software engineering educators in designing a comprehensive software security course by sharing an experience running a software security course for the eleventh time. Through all the eleven years of running the software security course, the course objectives have been comprehensive - ranging from security testing, to secure design and coding, to security requirements to security risk management. For the first time in this eleventh year, a theme of the course assignments was to map vulnerability discovery to the security controls of the Open Web Application Security Project (OWASP) Application Security Verification Standard (ASVS). Based upon student performance on a final exploratory penetration testing project, this mapping may have increased students' depth of understanding of a wider range of security topics. The students efficiently detected 191 unique and verified vulnerabilities of 28 different Common Weakness Enumeration (CWE) types during a three-hour period in the OpenMRS project, an electronic health record application in active use.}, journal={2021 IEEE/ACM 43RD INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: JOINT TRACK ON SOFTWARE ENGINEERING EDUCATION AND TRAINING (ICSE-JSEET 2021)}, publisher={IEEE}, author={Elder, Sarah E. and Zahan, Nusrat and Kozarev, Val and Shu, Rui and Menzies, Tim and Williams, Laurie}, year={2021}, pages={95–104} } @article{shu_wang_gorski_andow_nadkarni_deshotels_gionta_enck_gu_2016, title={A Study of Security Isolation Techniques}, volume={49}, ISSN={["1557-7341"]}, DOI={10.1145/2988545}, abstractNote={Security isolation is a foundation of computing systems that enables resilience to different forms of attacks. This article seeks to understand existing security isolation techniques by systematically classifying different approaches and analyzing their properties. We provide a hierarchical classification structure for grouping different security isolation techniques. At the top level, we consider two principal aspects: mechanism and policy. Each aspect is broken down into salient dimensions that describe key properties. We break the mechanism into two dimensions, enforcement location and isolation granularity, and break the policy aspect down into three dimensions: policy generation, policy configurability, and policy lifetime. We apply our classification to a set of representative articles that cover a breadth of security isolation techniques and discuss tradeoffs among different design choices and limitations of existing approaches.}, number={3}, journal={ACM COMPUTING SURVEYS}, publisher={ACM}, author={Shu, Rui and Wang, Peipei and Gorski, Sigmund A. and Andow, Benjamin and Nadkarni, Adwait and Deshotels, Luke and Gionta, Jason and Enck, William and Gu, Xiaohui}, year={2016}, month={Dec} }