@article{yu_theisen_williams_menzies_2021, title={Improving Vulnerability Inspection Efficiency Using Active Learning}, volume={47}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2019.2949275}, DOI={10.1109/TSE.2019.2949275}, abstractNote={Software engineers can find vulnerabilities with less effort if they are directed towards code that might contain more vulnerabilities. HARMLESS is an incremental support vector machine tool that builds a vulnerability prediction model from the source code inspected to date, then suggests what source code files should be inspected next. In this way, HARMLESS can reduce the time and effort required to achieve some desired level of recall for finding vulnerabilities. The tool also provides feedback on when to stop (at that desired level of recall) while at the same time, correcting human errors by double-checking suspicious files. This paper evaluates HARMLESS on Mozilla Firefox vulnerability data. HARMLESS found 80, 90, 95, 99 percent of the vulnerabilities by inspecting 10, 16, 20, 34 percent of the source code files. When targeting 90, 95, 99 percent recall, HARMLESS could stop after inspecting 23, 30, 47 percent of the source code files. Even when human reviewers fail to identify half of the vulnerabilities (50 percent false negative rate), HARMLESS could detect 96 percent of the missing vulnerabilities by double-checking half of the inspected files. Our results serve to highlight the very steep cost of protecting software from vulnerabilities (in our case study that cost is, for example, the human effort of inspecting 28,750 × 20% = 5,750 source code files to identify 95 percent of the vulnerabilities). While this result could benefit the mission-critical projects where human resources are available for inspecting thousands of source code files, the research challenge for future work is how to further reduce that cost. The conclusion of this paper discusses various ways that goal might be achieved.}, number={11}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yu, Zhe and Theisen, Christopher and Williams, Laurie and Menzies, Tim}, year={2021}, month={Nov}, pages={2401–2420} } @article{theisen_williams_2020, title={Better together: Comparing vulnerability prediction models}, volume={119}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2019.106204}, abstractNote={Vulnerability Prediction Models (VPMs) are an approach for prioritizing security inspection and testing to find and fix vulnerabilities. VPMs have been created based on a variety of metrics and approaches, yet widespread adoption of VPM usage in practice has not occurred. Knowing which VPMs have strong prediction and which VPMs have low data requirements and resources usage would be useful for practitioners to match VPMs to their project’s needs. The low density of vulnerabilities compared to defects is also an obstacle for practical VPMs. The goal of the paper is to help security practitioners and researchers choose appropriate features for vulnerability prediction through a comparison of Vulnerability Prediction Models. We performed replications of VPMs on Mozilla Firefox with 28,750 source code files featuring 271 vulnerabilities using software metrics, text mining, and crash data. We then combined features from each VPM and reran our classifiers. We improved the F-score of the best VPM (.20 to 0.28) by combining features from three types of VPMs and using Naive Bayes as the classifier. The strongest features in the combined model were the number of times a file was involved in a crash, the number of outgoing calls from a file, and the string “nullptr”. Our results indicate that further work is needed to develop new features for input into classifiers. In addition, new analytic approaches for VPMs are needed for VPMs to be useful in practical situations, due to the low density of vulnerabilities in software (less than 1% for our dataset).}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Theisen, Christopher and Williams, Laurie}, year={2020}, month={Mar} } @misc{theisen_munaiah_al-zyoud_carver_meneely_williams_2018, title={Attack surface definitions: A systematic literature review}, volume={104}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2018.07.008}, abstractNote={Michael Howard conceptualized the attack surface of a software system as a metaphor for risk assessment during the development and maintenance of software. While the phrase attack surface is used in a variety of contexts in cybersecurity, professionals have different conceptions of what the phrase means. The goal of this systematic literature review is to aid researchers and practitioners in reasoning about security in terms of attack surface by exploring various definitions of the phrase attack surface. We reviewed 644 works from prior literature, including research papers, magazine articles, and technical reports, that use the phrase attack surface and categorized them into those that provided their own definition; cited another definition; or expected the reader to intuitively understand the phrase. In our study, 71% of the papers used the phrase without defining it or citing another paper. Additionally, we found six themes of definitions for the phrase attack surface. Based on our analysis, we recommend practitioners choose a definition of attack surface appropriate for their domain based on the six themes we identified in our study.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Theisen, Christopher and Munaiah, Nuthan and Al-Zyoud, Mahran and Carver, Jeffrey C. and Meneely, Andrew and Williams, Laurie}, year={2018}, month={Dec}, pages={94–103} } @article{choi_ji_2017, title={Twist-3 Distribution Amplitudes of Pion in the Light-Front Quark Model}, volume={58}, ISSN={0177-7963 1432-5411}, url={http://dx.doi.org/10.1007/S00601-016-1208-8}, DOI={10.1007/s00601-016-1208-8}, abstractNote={We analyzed two twist-3 distribution amplitudes of pion, i.e. pseudoscalar $$\phi ^P_{3;\pi }(x)$$ and pseudotensor $$\phi ^\sigma _{3;\pi }(x)$$ , within the LFQM. Our LFQM descriptions both for twist-3 $$\phi ^P_{3;\pi }$$ and $$\phi ^\sigma _{3;\pi }$$ obtained from the Gaussian radial wave function not only satisfy the fundamental constraint required from the isospin symmetry, but also reproduce exactly the asymptotic forms anticipated from QCD’s conformal limit.}, number={2}, journal={Few-Body Systems}, publisher={Springer Nature}, author={Choi, Ho-Meoyng and Ji, Chueng-Ryong}, year={2017}, month={Jan} } @inproceedings{theisen_2016, title={Reusing stack traces: Automated attack surface approximation}, booktitle={2016 IEEE/ACM 38th International Conference on Software Engineering Companion (ICSE-C)}, author={Theisen, C.}, year={2016}, pages={859–862} } @article{theisen_williams_2016, title={Stack traces reveal attack surfaces}, journal={Perspectives on Data Science for Software Engineering}, author={Theisen, C. and Williams, L.}, year={2016}, pages={73–76} } @inproceedings{theisen_2015, title={Automated attack surface approximation}, booktitle={2015 10th Joint Meeting of the European Software Engineering Conference and the ACM Sigsoft Symposium on the Foundations of Software Engineering (ESEC/FSE 2015) Proceedings}, author={Theisen, C.}, year={2015}, pages={1063–1065} }