@misc{imtiaz_williams_2023, title={Are Your Dependencies Code Reviewed?: Measuring Code Review Coverage in Dependency Updates}, volume={49}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2023.3319509}, abstractNote={As modern software extensively uses free open source packages as dependencies, developers have to regularly pull in new third-party code through frequent updates. However, without a proper review of every incoming change, vulnerable and malicious code can sneak into the codebase through these dependencies. The goal of this study is to aid developers in securely accepting dependency updates by measuring if the code changes in an update have passed through a code review process. We implement Depdive, an update audit tool for packages in Crates.io, npm, PyPI, and RubyGems registry. Depdive first (i) identifies the files and the code changes in an update that cannot be traced back to the package's source repository, i.e., phantom artifacts; and then (ii) measures what portion of changes in the update, excluding the phantom artifacts, has passed through a code review process, i.e., code review coverage. Using Depdive, we present an empirical study across the latest ten updates of the most downloaded 1000 packages in each of the four registries. We further evaluated our results through a maintainer agreement survey. We find that phantom artifacts are not uncommon in the updates (20.1% of the analyzed updates had at least one phantom file). The phantoms can appear either due to legitimate reasons, such as in the case of programmatically generated files, or from accidental inclusion, such as in the case of files that are ignored in the repository. Regarding code review coverage (CRC), we find the updates are typically only partially code-reviewed (52.5% of the time). Further, only 9.0% of the packages had all their updates in our data set fully code-reviewed, indicating that even the most used packages can introduce non-reviewed code in the software supply chain. We also observe that updates either tend to have high CRC or low CRC, suggesting that packages at the opposite end of the spectrum may require a separate set of treatments.}, number={11}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Imtiaz, Nasif and Williams, Laurie}, year={2023}, month={Nov}, pages={4932–4945} } @article{zahan_shohan_harris_williams_2023, title={Do Software Security Practices Yield Fewer Vulnerabilities?}, ISSN={["2832-7640"]}, DOI={10.1109/ICSE-SEIP58684.2023.00032}, abstractNote={Due to the ever-increasing number of security breaches, practitioners are motivated to produce more secure software. In the United States, the White House Office released a memorandum on Executive Order (EO) 14028 that mandates organizations provide self-attestation of the use of secure software development practices. The OpenSSF Scorecard project allows practitioners to measure the use of software security practices automatically. However, little research has been done to determine whether the use of security practices improves package security, particularly which security practices have the biggest impact on security outcomes. The goal of this study is to assist practitioners and researchers in making informed decisions on which security practices to adopt through the development of models between software security practice scores and security vulnerability counts.To that end, we developed five supervised machine learning models for npm and PyPI packages using the OpenSSF Scorecard security practices scores and aggregate security scores as predictors and the number of externally-reported vulnerabilities as a target variable. Our models found that four security practices (Maintained, Code Review, Branch Protection, and Security Policy) were the most important practices influencing vulnerability count. However, we had low R2 (ranging from 9% to 12%) when we tested the models to predict vulnerability counts. Additionally, we observed that the number of reported vulnerabilities increased rather than reduced as the aggregate security score of the packages increased. Both findings indicate that additional factors may influence the package vulnerability count. Other factors, such as the scarcity of vulnerability data, time to implicate security practices vs. time to detect vulnerabilities, and the need for more adequate scripts to detect security practices, may impede the data-driven studies to indicate that a practice can aid in the reduction of externally-reported vulnerabilities. We suggest that vulnerability count and security score data be refined such that these measures may be used to provide actionable guidance on security practices.}, journal={2023 IEEE/ACM 45TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING IN PRACTICE, ICSE-SEIP}, author={Zahan, Nusrat and Shohan, Shohanuzzaman and Harris, Dan and Williams, Laurie}, year={2023}, pages={292–303} } @article{imtiaz_khanom_williams_2023, title={Open or Sneaky? Fast or Slow? Light or Heavy?: Investigating Security Releases of Open Source Packages}, volume={49}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2022.3181010}, abstractNote={Vulnerabilities in open source packages can be a security risk for the downstream client projects. When a new vulnerability is discovered, a package should quickly release a fix in a new version, referred to as a security release in this study. The security release should be well-documented and require minimal migration effort to facilitate fast adoption by the clients. However, to what extent the open source packages follow these recommendations is not known. In this paper, we study (1) the time lag between fix and release; (2) how security fixes are documented in the release notes; (3) code change characteristics (size and semantic versioning) of the release; and (4) the time lag between the release and an advisory publication for security releases over a dataset of 4,377 security advisories across seven package ecosystems. We find that the median security release becomes available within 4 days of the corresponding fix and contains 131 lines of code (LOC) change. However, one-fourth of the releases in our data set still came at least 20 days after the fix was made.Further, we find that 61.5% of the security releases come with a release note that documents the corresponding security fix. Still, Snyk and NVD, two popular databases, take a median of 17 days (from the release) to publish a security advisory, possibly resulting in delayed notifications to the client projects. We also find that security releases may contain breaking change(s) as 13.2% indicated backward incompatibility through semantic versioning, while 6.4% mentioned breaking change(s) in the release notes. Based on our findings, we point out areas for future work, such as private fork for security fixes and standardized practice for announcing security releases.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Imtiaz, Nasif and Khanom, Aniqa and Williams, Laurie}, year={2023}, month={Apr}, pages={1540–1560} } @article{zahan_kanakiya_hambleton_shohan_williams_2023, title={OpenSSF Scorecard: On the Path Toward Ecosystem-Wide Automated Security Metrics}, volume={21}, ISSN={["1558-4046"]}, url={https://doi.org/10.1109/MSEC.2023.3279773}, DOI={10.1109/MSEC.2023.3279773}, abstractNote={The OpenSSF Scorecard project is an automated tool to monitor the security health of open source software. This study evaluates the applicability of the Scorecard tool and compares the security practices and gaps in the npm and PyPI ecosystems.}, number={6}, journal={IEEE SECURITY & PRIVACY}, author={Zahan, Nusrat and Kanakiya, Parth and Hambleton, Brian and Shohan, Shohanuzzaman and Williams, Laurie}, year={2023}, month={Nov}, pages={76–88} } @article{basak_neil_reaves_williams_2023, title={SecretBench: A Dataset of Software Secrets}, ISSN={["2160-1852"]}, DOI={10.1109/MSR59073.2023.00053}, abstractNote={According to GitGuardian’s monitoring of public GitHub repositories, the exposure of secrets (API keys and other credentials) increased two-fold in 2021 compared to 2020, totaling more than six million secrets. However, no benchmark dataset is publicly available for researchers and tool developers to evaluate secret detection tools that produce many false positive warnings. The goal of our paper is to aid researchers and tool developers in evaluating and improving secret detection tools by curating a benchmark dataset of secrets through a systematic collection of secrets from open-source repositories. We present a labeled dataset of source codes containing 97,479 secrets (of which 15,084 are true secrets) of various secret types extracted from 818 public GitHub repositories. The dataset covers 49 programming languages and 311 file types.}, journal={2023 IEEE/ACM 20TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES, MSR}, author={Basak, Setu Kumar and Neil, Lorenzo and Reaves, Bradley and Williams, Laurie}, year={2023}, pages={347–351} } @article{massacci_williams_2023, title={Software Supply Chain Security}, volume={21}, ISSN={["1558-4046"]}, DOI={10.1109/MSEC.2023.3321189}, abstractNote={Today, the Ancient Mariner would rhyme “Code, code every where, Not any drop to trust.” This special issue of IEEE Security & Privacy highlights software supply chain security research and experiences of value to practitioners and researchers alike.}, number={6}, journal={IEEE SECURITY & PRIVACY}, author={Massacci, Fabio and Williams, Laurie}, year={2023}, month={Nov}, pages={8–10} } @article{rahman_hezaveh_williams_2023, title={What Are the Attackers Doing Now? Automating Cyberthreat Intelligence Extraction from Text on Pace with the Changing Threat Landscape: A Survey}, volume={55}, ISSN={["1557-7341"]}, DOI={10.1145/3571726}, abstractNote={ Cybersecurity researchers have contributed to the automated extraction of CTI from textual sources, such as threat reports and online articles describing cyberattack strategies, procedures, and tools. The goal of this article is to aid cybersecurity researchers in understanding the current techniques used for cyberthreat intelligence extraction from text through a survey of relevant studies in the literature. Our work finds 11 types of extraction purposes and 7 types of textual sources for CTI extraction. We observe the technical challenges associated with obtaining available clean and labeled data for replication, validation, and further extension of the studies. We advocate for building upon the current CTI extraction work to help cybersecurity practitioners with proactive decision-making such as in threat prioritization and mitigation strategy formulation to utilize knowledge from past cybersecurity incidents. }, number={12}, journal={ACM COMPUTING SURVEYS}, author={Rahman, Md Rayhanur and Hezaveh, Rezvan Mahdavi and Williams, Laurie}, year={2023}, month={Dec} } @article{basak_neil_reaves_williams_2023, title={What Challenges Do Developers Face About Checked-in Secrets in Software Artifacts?}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE48619.2023.00141}, abstractNote={Throughout 2021, GitGuardian's monitoring of public GitHub repositories revealed a two-fold increase in the number of secrets (database credentials, API keys, and other credentials) exposed compared to 2020, accumulating more than six million secrets. To our knowledge, the challenges developers face to avoid checked-in secrets are not yet characterized. The goal of our paper is to aid researchers and tool developers in understanding and prioritizing opportunities for future research and tool automation for mitigating checked-in secrets through an empirical investigation of challenges and solutions related to checked-in secrets. We extract 779 questions related to checked-in secrets on Stack Exchange and apply qualitative analysis to determine the challenges and the solutions posed by others for each of the challenges. We identify 27 challenges and 13 solutions. The four most common challenges, in ranked order, are: (i) store/version of secrets during deployment; (ii) store/version of secrets in source code; (iii) ignore/hide of secrets in source code; and (iv) sanitize VCS history. The three most common solutions, in ranked order, are: (i) move secrets out of source code/version control and use template config file; (ii) secret management in deployment; and (iii) use local environment variables. Our findings indicate that the same solution has been mentioned to mitigate multiple challenges. However, our findings also identify an increasing trend in questions lacking accepted solutions substantiating the need for future research and tool automation on managing secrets.}, journal={2023 IEEE/ACM 45TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING, ICSE}, author={Basak, Setu Kumar and Neil, Lorenzo and Reaves, Bradley and Williams, Laurie}, year={2023}, pages={1635–1647} } @article{shu_xia_williams_menzies_2022, title={Dazzle: Using Optimized Generative Adversarial Networks to Address Security Data Class Imbalance Issue}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3528437}, abstractNote={Background: Machine learning techniques have been widely used and demonstrate promising performance in many software security tasks such as software vulnerability prediction. However, the class ratio within software vulnerability datasets is often highly imbalanced (since the percentage of observed vulnerability is usually very low). Goal: To help security practitioners address software security data class imbalanced issues and further help build better prediction models with resampled datasets. Method: We introduce an approach called Dazzle which is an optimized version of conditional Wasserstein Generative Adversarial Networks with gradient penalty (cWGAN-GP). Dazzle explores the architecture hyperparameters of cWGAN-GP with a novel optimizer called Bayesian Optimization. We use Dazzle to generate minority class samples to resample the original imbalanced training dataset. Results: We evaluate Dazzle with three software security datasets, i.e., Moodle vulnerable files, Ambari bug reports, and JavaScript function code. We show that Dazzle is practical to use and demonstrates promising improvement over existing state-of-the-art oversampling techniques such as SMOTE (e.g., with an average of about 60% improvement rate over SMOTE in recall among all datasets). Conclusion: Based on this study, we would suggest the use of optimized GANs as an alternative method for security vulnerability data class imbalanced issues.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, pages={144–155} } @article{elder_zahan_shu_metro_kozarev_menzies_williams_2022, title={Do I really need all this work to find vulnerabilities? An empirical case study comparing vulnerability detection techniques on a Java application}, volume={27}, ISSN={["1573-7616"]}, url={http://dx.doi.org/10.1007/s10664-022-10179-6}, DOI={10.1007/s10664-022-10179-6}, number={6}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Elder, Sarah and Zahan, Nusrat and Shu, Rui and Metro, Monica and Kozarev, Valeri and Menzies, Tim and Williams, Laurie}, year={2022}, month={Nov} } @article{weir_migues_williams_2022, title={Exploring the Shift in Security Responsibility}, ISSN={["1558-4046"]}, DOI={10.1109/MSEC.2022.3150238}, abstractNote={The Building Security in Maturity Model survey has been tracking software security activity adoption in 211 companies over 12 years. This article explores how organizations should adapt to the latest security challenges.}, journal={IEEE SECURITY & PRIVACY}, author={Weir, Charles and Migues, Samuel and Williams, Laurie}, year={2022}, month={Mar} } @article{mahdavi-hezaveh_ajmeri_williams_2022, title={Feature toggles as code: Heuristics and metrics for structuring feature toggles}, volume={145}, ISSN={["1873-6025"]}, url={https://research-information.bris.ac.uk/en/publications/cf3267f4-7537-48f9-9f5d-39eae5b5ced6}, DOI={10.1016/j.infsof.2021.106813}, abstractNote={Using feature toggles is a technique to turn a feature either on or off in program code by checking the value of a variable in a conditional statement . This technique is increasingly used by software practitioners to support continuous integration and continuous delivery (CI/CD). However, using feature toggles may increase code complexity, create dead code, and decrease the quality of a codebase. The goal of this research is to aid software practitioners in structuring feature toggles in the codebase by proposing and evaluating a set of heuristics and corresponding complexity, comprehensibility , and maintainability metrics based upon an empirical study of open source repositories. We identified 80 GitHub repositories that use feature toggles in their development cycle. We conducted a qualitative analysis using 60 of the 80 repositories to identify heuristics and metrics. Then, we conducted a survey of practitioners of 80 repositories to obtain their feedback that the proposed heuristics can be used to guide the structure of feature toggles and to reduce technical debt. We also conducted a case study of the all 80 repositories to analyze relations between heuristics and metrics. From the qualitative analysis, we proposed 7 heuristics to guide structuring feature toggles and identified 12 metrics to support the principles embodied in the heuristics. Our survey result shows that practitioners agree that managing feature toggles is difficult, and using identified heuristics can reduce technical debt. Based on our case study, we find a relationship between the adoption of heuristics and the values of metrics. Our results support that practitioners should have self-descriptive feature toggles, use feature toggles sparingly, avoid duplicate code in using feature toggles, and ensure complete removal of a feature toggle.}, note={Funding Information: The first author is funded by North Carolina State University . The second author is funded by the National Security Agency (Science of Security Lablet) at North Carolina State University. We thank all the reviewers for their valuable feedback. We also thank the members of the RealSearch group. Publisher Copyright: \textcopyright 2022 Elsevier B.V.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, publisher={Amsterdam:Elsevier}, author={Mahdavi-Hezaveh, Rezvan and Ajmeri, Nirav and Williams, Laurie}, year={2022}, month={May} } @article{shu_xia_williams_menzies_2022, title={Omni: automated ensemble with unexpected models against adversarial evasion attack}, volume={27}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-021-10064-8}, DOI={10.1007/s10664-021-10064-8}, abstractNote={Machine learning-based security detection models have become prevalent in modern malware and intrusion detection systems. However, previous studies show that such models are susceptible to adversarial evasion attacks. In this type of attack, inputs (i.e., adversarial examples) are specially crafted by intelligent malicious adversaries, with the aim of being misclassified by existing state-of-the-art models (e.g., deep neural networks). Once the attackers can fool a classifier to think that a malicious input is actually benign, they can render a machine learning-based malware or intrusion detection system ineffective. To help security practitioners and researchers build a more robust model against non-adaptive, white-box and non-targeted adversarial evasion attacks through the idea of ensemble model. We propose an approach called Omni, the main idea of which is to explore methods that create an ensemble of “unexpected models”; i.e., models whose control hyperparameters have a large distance to the hyperparameters of an adversary’s target model, with which we then make an optimized weighted ensemble prediction. In studies with five types of adversarial evasion attacks (FGSM, BIM, JSMA, DeepFool and Carlini-Wagner) on five security datasets (NSL-KDD, CIC-IDS-2017, CSE-CIC-IDS2018, CICAndMal2017 and the Contagio PDF dataset), we show Omni is a promising approach as a defense strategy against adversarial attacks when compared with other baseline treatments. When employing ensemble defense against adversarial evasion attacks, we suggest to create ensemble with unexpected models that are distant from the attacker’s expected model (i.e., target model) through methods such as hyperparameter optimization.}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Williams, Laurie and Menzies, Tim}, year={2022}, month={Jan} } @article{enck_williams_2022, title={Top Five Challenges in Software Supply Chain Security: Observations From 30 Industry and Government Organizations}, volume={20}, ISSN={["1558-4046"]}, DOI={10.1109/MSEC.2022.3142338}, abstractNote={Software is complex, not only due to the code within a given project, but also due to the vast ecosystem of dependencies and transitive dependencies upon which each project relies. Recent years have observed a sharp uptick of attacks on the software supply chain spurring invigorated interest by industry and government alike. We held three summits with a diverse set of organizations and report on the top five challenges in software supply chain security.}, number={2}, journal={IEEE SECURITY & PRIVACY}, publisher={IEEE}, author={Enck, William and Williams, Laurie}, year={2022}, pages={96–100} } @article{zahan_zimmermann_godefroid_murphy_maddila_williams_2022, title={What are Weak Links in the npm Supply Chain?}, url={http://dx.doi.org/10.1145/3510457.3513044}, DOI={10.1145/3510457.3513044}, abstractNote={Modern software development frequently uses third-party packages, raising the concern of supply chain security attacks. Many attackers target popular package managers, like npm, and their users with supply chain attacks. In 2021 there was a 650% year-on-year growth in security attacks by exploiting Open Source Software's supply chain. Proactive approaches are needed to predict package vulnerability to high-risk supply chain attacks. The goal of this work is to help software developers and security specialists in measuring npm supply chain weak link signals to prevent future supply chain attacks by empirically studying npm package metadata. In this paper, we analyzed the metadata of 1.63 million JavaScript npm packages. We propose six signals of security weaknesses in a software supply chain, such as the presence of install scripts, maintainer accounts associated with an expired email domain, and inactive packages with inactive maintainers. One of our case studies identified 11 malicious packages from the install scripts signal. We also found 2,818 maintainer email addresses associated with expired domains, allowing an attacker to hijack 8,494 packages by taking over the npm accounts. We obtained feedback on our weak link signals through a survey responded to by 470 npm package developers. The majority of the developers supported three out of our six proposed weak link signals. The developers also indicated that they would want to be notified about weak links signals before using third-party packages. Additionally, we discussed eight new signals suggested by package developers.}, journal={2022 ACM/IEEE 44TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING IN PRACTICE (ICSE-SEIP 2022)}, publisher={ACM}, author={Zahan, Nusrat and Zimmermann, Thomas and Godefroid, Patrice and Murphy, Brendan and Maddila, Chandra and Williams, Laurie}, year={2022}, pages={331–340} } @article{basak_neil_reaves_williams_2022, title={What are the Practices for Secret Management in Software Artifacts?}, DOI={10.1109/SecDev53368.2022.00026}, abstractNote={Throughout 2021, GitGuardian's monitoring of public GitHub repositories revealed a two-fold increase in the number of secrets (database credentials, API keys, and other credentials) exposed compared to 2020, accumulating more than six million secrets. A systematic derivation of practices for managing secrets can help practitioners in secure development. The goal of our paper is to aid practitioners in avoiding the exposure of secrets by identifying secret management practices in software artifacts through a systematic derivation of practices disseminated in Internet artifacts. We conduct a grey literature review of Internet artifacts, such as blog articles and question and answer posts. We identify 24 practices grouped in six categories comprised of developer and organizational practices. Our findings indicate that using local environment variables and external secret management services are the most recommended practices to move secrets out of source code and to securely store secrets. We also observe that using version control system scanning tools and employing short-lived secrets are the most recommended practices to avoid accidentally committing secrets and limit secret exposure, respectively.}, journal={2022 IEEE SECURE DEVELOPMENT CONFERENCE (SECDEV 2022)}, author={Basak, Setu Kumar and Neil, Lorenzo and Reaves, Bradley and Williams, Laurie}, year={2022}, pages={69–76} } @article{rahman_imtiaz_storey_williams_2022, title={Why secret detection tools are not enough: It's not just about false positives-An industrial case study}, volume={27}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-021-10109-y}, abstractNote={Checked-in secrets in version-controlled software projects pose security risks to software and services. Secret detection tools can identify the presence of secrets in the code, commit changesets, and project version control history. As these tools can generate false positives, developers are provided with mechanisms to bypass the warnings generated from these tools. Providing this override mechanism can result in developers sometimes exposing secrets in software repositories. The goal of this article is to aid software security practitioners in understanding why‘ secrets are checked into repositories, despite being warned by tools, through an industrial case study of analysis of usage data of a secret detection tool and a survey of developers who bypassed the tool alert. In this case study, we analyzed the usage data of a checked-in secret detection tool used widely by a software company and we surveyed developers who bypassed the warnings generated by the tool. From the case study, we found that, despite developers classified 50% of the warning as false positive, developers also bypassed the warning due to time constraints, working with non-shipping projects, technical challenges of eliminating secrets completely from the version control history, technical debts, and perceptions that check-ins are low risk. We advocate practitioners and researchers to investigate the findings of our study further to improve secret detection tools and related development practices. We also advocate that organizations should insert secondary checks, as is done by the company we studied, to capture occasions where developers incorrectly bypass secret detection tools.}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Rahman, Md Rayhanur and Imtiaz, Nasif and Storey, Margaret-Anne and Williams, Laurie}, year={2022}, month={May} } @article{rahman_williams_2021, title={Different Kind of Smells: Security Smells in Infrastructure as Code Scripts}, volume={19}, ISSN={["1558-4046"]}, DOI={10.1109/MSEC.2021.3065190}, abstractNote={In this article, we summarize our recent research findings related to infrastructure as code (IaC) scripts, where we have identified 67,801 occurrences of security smells that include 9,175 hard-coded passwords. We hope our work will facilitate awareness among practitioners who use IaC.}, number={3}, journal={IEEE SECURITY & PRIVACY}, author={Rahman, Akond and Williams, Laurie}, year={2021}, pages={33–41} } @article{shu_xia_chen_williams_menzies_2021, title={How to Better Distinguish Security Bug Reports (Using Dual Hyperparameter Optimization)}, volume={26}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-020-09906-8}, DOI={10.1007/s10664-020-09906-8}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Shu, Rui and Xia, Tianpei and Chen, Jianfeng and Williams, Laurie and Menzies, Tim}, year={2021}, month={May} } @article{yu_theisen_williams_menzies_2021, title={Improving Vulnerability Inspection Efficiency Using Active Learning}, volume={47}, ISSN={["1939-3520"]}, url={https://doi.org/10.1109/TSE.2019.2949275}, DOI={10.1109/TSE.2019.2949275}, abstractNote={Software engineers can find vulnerabilities with less effort if they are directed towards code that might contain more vulnerabilities. HARMLESS is an incremental support vector machine tool that builds a vulnerability prediction model from the source code inspected to date, then suggests what source code files should be inspected next. In this way, HARMLESS can reduce the time and effort required to achieve some desired level of recall for finding vulnerabilities. The tool also provides feedback on when to stop (at that desired level of recall) while at the same time, correcting human errors by double-checking suspicious files. This paper evaluates HARMLESS on Mozilla Firefox vulnerability data. HARMLESS found 80, 90, 95, 99 percent of the vulnerabilities by inspecting 10, 16, 20, 34 percent of the source code files. When targeting 90, 95, 99 percent recall, HARMLESS could stop after inspecting 23, 30, 47 percent of the source code files. Even when human reviewers fail to identify half of the vulnerabilities (50 percent false negative rate), HARMLESS could detect 96 percent of the missing vulnerabilities by double-checking half of the inspected files. Our results serve to highlight the very steep cost of protecting software from vulnerabilities (in our case study that cost is, for example, the human effort of inspecting 28,750 × 20% = 5,750 source code files to identify 95 percent of the vulnerabilities). While this result could benefit the mission-critical projects where human resources are available for inspecting thousands of source code files, the research challenge for future work is how to further reduce that cost. The conclusion of this paper discusses various ways that goal might be achieved.}, number={11}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yu, Zhe and Theisen, Christopher and Williams, Laurie and Menzies, Tim}, year={2021}, month={Nov}, pages={2401–2420} } @article{weir_migues_ware_williams_2021, title={Infiltrating Security into Development: Exploring the World' Largest Software Security Study}, DOI={10.1145/34682643473926}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, author={Weir, Charles and Migues, Sammy and Ware, Mike and Williams, Laurie}, year={2021}, pages={1326–1336} } @article{rahman_rahman_parnin_williams_2021, title={Security Smells in Ansible and Chef Scripts: A Replication Study}, volume={30}, ISBN={1557-7392}, DOI={10.1145/3408897}, abstractNote={ Context: Security smells are recurring coding patterns that are indicative of security weakness and require further inspection. As infrastructure as code (IaC) scripts, such as Ansible and Chef scripts, are used to provision cloud-based servers and systems at scale, security smells in IaC scripts could be used to enable malicious users to exploit vulnerabilities in the provisioned systems. Goal: The goal of this article is to help practitioners avoid insecure coding practices while developing infrastructure as code scripts through an empirical study of security smells in Ansible and Chef scripts. Methodology: We conduct a replication study where we apply qualitative analysis with 1,956 IaC scripts to identify security smells for IaC scripts written in two languages: Ansible and Chef. We construct a static analysis tool called Security Linter for Ansible and Chef scripts (SLAC) to automatically identify security smells in 50,323 scripts collected from 813 open source software repositories. We also submit bug reports for 1,000 randomly selected smell occurrences. Results: We identify two security smells not reported in prior work: missing default in case statement and no integrity check. By applying SLAC we identify 46,600 occurrences of security smells that include 7,849 hard-coded passwords. We observe agreement for 65 of the responded 94 bug reports, which suggests the relevance of security smells for Ansible and Chef scripts amongst practitioners. Conclusion: We observe security smells to be prevalent in Ansible and Chef scripts, similarly to that of the Puppet scripts. We recommend practitioners to rigorously inspect the presence of the identified security smells in Ansible and Chef scripts using (i) code review, and (ii) static analysis tools. }, number={1}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Rahman, Akond and Rahman, Md Rayhanur and Parnin, Chris and Williams, Laurie}, year={2021}, month={Jan} } @article{mahdavi-hezaveh_dremann_williams_2021, title={Software development with feature toggles: practices used by practitioners}, volume={26}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-020-09901-z}, abstractNote={Background: Using feature toggles is a technique that allows developers to either turn a feature on or off with a variable in a conditional statement. Feature toggles are increasingly used by software companies to facilitate continuous integration and continuous delivery. However, using feature toggles inappropriately may cause problems which can have a severe impact, such as code complexity, dead code, and system failure. For example, the erroneous repurposing of an old feature toggle caused Knight Capital Group, an American global financial services firm, to go bankrupt due to the implications of the resultant incorrect system behavior. Aim: The goal of this research project is to aid software practitioners in the use of practices to support software development with feature toggles through an empirical study of feature toggle practice usage by practitioners. Method: We conducted a qualitative analysis of 99 artifacts from the grey literature and 10 peer-reviewed papers about feature toggles. We conducted a survey of practitioners from 38 companies. Results: We identified 17 practices in 4 categories: Management practices, Initialization practices, Implementation practices, and Clean-up practices. We observed that all of the survey respondents use a dedicated tool to create and manage feature toggles in their code. Documenting feature toggle's metadata, setting up the default value for feature toggles, and logging the changes made on feature toggles are also frequently-observed practices. Conclusions: The feature toggle development practices discovered and enumerated in this work can help practitioners more effectively use feature toggles. This work can enable future mining of code repositories to automatically identify feature toggle practices.}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Mahdavi-Hezaveh, Rezvan and Dremann, Jacob and Williams, Laurie}, year={2021}, month={Jan} } @article{elder_zahan_kozarev_shu_menzies_williams_2021, title={Structuring a Comprehensive Software Security Course Around the OWASP Application Security Verification Standard}, url={http://dx.doi.org/10.1109/icse-seet52601.2021.00019}, DOI={10.1109/ICSE-SEET52601.2021.00019}, abstractNote={Lack of security expertise among software practitioners is a problem with many implications. First, there is a deficit of security professionals to meet current needs. Additionally, even practitioners who do not plan to work in security may benefit from increased understanding of security. The goal of this paper is to aid software engineering educators in designing a comprehensive software security course by sharing an experience running a software security course for the eleventh time. Through all the eleven years of running the software security course, the course objectives have been comprehensive - ranging from security testing, to secure design and coding, to security requirements to security risk management. For the first time in this eleventh year, a theme of the course assignments was to map vulnerability discovery to the security controls of the Open Web Application Security Project (OWASP) Application Security Verification Standard (ASVS). Based upon student performance on a final exploratory penetration testing project, this mapping may have increased students' depth of understanding of a wider range of security topics. The students efficiently detected 191 unique and verified vulnerabilities of 28 different Common Weakness Enumeration (CWE) types during a three-hour period in the OpenMRS project, an electronic health record application in active use.}, journal={2021 IEEE/ACM 43RD INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: JOINT TRACK ON SOFTWARE ENGINEERING EDUCATION AND TRAINING (ICSE-JSEET 2021)}, publisher={IEEE}, author={Elder, Sarah E. and Zahan, Nusrat and Kozarev, Val and Shu, Rui and Menzies, Tim and Williams, Laurie}, year={2021}, pages={95–104} } @article{rahman_mahdavi-hezaveh_williams_2020, title={A Literature Review on Mining Cyberthreat Intelligence from Unstructured Texts}, ISSN={["2375-9232"]}, DOI={10.1109/ICDMW51313.2020.00075}, abstractNote={Cyberthreat defense mechanisms have become more proactive these days, and thus leading to the increasing incorporation of cyberthreat intelligence (CTI). Cybersecurity researchers and vendors are powering the CTI with large volumes of unstructured textual data containing information on threat events, threat techniques, and tactics. Hence, extracting cyberthreat-relevant information through text mining is an effective way to obtain actionable CTI to thwart cyberattacks. The goal of this research is to aid cybersecurity researchers understand the source, purpose, and approaches for mining cyberthreat intelligence from unstructured text through a literature review of peer-reviewed studies on this topic. We perform a literature review to identify and analyze existing research on mining CTI. By using search queries in the bibliographic databases, 28,484 articles are found. From those, 38 studies are identified through the filtering criteria which include removing duplicates, non-English, non-peer-reviewed articles, and articles not about mining CTI. We find that the most prominent sources of unstructured threat data are the threat reports, Twitter feeds, and posts from hackers and security experts. We also observe that security researchers mined CTI from unstructured sources to extract Indicator of Compromise (IoC), threat-related topic, and event detection. Finally, natural language processing (NLP) based approaches: topic classification; keyword identification; and semantic relationship extraction among the keywords are mostly availed in the selected studies to mine CTI information from unstructured threat sources.}, journal={20TH IEEE INTERNATIONAL CONFERENCE ON DATA MINING WORKSHOPS (ICDMW 2020)}, author={Rahman, Md Rayhanur and Mahdavi-Hezaveh, Rezvan and Williams, Laurie}, year={2020}, pages={516–525} } @article{theisen_williams_2020, title={Better together: Comparing vulnerability prediction models}, volume={119}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2019.106204}, abstractNote={Vulnerability Prediction Models (VPMs) are an approach for prioritizing security inspection and testing to find and fix vulnerabilities. VPMs have been created based on a variety of metrics and approaches, yet widespread adoption of VPM usage in practice has not occurred. Knowing which VPMs have strong prediction and which VPMs have low data requirements and resources usage would be useful for practitioners to match VPMs to their project’s needs. The low density of vulnerabilities compared to defects is also an obstacle for practical VPMs. The goal of the paper is to help security practitioners and researchers choose appropriate features for vulnerability prediction through a comparison of Vulnerability Prediction Models. We performed replications of VPMs on Mozilla Firefox with 28,750 source code files featuring 271 vulnerabilities using software metrics, text mining, and crash data. We then combined features from each VPM and reran our classifiers. We improved the F-score of the best VPM (.20 to 0.28) by combining features from three types of VPMs and using Naive Bayes as the classifier. The strongest features in the combined model were the number of times a file was involved in a crash, the number of outgoing calls from a file, and the string “nullptr”. Our results indicate that further work is needed to develop new features for input into classifiers. In addition, new analytic approaches for VPMs are needed for VPMs to be useful in practical situations, due to the low density of vulnerabilities in software (less than 1% for our dataset).}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Theisen, Christopher and Williams, Laurie}, year={2020}, month={Mar} } @article{rahman_farhana_parnin_williams_2020, title={Gang of Eight: A Defect Taxonomy for Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380409}, abstractNote={Defects in infrastructure as code (IaC) scripts can have serious consequences, for example, creating large-scale system outages. A taxonomy of IaC defects can be useful for understanding the nature of defects, and identifying activities needed to fix and prevent defects in IaC scripts. The goal of this paper is to help practitioners improve the quality of infrastructure as code (IaC) scripts by developing a defect taxonomy for IaC scripts through qualitative analysis. We develop a taxonomy of IaC defects by applying qualitative analysis on 1,448 defect-related commits collected from open source software (OSS) repositories of the Openstack organization. We conduct a survey with 66 practitioners to assess if they agree with the identified defect categories included in our taxonomy. We quantify the frequency of identified defect categories by analyzing 80,425 commits collected from 291 OSS repositories spanning across 2005 to 2019. Our defect taxonomy for IaC consists of eight categories, including a category specific to IaC called idempotency (i.e., defects that lead to incorrect system provisioning when the same IaC script is executed multiple times). We observe the surveyed 66 practitioners to agree most with idempotency. The most frequent defect category is configuration data i.e., providing erroneous configuration data in IaC scripts. Our taxonomy and the quantified frequency of the defect categories may help in advancing the science of IaC script quality.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Rahman, Akond and Farhana, Effat and Parnin, Chris and Williams, Laurie}, year={2020}, pages={752–764} } @article{rahman_farhana_williams_2020, title={The 'as code' activities: development anti-patterns for infrastructure as code}, volume={25}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-020-09841-8}, abstractNote={The ‘as code’ suffix in infrastructure as code (IaC) refers to applying software engineering activities, such as version control, to maintain IaC scripts. Without the application of these activities, defects that can have serious consequences may be introduced in IaC scripts. A systematic investigation of the development anti-patterns for IaC scripts can guide practitioners in identifying activities to avoid defects in IaC scripts. Development anti-patterns are recurring development activities that relate with defective IaC scripts. The goal of this paper is to help practitioners improve the quality of infrastructure as code (IaC) scripts by identifying development activities that relate with defective IaC scripts. We identify development anti-patterns by adopting a mixed-methods approach, where we apply quantitative analysis with 2,138 open source IaC scripts and conduct a survey with 51 practitioners. We observe five development activities to be related with defective IaC scripts from our quantitative analysis. We identify five development anti-patterns namely, ‘boss is not around’, ‘many cooks spoil’, ‘minors are spoiler’, ‘silos’, and ‘unfocused contribution’. Our identified development anti-patterns suggest the importance of ‘as code’ activities in IaC because these activities are related to quality of IaC scripts.}, number={5}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Rahman, Akond and Farhana, Effat and Williams, Laurie}, year={2020}, month={Sep}, pages={3430–3467} } @article{rahman_mandavi-hezaveh_williams_2019, title={A systematic mapping study of infrastructure as code research}, volume={108}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2018.12.004}, abstractNote={Context:Infrastructure as code (IaC) is the practice to automatically configure system dependencies and to provision local and remote instances. Practitioners consider IaC as a fundamental pillar to implement DevOps practices, which helps them to rapidly deliver software and services to end-users. Information technology (IT) organizations, such as Github, Mozilla, Facebook, Google and Netflix have adopted IaC. A systematic mapping study on existing IaC research can help researchers to identify potential research areas related to IaC, for example, the areas of defects and security flaws that may occur in IaC scripts. Objective: The objective of this paper is to help researchers identify research areas related to infrastructure as code (IaC) by conducting a systematic mapping study of IaC-related research. Methodology: We conduct our research study by searching six scholar databases. We collect a set of 33,887 publications by using seven search strings. By systematically applying inclusion and exclusion criteria, we identify 31 publications related to IaC. We identify topics addressed in these publications by applying qualitative analysis. Results: We identify four topics studied in IaC-related publications: (i) framework/tool for infrastructure as code; (ii) use of infrastructure as code; (iii) empirical study related to infrastructure as code; and (iv) testing in infrastructure as code. According to our analysis, 52% of the studied 31 publications propose a framework or tool to implement the practice of IaC or extend the functionality of an existing IaC tool. Conclusion: As defects and security flaws can have serious consequences for the deployment and development environments in DevOps, along with other topics, we observe the need for research studies that will study defects and security flaws for IaC.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Rahman, Akond and Mandavi-Hezaveh, Rezvan and Williams, Laurie}, year={2019}, month={Apr}, pages={65–77} } @article{imtiaz_murphy_williams_2019, title={How Do Developers Act on Static Analysis Alerts? An Empirical Study of Coverity Usage}, ISSN={["1071-9458"]}, DOI={10.1109/ISSRE.2019.00040}, abstractNote={Static analysis tools (SATs) often fall short of developer satisfaction despite their many benefits. An understanding of how developers in the real-world act on the alerts detected by SATs can help improve the utility of these tools and determine future research directions. The goal of this paper is to aid researchers and tool makers in improving the utility of static analysis tools through an empirical study of developer action on the alerts detected by Coverity, a state-of-the-art static analysis tool. In this paper, we analyze five open source projects as case studies (Linux, Firefox, Samba, Kodi, and Ovirt-engine) that have been actively using Coverity over a period of at least five years. We investigate the alert occurrences and developer triage of the alerts from the Coverity database; identify the alerts that were fixed through code changes (i.e. actionable) by mining the commit history of the projects; analyze the time an alert remain in the code base (i.e. lifespan) and the complexity of code changes (i.e. fix complexity) in fixing the alert. We find that 27.4% to 49.5% (median: 36.7%) of the alerts are actionable across projects, a rate higher than previously reported. We also find that the fixes of Coverity alerts are generally low in complexity (2 to 7 lines of code changes in the affected file, median: 4). However, developers still take from 36 to 245 days (median: 96) to fix these alerts. Finally, our data suggest that severity and fix complexity may correlate with an alert's lifespan in some of the projects.}, journal={2019 IEEE 30TH INTERNATIONAL SYMPOSIUM ON SOFTWARE RELIABILITY ENGINEERING (ISSRE)}, author={Imtiaz, Nasif and Murphy, Brendan and Williams, Laurie}, year={2019}, pages={323–333} } @article{rahman_rahman_williams_2019, title={Share, But Be Aware: Security Smells in Python Gists}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME.2019.00087}, abstractNote={Github Gist is a service provided by Github which is used by developers to share code snippets. While sharing, developers may inadvertently introduce security smells in code snippets as well, such as hard-coded passwords. Security smells are recurrent coding patterns that are indicative of security weaknesses, which could potentially lead to security breaches. The goal of this paper is to help software practitioners avoid insecure coding practices through an empirical study of security smells in publicly-available GitHub Gists. Through static analysis, we found 13 types of security smells with 4,403 occurrences in 5,822 publicly-available Python Gists. 1,817 of those Gists, which is around 31%, have at least one security smell including 689 instances of hard-coded secrets. We also found no significance relation between the presence of these security smells and the reputation of the Gist author. Based on our findings, we advocate for increased awareness and rigorous code review efforts related to software security for Github Gists so that propagation of insecure coding practices are mitigated.}, journal={2019 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2019)}, author={Rahman, Md Rayhanur and Rahman, Akond and Williams, Laurie}, year={2019}, pages={536–540} } @article{rahman_williams_2019, title={Source code properties of defective infrastructure as code scripts}, volume={112}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2019.04.013}, abstractNote={Context: In continuous deployment, software and services are rapidly deployed to end-users using an automated deployment pipeline. Defects in infrastructure as code (IaC) scripts can hinder the reliability of the automated deployment pipeline. We hypothesize that certain properties of IaC source code such as lines of code and hard-coded strings used as configuration values, show correlation with defective IaC scripts. Objective: The objective of this paper is to help practitioners in increasing the quality of infrastructure as code (IaC) scripts through an empirical study that identifies source code properties of defective IaC scripts. Methodology: We apply qualitative analysis on defect-related commits mined from open source software repositories to identify source code properties that correlate with defective IaC scripts. Next, we survey practitioners to assess the practitioner's agreement level with the identified properties. We also construct defect prediction models using the identified properties for 2,439 scripts collected from four datasets. Results: We identify 10 source code properties that correlate with defective IaC scripts. Of the identified 10 properties we observe lines of code and hard-coded string to show the strongest correlation with defective IaC scripts. Hard-coded string is the property of specifying configuration value as hard-coded string. According to our survey analysis, majority of the practitioners show agreement for two properties: include, the property of executing external modules or scripts, and hard-coded string. Using the identified properties, our constructed defect prediction models show a precision of 0.70~0.78, and a recall of 0.54~0.67.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Rahman, Akond and Williams, Laurie}, year={2019}, month={Aug}, pages={148–163} } @article{rahman_parnin_williams_2019, title={The Seven Sins: Security Smells in Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE.2019.00033}, abstractNote={Practitioners use infrastructure as code (IaC) scripts to provision servers and development environments. While developing IaC scripts, practitioners may inadvertently introduce security smells. Security smells are recurring coding patterns that are indicative of security weakness and can potentially lead to security breaches. The goal of this paper is to help practitioners avoid insecure coding practices while developing infrastructure as code (IaC) scripts through an empirical study of security smells in IaC scripts. We apply qualitative analysis on 1,726 IaC scripts to identify seven security smells. Next, we implement and validate a static analysis tool called Security Linter for Infrastructure as Code scripts (SLIC) to identify the occurrence of each smell in 15,232 IaC scripts collected from 293 open source repositories. We identify 21,201 occurrences of security smells that include 1,326 occurrences of hard-coded passwords. We submitted bug reports for 1,000 randomly-selected security smell occurrences. We obtain 212 responses to these bug reports, of which 148 occurrences were accepted by the development teams to be fixed. We observe security smells can have a long lifetime, e.g., a hard-coded secret can persist for as long as 98 months, with a median lifetime of 20 months.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2019)}, author={Rahman, Akond and Parnin, Chris and Williams, Laurie}, year={2019}, pages={164–175} } @article{morrison_pandita_xiao_chillarege_williams_2018, title={Are Vulnerabilities Discovered and Resolved like Other Defects?}, DOI={10.1145/3180155.3182553}, abstractNote={Context: Software defect data has long been used to drive software development process improvement. If security defects (i.e.,vulnerabilities) are discovered and resolved by different software development practices than non-security defects, the knowledge of that distinction could be applied to drive process improvement.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE)}, author={Morrison, Patrick J. and Pandita, Rahul and Xiao, Xusheng and Chillarege, Ram and Williams, Laurie}, year={2018}, pages={498–498} } @article{morrison_pandita_xiao_chillarege_williams_2018, title={Are vulnerabilities discovered and resolved like other defects?}, volume={23}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-017-9541-1}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Morrison, Patrick J. and Pandita, Rahul and Xiao, Xusheng and Chillarege, Ram and Williams, Laurie}, year={2018}, month={Jun}, pages={1383–1421} } @misc{theisen_munaiah_al-zyoud_carver_meneely_williams_2018, title={Attack surface definitions: A systematic literature review}, volume={104}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2018.07.008}, abstractNote={Michael Howard conceptualized the attack surface of a software system as a metaphor for risk assessment during the development and maintenance of software. While the phrase attack surface is used in a variety of contexts in cybersecurity, professionals have different conceptions of what the phrase means. The goal of this systematic literature review is to aid researchers and practitioners in reasoning about security in terms of attack surface by exploring various definitions of the phrase attack surface. We reviewed 644 works from prior literature, including research papers, magazine articles, and technical reports, that use the phrase attack surface and categorized them into those that provided their own definition; cited another definition; or expected the reader to intuitively understand the phrase. In our study, 71% of the papers used the phrase without defining it or citing another paper. Additionally, we found six themes of definitions for the phrase attack surface. Based on our analysis, we recommend practitioners choose a definition of attack surface appropriate for their domain based on the six themes we identified in our study.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Theisen, Christopher and Munaiah, Nuthan and Al-Zyoud, Mahran and Carver, Jeffrey C. and Meneely, Andrew and Williams, Laurie}, year={2018}, month={Dec}, pages={94–103} } @article{rahman_williams_2018, title={Characterizing Defective Configuration Scripts Used for Continuous Deployment}, ISSN={["2381-2834"]}, DOI={10.1109/icst.2018.00014}, abstractNote={In software engineering, validation and verification (V&V) resources are limited and characterization of defective software source files can help in efficiently allocating V&V resources. Similar to software source files, defects occur in the scripts used to automatically manage configurations and software deployment infrastructure, often known as infrastructure as code (IaC) scripts. Defects in IaC scripts can have dire consequences, for example, creating large-scale system outages. Identifying the characteristics of defective IaC scripts can help in mitigating these defects by allocating V&V efforts efficiently based upon these characteristics. The objective of this paper is to help software practitioners to prioritize validation and verification efforts for infrastructure as code (IaC) scripts by identifying the characteristics of defective IaC scripts. Researchers have previously extracted text features to characterize defective software source files written in general purpose programming languages. We investigate if text features can be used to identify properties that characterize defective IaC scripts. We use two text mining techniques to extract text features from IaC scripts: the bag-of-words technique, and the term frequency-inverse document frequency (TF-IDF) technique. Using the extracted features and applying grounded theory, we characterize defective IaC scripts. We also use the text features to build defect prediction models with tuned statistical learners. We mine open source repositories from Mozilla, Openstack, and Wikimedia Commons, to construct three case studies and evaluate our methodology. We identify three properties that characterize defective IaC scripts: filesystem operations, infrastructure provisioning, and managing user accounts. Using the bag-of-word technique, we observe a median F-Measure of 0.74, 0.71, and 0.73, respectively, for Mozilla, Openstack, and Wikimedia Commons. Using the TF-IDF technique, we observe a median F-Measure of 0.72, 0.74, and 0.70, respectively, for Mozilla, Openstack, and Wikimedia Commons.}, journal={2018 IEEE 11TH INTERNATIONAL CONFERENCE ON SOFTWARE TESTING, VERIFICATION AND VALIDATION (ICST)}, author={Rahman, Akond and Williams, Laurie}, year={2018}, pages={34–45} } @article{williams_2018, title={Continuously Integrating Security}, DOI={10.1145/3194707.3194717}, abstractNote={Continuous deployment is a software engineering process where incremental software changes are automatically tested and frequently deployed to production environments. With continuous deployment, the elapsed time for a change made by a developer to reach a customer can now be measured in days or even hours. To understand the emerging practices surrounding continuous deployment, three annual one-day Continuous Deployment Summits have been held at Facebook, Netflix, and Google in 2015-2017, where 17 companies have described how they used continuous deployment. This short paper will describe the practices and environment used by these companies as they strive to develop secure and privacy-preserving products while making ultra-fast changes.}, journal={2018 IEEE/ACM 1ST INTERNATIONAL WORKSHOP ON SECURITY AWARENESS FROM DESIGN TO DEPLOYMENT (SEAD)}, author={Williams, Laurie}, year={2018}, pages={1–2} } @article{morrison_oyetoyan_williams_2018, title={Identifying Security Issues in Software Development: Are Keywords Enough?}, ISSN={["2574-1926"]}, DOI={10.1145/3183440.3195040}, abstractNote={Identifying security issues before attackers do has become a critical concern for software development teams and software users. One approach to identifying security issues in software development artifacts is to use lists of security-related keywords to build classifiers for detecting security issues. However, generic keyword lists may miss project-specific vocabulary. The goal of this research is to support researchers and practitioners in identifying security issues in software development project artifacts by defining and evaluating a systematic scheme for identifying project-specific security vocabularies that can be used for keyword-based classification. We sampled and manually classified 5400 messages from the Apache Derby, Apache Camel, and Dolibarr projects to form an oracle. In addition, we collected each project's publicly disclosed vulnerability data from the CVE and mapped them to the project's dataset to create a CVE-labelled data. We extracted project-specific vocabulary from each project and built classifiers to predict security-related issues in both the oracle and CVE dataset. In our data, we found that the vocabularies of each project included project-specific terms in addition to generic security keywords. Classifiers based on the project-specific security vocabularies increased recall performance by at least double (at varying costs to precision) compared with the previous published keyword lists we evaluated.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING - COMPANION (ICSE-COMPANION}, author={Morrison, Patrick and Oyetoyan, Tosin Daniel and Williams, Laurie}, year={2018}, pages={426–427} } @article{morrison_moye_pandita_williams_2018, title={Mapping the field of software life cycle security metrics}, volume={102}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2018.05.011}, abstractNote={Context: Practitioners establish a piece of software’s security objectives during the software development process. To support control and assessment, practitioners and researchers seek to measure security risks and mitigations during software development projects. Metrics provide one means for assessing whether software security objectives have been achieved. A catalog of security metrics for the software development life cycle could assist practitioners in choosing appropriate metrics, and researchers in identifying opportunities for refinement of security measurement. Objective: The goal of this research is to support practitioner and researcher use of security measurement in the software life cycle by cataloging security metrics presented in the literature, their validation, and the subjects they measure. Method: We conducted a systematic mapping study, beginning with 4818 papers and narrowing down to 71 papers reporting on 324 unique security metrics. For each metric, we identified the subject being measured, how the metric has been validated, and how the metric is used. We categorized the metrics, and give examples of metrics for each category. Results: In our data, 85% of security metrics have been proposed and evaluated solely by their authors, leaving room for replication and confirmation through field studies. Approximately 60% of the metrics have been empirically evaluated, by their authors or by others. The available metrics are weighted heavily toward the implementation and operations phases, with relatively few metrics for requirements, design, and testing phases of software development. Some artifacts and processes remain unmeasured. Measured by phase, Testing received the least attention, with 1.5% of the metrics. Conclusions: At present, the primary application of security metrics to the software development life cycle in the literature is to study the relationship between properties of source code and reported vulnerabilities. The most-cited and most used metric, vulnerability count, has multiple definitions and operationalizations. We suggest that researchers must check vulnerability count definitions when making comparisons between papers. In addition to refining vulnerability measurement, we see research opportunities for greater attention to metrics for the requirement, design, and testing phases of development. We conjecture from our data that the field of software life cycle security metrics has yet to converge on an accepted set of metrics.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Morrison, Patrick and Moye, David and Pandita, Rahul and Williams, Laurie}, year={2018}, month={Oct}, pages={146–159} } @article{rahman_stallings_williams_2018, title={Poster: Defect Prediction Metrics for Infrastructure as Code Scripts in DevOps}, ISSN={["2574-1926"]}, DOI={10.1145/3183440.3195034}, abstractNote={Use of infrastructure as code (IaC) scripts helps software teams manage their configuration and infrastructure automatically. Information technology (IT) organizations use IaC scripts to create and manage automated deployment pipelines to deliver services rapidly. IaC scripts can be defective, resulting in dire consequences, such as creating wide-scale service outages for end-users. Prediction of defective IaC scripts can help teams to mitigate defects in these scripts by prioritizing their inspection efforts. The goal of this paper is to help software practitioners in prioritizing their inspection efforts for infrastructure as code (IaC) scripts by proposing defect prediction model-related metrics. IaC scripts use domain specific languages (DSL) that are fundamentally different from object-oriented programming (OOP) languages. Hence, the OOP-based metrics that researchers used in defect prediction might not be applicable for IaC scripts. We apply Constructivist Grounded Theory (CGT) on defect-related commits mined from version control systems to identify metrics suitable for IaC scripts. By applying CGT, we identify 18 metrics. Of these metrics, 13 are related to IaC, for example, count of string occurrences in a script. Four of the identified metrics are related to churn, and one metric is lines of code.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING - COMPANION (ICSE-COMPANION}, author={Rahman, Akond and Stallings, Jonathan and Williams, Laurie}, year={2018}, pages={414–415} } @article{rahman_partho_morrison_williams_2018, title={What Questions Do Programmers Ask About Configuration as Code?}, DOI={10.1145/3194760.3194769}, abstractNote={Configuration as code (CaC) tools, such as Ansible and Puppet, help software teams to implement continuous deployment and deploy software changes rapidly. CaC tools are growing in popularity, yet what challenges programmers encounter about CaC tools, have not been characterized. A systematic investigation on what questions are asked by programmers, can help us identify potential technical challenges about CaC, and can aid in successful use of CaC tools. The goal of this paper is to help current and potential configuration as code (CaC) adoptees in identifying the challenges related to CaC through an analysis of questions asked by programmers on a major question and answer website. We extract 2,758 Puppet-related questions asked by programmers from January 2010 to December 2016, posted on Stack Overflow. We apply qualitative analysis to identify the questions programmers ask about Puppet. We also investigate the trends in questions with unsatisfactory answers, and changes in question categories over time. From our empirical study, we synthesize 16 major categories of questions. The three most common question categories are: (i) syntax errors, (ii) provisioning instances; and (iii) assessing Puppet's feasibility to accomplish certain tasks. Three categories of questions that yield the most unsatisfactory answers are (i) installation, (ii) security, and (iii) data separation.}, journal={PROCEEDINGS 2018 IEEE/ACM 4TH INTERNATIONAL WORKSHOP ON RAPID CONTINUOUS SOFTWARE ENGINEERING (RCOSE)}, author={Rahman, Akond and Partho, Asif and Morrison, Patrick and Williams, Laurie}, year={2018}, pages={16–22} } @article{williams_baldwin_2017, title={Highlights of the ACM Student Research Competition}, volume={60}, ISSN={["1557-7317"]}, DOI={10.1145/3145811}, abstractNote={No abstract available.}, number={11}, journal={COMMUNICATIONS OF THE ACM}, author={Williams, Laurie and Baldwin, Doug}, year={2017}, month={Nov}, pages={5–5} } @article{riaz_king_slankas_williams_massacci_quesada-lopez_jenkins_2017, title={Identifying the implied: Findings from three differentiated replications on the use of security requirements templates}, volume={22}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-016-9481-1}, DOI={10.1007/s10664-016-9481-1}, abstractNote={Identifying security requirements early on can lay the foundation for secure software development. Security requirements are often implied by existing functional requirements but are mostly left unspecified. The Security Discoverer (SD) process automatically identifies security implications of individual requirements sentences and suggests applicable security requirements templates. The objective of this research is to support requirements analysts in identifying security requirements by automating the suggestion of security requirements templates that are implied by existing functional requirements. We conducted a controlled experiment in a graduate-level security class at North Carolina State University (NCSU) to evaluate the SD process in eliciting implied security requirements in 2014. We have subsequently conducted three differentiated replications to evaluate the generalizability and applicability of the initial findings. The replications were conducted across three countries at the University of Trento, NCSU, and the University of Costa Rica. We evaluated the responses of the 205 total participants in terms of quality, coverage, relevance and efficiency. We also develop shared insights regarding the impact of context factors such as time, motivation and support, on the study outcomes and provide lessons learned in conducting the replications. Treatment group, using the SD process, performed significantly better than the control group (at p-value <0.05) in terms of the coverage of the identified security requirements and efficiency of the requirements elicitation process in two of the three replications, supporting the findings of the original study. Participants in the treatment group identified 84 % more security requirements in the oracle as compared to the control group on average. Overall, 80 % of the 111 participants in the treatment group were favorable towards the use of templates in identifying security requirements. Our qualitative findings indicate that participants may be able to differentiate between relevant and extraneous templates suggestions and be more inclined to fill in the templates with additional support. Security requirements templates capture the security knowledge of multiple experts and can support the security requirements elicitation process when automatically suggested, making the implied security requirements more evident. However, individual participants may still miss out on identifying a number of security requirements due to empirical constraints as well as potential limitations on knowledge and security expertise.}, number={4}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Riaz, Maria and King, Jason and Slankas, John and Williams, Laurie and Massacci, Fabio and Quesada-Lopez, Christian and Jenkins, Marcelo}, year={2017}, month={Aug}, pages={2127–2178} } @article{pandita_jetley_sudarsan_menzies_williams_2017, title={TMAP: Discovering relevant API methods through text mining of API documentation}, volume={29}, ISSN={2047-7473}, url={http://dx.doi.org/10.1002/SMR.1845}, DOI={10.1002/SMR.1845}, abstractNote={Abstract}, number={12}, journal={Journal of Software: Evolution and Process}, publisher={Wiley}, author={Pandita, Rahul and Jetley, Raoul and Sudarsan, Sithu and Menzies, Timothy and Williams, Laurie}, year={2017}, month={Feb}, pages={e1845} } @article{williams_2017, title={The Rising Tide Lifts All Boats: The Advancement of Science in Cyber Security (Invited Talk)}, DOI={10.1145/3106237.3121272}, abstractNote={Stolen passwords, compromised medical records, taking the internet out through video cameras– cybersecurity breaches are in the news every day. Despite all this, the practice of cybersecurity today is generally reactive rather than proactive. That is, rather than improving their defenses in advance, organizations react to attacks once they have occurred by patching the individual vulnerabilities that led to those attacks. Researchers engineer solutions to the latest form of attack. What we need, instead, are scientifically founded design principles for building in security mechanisms from the beginning, giving protection against broad classes of attacks. Through scientific measurement, we can improve our ability to make decisions that are evidence-based, proactive, and long-sighted. Recognizing these needs, the US National Security Agency (NSA) devised a new framework for collaborative research, the “Lablet” structure, with the intent to more aggressively advance the science of cybersecurity. A key motivation was to catalyze a shift in relevant areas towards a more organized and cohesive scientific community. The NSA named Carnegie Mellon University, North Carolina State University, and the University of Illinois – Urbana Champaign its initial Lablets in 2011, and added the University of Maryland in 2014. This talk will reflect on the structure of the collaborative research efforts of the Lablets, lessons learned in the transition to more scientific concepts to cybersecurity, research results in solving five hard security problems, and methods that are being used for the measurement of scientific progress of the Lablet research.}, journal={ESEC/FSE 2017: PROCEEDINGS OF THE 2017 11TH JOINT MEETING ON FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Williams, Laurie}, year={2017}, pages={1–1} } @article{parnin_helms_atlee_boughton_ghattas_glover_holman_micco_murphy_savor_et al._2017, title={The Top 10 Adages in Continuous Deployment}, volume={34}, ISSN={["1937-4194"]}, DOI={10.1109/ms.2017.86}, abstractNote={Continuous deployment involves automatically testing incremental software changes and frequently deploying them to production environments. With it, developers' changes can reach customers in days or even hours. Such ultrafast changes create a new reality in software development. To understand the emerging practices surrounding continuous deployment, researchers facilitated a one-day Continuous Deployment Summit at the Facebook campus in July 2015, at which participants from 10 companies described how they used continuous deployment. From the resulting conversation, the researchers derived 10 adages about continuous-deployment practices. These adages represent a working set of approaches and beliefs that guide current practice and establish a tangible target for empirical validation by the research community.}, number={3}, journal={IEEE SOFTWARE}, author={Parnin, Chris and Helms, Eric and Atlee, Chris and Boughton, Harley and Ghattas, Mark and Glover, Andy and Holman, James and Micco, John and Murphy, Brendan and Savor, Tony and et al.}, year={2017}, pages={86–95} } @article{king_stallings_riaz_williams_2017, title={To log, or not to log: using heuristics to identify mandatory log events - a controlled experiment}, volume={22}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-016-9449-1}, DOI={10.1007/s10664-016-9449-1}, number={5}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={King, Jason and Stallings, Jon and Riaz, Maria and Williams, Laurie}, year={2017}, month={Oct}, pages={2684–2717} } @article{choi_ji_2017, title={Twist-3 Distribution Amplitudes of Pion in the Light-Front Quark Model}, volume={58}, ISSN={0177-7963 1432-5411}, url={http://dx.doi.org/10.1007/S00601-016-1208-8}, DOI={10.1007/s00601-016-1208-8}, abstractNote={We analyzed two twist-3 distribution amplitudes of pion, i.e. pseudoscalar $$\phi ^P_{3;\pi }(x)$$ and pseudotensor $$\phi ^\sigma _{3;\pi }(x)$$ , within the LFQM. Our LFQM descriptions both for twist-3 $$\phi ^P_{3;\pi }$$ and $$\phi ^\sigma _{3;\pi }$$ obtained from the Gaussian radial wave function not only satisfy the fundamental constraint required from the isospin symmetry, but also reproduce exactly the asymptotic forms anticipated from QCD’s conformal limit.}, number={2}, journal={Few-Body Systems}, publisher={Springer Nature}, author={Choi, Ho-Meoyng and Ji, Chueng-Ryong}, year={2017}, month={Jan} } @inproceedings{pandita_taneja_tung_williams_2016, title={ICON: Inferring temporal constraints from natural language API descriptions}, booktitle={32nd ieee international conference on software maintenance and evolution (icsme 2016)}, author={Pandita, R. and Taneja, K. and Tung, T. and Williams, L.}, year={2016}, pages={378–388} } @article{kafali_singh_williams_2016, title={NANE: Identifying Misuse Cases Using Temporal Norm Enactments}, ISSN={["2332-6441"]}, url={https://publons.com/publon/21294380/}, DOI={10.1109/re.2016.34}, abstractNote={Recent data breaches in domains such as healthcare where confidentiality of data is crucial indicate that breaches often originate from misuses, not only from vulnerabilities in the technical (software or hardware) architecture. Current requirements engineering (RE) approaches determine what access control mechanisms are needed to protect sensitive resources (assets). However, current RE approaches inadequately characterize how a user is expected to interact with others in relation to the relevant assets. Consequently, a requirements analyst cannot readily identify misuses by legitimate users. We adopt social norms as a natural, formal means of characterizing user interactions whereby potential misuses map to norm violations. Our research goal is to help analysts identify misuse cases by formal reasoning about norm enactments. We propose Nane, a formal framework for identifying such misuse cases using a semiautomated process. We demonstrate how Nane enables monitoring of potential misuses on a healthcare scenario.}, journal={2016 IEEE 24TH INTERNATIONAL REQUIREMENTS ENGINEERING CONFERENCE (RE)}, author={Kafali, Ozgur and Singh, Munindar P. and Williams, Laurie}, year={2016}, pages={136–145} } @article{rahman_williams_2016, title={Software Security in DevOps: Synthesizing Practitioners' Perceptions and Practices}, DOI={10.1145/2896941.2896946}, abstractNote={In organizations that use DevOps practices, software changes can be deployed as fast as 500 times or more per day. Without adequate involvement of the security team, rapidly deployed software changes are more likely to contain vulnerabilities due to lack of adequate reviews. The goal of this paper is to aid software practitioners in integrating security and DevOps by summarizing experiences in utilizing security practices in a DevOps environment. We analyzed a selected set of Internet artifacts and surveyed representatives of nine organizations that are using DevOps to systematically explore experiences in utilizing security practices. We observe that the majority of the software practitioners have expressed the potential of common DevOps activities, such as automated monitoring, to improve the security of a system. Furthermore, organizations that integrate DevOps and security utilize additional security activities, such as security requirements analysis and performing security configurations. Additionally, these teams also have established collaboration between the security team and the development and operations teams.}, journal={INTERNATIONAL WORKSHOP ON CONTINUOUS SOFTWARE EVOLUTION AND DELIVERY, CSED 2016}, author={Rahman, Akond Ashfaque Ur and Williams, Laurie}, year={2016}, pages={70–76} } @article{theisen_williams_2016, title={Stack traces reveal attack surfaces}, journal={Perspectives on Data Science for Software Engineering}, author={Theisen, C. and Williams, L.}, year={2016}, pages={73–76} } @article{alonso-mori_asa_bergmann_brewster_chatterjee_cooper_frei_fuller_goggins_gul_et al._2016, title={Towards characterization of photo-excited electron transfer and catalysis in natural and artificial systems using XFELs}, volume={194}, ISSN={["1364-5498"]}, DOI={10.1039/c6fd00084c}, abstractNote={The ultra-bright femtosecond X-ray pulses provided by X-ray Free Electron Lasers (XFELs) open capabilities for studying the structure and dynamics of a wide variety of biological and inorganic systems beyond what is possible at synchrotron sources. Although the structure and chemistry at the catalytic sites have been studied intensively in both biological and inorganic systems, a full understanding of the atomic-scale chemistry requires new approaches beyond the steady state X-ray crystallography and X-ray spectroscopy at cryogenic temperatures. Following the dynamic changes in the geometric and electronic structure at ambient conditions, while overcoming X-ray damage to the redox active catalytic center, is key for deriving reaction mechanisms. Such studies become possible by using the intense and ultra-short femtosecond X-ray pulses from an XFEL, where sample is probed before it is damaged. We have developed methodology for simultaneously collecting X-ray diffraction data and X-ray emission spectra, using an energy dispersive spectrometer, at ambient conditions, and used this approach to study the room temperature structure and intermediate states of the photosynthetic water oxidizing metallo-protein, photosystem II. Moreover, we have also used this setup to simultaneously collect the X-ray emission spectra from multiple metals to follow the ultrafast dynamics of light-induced charge transfer between multiple metal sites. A Mn–Ti containing system was studied at an XFEL to demonstrate the efficacy and potential of this method.}, journal={FARADAY DISCUSSIONS}, author={Alonso-Mori, R. and Asa, K. and Bergmann, U. and Brewster, A. S. and Chatterjee, R. and Cooper, J. K. and Frei, H. M. and Fuller, F. D. and Goggins, E. and Gul, S. and et al.}, year={2016}, pages={621–638} } @inproceedings{xie_enck_2016, title={Tutorial: text analytics for security}, booktitle={Symposium and Bootcamp on the Science of Security}, author={Xie, T. and Enck, W.}, year={2016}, pages={124–125} } @inproceedings{systematically developing prevention, detection, and response patterns for security requirements_2016, booktitle={2016 IEEE 24th International Requirements Engineering Conference Workshops (REW)}, year={2016}, pages={62–67} } @inproceedings{pandita_jetley_sudarsan_williams_2015, title={Discovering likely mappings between APIs using text mining}, DOI={10.1109/scam.2015.7335419}, abstractNote={Developers often release different versions of their applications to support various platform/programming-language application programming interfaces (APIs). To migrate an application written using one API (source) to another API (target), a developer must know how the methods in the source API map to the methods in the target API. Given a typical platform or language exposes a large number of API methods, manually writing API mappings is prohibitively resource-intensive and may be error prone. Recently, researchers proposed to automate the mapping process by mining API mappings from existing code-bases. However, these approaches require as input a manually ported (or at least functionally similar) code across source and target APIs. To address the shortcoming, this paper proposes TMAP: Text Mining based approach to discover likely API mappings using the similarity in the textual description of the source and target API documents. To evaluate our approach, we used TMAP to discover API mappings for 15 classes across: 1) Java and C# API, and 2) Java ME and Android API. We compared the discovered mappings with state-of-the-art source code analysis based approaches: Rosetta and StaMiner. Our results indicate that TMAP on average found relevant mappings for 57% more methods compared to previous approaches. Furthermore, our results also indicate that TMAP on average found exact mappings for 6.5 more methods per class with a maximum of 21 additional exact mappings for a single class as compared to previous approaches.}, booktitle={Ieee international working conference on source code analysis and}, author={Pandita, R. and Jetley, R. P. and Sudarsan, S. D. and Williams, L.}, year={2015}, pages={231–240} } @article{riaz_breaux_williams_2015, title={How have we evaluated software pattern application? A systematic mapping study of research design practices}, volume={65}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2015.04.002}, abstractNote={Software patterns encapsulate expert knowledge for constructing successful solutions to recurring problems. Although a large collection of software patterns is available in literature, empirical evidence on how well various patterns help in problem solving is limited and inconclusive. The context of these empirical findings is also not well understood, limiting applicability and generalizability of the findings. To characterize the research design of empirical studies exploring software pattern application involving human participants. We conducted a systematic mapping study to identify and analyze 30 primary empirical studies on software pattern application, including 24 original studies and 6 replications. We characterize the research design in terms of the questions researchers have explored and the context of empirical research efforts. We also classify the studies in terms of measures used for evaluation, and threats to validity considered during study design and execution. Use of software patterns in maintenance is the most commonly investigated theme, explored in 16 studies. Object-oriented design patterns are evaluated in 14 studies while 4 studies evaluate architectural patterns. We identified 10 different constructs with 31 associated measures used to evaluate software patterns. Measures for ‘efficiency’ and ‘usability’ are commonly used to evaluate the problem solving process. While measures for ‘completeness’, ‘correctness’ and ‘quality’ are commonly used to evaluate the final artifact. Overall, ‘time to complete a task’ is the most frequently used measure, employed in 15 studies to measure ‘efficiency’. For qualitative measures, studies do not report approaches for minimizing biases 27% of the time. Nine studies do not discuss any threats to validity. Subtle differences in study design and execution can limit comparison of findings. Establishing baselines for participants’ experience level, providing appropriate training, standardizing problem sets, and employing commonly used measures to evaluate performance can support replication and comparison of results across studies.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Riaz, Maria and Breaux, Travis and Williams, Laurie}, year={2015}, month={Sep}, pages={14–38} } @article{ur rahman_helms_williams_parnin_2015, title={Synthesizing Continuous Deployment Practices Used in Software Development}, DOI={10.1109/agile.2015.12}, abstractNote={Continuous deployment speeds up the process of existing agile methods, such as Scrum, and Extreme Programming (XP) through the automatic deployment of software changes to end-users upon passing of automated tests. Continuous deployment has become an emerging software engineering process amongst numerous software companies, such as Facebook, Github, Netflix, and Rally Software. A systematic analysis of software practices used in continuous deployment can facilitate a better understanding of continuous deployment as a software engineering process. Such analysis can also help software practitioners in having a shared vocabulary of practices and in choosing the software practices that they can use to implement continuous deployment. The goal of this paper is to aid software practitioners in implementing continuous deployment through a systematic analysis of software practices that are used by software companies. We studied the continuous deployment practices of 19 software companies by performing a qualitative analysis of Internet artifacts and by conducting follow-up inquiries. In total, we found 11 software practices that are used by 19 software companies. We also found that in terms of use, eight of the 11 software practices are common across 14 software companies. We observe that continuous deployment necessitates the consistent use of sound software engineering practices such as automated testing, automated deployment, and code review.}, journal={2015 AGILE CONFERENCE}, author={Ur Rahman, Akond Ashfaque and Helms, Eric and Williams, Laurie and Parnin, Chris}, year={2015}, pages={1–10} } @inbook{doyle_williams_cohn_rubin_2014, title={Agile Software Development in Practice}, ISBN={9783319068619 9783319068626}, ISSN={1865-1348 1865-1356}, url={http://dx.doi.org/10.1007/978-3-319-06862-6_3}, DOI={10.1007/978-3-319-06862-6_3}, abstractNote={Agile software development methods have been around since the mid 1990s. Over these years, teams have evolved the specific software development practices used. Aims: The goal of this paper is to provide a view of the agile practices used by new teams, and the relationship between the practices used, project outcomes, and the agile principles. Method: This paper provides a summary and analysis of 2,229 Comparative AgilityTM (CA) assessment surveys completed between March 2011 and October 2012 by agile developers who knew about the survey. The CA tool assesses a team’s agility and project outcomes using a 65-statement Likert survey. Results: The agile principle of respect for individuals occurs the most frequently, while simplicity occurs least. Progress/Planning is correlated strongly to nine principles. Conclusion: Subject to sampling issues, successful teams report more positive results for agile practices with the most important practice being teams knowing their velocity.}, booktitle={Lecture Notes in Business Information Processing}, publisher={Springer International Publishing}, author={Doyle, Maureen and Williams, Laurie and Cohn, Mike and Rubin, Kenneth S.}, year={2014}, pages={32–45} } @inproceedings{riaz_king_slankas_williams_2014, title={Hidden in plain sight: Automatically identifying security requirements from natural language artifacts}, DOI={10.1109/re.2014.6912260}, abstractNote={Natural language artifacts, such as requirements specifications, often explicitly state the security requirements for software systems. However, these artifacts may also imply additional security requirements that developers may overlook but should consider to strengthen the overall security of the system. The goal of this research is to aid requirements engineers in producing a more comprehensive and classified set of security requirements by (1) automatically identifying security-relevant sentences in natural language requirements artifacts, and (2) providing context-specific security requirements templates to help translate the security-relevant sentences into functional security requirements. Using machine learning techniques, we have developed a tool-assisted process that takes as input a set of natural language artifacts. Our process automatically identifies security-relevant sentences in the artifacts and classifies them according to the security objectives, either explicitly stated or implied by the sentences. We classified 10,963 sentences in six different documents from healthcare domain and extracted corresponding security objectives. Our manual analysis showed that 46% of the sentences were security-relevant. Of these, 28% explicitly mention security while 72% of the sentences are functional requirements with security implications. Using our tool, we correctly predict and classify 82% of the security objectives for all the sentences (precision). We identify 79% of all security objectives implied by the sentences within the documents (recall). Based on our analysis, we develop context-specific templates that can be instantiated into a set of functional security requirements by filling in key information from security-relevant sentences.}, booktitle={2014 ieee 22nd international requirements engineering conference (re)}, author={Riaz, M. and King, Jason and Slankas, J. and Williams, L.}, year={2014}, pages={183–192} } @article{rivers_vouk_williams_2014, title={On Coverage-Based Attack Profiles}, DOI={10.1109/sere-c.2014.15}, abstractNote={Automated cyber attacks tend to be schedule and resource limited. The primary progress metric is often "coverage" of pre-determined "known" vulnerabilities that may not have been patched, along with possible zero-day exploits (if such exist). We present and discuss a hypergeometric process model that describes such attack patterns. We used web request signatures from the logs of a production web server to assess the applicability of the model.}, journal={2014 IEEE EIGHTH INTERNATIONAL CONFERENCE ON SOFTWARE SECURITY AND RELIABILITY - COMPANION (SERE-C 2014)}, author={Rivers, Anthony T. and Vouk, Mladen A. and Williams, Laurie}, year={2014}, pages={5–6} } @inproceedings{hibshi_breaux_riaz_williams_2014, title={Towards a framework to measure security expertise in requirements analysis}, DOI={10.1109/espre.2014.6890522}, abstractNote={Research shows that commonly accepted security requirements are not generally applied in practice. Instead of relying on requirements checklists, security experts rely on their expertise and background knowledge to identify security vulnerabilities. To understand the gap between available checklists and practice, we conducted a series of interviews to encode the decision-making process of security experts and novices during security requirements analysis. Participants were asked to analyze two types of artifacts: source code, and network diagrams for vulnerabilities and to apply a requirements checklist to mitigate some of those vulnerabilities. We framed our study using Situation Awareness-a cognitive theory from psychology-to elicit responses that we later analyzed using coding theory and grounded analysis. We report our preliminary results of analyzing two interviews that reveal possible decision-making patterns that could characterize how analysts perceive, comprehend and project future threats which leads them to decide upon requirements and their specifications, in addition, to how experts use assumptions to overcome ambiguity in specifications. Our goal is to build a model that researchers can use to evaluate their security requirements methods against how experts transition through different situation awareness levels in their decision-making process.}, booktitle={2014 IEEE 1st Workshop on Evolving Security and Privacy Requirements Engineering (ESPRE)}, author={Hibshi, H. and Breaux, T. and Riaz, M. and Williams, L.}, year={2014}, pages={13–18} } @article{austin_holmgreen_williams_2013, title={A comparison of the efficiency and effectiveness of vulnerability discovery techniques}, volume={55}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2012.11.007}, abstractNote={Security vulnerabilities discovered later in the development cycle are more expensive to fix than those discovered early. Therefore, software developers should strive to discover vulnerabilities as early as possible. Unfortunately, the large size of code bases and lack of developer expertise can make discovering software vulnerabilities difficult. A number of vulnerability discovery techniques are available, each with their own strengths. The objective of this research is to aid in the selection of vulnerability discovery techniques by comparing the vulnerabilities detected by each and comparing their efficiencies. We conducted three case studies using three electronic health record systems to compare four vulnerability discovery techniques: exploratory manual penetration testing, systematic manual penetration testing, automated penetration testing, and automated static analysis. In our case study, we found empirical evidence that no single technique discovered every type of vulnerability. We discovered that the specific set of vulnerabilities identified by one tool was largely orthogonal to that of other tools. Systematic manual penetration testing found the most design flaws, while automated static analysis found the most implementation bugs. The most efficient discovery technique in terms of vulnerabilities discovered per hour was automated penetration testing. The results show that employing a single technique for vulnerability discovery is insufficient for finding all types of vulnerabilities. Each technique identified only a subset of the vulnerabilities, which, for the most part were independent of each other. Our results suggest that in order to discover the greatest variety of vulnerability types, at least systematic manual penetration testing and automated static analysis should be performed.}, number={7}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Austin, Andrew and Holmgreen, Casper and Williams, Laurie}, year={2013}, month={Jul}, pages={1279–1288} } @article{slankas_williams_2013, title={Access Control Policy Extraction from Unconstrained Natural Language Text}, DOI={10.1109/socialcom.2013.68}, abstractNote={While access control mechanisms have existed in computer systems since the 1960s, modern system developers often fail to ensure appropriate mechanisms are implemented within particular systems. Such failures allow for individuals, both benign and malicious, to view and manipulate information that they should not otherwise be able to access. The goal of our research is to help developers improve security by extracting the access control policies implicitly and explicitly defined in natural language project artifacts. Developers can then verify and implement the extracted access control policies within a system. We propose a machine-learning based process to parse existing, unaltered natural language documents, such as requirement or technical specifications to extract the relevant subjects, actions, and resources for an access control policy. To evaluate our approach, we analyzed a public requirements specification. We had a precision of 0.87 with a recall of 0.91 in classifying sentences as access control or not. Through a bootstrapping process utilizing dependency graphs, we correctly identified the subjects, actions, and objects elements of the access control policies with a precision of 0.46 and a recall of 0.54.}, journal={2013 ASE/IEEE INTERNATIONAL CONFERENCE ON SOCIAL COMPUTING (SOCIALCOM)}, author={Slankas, John and Williams, Laurie}, year={2013}, pages={435–440} } @inproceedings{slankas_williams_2013, title={Automated extraction of non-functional requirements in available documentation}, DOI={10.1109/naturalise.2013.6611715}, abstractNote={While all systems have non-functional requirements (NFRs), they may not be explicitly stated in a formal requirements specification. Furthermore, NFRs may also be externally imposed via government regulations or industry standards. As some NFRs represent emergent system proprieties, those NFRs require appropriate analysis and design efforts to ensure they are met. When the specified NFRs are not met, projects incur costly re-work to correct the issues. The goal of our research is to aid analysts in more effectively extracting relevant non-functional requirements in available unconstrained natural language documents through automated natural language processing. Specifically, we examine which document types (data use agreements, install manuals, regulations, request for proposals, requirements specifications, and user manuals) contain NFRs categorized to 14 NFR categories (e.g. capacity, reliability, and security). We measure how effectively we can identify and classify NFR statements within these documents. In each of the documents evaluated, we found NFRs present. Using a word vector representation of the NFRs, a support vector machine algorithm performed twice as effectively compared to the same input to a multinomial naïve Bayes classifier. Our k-nearest neighbor classifier with a unique distance metric had an F1 measure of 0.54, outperforming in our experiments the optimal naïve Bayes classifier which had a F1 measure of 0.32. We also found that stop word lists beyond common determiners had no minimal performance effect.}, booktitle={2013 1st International Workshop on Natural Language Analysis in Software Engineering (NaturaLiSE)}, author={Slankas, J. and Williams, L.}, year={2013}, pages={9–16} } @article{shin_williams_2013, title={Can traditional fault prediction models be used for vulnerability prediction?}, volume={18}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-011-9190-8}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Shin, Yonghee and Williams, Laurie}, year={2013}, month={Feb}, pages={25–59} } @inproceedings{subramani_vouk_williams_2013, title={Non-operational testing of software for security issues}, DOI={10.1109/issrew.2013.6688857}, abstractNote={We are studying extension of the classical Software Reliability Engineering (SRE) methodology into the security space. We combine “classical” reliability modeling, when applied to reported vulnerabilities found under “normal” operational profile conditions, with safety oriented fault management processes. We illustrate with open source Fedora software.}, booktitle={2013 IEEE International Symposium on Software Reliability Engineering Workshops (ISSREW)}, author={Subramani, S. and Vouk, M. and Williams, L.}, year={2013}, pages={21–22} } @article{morrison_holmgreen_massey_williams_2013, title={Proposing Regulatory-Driven Automated Test Suites}, DOI={10.1109/agile.2013.8}, abstractNote={In regulated domains such as finance and health care, failure to comply with regulation can lead to financial, civil and criminal penalties. While systems vary from organization to organization, the same regulations apply for all systems. As a result, efficiencies could be gained if the commonalities between systems could be captured in public, shared, test suites for regulations. We propose the use of Behavior-Driven-Development (BDD) technology to create these test suites. With BDD, desired system behavior with respect to regulatory requirements can be captured as constrained natural language 'scenarios'. The scenarios can then be automated through system-specific test drivers. The goal of this research is to enable organizations to compare their systems to regulation in a repeatable and traceable way through the use of BDD. To evaluate our approach, we developed seven scenarios based on the HITECH Act Meaningful Use (MU) regulations for healthcare. We then created system-specific code for three open-source electronic health record systems. We found that it was possible to create scenarios and system-specific code supporting scenario execution on three systems, that iTrust can be shown to be noncompliant, and that emergency access procedures are not defined clearly enough by the regulation to determine compliance or non-compliance.}, journal={2013 AGILE CONFERENCE (AGILE)}, author={Morrison, Patrick and Holmgreen, Casper and Massey, Aaron and Williams, Laurie}, year={2013}, pages={11–21} } @inproceedings{morrison_holmgreen_massey_williams_2013, title={Proposing regulatory-driven automated test suites for electronic health record systems}, DOI={10.1109/sehc.2013.6602477}, abstractNote={In regulated domains such as finance and health care, failure to comply with regulation can lead to financial, civil and criminal penalties. While systems vary from organization to organization, regulations apply across organizations. We propose the use of Behavior-Driven-Development (BDD) scenarios as the basis of an automated compliance test suite for standards such as regulation and interoperability. Such test suites could become a shared asset for use by all systems subject to these regulations and standards. Each system, then, need only create their own system-specific test driver code to automate their compliance checks. The goal of this research is to enable organizations to compare their systems to regulation in a repeatable and traceable way through the use of BDD. To evaluate our proposal, we developed an abbreviated HIPAA test suite and applied it to three open-source electronic health record systems. The scenarios covered all security behavior defined by the selected regulation. The system-specific test driver code covered all security behavior defined in the scenarios, and identified where the tested system lacked such behavior.}, booktitle={2013 5th international workshop on software engineering in health care (sehc)}, author={Morrison, P. and Holmgreen, C. and Massey, A. and Williams, L.}, year={2013}, pages={46–49} } @article{srikanth_banerjee_williams_osborne_2013, title={Towards the prioritization of system test cases}, volume={24}, ISSN={0960-0833}, url={http://dx.doi.org/10.1002/STVR.1500}, DOI={10.1002/stvr.1500}, abstractNote={SUMMARY}, number={4}, journal={Software Testing, Verification and Reliability}, publisher={Wiley}, author={Srikanth, Hema and Banerjee, Sean and Williams, Laurie and Osborne, Jason}, year={2013}, month={Jun}, pages={320–337} } @article{meneely_smith_williams_2012, title={Validating Software Metrics: A Spectrum of Philosophies}, volume={21}, ISSN={["1049-331X"]}, DOI={10.1145/2377656.2377661}, abstractNote={ Context . Researchers proposing a new metric have the burden of proof to demonstrate to the research community that the metric is acceptable in its intended use. This burden of proof is provided through the multi-faceted, scientific, and objective process of software metrics validation. Over the last 40 years, however, researchers have debated what constitutes a “valid” metric. }, number={4}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Meneely, Andrew and Smith, Ben and Williams, Laurie}, year={2012} } @article{williams_2012, title={What Agile Teams Think of Agile Principles}, volume={55}, ISSN={["0001-0782"]}, DOI={10.1145/2133806.2133823}, abstractNote={Even after almost a dozen years, they still deliver solid guidance for software development teams and their projects.}, number={4}, journal={COMMUNICATIONS OF THE ACM}, author={Williams, Laurie}, year={2012}, month={Apr}, pages={71–76} } @misc{heckman_williams_2011, title={A systematic literature review of actionable alert identification techniques for automated static code analysis}, volume={53}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2010.12.007}, abstractNote={Automated static analysis (ASA) identifies potential source code anomalies early in the software development lifecycle that could lead to field failures. Excessive alert generation and a large proportion of unimportant or incorrect alerts (unactionable alerts) may cause developers to reject the use of ASA. Techniques that identify anomalies important enough for developers to fix (actionable alerts) may increase the usefulness of ASA in practice. The goal of this work is to synthesize available research results to inform evidence-based selection of actionable alert identification techniques (AAIT). Relevant studies about AAITs were gathered via a systematic literature review. We selected 21 peer-reviewed studies of AAITs. The techniques use alert type selection; contextual information; data fusion; graph theory; machine learning; mathematical and statistical models; or dynamic detection to classify and prioritize actionable alerts. All of the AAITs are evaluated via an example with a variety of evaluation metrics. The selected studies support (with varying strength), the premise that the effective use of ASA is improved by supplementing ASA with an AAIT. Seven of the 21 selected studies reported the precision of the proposed AAITs. The two studies with the highest precision built models using the subject program’s history. Precision measures how well a technique identifies true actionable alerts out of all predicted actionable alerts. Precision does not measure the number of actionable alerts missed by an AAIT or how well an AAIT identifies unactionable alerts. Inconsistent use of evaluation metrics, subject programs, and ASAs in the selected studies preclude meta-analysis and prevent the current results from informing evidence-based selection of an AAIT. We propose building on an actionable alert identification benchmark for comparison and evaluation of AAIT from literature on a standard set of subjects and utilizing a common set of evaluation metrics.}, number={4}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Heckman, Sarah and Williams, Laurie}, year={2011}, month={Apr}, pages={363–387} } @inproceedings{meneely_williams_2011, title={Socio-technical developer networks: Should we trust our measurements?}, DOI={10.1145/1985793.1985832}, abstractNote={Software development teams must be properly structured to provide effectiv collaboration to produce quality software. Over the last several years, social network analysis (SNA) has emerged as a popular method for studying the collaboration and organization of people working in large software development teams. Researchers have been modeling networks of developers based on socio-technical connections found in software development artifacts. Using these developer networks, researchers have proposed several SNA metrics that can predict software quality factors and describe the team structure. But do SNA metrics measure what they purport to measure? The objective of this research is to investigate if SNA metrics represent socio-technical relationships by examining if developer networks can be corroborated with developer perceptions. To measure developer perceptions, we developed an online survey that is personalized to each developer of a development team based on that developer's SNA metrics. Developers answered questions about other members of the team, such as identifying their collaborators and the project experts. A total of 124 developers responded to our survey from three popular open source projects: the Linux kernel, the PHP programming language, and the Wireshark network protocol analyzer. Our results indicate that connections in the developer network are statistically associated with the collaborators whom the developers named. Our results substantiate that SNA metrics represent socio-technical relationships in open source development projects, while also clarifying how the developer network can be interpreted by researchers and practitioners.}, booktitle={2011 33rd International Conference on Software Engineering (ICSE)}, author={Meneely, A. and Williams, L.}, year={2011}, pages={281–290} } @article{williams_2010, title={Agile software development methodologies and practices}, volume={80}, journal={Advances in computers, vol 80}, author={Williams, L.}, year={2010}, pages={1–44} } @article{williams_2010, title={Guest editorial: Special issue on software reliability engineering}, volume={15}, ISSN={["1382-3256"]}, DOI={10.1007/s10664-010-9129-5}, number={4}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Williams, Laurie}, year={2010}, month={Aug}, pages={321–322} } @inbook{smith_williams_austin_2010, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Idea: Using System Level Testing for Revealing SQL Injection-Related Error Message Information Leaks}, ISBN={9783642117466 9783642117473}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-11747-3_15}, DOI={10.1007/978-3-642-11747-3_15}, abstractNote={Completely handling SQL injection consists of two activities: properly protecting the system from malicious input, and preventing any resultant error messages caused by SQL injection from revealing sensitive information. The goal of this research is to assess the relative effectiveness of unit and system level testing of web applications to reveal both error message information leak and SQL injection vulnerabilities. To produce 100% test coverage of 176 SQL statements in four open source web applications, we augmented the original automated unit test cases with our own system level tests that use both normal input and 132 forms of malicious input. Although we discovered no SQL injection vulnerabilities, we exposed 17 error message information leak vulnerabilities associated with SQL statements using system level testing. Our results suggest that security testers who use an iterative, test-driven development process should compose system level rather than unit level tests.}, booktitle={Engineering Secure Software and Systems. ESSoS 2010}, publisher={Springer}, author={Smith, Ben and Williams, Laurie and Austin, Andrew}, editor={Massacci, F. and Wallach, D. and Zannone, N.Editors}, year={2010}, pages={192–200}, collection={Lecture Notes in Computer Science} } @inproceedings{smith_williams_austin_2010, title={Idea: Using system level testing for revealing SQL injection-related error message information leaks}, volume={5965}, booktitle={Engineering secure software and systems, proceedings}, author={Smith, B. and Williams, L. and Austin, A.}, year={2010}, pages={192–200} } @article{williams_meneely_shipley_2010, title={Protection Poker: The New Software Security "Game"}, volume={8}, ISSN={["1540-7993"]}, DOI={10.1109/msp.2010.58}, abstractNote={Without infinite resources, software development teams must prioritize security fortification efforts to prevent the most damaging attacks. The Protection Poker "game" is a collaborative means for guiding this prioritization and has the potential to improve software security practices and team software security knowledge.}, number={3}, journal={IEEE SECURITY & PRIVACY}, author={Williams, Laurie and Meneely, Andrew and Shipley, Grant}, year={2010}, pages={14–20} } @article{thomas_williams_xie_2009, title={On automated prepared statement generation to remove SQL injection vulnerabilities}, volume={51}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2008.08.002}, abstractNote={Since 2002, over 10% of total cyber vulnerabilities were SQL injection vulnerabilities (SQLIVs). This paper presents an algorithm of prepared statement replacement for removing SQLIVs by replacing SQL statements with prepared statements. Prepared statements have a static structure, which prevents SQL injection attacks from changing the logical structure of a prepared statement. We created a prepared statement replacement algorithm and a corresponding tool for automated fix generation. We conducted four case studies of open source projects to evaluate the capability of the algorithm and its automation. The empirical results show that prepared statement code correctly replaced 94% of the SQLIVs in these projects.}, number={3}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Thomas, Stephen and Williams, Laurie and Xie, Tao}, year={2009}, month={Mar}, pages={589–598} } @article{smith_williams_2009, title={On guiding the augmentation of an automated test suite via mutation analysis}, volume={14}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-008-9083-7}, abstractNote={Mutation testing has traditionally been used as a defect injection technique to assess the effectiveness of a test suite as represented by a “mutation score.” Recently, mutation testing tools have become more efficient, and industrial usage of mutation analysis is experiencing growth. Mutation analysis entails adding or modifying test cases until the test suite is sufficient to detect as many mutants as possible and the mutation score is satisfactory. The augmented test suite resulting from mutation analysis may reveal latent faults and provides a stronger test suite to detect future errors which might be injected. Software engineers often look for guidance on how to augment their test suite using information provided by line and/or branch coverage tools. As the use of mutation analysis grows, software engineers will want to know how the emerging technique compares with and/or complements coverage analysis for guiding the augmentation of an automated test suite. Additionally, software engineers can benefit from an enhanced understanding of efficient mutation analysis techniques. To address these needs for additional information about mutation analysis, we conducted an empirical study of the use of mutation analysis on two open source projects. Our results indicate that a focused effort on increasing mutation score leads to a corresponding increase in line and branch coverage to the point that line coverage, branch coverage and mutation score reach a maximum but leave some types of code structures uncovered. Mutation analysis guides the creation of additional “common programmer error” tests beyond those written to increase line and branch coverage. We also found that 74% of our chosen set of mutation operators is useful, on average, for producing new tests. The remaining 26% of mutation operators did not produce new test cases because their mutants were immediately detected by the initial test suite, indirectly detected by test suites we added to detect other mutants, or were not able to be detected by any test.}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Smith, Ben H. and Williams, Laurie}, year={2009}, month={Jun}, pages={341–369} } @inbook{williams_gegick_meneely_2009, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Protection Poker: Structuring Software Security Risk Assessment and Knowledge Transfer}, ISBN={9783642001987 9783642001994}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-00199-4_11}, DOI={10.1007/978-3-642-00199-4_11}, abstractNote={Discovery of security vulnerabilities is on the rise. As a result, software development teams must place a higher priority on preventing the injection of vulnerabilities in software as it is developed. Because the focus on software security has increased only recently, software development teams often do not have expertise in techniques for identifying security risk, understanding the impact of a vulnerability, or knowing the best mitigation strategy. We propose the Protection Poker activity as a collaborative and informal form of misuse case development and threat modeling that plays off the diversity of knowledge and perspective of the participants. An excellent outcome of Protection Poker is that security knowledge passed around the team. Students in an advanced undergraduate software engineering course at North Carolina State University participated in a Protection Poker session conducted as a laboratory exercise. Students actively shared misuse cases, threat models, and their limited software security expertise as they discussed vulnerabilities in their course project. We observed students relating vulnerabilities to the business impacts of the system. Protection Poker lead to a more effective software security learning experience than in prior semesters. A pilot of the use of Protection Poker with an industrial partner began in October 2008. The first security discussion structured via Protection Poker caused two requirements to be revised for added security fortification; led to the immediate identification of one vulnerability in the system; initiated a meeting on the prioritization of security defects; and instigated a call for an education session on preventing cross site scripting vulnerabilities.}, booktitle={Engineering Secure Software and Systems. ESSoS 2009}, publisher={Springer}, author={Williams, Laurie and Gegick, Michael and Meneely, Andrew}, editor={Massacci, F. and Redwine, S.T. and Zannone, N.Editors}, year={2009}, pages={122–134}, collection={Lecture Notes in Computer Science} } @inproceedings{meneely_williams_2009, title={Secure open source collaboration: An empirical study of linus' law}, DOI={10.1145/1653662.1653717}, abstractNote={Open source software is often considered to be secure. One factor in this confidence in the security of open source software lies in leveraging large developer communities to find vulnerabilities in the code. Eric Raymond declares Linus' Law "Given enough eyeballs, all bugs are shallow." Does Linus' Law hold up ad infinitum? Or, can the multitude of developers become "too many cooks in the kitchen", causing the system's security to suffer as a result? In this study, we examine the security of an open source project in the context of developer collaboration. By analyzing version control logs, we quantified notions of Linus' Law as well as the "too many cooks in the kitchen" viewpoint into developer activity metrics. We performed an empirical case study by examining correlations between the known security vulnerabilities in the open source Red Hat Enterprise Linux 4 kernel and developer activity metrics. Files developed by otherwise-independent developer groups were more likely to have a vulnerability, supporting Linus' Law. However, files with changes from nine or more developers were 16 times more likely to have a vulnerability than files changed by fewer than nine developers, indicating that many developers changing code may have a detrimental effect on the system's security.}, booktitle={CCS'09: Proceedings of the 16th ACM Conference on Computer and Communications Security}, author={Meneely, A. and Williams, L.}, year={2009}, pages={453–462} } @article{smith_williams_2009, title={Should software testers use mutation analysis to augment a test set?}, volume={82}, ISSN={["1873-1228"]}, DOI={10.1016/j.jss.2009.06.031}, abstractNote={Mutation testing has historically been used to assess the fault-finding effectiveness of a test suite or other verification technique. Mutation analysis, rather, entails augmenting a test suite to detect all killable mutants. Concerns about the time efficiency of mutation analysis may prohibit its widespread, practical use. The goal of our research is to assess the effectiveness of the mutation analysis process when used by software testers to augment a test suite to obtain higher statement coverage scores. We conducted two empirical studies and have shown that mutation analysis can be used by software testers to effectively produce new test cases and to improve statement coverage scores in a feasible amount of time. Additionally, we find that our user study participants view mutation analysis as an effective but relatively expensive technique for writing new test cases. Finally, we have shown that the choice of mutation tool and operator set can play an important role in determining how efficient mutation analysis is for producing new test cases.}, number={11}, journal={JOURNAL OF SYSTEMS AND SOFTWARE}, author={Smith, Ben H. and Williams, Laurie}, year={2009}, month={Nov}, pages={1819–1832} } @inbook{gegick_rotella_williams_2009, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Toward Non-security Failures as a Predictor of Security Faults and Failures}, ISBN={9783642001987 9783642001994}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-00199-4_12}, DOI={10.1007/978-3-642-00199-4_12}, abstractNote={In the search for metrics that can predict the presence of vulnerabilities early in the software life cycle, there may be some benefit to choosing metrics from the non-security realm. We analyzed non-security and security failure data reported for the year 2007 of a Cisco software system. We used non-security failure reports as input variables into a classification and regression tree (CART) model to determine the probability that a component will have at least one vulnerability. Using CART, we ranked all of the system components in descending order of their probabilities and found that 57% of the vulnerable components were in the top nine percent of the total component ranking, but with a 48% false positive rate. The results indicate that non-security failures can be used as one of the input variables for security-related prediction models.}, booktitle={Engineering Secure Software and Systems. ESSoS 2009.}, publisher={Springer}, author={Gegick, Michael and Rotella, Pete and Williams, Laurie}, editor={Massacci, F. and Redwine, S.T. and Zannone, N.Editors}, year={2009}, pages={135–149}, collection={Lecture Notes in Computer Science} } @article{layman_williams_slaten_berenson_vouk_2008, title={Addressing diverse needs through a balance of agile and plan-driven software development methodologies in the core software engineering course}, volume={24}, number={4}, journal={International Journal of Engineering Education}, author={Layman, L. and Williams, L. and Slaten, K. and Berenson, S. and Vouk, M.}, year={2008}, pages={659–670} } @article{nagappan_maximilien_bhat_williams_2008, title={Realizing quality improvement through test driven development: results and experiences of four industrial teams}, volume={13}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-008-9062-z}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Nagappan, Nachiappan and Maximilien, E. Michael and Bhat, Thirumalesh and Williams, Laurie}, year={2008}, month={Jun}, pages={289–302} } @inbook{prechelt_williams_2007, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Industry-Research Collaboration Working Group Results}, ISBN={9783540713005 9783540713012}, url={http://dx.doi.org/10.1007/978-3-540-71301-2_46}, DOI={10.1007/978-3-540-71301-2_46}, booktitle={Empirical Software Engineering Issues. Critical Assessment and Future Directions}, publisher={Springer}, author={Prechelt, Lutz and Williams, Laurie}, editor={Basili, V.R. and Rombach, D. and Schneider, K. and Kitchenham, B. and Pfahl, D. and Selby, R.W.Editors}, year={2007}, month={Jun}, pages={153–157}, collection={Lecture Notes in Computer Science} } @article{gegick_williams_2007, title={On the design of more secure software-intensive systems by use of attack patterns}, volume={49}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2006.06.002}, abstractNote={Retrofitting security implementations to a released software-intensive system or to a system under development may require significant architectural or coding changes. These late changes can be difficult and more costly than if performed early in the software process. We have created regular expression-based attack patterns that show the sequential events that occur during an attack. By performing a Security Analysis for Existing Threats (SAFE-T), software engineers can match the symbols of a regular expression to their system design. An architectural analysis that identifies security vulnerabilities early in the software process can prepare software engineers for which security implementations are necessary when coding starts. A case study involving students in an upper-level undergraduate security course suggests that SAFE-T can be performed by relatively inexperienced engineers who are not experts in security. Data from the case study also suggest that the attack patterns do not restrict themselves to vulnerabilities in specific environments.}, number={4}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Gegick, Michael and Williams, Laurie}, year={2007}, month={Apr}, pages={381–397} } @inbook{williams_erdogmus_selby_2007, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Roadmapping Working Group 4 Results}, ISBN={9783540713005 9783540713012}, url={http://dx.doi.org/10.1007/978-3-540-71301-2_53}, DOI={10.1007/978-3-540-71301-2_53}, booktitle={Empirical Software Engineering Issues. Critical Assessment and Future Directions}, publisher={Springer}, author={Williams, Laurie and Erdogmus, Hakan and Selby, Rick}, editor={Basili, V.R. and Rombach, D. and Schneider, K. and Kitchenham, B. and Pfahl, D. and Selby, R.W.Editors}, year={2007}, month={Jun}, pages={181–183}, collection={Lecture Notes in Computer Science} } @inbook{williams_2007, place={Berlin Heidelberg}, series={Lecture Notes in Computer Science}, title={Structuring Families of Industrial Case Studies}, ISBN={9783540713005 9783540713012}, url={http://dx.doi.org/10.1007/978-3-540-71301-2_41}, DOI={10.1007/978-3-540-71301-2_41}, abstractNote={Practitioners are most influenced by results of research conducted in industrial settings. Evidence of the efficacy of a software development practice or process is best obtained through a triangulation of research findings obtained through a variety of empirical studies in various contexts. The use of an evaluation framework can enable a family of related industrial case studies in different contexts to be metaanalyzed and/or combined. Such an evaluation framework could consists of templates for specific quantitative measures to collect with associated instructions on what to include/exclude for consistent measurement collection as well as protocols for surveys and/or interviews. Groups of researchers interested in the same research question(s) can customize and evolve an evaluation framework for the technology under study.}, booktitle={Empirical Software Engineering Issues. Critical Assessment and Future Directions}, publisher={Springer}, author={Williams, Laurie}, editor={Basili, V.R. and Rombach, D. and Schneider, K. and Kitchenham, B. and Pfahl, D. and Selby, R.W.Editors}, year={2007}, month={Jun}, pages={134–134}, collection={Lecture Notes in Computer Science} } @article{layman_williams_damian_bures_2006, title={Essential communication practices for Extreme Programming in a global software development team}, volume={48}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2006.01.004}, abstractNote={We conducted an industrial case study of a distributed team in the USA and the Czech Republic that used Extreme Programming. Our goal was to understand how this globally-distributed team created a successful project in a new problem domain using a methodology that is dependent on informal, face-to-face communication. We collected quantitative and qualitative data and used grounded theory to identify four key factors for communication in globally-distributed XP teams working within a new problem domain. Our study suggests that, if these critical enabling factors are addressed, methodologies dependent on informal communication can be used on global software development projects.}, number={9}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Layman, Lucas and Williams, Laurie and Damian, Daniela and Bures, Hynek}, year={2006}, month={Sep}, pages={781–794} } @article{layman_williams_cunningham_2006, title={Motivations and measurements in an agile case study}, volume={52}, ISSN={["1873-6165"]}, DOI={10.1016/j.sysarc.2006.06.009}, abstractNote={With the recent emergence of agile software development technologies, the software community is awaiting sound, empirical investigation of the impacts of agile practices in a live setting. One means of conducting such research is through industrial case studies. There are a number of influencing factors that contribute to the success of such a case study. In this paper, we describe a case study performed at Sabre Airline SolutionsTM evaluating the effects of adopting Extreme Programming (XP) practices with a team that had characteristically plan-driven risk factors. We compare the team’s business-related results (productivity and quality) to two published sources of industry averages. Our case study found that the Sabre team yielded above-average post-release quality and average to above-average productivity. We discuss our experience in conducting this case study, including specifics of how data was collected, the rationale behind our process of data collection, and what obstacles were encountered during the case study. We identify four factors that potentially impact the outcome of industrial case studies: availability of data, tool support, cooperative personnel and project status. Recognizing and planning for these factors is essential to conducting industrial case studies.}, number={11}, journal={JOURNAL OF SYSTEMS ARCHITECTURE}, author={Layman, Lucas and Williams, Laurie and Cunningham, Lynn}, year={2006}, month={Nov}, pages={654–667} } @article{zheng_williams_nagappan_snipes_hudepohl_vouk_2006, title={On the value of static analysis for fault detection in software}, volume={32}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2006.38}, abstractNote={No single software fault-detection technique is capable of addressing all fault-detection concerns. Similarly to software reviews and testing, static analysis tools (or automated static analysis) can be used to remove defects prior to release of a software product. To determine to what extent automated static analysis can help in the economic production of a high-quality product, we have analyzed static analysis faults and test and customer-reported failures for three large-scale industrial software systems developed at Nortel Networks. The data indicate that automated static analysis is an affordable means of software fault detection. Using the orthogonal defect classification scheme, we found that automated static analysis is effective at identifying assignment and checking faults, allowing the later software production phases to focus on more complex, functional, and algorithmic faults. A majority of the defects found by automated static analysis appear to be produced by a few key types of programmer errors and some of these types have the potential to cause security vulnerabilities. Statistical analysis results indicate the number of automated static analysis faults can be effective for identifying problem modules. Our results indicate static analysis tools are complementary to other fault-detection techniques for the economic production of a high-quality software product.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Zheng, J and Williams, L and Nagappan, N and Snipes, W and Hudepohl, JP and Vouk, MA}, year={2006}, month={Apr}, pages={240–253} } @article{george_williams_2004, title={A structured experiment of test-driven development}, volume={46}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2003.09.011}, abstractNote={Test Driven Development (TDD) is a software development practice in which unit test cases are incrementally written prior to code implementation. We ran a set of structured experiments with 24 professional pair programmers. One group developed a small Java program using TDD while the other (control group), used a waterfall-like approach. Experimental results, subject to external validity concerns, tend to indicate that TDD programmers produce higher quality code because they passed 18% more functional black-box test cases. However, the TDD programmers took 16% more time. Statistical analysis of the results showed that a moderate statistical correlation existed between time spent and the resulting quality. Lastly, the programmers in the control group often did not write the required automated test cases after completing their code. Hence it could be perceived that waterfall-like approaches do not encourage adequate testing. This intuitive observation supports the perception that TDD has the potential for increasing the level of unit testing in the software industry.}, number={5}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={George, B and Williams, L}, year={2004}, month={Apr}, pages={337–342} } @inproceedings{katira_williams_wiebe_miller_balik_gehringer_2004, title={On understanding compatibility of student pair programmers}, ISBN={1581137982}, url={http://dx.doi.org/10.1145/971300.971307}, DOI={10.1145/971300.971307}, abstractNote={In recent years, educators have increasingly used pair programming in their computer science courses. Pair programming has been shown to be beneficial for both the teaching staff and the students in the courses. Occasionally, though, students are not compatible with their partners. An extensive study was done at the North Carolina State University to provide guidance on forming student pairs to improve the chances that pairs will be compatible and have a productive work relationship. We examined compatibility among freshman, advanced undergraduate, and graduate students. We have found that the students' perception of their partner's skill level has a significant influence on their compatibility. Graduate students work well with partners of similar actual skill level. Freshmen seem to work better with partners with different Myers Briggs personality type. Students' self-esteem does not appear to be a major contributor to pair compatibility.}, booktitle={Proceedings of the 35th SIGCSE technical symposium on Computer science education - SIGCSE '04}, publisher={ACM Press}, author={Katira, Neha and Williams, Laurie and Wiebe, Eric and Miller, Carol and Balik, Suzanne and Gehringer, Ed}, year={2004} } @inbook{nagappan_williams_wiebe_miller_balik_ferzli_petlick_2003, title={Pair learning: With an eye toward future success}, volume={2753}, ISBN={354040662X}, DOI={10.1007/978-3-540-45122-8_21}, abstractNote={Pair programming is a practice in which two programmers work collaboratively at one computer on the same design, algorithm, or code. Prior research indicates that pair programmers produce higher quality code in essentially half the time taken by solo programmers. Pair programming is becoming increasingly popular in industry and in university curricula. An experiment was run at North Carolina State University over a period of one and a half years to assess the efficacy of pair programming as an alternative educational technique in an introductory programming course. We found that the retention rate of the students in the introductory programming courses is equal to or better than that of the students in the solo programming courses. Most students show a positive attitude towards collaborative programming, and students in paired classes continue to be successful in subsequent programming classes that require solo programming. Pair programming also leads to a reduced workload for the course staff in terms of grading, questions answered and teaching effort.}, booktitle={Extreme programming and agile methods: XP/Agile Universe 2003: Third XP Agile Universe Conference, New Orleans, LA, USA, August 10-13, 2003}, publisher={Berlin; New York: Springer}, author={Nagappan, N. and Williams, L. and Wiebe, Eric and Miller, C. and Balik, S. and Ferzli, M. and Petlick, J.}, year={2003}, pages={185–198} } @article{erdogmus_williams_2003, title={The Economics of Software Development by Pair Programmers}, volume={48}, ISSN={0013-791X 1547-2701}, url={http://dx.doi.org/10.1080/00137910309408770}, DOI={10.1080/00137910309408770}, abstractNote={Abstract Evidence suggests that pair programmers–two programmers working collaboratively on the same design, algorithm, code, or test–perform substantially better than the two would working alone. Improved quality, teamwork, communication, knowledge management, and morale have been among the reported benefits of pair programming. This paper presents a comparative economic evaluation that strengthens the case for pair programming. The evaluation builds on the quantitative results of an empirical study conducted at the University of Utah. The evaluation is performed by interpreting these findings in the context of two different, idealized models of value realization. In the first model, consistent with the traditional waterfall process of software development, code produced by a development team is deployed in a single increment; its value is not realized until the full project completion. In the second model, consistent with agile software development processes such as Extreme Programming, code is produced and delivered in small increments; thus its value is realized in an equally incremental fashion. Under both models, our analysis demonstrates a distinct economic advantage of pair programmers over solo programmers. Based on these preliminary results, we recommend that organizations engaged in software development consider adopting pair programming as a practice that could improve their bottom line. To be able to perform quantitative analyses, several simplifying assumptions had to be made regarding alternative models of software development, the costs and benefits associated with these models, and how these costs and benefits are recognized. The implications of these assumptions are addressed in the paper.}, number={4}, journal={The Engineering Economist}, publisher={Informa UK Limited}, author={Erdogmus, Hakan and Williams, Laurie}, year={2003}, month={Jan}, pages={283–319} } @article{williams_2003, title={The XP programmer: The few-minutes programmer}, volume={20}, ISSN={["0740-7459"]}, DOI={10.1109/MS.2003.1196315}, number={3}, journal={IEEE SOFTWARE}, author={Williams, L}, year={2003}, pages={16–20} } @book{baheti_williams_gehringer_stotts_smith_2002, title={Distributed Pair Programming: Empirical Studies and Supporting Environments}, number={2002}, institution={Chapel Hill, NC: Dept. of Computer Science, University of North Carolina}, author={Baheti, P. and Williams, L. and Gehringer, E. and Stotts, D. and Smith, J.}, year={2002}, month={Mar}, pages={TR02–010} } @book{extreme programming and agile methods xp/agile universe 2002 : second xp universe and first agile universe conference, chicago, il, usa, august 4-7, 2002 : proceedings_2002, publisher={Berlin ;|aNew York: Springer}, year={2002} } @article{williams_wiebe_yang_ferzli_miller_2002, title={In support of paired programming in the introductory computer science course}, volume={12}, DOI={10.1076/csed.12.3.197.8618}, abstractNote={A formal pair programming experiment was run at North Carolina to empirically assess the educational efficacy of the technique in a CS1 course. Results indicate that students who practice pair programming perform better on programming projects and are more likely to succeed by completing the class with a C or better. Student pairs are more self-sufficient which reduces their reliance on the teaching staff. Qualitatively, paired students demonstrate higher order thinking skills than students who work alone. These results are supportive of pair programming as a collaborative learning technique.}, number={3}, journal={Computer Science Education}, author={Williams, L. and Wiebe, Eric and Yang, K. and Ferzli, M. and Miller, C.}, year={2002}, pages={197–212} } @article{hislop_lutz_naveda_mccracken_mead_williams_2002, title={Integrating Agile Practices into Software Engineering Courses}, volume={12}, ISSN={0899-3408 1744-5175}, url={http://dx.doi.org/10.1076/csed.12.3.169.8619}, DOI={10.1076/csed.12.3.169.8619}, abstractNote={Agile software development methodologies are gaining popularity in industry although they comprise a mix of accepted and controversial software engineering practices. It is quite likely that the software industry will find that specific project characteristics will determine the prudence of using an agile or a plan-driven methodology – or a hybrid of the two. Educators must assess the value and applicability of these emerging agile practices and decide what role they have in software engineering curricula. This paper provides a brief overview of several agile methodologies, including a discussion of evaluative research of agile practices in academia. The paper also considers instructional issues related to agile methods and the impact of agile methodologies on existing curricular references such as SWEBOK.}, number={3}, journal={Computer Science Education}, publisher={Informa UK Limited}, author={Hislop, Gregory W. and Lutz, Michael J. and Naveda, J. Fernando and McCracken, W. Michael and Mead, Nancy R. and Williams, Laurie A.}, year={2002}, month={Sep}, pages={169–185} } @misc{williams_2002, title={Letters - Try it, you'll like it}, volume={19}, number={1}, journal={IEEE Software}, author={Williams, L.}, year={2002}, pages={7} } @inproceedings{williams_yang_wiebe_ferzli_miller_2002, title={Pair programming in an introductory computer science course: Initial results and recommendations}, ISBN={1581134711}, booktitle={OOPSLA 2002: 17th ACM Conference on Object-Oriented Programming, Systems, Languages, and Applications : conference proceedings: November 4-8, 2002, Washington State Convention and Trade Center, Seattle, Washington, USA}, publisher={New York, NY: ACM Press}, author={Williams, L. and Yang, K. and Wiebe, E. and Ferzli, M. and Miller, C.}, year={2002} } @article{borstler_carrington_hislop_lisack_olson_williams_2002, title={Teaching PSP: Challenges and lessons learned}, volume={19}, ISSN={["0740-7459"]}, DOI={10.1109/MS.2002.1032853}, abstractNote={Software engineering educators need to provide environments where students learn about the size and complexity of modern software systems and the techniques available for managing these difficulties. Five universities used the personal software process to teach software engineering concepts in a variety of contexts.}, number={5}, journal={IEEE SOFTWARE}, author={Borstler, J and Carrington, D and Hislop, GW and Lisack, S and Olson, K and Williams, L}, year={2002}, pages={42-+} } @article{williams_kessler_2001, title={Experiments with Industry's “Pair-Programming” Model in the Computer Science Classroom}, volume={11}, ISSN={0899-3408 1744-5175}, url={http://dx.doi.org/10.1076/csed.11.1.7.3846}, DOI={10.1076/csed.11.1.7.3846}, abstractNote={Anecdotal evidence from several sources, primarily in industry, indicates that two programmers working collaboratively on the same design, algorithm, code, or test perform substantially better than the two working alone. Two courses taught at the University of Utah studied the use of this technique, often called pair-programming or collaborative programming, in the undergraduate computer science classroom. The students applied a positive form of “pair-pressure” on each other, which proved beneficial to the quality of their work products. The students also benefit from “pair-learning,” which allowed them to learn new languages faster and better than with solitary learning. The workload of the teaching staff is reduced because the students more often look to each other for technical support and advice.}, number={1}, journal={Computer Science Education}, publisher={Informa UK Limited}, author={Williams, Laurie A. and Kessler, Robert R.}, year={2001}, month={Jan}, pages={7–20} } @article{williams_kessler_cunningham_jeffries_2000, title={Strengthening the case for pair programming}, volume={17}, ISSN={["0740-7459"]}, DOI={10.1109/52.854064}, abstractNote={The software industry has practiced pair programming (two programmers working side by side at one computer on the same problem) with great success for years, but people who haven't tried it often reject the idea as a waste of resources. The authors demonstrate that using pair programming in the software development process yields better products in less time-and happier, more confident programmers.}, number={4}, journal={IEEE SOFTWARE}, author={Williams, L and Kessler, RR and Cunningham, W and Jeffries, R}, year={2000}, pages={19-+} }