@article{riaz_king_slankas_williams_massacci_quesada-lopez_jenkins_2017, title={Identifying the implied: Findings from three differentiated replications on the use of security requirements templates}, volume={22}, ISSN={["1573-7616"]}, url={https://doi.org/10.1007/s10664-016-9481-1}, DOI={10.1007/s10664-016-9481-1}, abstractNote={Identifying security requirements early on can lay the foundation for secure software development. Security requirements are often implied by existing functional requirements but are mostly left unspecified. The Security Discoverer (SD) process automatically identifies security implications of individual requirements sentences and suggests applicable security requirements templates. The objective of this research is to support requirements analysts in identifying security requirements by automating the suggestion of security requirements templates that are implied by existing functional requirements. We conducted a controlled experiment in a graduate-level security class at North Carolina State University (NCSU) to evaluate the SD process in eliciting implied security requirements in 2014. We have subsequently conducted three differentiated replications to evaluate the generalizability and applicability of the initial findings. The replications were conducted across three countries at the University of Trento, NCSU, and the University of Costa Rica. We evaluated the responses of the 205 total participants in terms of quality, coverage, relevance and efficiency. We also develop shared insights regarding the impact of context factors such as time, motivation and support, on the study outcomes and provide lessons learned in conducting the replications. Treatment group, using the SD process, performed significantly better than the control group (at p-value <0.05) in terms of the coverage of the identified security requirements and efficiency of the requirements elicitation process in two of the three replications, supporting the findings of the original study. Participants in the treatment group identified 84 % more security requirements in the oracle as compared to the control group on average. Overall, 80 % of the 111 participants in the treatment group were favorable towards the use of templates in identifying security requirements. Our qualitative findings indicate that participants may be able to differentiate between relevant and extraneous templates suggestions and be more inclined to fill in the templates with additional support. Security requirements templates capture the security knowledge of multiple experts and can support the security requirements elicitation process when automatically suggested, making the implied security requirements more evident. However, individual participants may still miss out on identifying a number of security requirements due to empirical constraints as well as potential limitations on knowledge and security expertise.}, number={4}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Riaz, Maria and King, Jason and Slankas, John and Williams, Laurie and Massacci, Fabio and Quesada-Lopez, Christian and Jenkins, Marcelo}, year={2017}, month={Aug}, pages={2127–2178} } @article{barik_lubick_smith_slankas_murphy-hill_2015, title={FUSE: A Reproducible, Extendable, Internet-scale Corpus of Spreadsheets}, DOI={10.1109/msr.2015.70}, abstractNote={Spreadsheets are perhaps the most ubiquitous form of end-user programming software. This paper describes a corpus, called Fuse, containing 2,127,284 URLs that return spreadsheets (and their HTTP server responses), and 249,376 unique spreadsheets, contained within a public web archive of over 26.83 billion pages. Obtained using nearly 60,000 hours of computation, the resulting corpus exhibits several useful properties over prior spreadsheet corpora, including reproducibility and extendability. Our corpus is unencumbered by any license agreements, available to all, and intended for wide usage by end-user software engineering researchers. In this paper, we detail the data and the spreadsheet extraction process, describe the data schema, and discuss the trade-offs of Fuse with other corpora.}, journal={12TH WORKING CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR 2015)}, author={Barik, Titus and Lubick, Kevin and Smith, Justin and Slankas, John and Murphy-Hill, Emerson}, year={2015}, pages={486–489} } @inproceedings{riaz_king_slankas_williams_2014, title={Hidden in plain sight: Automatically identifying security requirements from natural language artifacts}, DOI={10.1109/re.2014.6912260}, abstractNote={Natural language artifacts, such as requirements specifications, often explicitly state the security requirements for software systems. However, these artifacts may also imply additional security requirements that developers may overlook but should consider to strengthen the overall security of the system. The goal of this research is to aid requirements engineers in producing a more comprehensive and classified set of security requirements by (1) automatically identifying security-relevant sentences in natural language requirements artifacts, and (2) providing context-specific security requirements templates to help translate the security-relevant sentences into functional security requirements. Using machine learning techniques, we have developed a tool-assisted process that takes as input a set of natural language artifacts. Our process automatically identifies security-relevant sentences in the artifacts and classifies them according to the security objectives, either explicitly stated or implied by the sentences. We classified 10,963 sentences in six different documents from healthcare domain and extracted corresponding security objectives. Our manual analysis showed that 46% of the sentences were security-relevant. Of these, 28% explicitly mention security while 72% of the sentences are functional requirements with security implications. Using our tool, we correctly predict and classify 82% of the security objectives for all the sentences (precision). We identify 79% of all security objectives implied by the sentences within the documents (recall). Based on our analysis, we develop context-specific templates that can be instantiated into a set of functional security requirements by filling in key information from security-relevant sentences.}, booktitle={2014 ieee 22nd international requirements engineering conference (re)}, author={Riaz, M. and King, Jason and Slankas, J. and Williams, L.}, year={2014}, pages={183–192} } @article{slankas_williams_2013, title={Access Control Policy Extraction from Unconstrained Natural Language Text}, DOI={10.1109/socialcom.2013.68}, abstractNote={While access control mechanisms have existed in computer systems since the 1960s, modern system developers often fail to ensure appropriate mechanisms are implemented within particular systems. Such failures allow for individuals, both benign and malicious, to view and manipulate information that they should not otherwise be able to access. The goal of our research is to help developers improve security by extracting the access control policies implicitly and explicitly defined in natural language project artifacts. Developers can then verify and implement the extracted access control policies within a system. We propose a machine-learning based process to parse existing, unaltered natural language documents, such as requirement or technical specifications to extract the relevant subjects, actions, and resources for an access control policy. To evaluate our approach, we analyzed a public requirements specification. We had a precision of 0.87 with a recall of 0.91 in classifying sentences as access control or not. Through a bootstrapping process utilizing dependency graphs, we correctly identified the subjects, actions, and objects elements of the access control policies with a precision of 0.46 and a recall of 0.54.}, journal={2013 ASE/IEEE INTERNATIONAL CONFERENCE ON SOCIAL COMPUTING (SOCIALCOM)}, author={Slankas, John and Williams, Laurie}, year={2013}, pages={435–440} } @inproceedings{slankas_williams_2013, title={Automated extraction of non-functional requirements in available documentation}, DOI={10.1109/naturalise.2013.6611715}, abstractNote={While all systems have non-functional requirements (NFRs), they may not be explicitly stated in a formal requirements specification. Furthermore, NFRs may also be externally imposed via government regulations or industry standards. As some NFRs represent emergent system proprieties, those NFRs require appropriate analysis and design efforts to ensure they are met. When the specified NFRs are not met, projects incur costly re-work to correct the issues. The goal of our research is to aid analysts in more effectively extracting relevant non-functional requirements in available unconstrained natural language documents through automated natural language processing. Specifically, we examine which document types (data use agreements, install manuals, regulations, request for proposals, requirements specifications, and user manuals) contain NFRs categorized to 14 NFR categories (e.g. capacity, reliability, and security). We measure how effectively we can identify and classify NFR statements within these documents. In each of the documents evaluated, we found NFRs present. Using a word vector representation of the NFRs, a support vector machine algorithm performed twice as effectively compared to the same input to a multinomial naïve Bayes classifier. Our k-nearest neighbor classifier with a unique distance metric had an F1 measure of 0.54, outperforming in our experiments the optimal naïve Bayes classifier which had a F1 measure of 0.32. We also found that stop word lists beyond common determiners had no minimal performance effect.}, booktitle={2013 1st International Workshop on Natural Language Analysis in Software Engineering (NaturaLiSE)}, author={Slankas, J. and Williams, L.}, year={2013}, pages={9–16} } @inproceedings{slankas_2013, title={Implementing database access control policy from unconstrained natural language text}, DOI={10.1109/icse.2013.6606716}, abstractNote={Although software can and does implement access control at the application layer, failure to enforce data access at the data layer often allows uncontrolled data access when individuals bypass application controls. The goal of this research is to improve security and compliance by ensuring access controls rules explicitly and implicitly defined within unconstrained natural language texts are appropriately enforced within a system's relational database. Access control implemented in both the application and data layers strongly supports a defense in depth strategy. We propose a tool-based process to 1) parse existing, unaltered natural language documents; 2) classify whether or not a statement implies access control and whether or not the statement implies database design; and, as appropriate, 3) extract policy elements; 4) extract database design; 5) map data objects found in the text to a database schema; and 6) automatically generate the necessary SQL commands to enable the database to enforce access control. Our initial studies of the first three steps indicate that we can effectively identify access control sentences and extract the relevant policy elements.}, booktitle={Proceedings of the 35th International Conference on software engineering (ICSE 2013)}, author={Slankas, J.}, year={2013}, pages={1357–1360} }