@article{rahman_williams_2021, title={Different Kind of Smells: Security Smells in Infrastructure as Code Scripts}, volume={19}, ISSN={["1558-4046"]}, DOI={10.1109/MSEC.2021.3065190}, abstractNote={In this article, we summarize our recent research findings related to infrastructure as code (IaC) scripts, where we have identified 67,801 occurrences of security smells that include 9,175 hard-coded passwords. We hope our work will facilitate awareness among practitioners who use IaC.}, number={3}, journal={IEEE SECURITY & PRIVACY}, author={Rahman, Akond and Williams, Laurie}, year={2021}, pages={33–41} } @article{rahman_rahman_parnin_williams_2021, title={Security Smells in Ansible and Chef Scripts: A Replication Study}, volume={30}, ISBN={1557-7392}, DOI={10.1145/3408897}, abstractNote={ Context: Security smells are recurring coding patterns that are indicative of security weakness and require further inspection. As infrastructure as code (IaC) scripts, such as Ansible and Chef scripts, are used to provision cloud-based servers and systems at scale, security smells in IaC scripts could be used to enable malicious users to exploit vulnerabilities in the provisioned systems. Goal: The goal of this article is to help practitioners avoid insecure coding practices while developing infrastructure as code scripts through an empirical study of security smells in Ansible and Chef scripts. Methodology: We conduct a replication study where we apply qualitative analysis with 1,956 IaC scripts to identify security smells for IaC scripts written in two languages: Ansible and Chef. We construct a static analysis tool called Security Linter for Ansible and Chef scripts (SLAC) to automatically identify security smells in 50,323 scripts collected from 813 open source software repositories. We also submit bug reports for 1,000 randomly selected smell occurrences. Results: We identify two security smells not reported in prior work: missing default in case statement and no integrity check. By applying SLAC we identify 46,600 occurrences of security smells that include 7,849 hard-coded passwords. We observe agreement for 65 of the responded 94 bug reports, which suggests the relevance of security smells for Ansible and Chef scripts amongst practitioners. Conclusion: We observe security smells to be prevalent in Ansible and Chef scripts, similarly to that of the Puppet scripts. We recommend practitioners to rigorously inspect the presence of the identified security smells in Ansible and Chef scripts using (i) code review, and (ii) static analysis tools. }, number={1}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Rahman, Akond and Rahman, Md Rayhanur and Parnin, Chris and Williams, Laurie}, year={2021}, month={Jan} } @article{rahman_farhana_parnin_williams_2020, title={Gang of Eight: A Defect Taxonomy for Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380409}, abstractNote={Defects in infrastructure as code (IaC) scripts can have serious consequences, for example, creating large-scale system outages. A taxonomy of IaC defects can be useful for understanding the nature of defects, and identifying activities needed to fix and prevent defects in IaC scripts. The goal of this paper is to help practitioners improve the quality of infrastructure as code (IaC) scripts by developing a defect taxonomy for IaC scripts through qualitative analysis. We develop a taxonomy of IaC defects by applying qualitative analysis on 1,448 defect-related commits collected from open source software (OSS) repositories of the Openstack organization. We conduct a survey with 66 practitioners to assess if they agree with the identified defect categories included in our taxonomy. We quantify the frequency of identified defect categories by analyzing 80,425 commits collected from 291 OSS repositories spanning across 2005 to 2019. Our defect taxonomy for IaC consists of eight categories, including a category specific to IaC called idempotency (i.e., defects that lead to incorrect system provisioning when the same IaC script is executed multiple times). We observe the surveyed 66 practitioners to agree most with idempotency. The most frequent defect category is configuration data i.e., providing erroneous configuration data in IaC scripts. Our taxonomy and the quantified frequency of the defect categories may help in advancing the science of IaC script quality.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Rahman, Akond and Farhana, Effat and Parnin, Chris and Williams, Laurie}, year={2020}, pages={752–764} } @article{rahman_farhana_williams_2020, title={The 'as code' activities: development anti-patterns for infrastructure as code}, volume={25}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-020-09841-8}, abstractNote={The ‘as code’ suffix in infrastructure as code (IaC) refers to applying software engineering activities, such as version control, to maintain IaC scripts. Without the application of these activities, defects that can have serious consequences may be introduced in IaC scripts. A systematic investigation of the development anti-patterns for IaC scripts can guide practitioners in identifying activities to avoid defects in IaC scripts. Development anti-patterns are recurring development activities that relate with defective IaC scripts. The goal of this paper is to help practitioners improve the quality of infrastructure as code (IaC) scripts by identifying development activities that relate with defective IaC scripts. We identify development anti-patterns by adopting a mixed-methods approach, where we apply quantitative analysis with 2,138 open source IaC scripts and conduct a survey with 51 practitioners. We observe five development activities to be related with defective IaC scripts from our quantitative analysis. We identify five development anti-patterns namely, ‘boss is not around’, ‘many cooks spoil’, ‘minors are spoiler’, ‘silos’, and ‘unfocused contribution’. Our identified development anti-patterns suggest the importance of ‘as code’ activities in IaC because these activities are related to quality of IaC scripts.}, number={5}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Rahman, Akond and Farhana, Effat and Williams, Laurie}, year={2020}, month={Sep}, pages={3430–3467} } @article{rahman_mandavi-hezaveh_williams_2019, title={A systematic mapping study of infrastructure as code research}, volume={108}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2018.12.004}, abstractNote={Context:Infrastructure as code (IaC) is the practice to automatically configure system dependencies and to provision local and remote instances. Practitioners consider IaC as a fundamental pillar to implement DevOps practices, which helps them to rapidly deliver software and services to end-users. Information technology (IT) organizations, such as Github, Mozilla, Facebook, Google and Netflix have adopted IaC. A systematic mapping study on existing IaC research can help researchers to identify potential research areas related to IaC, for example, the areas of defects and security flaws that may occur in IaC scripts. Objective: The objective of this paper is to help researchers identify research areas related to infrastructure as code (IaC) by conducting a systematic mapping study of IaC-related research. Methodology: We conduct our research study by searching six scholar databases. We collect a set of 33,887 publications by using seven search strings. By systematically applying inclusion and exclusion criteria, we identify 31 publications related to IaC. We identify topics addressed in these publications by applying qualitative analysis. Results: We identify four topics studied in IaC-related publications: (i) framework/tool for infrastructure as code; (ii) use of infrastructure as code; (iii) empirical study related to infrastructure as code; and (iv) testing in infrastructure as code. According to our analysis, 52% of the studied 31 publications propose a framework or tool to implement the practice of IaC or extend the functionality of an existing IaC tool. Conclusion: As defects and security flaws can have serious consequences for the deployment and development environments in DevOps, along with other topics, we observe the need for research studies that will study defects and security flaws for IaC.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Rahman, Akond and Mandavi-Hezaveh, Rezvan and Williams, Laurie}, year={2019}, month={Apr}, pages={65–77} } @article{sarpatwar_ganapavarapu_shanmugam_rahman_vaculin_2019, title={Blockchain Enabled AI Marketplace: The Price You Pay For Trust}, ISSN={["2160-7508"]}, DOI={10.1109/CVPRW.2019.00345}, abstractNote={There has been a considerable amount of interest in exploring blockchain technologies for enabling marketplaces of different kinds. In this work, we provide a blockchain implementation that enables an "AI marketplace": a platform where consumers and data providers can transact data and/or models and derive value. Preserving privacy and trust during these transactions is a paramount concern. As an enabling use case, we consider a transfer learning setting. In this setting, a consumer entity wants to acquire a large training set, from different private data providers, that matches a small validation dataset provided by the consumer. Data providers expect fair value for their contribution and the consumer also wants to maximize its benefit. We implement a distributed protocol on a blockchain that provides guarantees on privacy and consumer's benefit. We also demonstrate that our blockchain implementation plays a crucial role in addressing the issue of fair value attribution and privacy in a trustable way. We consider three different designs for a blockchain implementation that trades off trust requirements on different entities and the overhead in terms of time taken for completion of the task. The first design provides no trust guarantees. The second one guarantees trust with respect to other participants if the platform is trustworthy. The third one guarantees complete trust with no requirements. Our experiments show that the performance in the second and third cases, with partial/complete trust guarantees, degrade by roughly 2x and 5x respectively, compared to the baseline with no trust guarantees.}, journal={2019 IEEE/CVF CONFERENCE ON COMPUTER VISION AND PATTERN RECOGNITION WORKSHOPS (CVPRW 2019)}, author={Sarpatwar, Kanthi and Ganapavarapu, Venkata Sitaramagiridharganesh and Shanmugam, Karthikeyan and Rahman, Akond and Vaculin, Roman}, year={2019}, pages={2857–2866} } @article{rahman_rahman_williams_2019, title={Share, But Be Aware: Security Smells in Python Gists}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME.2019.00087}, abstractNote={Github Gist is a service provided by Github which is used by developers to share code snippets. While sharing, developers may inadvertently introduce security smells in code snippets as well, such as hard-coded passwords. Security smells are recurrent coding patterns that are indicative of security weaknesses, which could potentially lead to security breaches. The goal of this paper is to help software practitioners avoid insecure coding practices through an empirical study of security smells in publicly-available GitHub Gists. Through static analysis, we found 13 types of security smells with 4,403 occurrences in 5,822 publicly-available Python Gists. 1,817 of those Gists, which is around 31%, have at least one security smell including 689 instances of hard-coded secrets. We also found no significance relation between the presence of these security smells and the reputation of the Gist author. Based on our findings, we advocate for increased awareness and rigorous code review efforts related to software security for Github Gists so that propagation of insecure coding practices are mitigated.}, journal={2019 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2019)}, author={Rahman, Md Rayhanur and Rahman, Akond and Williams, Laurie}, year={2019}, pages={536–540} } @article{rahman_williams_2019, title={Source code properties of defective infrastructure as code scripts}, volume={112}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2019.04.013}, abstractNote={Context: In continuous deployment, software and services are rapidly deployed to end-users using an automated deployment pipeline. Defects in infrastructure as code (IaC) scripts can hinder the reliability of the automated deployment pipeline. We hypothesize that certain properties of IaC source code such as lines of code and hard-coded strings used as configuration values, show correlation with defective IaC scripts. Objective: The objective of this paper is to help practitioners in increasing the quality of infrastructure as code (IaC) scripts through an empirical study that identifies source code properties of defective IaC scripts. Methodology: We apply qualitative analysis on defect-related commits mined from open source software repositories to identify source code properties that correlate with defective IaC scripts. Next, we survey practitioners to assess the practitioner's agreement level with the identified properties. We also construct defect prediction models using the identified properties for 2,439 scripts collected from four datasets. Results: We identify 10 source code properties that correlate with defective IaC scripts. Of the identified 10 properties we observe lines of code and hard-coded string to show the strongest correlation with defective IaC scripts. Hard-coded string is the property of specifying configuration value as hard-coded string. According to our survey analysis, majority of the practitioners show agreement for two properties: include, the property of executing external modules or scripts, and hard-coded string. Using the identified properties, our constructed defect prediction models show a precision of 0.70~0.78, and a recall of 0.54~0.67.}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Rahman, Akond and Williams, Laurie}, year={2019}, month={Aug}, pages={148–163} } @article{rahman_parnin_williams_2019, title={The Seven Sins: Security Smells in Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE.2019.00033}, abstractNote={Practitioners use infrastructure as code (IaC) scripts to provision servers and development environments. While developing IaC scripts, practitioners may inadvertently introduce security smells. Security smells are recurring coding patterns that are indicative of security weakness and can potentially lead to security breaches. The goal of this paper is to help practitioners avoid insecure coding practices while developing infrastructure as code (IaC) scripts through an empirical study of security smells in IaC scripts. We apply qualitative analysis on 1,726 IaC scripts to identify seven security smells. Next, we implement and validate a static analysis tool called Security Linter for Infrastructure as Code scripts (SLIC) to identify the occurrence of each smell in 15,232 IaC scripts collected from 293 open source repositories. We identify 21,201 occurrences of security smells that include 1,326 occurrences of hard-coded passwords. We submitted bug reports for 1,000 randomly-selected security smell occurrences. We obtain 212 responses to these bug reports, of which 148 occurrences were accepted by the development teams to be fixed. We observe security smells can have a long lifetime, e.g., a hard-coded secret can persist for as long as 98 months, with a median lifetime of 20 months.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2019)}, author={Rahman, Akond and Parnin, Chris and Williams, Laurie}, year={2019}, pages={164–175} } @article{rahman_2018, title={Characteristics of Defective Infrastructure as Code Scripts in DevOps}, ISSN={["2574-1926"]}, DOI={10.1145/3183440.3183452}, abstractNote={Defects in infrastructure as code (IaC) scripts can have serious consequences for organizations who adopt DevOps. By identifying which characteristics of IaC scripts correlate with defects, we can identify anti-patterns, and help software practitioners make informed decisions on better development and maintenance of IaC scripts, and increase quality of IaC scripts. The goal of this paper is to help practitioners increase the quality of IaC scripts by identifying characteristics of IaC scripts and IaC development process that correlate with defects, and violate security and privacy objectives. We focus on characteristics of IaC scripts and IaC development that (i) correlate with IaC defects, and (ii) violate security and privacy-related objectives namely, confidentiality, availability, and integrity. For our initial studies, we mined open source version control systems from three organizations: Mozilla, Openstack, and Wikimedia, to identify the defect-related characteristics and conduct our case studies. From our empirical analysis, we identify (i) 14 IaC code and four churn characteristics that correlate with defects; and (ii) 12 process characteristics such as, frequency of changes, and ownership of IaC scripts that correlate with defects. We propose the following studies: (i) identify structural characteristics that correlate with defects; (ii) with respect to prediction performance, compare which characteristics of IaC scripts are more correlated with defects; and (iii) identify characteristics that violate security and privacy objectives.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING - COMPANION (ICSE-COMPANION}, author={Rahman, Akond}, year={2018}, pages={476–479} } @article{rahman_agrawal_krishna_sobran_2018, title={Characterizing the Influence of Continuous Integration}, DOI={10.1145/3278142.3278149}, abstractNote={Continuous integration (CI) tools integrate code changes by automatically compiling, building, and executing test cases upon submission of code changes. Use of CI tools is getting increasingly popular, yet how proprietary projects reap the benefits of CI remains unknown. To investigate the influence of CI on software development, we analyze 150 open source software (OSS) projects, and 123 proprietary projects. For OSS projects, we observe the expected benefits after CI adoption, e.g., improvements in bug and issue resolution. However, for the proprietary projects, we cannot make similar observations. Our findings indicate that only adoption of CI might not be enough to the improve software development process. CI can be effective for software development if practitioners use CI's feedback mechanism efficiently, by applying the practice of making frequent commits. For our set of proprietary projects we observe practitioners commit less frequently, and hence not use CI effectively for obtaining feedback on the submitted code changes. Based on our findings we recommend industry practitioners to adopt the best practices of CI to reap the benefits of CI tools for example, making frequent commits.}, journal={PROCEEDINGS OF THE 4TH ACM SIGSOFT INTERNATIONAL WORKSHOP ON SOFTWARE ANALYTICS (SWAN'18)}, author={Rahman, Akond and Agrawal, Amritanshu and Krishna, Rahul and Sobran, Alexander}, year={2018}, pages={8–14} } @article{rahman_2018, title={Comprehension Effort and Programming Activities: Related? Or Not Related?}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196470}, abstractNote={Researchers have observed programmers to allocate considerable amount of effort in program comprehension. But, how does program comprehension effort relate with programming activities? We answer this question by conducting an empirical study using the MSR 2018 Mining Challenge Dataset. We quantify programmers' comprehension effort, and investigate the relationship between program comprehension effort and four programming activities: navigating, editing, building projects, and debugging. We observe when programmers are involved in high comprehension effort they navigate and make edits at a significantly slower rate. However, we do not observe any significant differences in programmers' build and debugging behavior, when programmers are involved in high comprehension effort. Our findings suggest that the relationship between program comprehension effort and programming activities is nuanced, as not all programming activities associate with program comprehension effort.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Rahman, Akond}, year={2018}, pages={66–69} } @article{rahman_stallings_williams_2018, title={Poster: Defect Prediction Metrics for Infrastructure as Code Scripts in DevOps}, ISSN={["2574-1926"]}, DOI={10.1145/3183440.3195034}, abstractNote={Use of infrastructure as code (IaC) scripts helps software teams manage their configuration and infrastructure automatically. Information technology (IT) organizations use IaC scripts to create and manage automated deployment pipelines to deliver services rapidly. IaC scripts can be defective, resulting in dire consequences, such as creating wide-scale service outages for end-users. Prediction of defective IaC scripts can help teams to mitigate defects in these scripts by prioritizing their inspection efforts. The goal of this paper is to help software practitioners in prioritizing their inspection efforts for infrastructure as code (IaC) scripts by proposing defect prediction model-related metrics. IaC scripts use domain specific languages (DSL) that are fundamentally different from object-oriented programming (OOP) languages. Hence, the OOP-based metrics that researchers used in defect prediction might not be applicable for IaC scripts. We apply Constructivist Grounded Theory (CGT) on defect-related commits mined from version control systems to identify metrics suitable for IaC scripts. By applying CGT, we identify 18 metrics. Of these metrics, 13 are related to IaC, for example, count of string occurrences in a script. Four of the identified metrics are related to churn, and one metric is lines of code.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING - COMPANION (ICSE-COMPANION}, author={Rahman, Akond and Stallings, Jonathan and Williams, Laurie}, year={2018}, pages={414–415} } @article{rahman_partho_morrison_williams_2018, title={What Questions Do Programmers Ask About Configuration as Code?}, DOI={10.1145/3194760.3194769}, abstractNote={Configuration as code (CaC) tools, such as Ansible and Puppet, help software teams to implement continuous deployment and deploy software changes rapidly. CaC tools are growing in popularity, yet what challenges programmers encounter about CaC tools, have not been characterized. A systematic investigation on what questions are asked by programmers, can help us identify potential technical challenges about CaC, and can aid in successful use of CaC tools. The goal of this paper is to help current and potential configuration as code (CaC) adoptees in identifying the challenges related to CaC through an analysis of questions asked by programmers on a major question and answer website. We extract 2,758 Puppet-related questions asked by programmers from January 2010 to December 2016, posted on Stack Overflow. We apply qualitative analysis to identify the questions programmers ask about Puppet. We also investigate the trends in questions with unsatisfactory answers, and changes in question categories over time. From our empirical study, we synthesize 16 major categories of questions. The three most common question categories are: (i) syntax errors, (ii) provisioning instances; and (iii) assessing Puppet's feasibility to accomplish certain tasks. Three categories of questions that yield the most unsatisfactory answers are (i) installation, (ii) security, and (iii) data separation.}, journal={PROCEEDINGS 2018 IEEE/ACM 4TH INTERNATIONAL WORKSHOP ON RAPID CONTINUOUS SOFTWARE ENGINEERING (RCOSE)}, author={Rahman, Akond and Partho, Asif and Morrison, Patrick and Williams, Laurie}, year={2018}, pages={16–22} } @article{rahman_williams_2016, title={Software Security in DevOps: Synthesizing Practitioners' Perceptions and Practices}, DOI={10.1145/2896941.2896946}, abstractNote={In organizations that use DevOps practices, software changes can be deployed as fast as 500 times or more per day. Without adequate involvement of the security team, rapidly deployed software changes are more likely to contain vulnerabilities due to lack of adequate reviews. The goal of this paper is to aid software practitioners in integrating security and DevOps by summarizing experiences in utilizing security practices in a DevOps environment. We analyzed a selected set of Internet artifacts and surveyed representatives of nine organizations that are using DevOps to systematically explore experiences in utilizing security practices. We observe that the majority of the software practitioners have expressed the potential of common DevOps activities, such as automated monitoring, to improve the security of a system. Furthermore, organizations that integrate DevOps and security utilize additional security activities, such as security requirements analysis and performing security configurations. Additionally, these teams also have established collaboration between the security team and the development and operations teams.}, journal={INTERNATIONAL WORKSHOP ON CONTINUOUS SOFTWARE EVOLUTION AND DELIVERY, CSED 2016}, author={Rahman, Akond Ashfaque Ur and Williams, Laurie}, year={2016}, pages={70–76} } @inproceedings{xie_enck_2016, title={Tutorial: text analytics for security}, booktitle={Symposium and Bootcamp on the Science of Security}, author={Xie, T. and Enck, W.}, year={2016}, pages={124–125} } @article{ur rahman_helms_williams_parnin_2015, title={Synthesizing Continuous Deployment Practices Used in Software Development}, DOI={10.1109/agile.2015.12}, abstractNote={Continuous deployment speeds up the process of existing agile methods, such as Scrum, and Extreme Programming (XP) through the automatic deployment of software changes to end-users upon passing of automated tests. Continuous deployment has become an emerging software engineering process amongst numerous software companies, such as Facebook, Github, Netflix, and Rally Software. A systematic analysis of software practices used in continuous deployment can facilitate a better understanding of continuous deployment as a software engineering process. Such analysis can also help software practitioners in having a shared vocabulary of practices and in choosing the software practices that they can use to implement continuous deployment. The goal of this paper is to aid software practitioners in implementing continuous deployment through a systematic analysis of software practices that are used by software companies. We studied the continuous deployment practices of 19 software companies by performing a qualitative analysis of Internet artifacts and by conducting follow-up inquiries. In total, we found 11 software practices that are used by 19 software companies. We also found that in terms of use, eight of the 11 software practices are common across 14 software companies. We observe that continuous deployment necessitates the consistent use of sound software engineering practices such as automated testing, automated deployment, and code review.}, journal={2015 AGILE CONFERENCE}, author={Ur Rahman, Akond Ashfaque and Helms, Eric and Williams, Laurie and Parnin, Chris}, year={2015}, pages={1–10} }