@misc{imtiaz_williams_2023, title={Are Your Dependencies Code Reviewed?: Measuring Code Review Coverage in Dependency Updates}, volume={49}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2023.3319509}, abstractNote={As modern software extensively uses free open source packages as dependencies, developers have to regularly pull in new third-party code through frequent updates. However, without a proper review of every incoming change, vulnerable and malicious code can sneak into the codebase through these dependencies. The goal of this study is to aid developers in securely accepting dependency updates by measuring if the code changes in an update have passed through a code review process. We implement Depdive, an update audit tool for packages in Crates.io, npm, PyPI, and RubyGems registry. Depdive first (i) identifies the files and the code changes in an update that cannot be traced back to the package's source repository, i.e., phantom artifacts; and then (ii) measures what portion of changes in the update, excluding the phantom artifacts, has passed through a code review process, i.e., code review coverage. Using Depdive, we present an empirical study across the latest ten updates of the most downloaded 1000 packages in each of the four registries. We further evaluated our results through a maintainer agreement survey. We find that phantom artifacts are not uncommon in the updates (20.1% of the analyzed updates had at least one phantom file). The phantoms can appear either due to legitimate reasons, such as in the case of programmatically generated files, or from accidental inclusion, such as in the case of files that are ignored in the repository. Regarding code review coverage (CRC), we find the updates are typically only partially code-reviewed (52.5% of the time). Further, only 9.0% of the packages had all their updates in our data set fully code-reviewed, indicating that even the most used packages can introduce non-reviewed code in the software supply chain. We also observe that updates either tend to have high CRC or low CRC, suggesting that packages at the opposite end of the spectrum may require a separate set of treatments.}, number={11}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Imtiaz, Nasif and Williams, Laurie}, year={2023}, month={Nov}, pages={4932–4945} } @article{imtiaz_khanom_williams_2023, title={Open or Sneaky? Fast or Slow? Light or Heavy?: Investigating Security Releases of Open Source Packages}, volume={49}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2022.3181010}, abstractNote={Vulnerabilities in open source packages can be a security risk for the downstream client projects. When a new vulnerability is discovered, a package should quickly release a fix in a new version, referred to as a security release in this study. The security release should be well-documented and require minimal migration effort to facilitate fast adoption by the clients. However, to what extent the open source packages follow these recommendations is not known. In this paper, we study (1) the time lag between fix and release; (2) how security fixes are documented in the release notes; (3) code change characteristics (size and semantic versioning) of the release; and (4) the time lag between the release and an advisory publication for security releases over a dataset of 4,377 security advisories across seven package ecosystems. We find that the median security release becomes available within 4 days of the corresponding fix and contains 131 lines of code (LOC) change. However, one-fourth of the releases in our data set still came at least 20 days after the fix was made.Further, we find that 61.5% of the security releases come with a release note that documents the corresponding security fix. Still, Snyk and NVD, two popular databases, take a median of 17 days (from the release) to publish a security advisory, possibly resulting in delayed notifications to the client projects. We also find that security releases may contain breaking change(s) as 13.2% indicated backward incompatibility through semantic versioning, while 6.4% mentioned breaking change(s) in the release notes. Based on our findings, we point out areas for future work, such as private fork for security fixes and standardized practice for announcing security releases.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Imtiaz, Nasif and Khanom, Aniqa and Williams, Laurie}, year={2023}, month={Apr}, pages={1540–1560} } @article{rahman_imtiaz_storey_williams_2022, title={Why secret detection tools are not enough: It's not just about false positives-An industrial case study}, volume={27}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-021-10109-y}, abstractNote={Checked-in secrets in version-controlled software projects pose security risks to software and services. Secret detection tools can identify the presence of secrets in the code, commit changesets, and project version control history. As these tools can generate false positives, developers are provided with mechanisms to bypass the warnings generated from these tools. Providing this override mechanism can result in developers sometimes exposing secrets in software repositories. The goal of this article is to aid software security practitioners in understanding why‘ secrets are checked into repositories, despite being warned by tools, through an industrial case study of analysis of usage data of a secret detection tool and a survey of developers who bypassed the tool alert. In this case study, we analyzed the usage data of a checked-in secret detection tool used widely by a software company and we surveyed developers who bypassed the warnings generated by the tool. From the case study, we found that, despite developers classified 50% of the warning as false positive, developers also bypassed the warning due to time constraints, working with non-shipping projects, technical challenges of eliminating secrets completely from the version control history, technical debts, and perceptions that check-ins are low risk. We advocate practitioners and researchers to investigate the findings of our study further to improve secret detection tools and related development practices. We also advocate that organizations should insert secondary checks, as is done by the company we studied, to capture occasions where developers incorrectly bypass secret detection tools.}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Rahman, Md Rayhanur and Imtiaz, Nasif and Storey, Margaret-Anne and Williams, Laurie}, year={2022}, month={May} } @article{imtiaz_murphy_williams_2019, title={How Do Developers Act on Static Analysis Alerts? An Empirical Study of Coverity Usage}, ISSN={["1071-9458"]}, DOI={10.1109/ISSRE.2019.00040}, abstractNote={Static analysis tools (SATs) often fall short of developer satisfaction despite their many benefits. An understanding of how developers in the real-world act on the alerts detected by SATs can help improve the utility of these tools and determine future research directions. The goal of this paper is to aid researchers and tool makers in improving the utility of static analysis tools through an empirical study of developer action on the alerts detected by Coverity, a state-of-the-art static analysis tool. In this paper, we analyze five open source projects as case studies (Linux, Firefox, Samba, Kodi, and Ovirt-engine) that have been actively using Coverity over a period of at least five years. We investigate the alert occurrences and developer triage of the alerts from the Coverity database; identify the alerts that were fixed through code changes (i.e. actionable) by mining the commit history of the projects; analyze the time an alert remain in the code base (i.e. lifespan) and the complexity of code changes (i.e. fix complexity) in fixing the alert. We find that 27.4% to 49.5% (median: 36.7%) of the alerts are actionable across projects, a rate higher than previously reported. We also find that the fixes of Coverity alerts are generally low in complexity (2 to 7 lines of code changes in the affected file, median: 4). However, developers still take from 36 to 245 days (median: 96) to fix these alerts. Finally, our data suggest that severity and fix complexity may correlate with an alert's lifespan in some of the projects.}, journal={2019 IEEE 30TH INTERNATIONAL SYMPOSIUM ON SOFTWARE RELIABILITY ENGINEERING (ISSRE)}, author={Imtiaz, Nasif and Murphy, Brendan and Williams, Laurie}, year={2019}, pages={323–333} } @article{imtiaz_middleton_chakraborty_robson_bai_murphy-hill_2019, title={Investigating the Effects of Gender Bias on GitHub}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE.2019.00079}, abstractNote={Diversity, including gender diversity, is valued by many software development organizations, yet the field remains dominated by men. One reason for this lack of diversity is gender bias. In this paper, we study the effects of that bias by using an existing framework derived from the gender studies literature.We adapt the four main effects proposed in the framework by posing hypotheses about how they might manifest on GitHub,then evaluate those hypotheses quantitatively. While our results how that effects of gender bias are largely invisible on the GitHub platform itself, there are still signals of women concentrating their work in fewer places and being more restrained in communication than men.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2019)}, author={Imtiaz, Nasif and Middleton, Justin and Chakraborty, Joymallya and Robson, Neill and Bai, Gina and Murphy-Hill, Emerson}, year={2019}, pages={700–711} } @article{farhana_imtiaz_rahman_2019, title={Synthesizing Program Execution Time Discrepancies in Julia Used for Scientific Software}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME.2019.00083}, abstractNote={Scientific software is defined as software that is used to analyze data to investigate unanswered research questions in the scientific community. Developers use programming languages such as Julia to build scientific software. When programming with Julia, developers experience program execution time discrepancy i.e. not obtaining desired program execution time, which hinders them to efficiently complete their tasks. The goal of this paper is to help developers in achieving desired program execution time for Julia by identifying the causes of why program execution time discrepancies happen with an empirical study of Stack Overflow posts. We conduct an empirical study with 263 Julia-related posts collected from Stack Overflow, and apply qualitative analysis on the collected 263 posts. We identify 9 categories of program execution time discrepancies for Julia, which include discrepancies related to data structures usage such as, arrays and dictionaries. We also identify 10 causes that explain why the program execution time discrepancies happen. For example, we identify program execution time discrepancy to happen when developers unnecessarily allocate memory by using array comprehension.}, journal={2019 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2019)}, author={Farhana, Effat and Imtiaz, Nasif and Rahman, Akond}, year={2019}, pages={496–500} }