@article{massacci_papotti_paramitha_2024, title={Addressing combinatorial experiments and scarcity of subjects by provably orthogonal and crossover experimental designs}, url={https://doi.org/10.1016/j.jss.2024.111990}, DOI={10.1016/j.jss.2024.111990}, abstractNote={Experimentation in Software and Security Engineering is a common research practice, in particular with human subjects. The combinatorial nature of software configurations and the difficulty of recruiting experienced subjects or running complex and expensive experiments make the use of full factorial experiments unfeasible to obtain statistically significant results. Provide comprehensive alternative Designs of Experiments (DoE) based on orthogonal designs or crossover designs that provably meet desired requirements such as balanced pair-wise configurations or balanced ordering of scenarios to mitigate bias or learning effects. We also discuss and formalize the statistical implications of these design choices, in particular for crossover designs. We made available the algorithmic construction of the design for ℓ=2,3,4,5 levels for arbitrary K factors and illustrated their use with examples from security and software engineering research.}, journal={Journal of Systems and Software}, author={Massacci, Fabio and Papotti, Aurora and Paramitha, Ranindya}, year={2024}, month={May} } @misc{scalco_paramitha_2024, title={Hash4Patch: A Lightweight Low False Positive Tool for Finding Vulnerability Patch Commits}, url={http://dx.doi.org/10.1145/3643991.3644871}, DOI={10.1145/3643991.3644871}, journal={Proceedings of the 21st International Conference on Mining Software Repositories}, publisher={ACM}, author={Scalco, Simone and Paramitha, Ranindya}, year={2024}, month={Apr}, pages={733–737} } @article{sabetta_ponta_lozoya_bezzi_sacchetti_greco_balogh_hegedűs_ferenc_paramitha_et al._2024, title={Known Vulnerabilities of Open Source Projects: Where Are the Fixes?}, url={https://doi.org/10.1109/MSEC.2023.3343836}, DOI={10.1109/MSEC.2023.3343836}, abstractNote={Every day, developers have the daunting task of tracing vulnerabilities back in a morass of commits. In this article, we report the experience of the industrial open source tool, Prospector, to support developers in this task.}, journal={IEEE Security & Privacy}, author={Sabetta, Antonino and Ponta, Serena Elisa and Lozoya, Rocio Cabrera and Bezzi, Michele and Sacchetti, Tommaso and Greco, Matteo and Balogh, Gergő and Hegedűs, Péter and Ferenc, Rudolf and Paramitha, Ranindya and et al.}, year={2024} } @article{papotti_paramitha_massacci_2024, title={On the acceptance by code reviewers of candidate security patches suggested by Automated Program Repair tools}, url={https://doi.org/10.1007/s10664-024-10506-z}, DOI={10.1007/s10664-024-10506-z}, abstractNote={Abstract Objective We investigated whether (possibly wrong) security patches suggested by Automated Program Repairs (APR) for real world projects are recognized by human reviewers. We also investigated whether knowing that a patch was produced by an allegedly specialized tool does change the decision of human reviewers. Method We perform an experiment with $$n= 72$$ n = 72 Master students in Computer Science. In the first phase, using a balanced design, we propose to human reviewers a combination of patches proposed by APR tools for different vulnerabilities and ask reviewers to adopt or reject the proposed patches. In the second phase, we tell participants that some of the proposed patches were generated by security-specialized tools (even if the tool was actually a ‘normal’ APR tool) and measure whether the human reviewers would change their decision to adopt or reject a patch. Results It is easier to identify wrong patches than correct patches, and correct patches are not confused with partially correct patches. Also patches from APR Security tools are adopted more often than patches suggested by generic APR tools but there is not enough evidence to verify if ‘bogus’ security claims are distinguishable from ‘true security’ claims. Finally, the number of switches to the patches suggested by security tool is significantly higher after the security information is revealed irrespective of correctness. Limitations The experiment was conducted in an academic setting, and focused on a limited sample of popular APR tools and popular vulnerability types.}, journal={Empirical Software Engineering}, author={Papotti, Aurora and Paramitha, Ranindya and Massacci, Fabio}, year={2024}, month={Sep} } @article{bui_paramitha_vu_massacci_scandariato_2023, title={APR4Vul: an empirical study of automatic program repair techniques on real-world Java vulnerabilities}, volume={29}, ISSN={1382-3256 1573-7616}, url={http://dx.doi.org/10.1007/s10664-023-10415-7}, DOI={10.1007/s10664-023-10415-7}, abstractNote={Abstract Security vulnerability fixes could be a promising research avenue for Automated Program Repair (APR) techniques. In recent years, APR tools have been thoroughly developed for fixing generic bugs. However, the area is still relatively unexplored when it comes to fixing security bugs or vulnerabilities. In this paper, we evaluate nine state-of-the-art APR tools and one vulnerability-specific repair tool. In particular, we investigate their ability to generate patches for 79 real-world Java vulnerabilities in the Vul4J dataset, as well as the level of trustworthiness of these patches. We evaluate the tools with respect to their ability to generate security patches that are (i) testable, (ii) having the positive effect of closing the vulnerability, and (iii) not having side effects from a functional point of view. Our results show that the evaluated APR tools were able to generate testable patches for around 20% of the considered vulnerabilities. On average, nearly 73% of the testable patches indeed eliminate the vulnerabilities, but only 44% of them could actually fix security bugs while maintaining the functionalities. To understand the root cause of this phenomenon, we conduct a detailed comparative study of the general bug fix patterns in Defect4J and the vulnerability fix patterns in ExtraVul (which we extend from Vul4J). Our investigation shows that, although security patches are short in terms of lines of code, they contain unique characteristics in their fix patterns compared to general bugs. For example, many security fixes require adding method calls. These method calls contain specific input validation-related keywords, such as encode , normalize , and trim . In this regard, our study suggests that additional repair patterns should be implemented for existing APR tools to fix more types of security vulnerabilities.}, number={1}, journal={Empirical Software Engineering}, publisher={Springer Science and Business Media LLC}, author={Bui, Quang-Cuong and Paramitha, Ranindya and Vu, Duc-Ly and Massacci, Fabio and Scandariato, Riccardo}, year={2023}, month={Dec} } @article{paramitha_massacci_2023, title={Technical leverage analysis in the Python ecosystem}, volume={28}, ISSN={1382-3256 1573-7616}, url={http://dx.doi.org/10.1007/s10664-023-10355-2}, DOI={10.1007/s10664-023-10355-2}, abstractNote={Abstract Context: Technical leverage is the ratio between dependencies (other people’s code) and own codes of a software package. It has been shown to be useful to characterize the Java ecosystem and there are also studies on the NPM ecosystem available. Objective: By using this metric we aim to analyze the Python ecosystem, how it evolves, and how secure it is, as a developer would perceive it when deciding to adopt or update (or not) a library. Method: We collect a dataset of the top 600 Python packages (corresponding to 21,205 versions) and used a number of innovative approaches for its analysis including the use of a two-part statistical model to deal with excess zeros, a mathematical closed formulation to estimate vulnerabilities that we confirm with bootstrapping on the actual dataset. Results: Small Python package versions have a median technical leverage of 6.9x their own code, while bigger package versions rely on dependencies code a tenth of their own (median leverage of 0.1). In terms of evolution, Python packages tend to have stable technical leverage through their evolution (once highly leveraged, always leveraged). On security, the chance of getting a safe package version when choosing a package is actually better than previous research has shown based on the ratio of safe package versions in the ecosystem. Coclusions: Python packages ship a lot of other people’s code and tend to keep doing so. However, developers will have a good chance to choose a safe package version.}, number={6}, journal={Empirical Software Engineering}, publisher={Springer Science and Business Media LLC}, author={Paramitha, Ranindya and Massacci, Fabio}, year={2023}, month={Oct}, pages={13901–13931} } @inproceedings{mecenero_paramitha_pashchenko_massacci_2022, place={New York, NY, USA}, title={Lightweight Parsing and Slicing for Bug Identification in C}, url={https://iris.unitn.it/handle/11572/369758}, DOI={10.1145/3538969.3543828}, abstractNote={Program slicing has been used to semi- or fully-automatically help developers find errors and vulnerabilities in their programs. For example, Dashevskyi et al. (IEEE TSE 2018) introduced a lightweight slicer for Java that can be used for vulnerability analysis. However, a similar lightweight slicer for C/C++ is still missing. In this work we propose a comparison method for parsers, evaluate it on two commonly-used parsers, and develop a lightweight slicer for C/C++ using the "better" parser from our comparison. From our evaluation, the Joern parsing method (island grammar) could parse non-standard C/C++ code but its resulting structure may contain semantic errors that can affect subsequent analysis. ANTLR4 is faster in returning a result, and when manually cleared of non-standard C/C++ codes, it is more accurate than Joern. We then built our C/C++ thin slicer extension using ANTLR4, and we observed that it is promising from both precision and performance perspectives. As a future work, we plan to improve the logic behind processing pointers. In particular, we consider doing deeper pointer analysis.}, booktitle={Proceedings of the 17th International Conference on Availability, Reliability and Security}, publisher={Association for Computing Machinery}, author={Mecenero, Luca and Paramitha, Ranindya and Pashchenko, Ivan and Massacci, Fabio}, year={2022}, month={Jan}, pages={1–10} } @article{papotti_paramitha_massacci_2022, place={Helsinki, Finland}, title={On the acceptance by code reviewers of candidate security patches suggested by Automated Program Repair tools}, url={https://arxiv.org/abs/2209.07211}, DOI={10.48550/ARXIV.2209.07211}, abstractNote={Background: Testing and validation of the semantic correctness of patches provided by tools for Automated Program Repairs (APR) has received a lot of attention. Yet, the eventual acceptance or rejection of suggested patches for real world projects by humans patch reviewers has received a limited attention. Objective: To address this issue, we plan to investigate whether (possibly incorrect) security patches suggested by APR tools are recognized by human reviewers. We also want to investigate whether knowing that a patch was produced by an allegedly specialized tool does change the decision of human reviewers. Method: In the first phase, using a balanced design, we propose to human reviewers a combination of patches proposed by APR tools for different vulnerabilities and ask reviewers to adopt or reject the proposed patches. In the second phase, we tell participants that some of the proposed patches were generated by security specialized tools (even if the tool was actually a `normal' APR tool) and measure whether the human reviewers would change their decision to adopt or reject a patch. Limitations: The experiment will be conducted in an academic setting, and to maintain power, it will focus on a limited sample of popular APR tools and popular vulnerability types.}, journal={ESEM 2022 Registered Reports}, publisher={arXiv}, author={Papotti, Aurora and Paramitha, Ranindya and Massacci, Fabio}, year={2022} } @inproceedings{scalco_paramitha_vu duc_massacci_2022, place={New York, NY, USA}, title={On the feasibility of detecting injections in malicious npm packages}, url={https://iris.unitn.it/handle/11572/369757}, DOI={10.1145/3538969.3543815}, abstractNote={Open-source packages typically have their source code available on a source code repository (e.g., on GitHub), but developers prefer to use pre-built artifacts directly from the package repositories (such as npm for JavaScript). Between the source code and the distributed artifacts, there could be differences that pose security risks (e.g., attackers deploy malicious code during package installation) in the software supply chain. Existing package scanners focus on the entire artifact of a package to detect this kind of attacks. These procedures are not only time consuming, but also generate high irrelevant alerts (FPs). An approach called LastPyMile by Vu et al. (ESEC/FSE’21) has been shown to be effective in detecting discrepancies and reducing false alerts in vetting Python packages on PyPI by focusing only on the differences between the source and the package. In this work, we propose to port that approach to scan JavaScript packages in the npm ecosystem. We presented a preliminary evaluation of our implementation on a set of real malicious npm packages and the top popular packages. The results show that while being 20.7x faster than git-log approach, our approach managed to reduce the percentage of false alerts produced by package scanner by 69%.}, booktitle={Proceedings of the 17th International Conference on Availability, Reliability and Security}, publisher={Association for Computing Machinery}, author={Scalco, Simone and Paramitha, Ranindya and Vu Duc, Ly and Massacci, Fabio}, year={2022}, month={Jan}, pages={1151–1158} } @inproceedings{paramitha_asnar_2021, title={Mining Software Repository for Security Smell Code Review}, url={http://dx.doi.org/10.1109/icodse53690.2021.9648484}, DOI={10.1109/icodse53690.2021.9648484}, abstractNote={Security smells found in a source code could be a subtle sign of security vulnerability in the future. Developers could do security code review to find those signs, however it takes a lot of time for a large code base. To increase security code review efficiency, developers should focus on files that most likely have high-risk. In finding those high-risk files, mining software repository is a promising method as it has been used for bug localization in some prior researches. Our analysis in 34 CVE-related (smelly) Java files from 7 different projects found that ordinal LOC, ordinal commit count, and ordinal author count, have positive correlation with the likelihood of a file being smelly (related to known security vulnerability/ies). We then built a tool that return a list of files that are considered smelly, using ordinal LOC, ordinal commit count, and ordinal author count information. We applied this tool on 2 test projects and analyzed the result manually. The manual analysis found that we could find as many as 69 smelly files which contain identified security smell/s by only manually inspecting 185 files rather than all 4,383 files. This means we could reduce the time needed for security code review, which we found could reach 84.63% time reduction. We also found positive correlation between smelly files with known security vulnerability, which shows that using mining software repository for finding security smells has the potential to prevent security vulnerability in the future.}, booktitle={2021 International Conference on Data and Software Engineering (ICoDSE)}, publisher={IEEE}, author={Paramitha, Ranindya and Asnar, Yudistira Dwi Wardhana}, year={2021}, month={Nov} } @inproceedings{paramitha_asnar_2021, title={Static Code Analysis Tool for Laravel Framework Based Web Application}, url={http://dx.doi.org/10.1109/icodse53690.2021.9648519}, DOI={10.1109/icodse53690.2021.9648519}, abstractNote={To increase and maintain web application security, developers could use some different methods, one of them is static code analysis. This method could find security vulnerabilities inside a source code without the need of running the program. It could also be automated by using tools, which considered more efficient than manual reviews. One specific method which is commonly used in static code analysis is taint analysis. Taint analysis usually utilizes source code modeling to prepare the code for analysis process to detect any untrusted data flows into security sensitives computations. While this kind of analysis could be very helpful, static code analysis tool for Laravel-based web application is still quite rare, despite its popularity. Therefore, in this research, we want to know how static code (taint) analysis could be utilized to detect security vulnerabilities and how the projects (Laravel-based) should be modeled in order to facilitate this analysis. We then developed a static analysis tool, which models the application's source code using AST and dictionary to be used as the base of the taint analysis. The tool first parsed the route file of Laravel project to get a list of controller files. Each file in that list would be parsed in order to build the source code representation, before actually being analyzed using taint analysis method. The experiments was done using this tool shows that the tools (with taint analysis) could detect 13 security vulnerabilities from 6 Laravel-based projects with one False Negative. An ineffective sanitizer was the suspected cause of this False Negative. This also shows that proposed modeling technique could be helpful in facilitating taint analysis in Laravel-based projects. For future development and studies, this tool should be tested with more Laravel and even other framework based web application with a wider range of security vulnerabilities.}, booktitle={2021 International Conference on Data and Software Engineering (ICoDSE)}, publisher={IEEE}, author={Paramitha, Ranindya and Asnar, Yudistira Dwi Wardhana}, year={2021}, month={Nov} }