@article{bai_sthapit_heckman_price_stolee_2023, title={An Experience Report on Introducing Explicit Strategies into Testing Checklists for Advanced Beginners}, url={https://doi.org/10.1145/3587102.3588781}, DOI={10.1145/3587102.3588781}, abstractNote={Software testing is a critical skill for computing students, but learning and practicing testing can be challenging, particularly for beginners. A recent study suggests that a lightweight testing checklist that contains testing strategies and tutorial information could assist students in writing quality tests. However, students expressed a desire for more support in knowing how to test the code/scenario. Moreover, the potential costs and benefits of the testing checklist are not yet examined in a classroom setting. To that end, we improved the checklist by integrating explicit testing strategies to it (ETS Checklist), which provide step-by-step guidance on how to transfer semantic information from instructions to the possible testing scenarios. In this paper, we report our experiences in designing explicit strategies in unit testing, as well as adapting the ETS Checklist as optional tool support in a CS1.5 course. With the quantitative and qualitative analysis of the survey responses and lab assignment submissions generated by students, we discuss students' engagement with the ETS Checklists. Our results suggest that students who used the checklist intervention had significantly higher quality in their student-authored test code, in terms of code coverage, compared to those who did not, especially for assignments earlier in the course. We also observed students' unawareness of their need for help in writing high-quality tests.}, journal={PROCEEDINGS OF THE 2023 CONFERENCE ON INNOVATION AND TECHNOLOGY IN COMPUTER SCIENCE EDUCATION, ITICSE 2023, VOL 1}, author={Bai, Gina R. and Sthapit, Sandeep and Heckman, Sarah and Price, Thomas W. and Stolee, Kathryn T.}, year={2023}, pages={194–200} } @article{majumder_chakraborty_bai_stolee_menzies_2023, title={Fair Enough: Searching for Sufficient Measures of Fairness}, volume={32}, ISSN={["1557-7392"]}, url={https://doi.org/10.1145/3585006}, DOI={10.1145/3585006}, abstractNote={Testing machine learning software for ethical bias has become a pressing current concern. In response, recent research has proposed a plethora of new fairness metrics, for example, the dozens of fairness metrics in the IBM AIF360 toolkit. This raises the question: How can any fairness tool satisfy such a diverse range of goals? While we cannot completely simplify the task of fairness testing, we can certainly reduce the problem. This article shows that many of those fairness metrics effectively measure the same thing. Based on experiments using seven real-world datasets, we find that (a) 26 classification metrics can be clustered into seven groups and (b) four dataset metrics can be clustered into three groups. Further, each reduced set may actually predict different things. Hence, it is no longer necessary (or even possible) to satisfy all fairness metrics. In summary, to simplify the fairness testing problem, we recommend the following steps: (1) determine what type of fairness is desirable (and we offer a handful of such types), then (2) lookup those types in our clusters, and then (3) just test for one item per cluster.}, number={6}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Majumder, Suvodeep and Chakraborty, Joymallya and Bai, Gina R. and Stolee, Kathryn T. and Menzies, Tim}, year={2023}, month={Nov} } @article{bai_presler-marshall_price_stolee_2022, title={Check It Off: Exploring the Impact of a Checklist Intervention on the Quality of Student-authored Unit Tests}, DOI={10.1145/3502718.3524799}, abstractNote={Software testing is an essential skill for computer science students. Prior work reports that students desire support in determining what code to test and which scenarios should be tested. In response to this, we present a lightweight testing checklist that contains both tutorial information and testing strategies to guide students in what and how to test. To assess the impact of the testing checklist, we conducted an experimental, controlled A/B study with 32 undergraduate and graduate students. The study task was writing a test suite for an existing program. Students were given either the testing checklist (the experimental group) or a tutorial on a standard coverage tool with which they were already familiar (the control group). By analyzing the combination of student-written tests and survey responses, we found students with the checklist performed as well as or better than the coverage tool group, suggesting a potential positive impact of the checklist (or at minimum, a non-negative impact). This is particularly noteworthy given the control condition of the coverage tool is the state of the practice. These findings suggest that the testing tool support does not need to be sophisticated to be effective.}, journal={PROCEEDINGS OF THE 27TH ACM CONFERENCE ON INNOVATION AND TECHNOLOGY IN COMPUTER SCIENCE EDUCATION, ITICSE 2022, VOL 1}, author={Bai, Gina R. and Presler-Marshall, Kai and Price, Thomas W. and Stolee, Kathryn T.}, year={2022}, pages={276–282} } @article{bai_2020, title={Improving Students' Testing Practices}, ISSN={["0270-5257"]}, DOI={10.1145/3377812.3381401}, abstractNote={Software testing prevents and detects the introduction of faults and bugs during the process of evolving and delivering reliable software. As an important software development activity, testing has been intensively studied to measure test code quality and effectiveness, and assist professional developers and testers with automated test generation tools. In recent years, testing has been attracting educators’ attention and has been integrated into some Computer Science education programs. Understanding challenges and problems faced by students can help inform educators the topics that require extra attention and practice when presenting testing concepts and techniques.In my research, I study how students implement and modify source code given unit tests, and how they perceive and perform unit testing. I propose to quantitatively measure the quality of student-written test code, and qualitatively identify the common mistakes and bad smells observed in student-written test code. We compare the performance of students and professionals, who vary in prior testing experience, to investigate the factors that lead to high-quality test code. The ultimate goal of my research is to address the challenges students encountered during test code composition and improve their testing skills with supportive tools or guidance.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: COMPANION PROCEEDINGS (ICSE-COMPANION 2020)}, author={Bai, Gina R.}, year={2020}, pages={218–221} }