@article{bai_sthapit_heckman_price_stolee_2023, title={An Experience Report on Introducing Explicit Strategies into Testing Checklists for Advanced Beginners}, url={https://doi.org/10.1145/3587102.3588781}, DOI={10.1145/3587102.3588781}, abstractNote={Software testing is a critical skill for computing students, but learning and practicing testing can be challenging, particularly for beginners. A recent study suggests that a lightweight testing checklist that contains testing strategies and tutorial information could assist students in writing quality tests. However, students expressed a desire for more support in knowing how to test the code/scenario. Moreover, the potential costs and benefits of the testing checklist are not yet examined in a classroom setting. To that end, we improved the checklist by integrating explicit testing strategies to it (ETS Checklist), which provide step-by-step guidance on how to transfer semantic information from instructions to the possible testing scenarios. In this paper, we report our experiences in designing explicit strategies in unit testing, as well as adapting the ETS Checklist as optional tool support in a CS1.5 course. With the quantitative and qualitative analysis of the survey responses and lab assignment submissions generated by students, we discuss students' engagement with the ETS Checklists. Our results suggest that students who used the checklist intervention had significantly higher quality in their student-authored test code, in terms of code coverage, compared to those who did not, especially for assignments earlier in the course. We also observed students' unawareness of their need for help in writing high-quality tests.}, journal={PROCEEDINGS OF THE 2023 CONFERENCE ON INNOVATION AND TECHNOLOGY IN COMPUTER SCIENCE EDUCATION, ITICSE 2023, VOL 1}, author={Bai, Gina R. and Sthapit, Sandeep and Heckman, Sarah and Price, Thomas W. and Stolee, Kathryn T.}, year={2023}, pages={194–200} } @article{majumder_chakraborty_bai_stolee_menzies_2023, title={Fair Enough: Searching for Sufficient Measures of Fairness}, volume={32}, ISSN={["1557-7392"]}, url={https://doi.org/10.1145/3585006}, DOI={10.1145/3585006}, abstractNote={Testing machine learning software for ethical bias has become a pressing current concern. In response, recent research has proposed a plethora of new fairness metrics, for example, the dozens of fairness metrics in the IBM AIF360 toolkit. This raises the question: How can any fairness tool satisfy such a diverse range of goals? While we cannot completely simplify the task of fairness testing, we can certainly reduce the problem. This article shows that many of those fairness metrics effectively measure the same thing. Based on experiments using seven real-world datasets, we find that (a) 26 classification metrics can be clustered into seven groups and (b) four dataset metrics can be clustered into three groups. Further, each reduced set may actually predict different things. Hence, it is no longer necessary (or even possible) to satisfy all fairness metrics. In summary, to simplify the fairness testing problem, we recommend the following steps: (1) determine what type of fairness is desirable (and we offer a handful of such types), then (2) lookup those types in our clusters, and then (3) just test for one item per cluster.}, number={6}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Majumder, Suvodeep and Chakraborty, Joymallya and Bai, Gina R. and Stolee, Kathryn T. and Menzies, Tim}, year={2023}, month={Nov} } @article{presler-marshall_heckman_stolee_2023, title={Improving Grading Outcomes in Software Engineering Projects Through Automated Contributions Summaries}, ISSN={["2832-756X"]}, DOI={10.1109/ICSE-SEET58685.2023.00030}, abstractNote={Teaming is a key aspect of most professional software engineering positions, and consequently, team-based learning (TBL) features heavily in many undergraduate computer science (CS) and software engineering programs. However, while TBL offers many pedagogical benefits, it is not without challenges. One such challenge is assessment, as the course teaching staff must be able to accurately identify individual students’ contributions to both encourage and reward participation. In this paper, we study improvements to grading practises in the context of a CS1.5 introductory software engineering course, where assessing individual students’ contributions to weekly lab assignments is done manually by teaching assistants (TAs). We explore the impact of presenting TAs with automated summaries of individual student contributions to their team’s GitHub repository. To do so, we propose a novel algorithm, and implement a tool based off of it, AutoVCS. We measure the impact on grading metrics in terms of grading speed, grading consistency, and TA satisfaction. We evaluate our algorithm, as implemented in AutoVCS, in a controlled experimental study on Java-based lab assignments from a recent offering of NC State University’s CS1.5 course. We find our automated summaries help TAs grade more consistently and provides students with more actionable feedback. Although TAs grade no faster using automated summaries, they nonetheless strongly prefer grading with the support of them than without. We conclude with recommendations for future work to explore improving consistency in contribution grading for student software engineering teams.}, journal={2023 IEEE/ACM 45TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING-SOFTWARE ENGINEERING EDUCATION AND TRAINING, ICSE-SEET}, author={Presler-Marshall, Kai and Heckman, Sarah and Stolee, Kathryn T.}, year={2023}, pages={259–270} } @article{bai_presler-marshall_price_stolee_2022, title={Check It Off: Exploring the Impact of a Checklist Intervention on the Quality of Student-authored Unit Tests}, DOI={10.1145/3502718.3524799}, abstractNote={Software testing is an essential skill for computer science students. Prior work reports that students desire support in determining what code to test and which scenarios should be tested. In response to this, we present a lightweight testing checklist that contains both tutorial information and testing strategies to guide students in what and how to test. To assess the impact of the testing checklist, we conducted an experimental, controlled A/B study with 32 undergraduate and graduate students. The study task was writing a test suite for an existing program. Students were given either the testing checklist (the experimental group) or a tutorial on a standard coverage tool with which they were already familiar (the control group). By analyzing the combination of student-written tests and survey responses, we found students with the checklist performed as well as or better than the coverage tool group, suggesting a potential positive impact of the checklist (or at minimum, a non-negative impact). This is particularly noteworthy given the control condition of the coverage tool is the state of the practice. These findings suggest that the testing tool support does not need to be sophisticated to be effective.}, journal={PROCEEDINGS OF THE 27TH ACM CONFERENCE ON INNOVATION AND TECHNOLOGY IN COMPUTER SCIENCE EDUCATION, ITICSE 2022, VOL 1}, author={Bai, Gina R. and Presler-Marshall, Kai and Price, Thomas W. and Stolee, Kathryn T.}, year={2022}, pages={276–282} } @article{wang_brown_jennings_stolee_2022, title={Demystifying regular expression bugs A comprehensive study on regular expression bug causes, fixes, and testing}, volume={27}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-021-10033-1}, abstractNote={Regular expressions cause string-related bugs and open security vulnerabilities for DOS attacks. However, beyond ReDoS (Regular expression Denial of Service), little is known about the extent to which regular expression issues affect software development and how these issues are addressed in practice. We conduct an empirical study of 356 regex-related bugs from merged pull requests in Apache, Mozilla, Facebook, and Google GitHub repositories. We identify and classify the nature of the regular expression problems, the fixes, and the related changes in the test code. The most important findings in this paper are as follows: 1) incorrect regular expression semantics is the dominant root cause of regular expression bugs (165/356, 46.3%). The remaining root causes are incorrect API usage (9.3%) and other code issues that require regular expression changes in the fix (29.5%), 2) fixing regular expression bugs is nontrivial as it takes more time and more lines of code to fix them compared to the general pull requests, 3) most (51%) of the regex-related pull requests do not contain test code changes. Certain regex bug types (e.g., compile error, performance issues, regex representation) are less likely to include test code changes than others, and 4) the dominant type of test code changes in regex-related pull requests is test case addition (75%). The results of this study contribute to a broader understanding of the practical problems faced by developers when using, fixing, and testing regular expressions.}, number={1}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Wang, Peipei and Brown, Chris and Jennings, Jamie A. and Stolee, Kathryn T.}, year={2022}, month={Jan} } @article{presler-marshall_heckman_stolee_2022, title={Identifying Struggling Teams in Software Engineering Courses ThroughWeekly Surveys}, DOI={10.1145/3478431.3499367}, abstractNote={Teaming is increasingly a core aspect of professional software engineering and most undergraduate computer science curricula. At NC State University, we teach communication and project-management skills explicitly through a junior-level software engineering course. However, some students may have a dysfunctional team experience that imperils their ability to learn these skills. Identifying these teams during a team project is important so the teaching staff can intervene early and hopefully alleviate the issues. We propose a weekly reflection survey to help the course teaching staff proactively identify teams that may not be on track to learn the course outcomes. The questions on the survey focus on team communication and collaboration over the previous week. We evaluate our survey on two semesters of the undergraduate software engineering course by comparing teams with poor end-of-project grades or peer evaluations against teams flagged on a weekly basis through the surveys. We find that the survey can identify most teams that later struggled on the project, typically by the half-way mark of the project, and thus may provide instructors with an actionable early-warning about struggling teams. Furthermore, a majority of students (64.4%) found the survey to be a helpful tool for keeping their team on track. Finally, we discuss future work for improving the survey and engaging with student teams.}, journal={PROCEEDINGS OF THE 53RD ACM TECHNICAL SYMPOSIUM ON COMPUTER SCIENCE EDUCATION (SIGCSE 2022), VOL 1}, author={Presler-Marshall, Kai and Heckman, Sarah and Stolee, Kathryn T.}, year={2022}, pages={126–132} } @article{mathew_stolee_2021, title={Cross-Language Code Search using Static and Dynamic Analyses}, DOI={10.1145/3468264.3468538}, abstractNote={As code search permeates most activities in software development,code-to-code search has emerged to support using code as a query and retrieving similar code in the search results. Applications include duplicate code detection for refactoring, patch identification for program repair, and language translation. Existing code-to-code search tools rely on static similarity approaches such as the comparison of tokens and abstract syntax trees (AST) to approximate dynamic behavior, leading to low precision. Most tools do not support cross-language code-to-code search, and those that do, rely on machine learning models that require labeled training data. We present Code-to-Code Search Across Languages (COSAL), a cross-language technique that uses both static and dynamic analyses to identify similar code and does not require a machine learning model. Code snippets are ranked using non-dominated sorting based on code token similarity, structural similarity, and behavioral similarity. We empirically evaluate COSAL on two datasets of 43,146Java and Python files and 55,499 Java files and find that 1) code search based on non-dominated ranking of static and dynamic similarity measures is more effective compared to single or weighted measures; and 2) COSAL has better precision and recall compared to state-of-the-art within-language and cross-language code-to-code search tools. We explore the potential for using COSAL on large open-source repositories and discuss scalability to more languages and similarity metrics, providing a gateway for practical,multi-language code-to-code search.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, author={Mathew, George and Stolee, Kathryn T.}, year={2021}, pages={205–217} } @article{afzal_motwani_stolee_brun_le goues_2021, title={SOSRepair: Expressive Semantic Search for Real-World Program Repair}, volume={47}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2019.2944914}, abstractNote={Automated program repair holds the potential to significantly reduce software maintenance effort and cost. However, recent studies have shown that it often produces low-quality patches that repair some but break other functionality. We hypothesize that producing patches by replacing likely faulty regions of code with semantically-similar code fragments, and doing so at a higher level of granularity than prior approaches can better capture abstraction and the intended specification, and can improve repair quality. We create SOSRepair, an automated program repair technique that uses semantic code search to replace candidate buggy code regions with behaviorally-similar (but not identical) code written by humans. SOSRepair is the first such technique to scale to real-world defects in real-world systems. On a subset of the ManyBugs benchmark of such defects, SOSRepair produces patches for 22 (34%) of the 65 defects, including 3, 5, and 6 defects for which previous state-of-the-art techniques Angelix, Prophet, and GenProg do not, respectively. On these 22 defects, SOSRepair produces more patches (9, 41%) that pass all independent tests than the prior techniques. We demonstrate a relationship between patch granularity and the ability to produce patches that pass all independent tests. We then show that fault localization precision is a key factor in SOSRepair's success. Manually improving fault localization allows SOSRepair to patch 23 (35%) defects, of which 16 (70%) pass all independent tests. We conclude that (1) higher-granularity, semantic-based patches can improve patch quality, (2) semantic search is promising for producing high-quality real-world defect repairs, (3) research in fault localization can significantly improve the quality of program repair techniques, and (4) semi-automated approaches in which developers suggest fix locations may produce high-quality patches.}, number={10}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Afzal, Afsoon and Motwani, Manish and Stolee, Kathryn T. and Brun, Yuriy and Le Goues, Claire}, year={2021}, month={Oct}, pages={2162–2181} } @article{presler-marshall_heckman_stolee_2021, title={SQLRepair: Identifying and Repairing Mistakes in Student-Authored SQL Queries}, DOI={10.1109/ICSE-SEET52601.2021.00030}, abstractNote={Computer science educators seek to understand the types of mistakes that students make when learning a new (programming) language so that they can help students avoid those mistakes in the future. While educators know what mistakes students regularly make in languages such as C and Python, students struggle with SQL and regularly make mistakes when working with it. We present an analysis of mistakes that students made when first working with SQL, classify the types of errors introduced, and provide suggestions on how to avoid them going forward. In addition, we present an automated tool, SQLRepair, that is capable of repairing errors introduced by undergraduate programmers when writing SQL queries. Our results show that students find repairs produced by our tool comparable in understandability to queries written by themselves or by other students, suggesting that SQL repair tools may be useful in an educational context. We also provide to the community a benchmark of SQL queries written by the students in our study that we used for evaluation of SQLRepair.}, journal={2021 IEEE/ACM 43RD INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: JOINT TRACK ON SOFTWARE ENGINEERING EDUCATION AND TRAINING (ICSE-JSEET 2021)}, author={Presler-Marshall, Kai and Heckman, Sarah and Stolee, Kathryn T.}, year={2021}, pages={199–210} } @article{mathew_parnin_stolee_2020, title={SLACC: Simion-based Language Agnostic Code Clones}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380407}, abstractNote={Successful cross-language clone detection could enable researchers and developers to create robust language migration tools, facilitate learning additional programming languages once one is mastered, and promote reuse of code snippets over a broader codebase. However, identifying cross-language clones presents special challenges to the clone detection problem. A lack of common underlying representation between arbitrary languages means detecting clones requires one of the following solutions: 1) a static analysis framework replicated across each targeted language with annotations matching language features across all languages, or 2) a dynamic analysis framework that detects clones based on runtime behavior. In this work, we demonstrate the feasibility of the latter solution, a dynamic analysis approach called SLACC for cross-language clone detection. Like prior clone detection techniques, we use input/output behavior to match clones, though we overcome limitations of prior work by amplifying the number of inputs and covering more data types; and as a result, achieve better clusters than prior attempts. Since clusters are generated based on input/output behavior, SLACC supports cross-language clone detection. As an added challenge, we target a static typed language, Java, and a dynamic typed language, Python. Compared to HitoshiIO, a recent clone detection tool for Java, SLACC retrieves 6 times as many clusters and has higher precision (86.7% vs. 30.7%). This is the first work to perform clone detection for dynamic typed languages (precision = 87.3%) and the first to perform clone detection across languages that lack a common underlying representation (precision = 94.1%). It provides a first step towards the larger goal of scalable language migration tools.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Mathew, George and Parnin, Chris and Stolee, Kathryn T.}, year={2020}, pages={210–221} } @article{heckman_stolee_parnin_2018, title={10+Years of Teaching Software Engineering with iTrust: the Good, the Bad, and the Ugly}, ISSN={["0270-5257"]}, DOI={10.1145/3183377.3183393}, abstractNote={This paper presents an experience report with a junior-level software engineering course at North Carolina State University. We provide an overview of the course structure and the course project, iTrust, that has been developed by students over 25 semesters. We summarize reflections from faculty, teaching assistants, and students (through course evaluations). From our lessons learned, we present our course improvements as we prepare for the next ten years of software engineering courses. Our main lessons learned are 1) course technologies have a lifespan and require periodic updating to balance student learning and working with a legacy system; 2) teaching assistant longevity and support is critical to course success; and 3) the value of working with a large, legacy system in a semester long course is supported by faculty, teaching assistants, and eventually students.}, journal={2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING EDUCATION AND TRAINING (ICSE-SEET)}, author={Heckman, Sarah and Stolee, Kathryn T. and Parnin, Christopher}, year={2018}, pages={1–4} } @article{rahman_barson_paul_kayani_andres lois_fernandez quezada_parnin_stolee_ray_2018, title={Evaluating How Developers Use General-Purpose Web-Search for Code Retrieval}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196425}, abstractNote={Search is an integral part of a software development process. Developers often use search engines to look for information during development, including reusable code snippets, API understanding, and reference examples. Developers tend to prefer general-purpose search engines like Google, which are often not optimized for code related documents and use search strategies and ranking techniques that are more optimized for generic, non-code related information. In this paper, we explore whether a general purpose search engine like Google is an optimal choice for code-related searches. In particular, we investigate whether the performance of searching with Google varies for code vs. non-code related searches. To analyze this, we collect search logs from 310 developers that contains nearly 150,000 search queries from Google and the associated result clicks. To di?erentiate between code-related searches and non-code related searches, we build a model which identifies code intent of queries. Leveraging this model, we build an automatic classifier that detects a code and non-code related query. We confirm the e?ectiveness of the classifier on manually annotated queries where the classifier achieves a precision of 87%, a recall of 86%, and an F1-score of 87%. We apply this classifier to automatically annotate all the queries in the dataset. Analyzing this dataset, we observe that code related searching often requires more e?ort (e.g., time, result clicks, and query modifications) than general non-code search, which indicates code search performance with a general search engine is less effective.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Rahman, Md Masudur and Barson, Jed and Paul, Sydney and Kayani, Joshua and Andres Lois, Federico and Fernandez Quezada, Sebastian and Parnin, Christopher and Stolee, Kathryn T. and Ray, Baishakhi}, year={2018}, pages={465–475} } @article{wang_stolee_2018, title={How Well Are Regular Expressions Tested in the Wild?}, DOI={10.1145/3236024.3236072}, abstractNote={Developers report testing their regular expressions less than the rest of their code. In this work, we explore how thoroughly tested regular expressions are by examining open source projects. Using standard metrics of coverage, such as line and branch coverage, gives an incomplete picture of the test coverage of regular expressions. We adopt graph-based coverage metrics for the DFA representation of regular expressions, providing fine-grained test coverage metrics. Using over 15,000 tested regular expressions in 1,225 Java projects on GitHub, we measure node, edge, and edge-pair coverage. Our results show that only 17% of the regular expressions in the repositories are tested at all. For those that are tested, the median number of test inputs is two. For nearly 42% of the tested regular expressions, only one test input is used. Average node and edge coverage levels on the DFAs for tested regular expressions are 59% and 29%, respectively. Due to the lack of testing of regular expressions, we explore whether a string generation tool for regular expressions, Rex, achieves high coverage levels. With some exceptions, we found that tools such as Rex can be used to write test inputs with similar coverage to the developer tests.}, journal={ESEC/FSE'18: PROCEEDINGS OF THE 2018 26TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Wang, Peipei and Stolee, Kathryn T.}, year={2018}, pages={668–678} } @article{hill_pasareanu_stolee_2018, title={Poster: Automated Program Repair with Canonical Constraints}, ISSN={["2574-1926"]}, DOI={10.1145/3183440.3194999}, abstractNote={Automated program repair (APR) seeks to improve the speed and decrease the cost of repairing software bugs. Existing APR approaches use unit tests or constraint solving to find and validate program patches.We propose Canonical Search And Repair (CSAR), a program repair technique based on semantic search which uses a canonical form of the path conditions to characterize buggy and patch code and allows for easy storage and retrieval of software patches, without the need for expensive constraint solving. CSAR uses string metrics over the canonical forms to cheaply measure semantic distance between patches and buggy code and uses a classifier to identify situations in which test suite executions are unnecessary–and to provide a finer-grained means of differentiating between potential patches. We evaluate CSAR on the IntroClass benchmark, and show that CSAR finds more correct patches (96% increase) than previous semantic search approaches, and more correct patches (34% increase) than other previous state-of-the-art in program repair.}, journal={PROCEEDINGS 2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING - COMPANION (ICSE-COMPANION}, author={Hill, Andrew and Pasareanu, Corina S. and Stolee, Kathryn T.}, year={2018}, pages={339–341} } @inproceedings{shriver_elbaum_stolee_2017, title={At the end of synthesis: Narrowing program candidates}, booktitle={2017 IEEE/ACM 39th International Conference on Software Engineering: New Ideas and Emerging Technologies Results Track (ICSE-NIER)}, author={Shriver, D. and Elbaum, S. and Stolee, K. T.}, year={2017}, pages={19–22} } @article{stolee_elbaum_dwyer_2016, title={Code search with input/output queries: Generalizing, ranking, and assessment}, volume={116}, ISSN={0164-1212}, url={http://dx.doi.org/10.1016/J.JSS.2015.04.081}, DOI={10.1016/J.JSS.2015.04.081}, abstractNote={In this work we generalize, improve, and extensively assess our semantic source code search engine through which developers use an input/output query model to specify what behavior they want instead of how it may be implemented. Under this approach a code repository contains programs encoded as constraints and an SMT solver finds encoded programs that match an input/output query. The search engine returns a list of source code snippets that match the specification. The initial instantiation of this approach showed potential but was limited. It only encoded single-path programs, reported just complete matches, did not rank the results, and was only partly assessed. In this work, we explore the use of symbolic execution to address some of these technical shortcomings. We implemented a tool, Satsy, that uses symbolic execution to encode multi-path programs as constraints and a novel ranking algorithm based on the strength of the match between an input/output query and the program paths traversed by symbolic execution. An assessment about the relevance of Satsy’s results versus other search engines, Merobase and Google, on eight novice-level programming tasks gathered from StackOverflow, using the opinions of 30 study participants, reveals that Satsy often out-performs the competition in terms of precision, and that matches are found in seconds.}, journal={Journal of Systems and Software}, publisher={Elsevier BV}, author={Stolee, Kathryn T. and Elbaum, Sebastian and Dwyer, Matthew B.}, year={2016}, month={Jun}, pages={35–48} } @inproceedings{sun_stolee_2016, title={Exploring crowd consistency in a mechanical turk survey}, booktitle={2016 IEEE/ACM 3rd International Workshop on Crowdsourcing in Software Engineering (CSI-SE)}, author={Sun, P. and Stolee, K. T.}, year={2016}, pages={8–14} } @inproceedings{hermans_stolee_hoepelman_2016, title={Smells in block-based programming languages}, booktitle={2016 ieee symposium on visual languages and human-centric computing (vl/hcc)}, author={Hermans, F. and Stolee, K. T. and Hoepelman, D.}, year={2016}, pages={68–72} } @article{stolee_elbaum_sarma_2013, title={Discovering how end-user programmers and their communities use public repositories: A study on Yahoo! Pipes}, volume={55}, ISSN={0950-5849}, url={http://dx.doi.org/10.1016/j.infsof.2012.10.004}, DOI={10.1016/j.infsof.2012.10.004}, abstractNote={End-user programmers are numerous, write software that matters to an increasingly large number of users, and face software engineering challenges that are similar to their professionals counterparts. Yet, we know little about how these end-user programmers create and share artifacts in repositories as part of a community. This work aims to gain a better understanding of end-user programmer communities, the characteristics of artifacts in community repositories, and how authors evolve over time. An artifact-based analysis of 32,000 mashups from the Yahoo! Pipes repository was performed. The popularity, configurability, complexity, and diversity of the artifacts were measured. Additionally, for the most prolific authors, we explore their submission trends over time. Similar to other online communities, there is great deal of attrition but authors who persevere tend to improve over time, creating pipes that are more configurable, diverse, complex, and popular. We also discovered, however, that end-user programmers do not effectively reuse existing programs, submit pipes that are highly similar to others already in the repository, and in most cases do not have an awareness of the community or the richness of artifacts that exist in repositories. There is a need for better end-user programmer support in several stages of the software lifecycle, including development, maintenance, search, and program understanding. Without such support, the community repositories will continue to be cluttered with highly-similar artifacts and authors may not be able to take full advantage of the community resources.}, number={7}, journal={Information and Software Technology}, publisher={Elsevier BV}, author={Stolee, Kathryn T. and Elbaum, Sebastian and Sarma, Anita}, year={2013}, month={Jul}, pages={1289–1303} }