@article{horton_parnin_2022, title={Dozer: Migrating Shell Commands to Ansible Modules via Execution Profiling and Synthesis}, DOI={10.1145/3510457.3513060}, abstractNote={Software developers frequently use the system shell to perform configuration management tasks. Unfortunately, the shell does not scale well to large systems, and configuration management tools like Ansible are more difficult to learn. We address this problem with Dozer, a technique to help developers push their shell commands into Ansible task definitions. It operates by tracing and comparing system calls to find Ansible modules with similar behaviors to shell commands, then generating and validating migrations to find the task which produces the most similar changes to the system. Dozer is syntax agnostic, which should allow it to generalize to other configuration management platforms. We evaluate Dozer using datasets from open source configuration scripts.}, journal={2022 ACM/IEEE 44TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING IN PRACTICE (ICSE-SEIP 2022)}, author={Horton, Eric and Parnin, Chris}, year={2022}, pages={147–148} } @article{parnin_orso_2021, title={Automated Debugging: Past, Present, and Future (ISSTA Impact Paper Award)}, DOI={10.1145/3460319.3472397}, abstractNote={The paper titled “Are Automated Debugging Techniques Actually Helping Programmers?” was published in the proceedings of the International Symposium on Software Testing and Analysis (ISSTA) in 2011, and has been selected to receive the ISSTA 2021 Impact Paper Award. The paper investigated, through two user studies, how developers used and benefited from popular automated debugging techniques. The results of the studies provided (1) evidence that several assumptions made by automated debugging techniques did not hold in practice and (2) insights on limitations of existing approaches and how these limitations could be addressed. In this talk, we revisit the original paper and the work that led to it. We then assess the impact of that research by reviewing how the area of automated debugging has evolved since the paper was published. Finally, we conclude the talk by reflecting on the current state of the art in this area and discussing open issues and potential directions for future work.}, journal={ISSTA '21: PROCEEDINGS OF THE 30TH ACM SIGSOFT INTERNATIONAL SYMPOSIUM ON SOFTWARE TESTING AND ANALYSIS}, author={Parnin, Chris and Orso, Alessandro}, year={2021}, pages={1–1} } @article{brown_parnin_2021, title={Nudging Students Toward Better Software Engineering Behaviors}, DOI={10.1109/BotSE52550.2021.00010}, abstractNote={Student experiences in large undergraduate Computer Science courses are increasingly impacted by automated systems. Bots, or agents of software automation, are useful for efficiently grading and generating feedback. Current efforts at automation in CS education focus on supporting instructional tasks, but do not address student struggles due to poor behaviors, such as procrastination. In this paper, we explore using bots to improve the software engineering behaviors of students using developer recommendation choice architectures, a framework incorporating behavioral science concepts in recommendations to improve the actions of programmers. We implemented this framework in class-bot, a novel system designed to nudge students to make better choices while working on programming assignments. This work presents a preliminary evaluation integrating this tool in an introductory programming course. Our results show that class-bot is beneficial for improving student development behaviors increasing code quality and productivity.}, journal={2021 IEEE/ACM THIRD INTERNATIONAL WORKSHOP ON BOTS IN SOFTWARE ENGINEERING (BOTSE 2021)}, author={Brown, Chris and Parnin, Chris}, year={2021}, pages={11–15} } @article{peitek_apel_parnin_brechmann_siegmund_2021, title={Program Comprehension and Code Complexity Metrics: A Replication Package of an fMRI Study}, ISSN={["2574-1926"]}, DOI={10.1109/ICSE-Companion52605.2021.00071}, abstractNote={In this artifact, we document our publicly shared data set of our functional magnetic resonance imaging (fMRI) study on programmers. We have conducted an fMRI study with 19 participants observing program comprehension of short code snippets at varying complexity levels. We dissected four classes of code complexity metrics and their relationship to neuronal, behavioral, and subjective correlates of program comprehension. Our data corroborate that complexity metrics can—to a limited degree—explain programmers' cognition in program comprehension. In the paper on the fMRI study, we outline several follow-up experiments investigating fine-grained effects of code complexity and describe possible refinements to code complexity metrics. We view our conducted experiment as a starting point to link code complexity metrics to neural and behavioral correlates. To enable future research to continue this line of work, we aim to provide as much support as possible to conduct similar studies with this artifact.}, journal={2021 IEEE/ACM 43RD INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: COMPANION PROCEEDINGS (ICSE-COMPANION 2021)}, author={Peitek, Norman and Apel, Sven and Parnin, Chris and Brechmann, Andre and Siegmund, Janet}, year={2021}, pages={168–169} } @article{peitek_apel_parnin_brechmann_siegmund_2021, title={Program Comprehension and Code Complexity Metrics: An fMRI Study}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE43902.2021.00056}, abstractNote={Background: Researchers and practitioners have been using code complexity metrics for decades to predict how developers comprehend a program. While it is plausible and tempting to use code metrics for this purpose, their validity is debated, since they rely on simple code properties and rarely consider particularities of human cognition. Aims: We investigate whether and how code complexity metrics reflect difficulty of program comprehension. Method: We have conducted a functional magnetic resonance imaging (fMRI) study with 19 participants observing program comprehension of short code snippets at varying complexity levels. We dissected four classes of code complexity metrics and their relationship to neuronal, behavioral, and subjective correlates of program comprehension, overall analyzing more than 41 metrics. Results: While our data corroborate that complexity metrics can-to a limited degree-explain programmers' cognition in program comprehension, fMRI allowed us to gain insights into why some code properties are difficult to process. In particular, a code's textual size drives programmers' attention, and vocabulary size burdens programmers' working memory. Conclusion: Our results provide neuro-scientific evidence supporting warnings of prior research questioning the validity of code complexity metrics and pin down factors relevant to program comprehension. Future Work: We outline several follow-up experiments investigating fine-grained effects of code complexity and describe possible refinements to code complexity metrics.}, journal={2021 IEEE/ACM 43RD INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2021)}, author={Peitek, Norman and Apel, Sven and Parnin, Chris and Brechmann, Andre and Siegmund, Janet}, year={2021}, pages={524–536} } @article{rahman_rahman_parnin_williams_2021, title={Security Smells in Ansible and Chef Scripts: A Replication Study}, volume={30}, ISBN={1557-7392}, DOI={10.1145/3408897}, abstractNote={ Context: Security smells are recurring coding patterns that are indicative of security weakness and require further inspection. As infrastructure as code (IaC) scripts, such as Ansible and Chef scripts, are used to provision cloud-based servers and systems at scale, security smells in IaC scripts could be used to enable malicious users to exploit vulnerabilities in the provisioned systems. Goal: The goal of this article is to help practitioners avoid insecure coding practices while developing infrastructure as code scripts through an empirical study of security smells in Ansible and Chef scripts. Methodology: We conduct a replication study where we apply qualitative analysis with 1,956 IaC scripts to identify security smells for IaC scripts written in two languages: Ansible and Chef. We construct a static analysis tool called Security Linter for Ansible and Chef scripts (SLAC) to automatically identify security smells in 50,323 scripts collected from 813 open source software repositories. We also submit bug reports for 1,000 randomly selected smell occurrences. Results: We identify two security smells not reported in prior work: missing default in case statement and no integrity check. By applying SLAC we identify 46,600 occurrences of security smells that include 7,849 hard-coded passwords. We observe agreement for 65 of the responded 94 bug reports, which suggests the relevance of security smells for Ansible and Chef scripts amongst practitioners. Conclusion: We observe security smells to be prevalent in Ansible and Chef scripts, similarly to that of the Puppet scripts. We recommend practitioners to rigorously inspect the presence of the identified security smells in Ansible and Chef scripts using (i) code review, and (ii) static analysis tools. }, number={1}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Rahman, Akond and Rahman, Md Rayhanur and Parnin, Chris and Williams, Laurie}, year={2021}, month={Jan} } @article{behroozi_shirolkar_barik_parnin_2020, title={Debugging Hiring: What Went Right and What Went Wrong in the Technical Interview Process}, DOI={10.1145/3377815.3381372}, abstractNote={The typical hiring pipeline for software engineering occurs over several stages—from phone screening and technical on-site interviews, to offer and negotiation. When these hiring pipelines are “leaky,” otherwise qualified candidates are lost at some stage of the pipeline. These leaky pipelines impact companies in several ways, including hindering a company’s ability to recruit competitive candidates and build diverse software teams.To understand where candidates become disengaged in the hiring pipeline—and what companies can do to prevent it—we conducted a qualitative study on over 10,000 reviews on 19 companies from Glassdoor, a website where candidates can leave reviews about their hiring process experiences. We identified several poor practices which prematurely sabotage the hiring process—for example, not adequately communicating hiring criteria, conducting interviews with inexperienced interviewers, and ghosting candidates. Our findings provide a set of guidelines to help companies improve their hiring pipeline practices—such as being deliberate about phrasing and language during initial contact with the candidate, providing candidates with constructive feedback after their interviews, and bringing salary transparency and long-term career discussions into offers and negotiations. Operationalizing these guidelines helps make the hiring pipeline more transparent, fair, and inclusive.}, journal={2020 IEEE/ACM 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING IN SOCIETY (ICSE-SEIS 2021)}, author={Behroozi, Mahnaz and Shirolkar, Shivani and Barik, Titus and Parnin, Chris}, year={2020}, pages={71–80} } @article{rahman_farhana_parnin_williams_2020, title={Gang of Eight: A Defect Taxonomy for Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380409}, abstractNote={Defects in infrastructure as code (IaC) scripts can have serious consequences, for example, creating large-scale system outages. A taxonomy of IaC defects can be useful for understanding the nature of defects, and identifying activities needed to fix and prevent defects in IaC scripts. The goal of this paper is to help practitioners improve the quality of infrastructure as code (IaC) scripts by developing a defect taxonomy for IaC scripts through qualitative analysis. We develop a taxonomy of IaC defects by applying qualitative analysis on 1,448 defect-related commits collected from open source software (OSS) repositories of the Openstack organization. We conduct a survey with 66 practitioners to assess if they agree with the identified defect categories included in our taxonomy. We quantify the frequency of identified defect categories by analyzing 80,425 commits collected from 291 OSS repositories spanning across 2005 to 2019. Our defect taxonomy for IaC consists of eight categories, including a category specific to IaC called idempotency (i.e., defects that lead to incorrect system provisioning when the same IaC script is executed multiple times). We observe the surveyed 66 practitioners to agree most with idempotency. The most frequent defect category is configuration data i.e., providing erroneous configuration data in IaC scripts. Our taxonomy and the quantified frequency of the defect categories may help in advancing the science of IaC script quality.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Rahman, Akond and Farhana, Effat and Parnin, Chris and Williams, Laurie}, year={2020}, pages={752–764} } @article{shrestha_botta_barik_parnin_2020, title={Here We Go Again: Why Is It Difficult for Developers to Learn Another Programming Language?}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380352}, abstractNote={Once a programmer knows one language, they can leverage concepts and knowledge already learned, and easily pick up another programming language. But is that always the case? To understand if programmers have difficulty learning additional programming languages, we conducted an empirical study of Stack Overflow questions across 18 different programming languages. We hypothesized that previous knowledge could potentially interfere with learning a new programming language. From our inspection of 450 Stack Overflow questions, we found 276 instances of interference that occurred due to faulty assumptions originating from knowledge about a different language. To understand why these difficulties occurred, we conducted semi-structured interviews with 16 professional programmers. The interviews revealed that programmers make failed attempts to relate a new programming language with what they already know. Our findings inform design implications for technical authors, toolsmiths, and language designers, such as designing documentation and automated tools that reduce interference, anticipating uncommon language transitions during language design, and welcoming programmers not just into a language, but its entire ecosystem.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Shrestha, Nischal and Botta, Colton and Barik, Titus and Parnin, Chris}, year={2020}, pages={691–701} } @article{mathew_parnin_stolee_2020, title={SLACC: Simion-based Language Agnostic Code Clones}, ISSN={["0270-5257"]}, DOI={10.1145/3377811.3380407}, abstractNote={Successful cross-language clone detection could enable researchers and developers to create robust language migration tools, facilitate learning additional programming languages once one is mastered, and promote reuse of code snippets over a broader codebase. However, identifying cross-language clones presents special challenges to the clone detection problem. A lack of common underlying representation between arbitrary languages means detecting clones requires one of the following solutions: 1) a static analysis framework replicated across each targeted language with annotations matching language features across all languages, or 2) a dynamic analysis framework that detects clones based on runtime behavior. In this work, we demonstrate the feasibility of the latter solution, a dynamic analysis approach called SLACC for cross-language clone detection. Like prior clone detection techniques, we use input/output behavior to match clones, though we overcome limitations of prior work by amplifying the number of inputs and covering more data types; and as a result, achieve better clusters than prior attempts. Since clusters are generated based on input/output behavior, SLACC supports cross-language clone detection. As an added challenge, we target a static typed language, Java, and a dynamic typed language, Python. Compared to HitoshiIO, a recent clone detection tool for Java, SLACC retrieves 6 times as many clusters and has higher precision (86.7% vs. 30.7%). This is the first work to perform clone detection for dynamic typed languages (precision = 87.3%) and the first to perform clone detection across languages that lack a common underlying representation (precision = 94.1%). It provides a first step towards the larger goal of scalable language migration tools.}, journal={2020 ACM/IEEE 42ND INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2020)}, author={Mathew, George and Parnin, Chris and Stolee, Kathryn T.}, year={2020}, pages={210–221} } @article{siegmund_peitek_brechmann_parnin_apel_2020, title={Studying Programming in the Neuroage: Just a Crazy Idea?}, volume={63}, ISSN={["1557-7317"]}, DOI={10.1145/3347093}, abstractNote={Programming research has entered the Neuroage.}, number={6}, journal={COMMUNICATIONS OF THE ACM}, author={Siegmund, Janet and Peitek, Norman and Brechmann, Andre and Parnin, Chris and Apel, Sven}, year={2020}, month={Jun}, pages={30–34} } @article{ford_behroozi_serebrenik_parnin_2019, title={Beyond the Code Itself: How Programmers Really Look at Pull Requests}, DOI={10.1109/ICSE-SEIS.2019.00014}, abstractNote={Developers in open source projects must make decisions on contributions from other community members, such as whether or not to accept a pull request. However, secondary factors-beyond the code itself-can influence those decisions. For example, signals from GitHub profiles, such as a number of followers, activity, names, or gender can also be considered when developers make decisions. In this paper, we examine how developers use these signals (or not) when making decisions about code contributions. To evaluate this question, we evaluate how signals related to perceived gender identity and code quality influenced decisions on accepting pull requests. Unlike previous work, we analyze this decision process with data collected from an eye-tracker. We analyzed differences in what signals developers said are important for themselves versus what signals they actually used to make decisions about others. We found that after the code snippet (x=57%), the second place programmers spent their time fixating is on supplemental technical signals (x=32%), such as previous contributions and popular repositories. Diverging from what participants reported themselves, we also found that programmers fixated on social signals more than recalled.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING IN SOCIETY (ICSE-SEIS 2019)}, author={Ford, Denae and Behroozi, Mahnaz and Serebrenik, Alexander and Parnin, Chris}, year={2019}, pages={51–60} } @article{horton_parnin_2019, title={DockerizeMe: Automatic Inference of Environment Dependencies for Python Code Snippets}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE.2019.00047}, abstractNote={Platforms like Stack Overflow and GitHub's gist system promote the sharing of ideas and programming techniques via the distribution of code snippets designed to illustrate particular tasks. Python, a popular and fast-growing programming language, sees heavy use on both sites, with nearly one million questions asked on Stack Overflow and 400 thousand public gists on GitHub. Unfortunately, around 75% of the Python example code shared through these sites cannot be directly executed. When run in a clean environment, over 50% of public Python gists fail due to an import error for a missing library. We present DockerizeMe, a technique for inferring the dependencies needed to execute a Python code snippet without import error. DockerizeMe starts with offline knowledge acquisition of the resources and dependencies for popular Python packages from the Python Package Index (PyPI). It then builds Docker specifications using a graph-based inference procedure. Our inference procedure resolves import errors in 892 out of nearly 3,000 gists from the Gistable dataset for which Gistable's baseline approach could not find and install all dependencies.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2019)}, author={Horton, Eric and Parnin, Chris}, year={2019}, pages={328–338} } @article{rahman_parnin_williams_2019, title={The Seven Sins: Security Smells in Infrastructure as Code Scripts}, ISSN={["0270-5257"]}, DOI={10.1109/ICSE.2019.00033}, abstractNote={Practitioners use infrastructure as code (IaC) scripts to provision servers and development environments. While developing IaC scripts, practitioners may inadvertently introduce security smells. Security smells are recurring coding patterns that are indicative of security weakness and can potentially lead to security breaches. The goal of this paper is to help practitioners avoid insecure coding practices while developing infrastructure as code (IaC) scripts through an empirical study of security smells in IaC scripts. We apply qualitative analysis on 1,726 IaC scripts to identify seven security smells. Next, we implement and validate a static analysis tool called Security Linter for Infrastructure as Code scripts (SLIC) to identify the occurrence of each smell in 15,232 IaC scripts collected from 293 open source repositories. We identify 21,201 occurrences of security smells that include 1,326 occurrences of hard-coded passwords. We submitted bug reports for 1,000 randomly-selected security smell occurrences. We obtain 212 responses to these bug reports, of which 148 occurrences were accepted by the development teams to be fixed. We observe security smells can have a long lifetime, e.g., a hard-coded secret can persist for as long as 98 months, with a median lifetime of 20 months.}, journal={2019 IEEE/ACM 41ST INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING (ICSE 2019)}, author={Rahman, Akond and Parnin, Chris and Williams, Laurie}, year={2019}, pages={164–175} } @article{horton_parnin_2019, title={V2: Fast Detection of Configuration Drift in Python}, DOI={10.1109/ASE.2019.00052}, abstractNote={Code snippets are prevalent, but are hard to reuse because they often lack an accompanying environment configuration. Most are not actively maintained, allowing for drift between the most recent possible configuration and the code snippet as the snippet becomes out-of-date over time. Recent work has identified the problem of validating and detecting out-of-date code snippets as the most important consideration for code reuse. However, determining if a snippet is correct, but simply out-of-date, is a non-trivial task. In the best case, breaking changes are well documented, allowing developers to manually determine when a code snippet contains an out-of-date API usage. In the worst case, determining if and when a breaking change was made requires an exhaustive search through previous dependency versions. We present V2, a strategy for determining if a code snippet is out-of-date by detecting discrete instances of configuration drift, where the snippet uses an API which has since undergone a breaking change. Each instance of configuration drift is classified by a failure encountered during validation and a configuration patch, consisting of dependency version changes, which fixes the underlying fault. V2 uses feedback-directed search to explore the possible configuration space for a code snippet, reducing the number of potential environment configurations that need to be validated. When run on a corpus of public Python snippets from prior research, V2 identifies 248 instances of configuration drift.}, journal={34TH IEEE/ACM INTERNATIONAL CONFERENCE ON AUTOMATED SOFTWARE ENGINEERING (ASE 2019)}, author={Horton, Eric and Parnin, Chris}, year={2019}, pages={477–488} } @article{heckman_stolee_parnin_2018, title={10+Years of Teaching Software Engineering with iTrust: the Good, the Bad, and the Ugly}, ISSN={["0270-5257"]}, DOI={10.1145/3183377.3183393}, abstractNote={This paper presents an experience report with a junior-level software engineering course at North Carolina State University. We provide an overview of the course structure and the course project, iTrust, that has been developed by students over 25 semesters. We summarize reflections from faculty, teaching assistants, and students (through course evaluations). From our lessons learned, we present our course improvements as we prepare for the next ten years of software engineering courses. Our main lessons learned are 1) course technologies have a lifespan and require periodic updating to balance student learning and working with a legacy system; 2) teaching assistant longevity and support is critical to course success; and 3) the value of working with a large, legacy system in a semester long course is supported by faculty, teaching assistants, and eventually students.}, journal={2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: SOFTWARE ENGINEERING EDUCATION AND TRAINING (ICSE-SEET)}, author={Heckman, Sarah and Stolee, Kathryn T. and Parnin, Christopher}, year={2018}, pages={1–4} } @article{peitek_siegmund_apel_kastner_parnin_bethmann_leich_saake_brechmann_2020, title={A Look into Programmers' Heads}, volume={46}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2018.2863303}, abstractNote={Program comprehension is an important, but hard to measure cognitive process. This makes it difficult to provide suitable programming languages, tools, or coding conventions to support developers in their everyday work. Here, we explore whether functional magnetic resonance imaging (fMRI) is feasible for soundly measuring program comprehension. To this end, we observed 17 participants inside an fMRI scanner while they were comprehending source code. The results show a clear, distinct activation of five brain regions, which are related to working memory, attention, and language processing, which all fit well to our understanding of program comprehension. Furthermore, we found reduced activity in the default mode network, indicating the cognitive effort necessary for program comprehension. We also observed that familiarity with Java as underlying programming language reduced cognitive effort during program comprehension. To gain confidence in the results and the method, we replicated the study with 11 new participants and largely confirmed our findings. Our results encourage us and, hopefully, others to use fMRI to observe programmers and, in the long run, answer questions, such as: How should we train programmers? Can we train someone to become an excellent programmer? How effective are new languages and tools for program comprehension?}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Peitek, Norman and Siegmund, Janet and Apel, Sven and Kastner, Christian and Parnin, Chris and Bethmann, Anja and Leich, Thomas and Saake, Gunter and Brechmann, Andre}, year={2020}, month={Apr}, pages={442–462} } @article{behroozi_lui_moore_ford_parnin_2018, title={Dazed: Measuring the Cognitive Load of Solving Technical Interview Problems at the Whiteboard}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85049810426&partnerID=MN8TOARS}, DOI={10.1145/3183399.3183415}, abstractNote={Problem-solving on a whiteboard is a popular technical interview technique used in industry. However, several critics have raised concerns that whiteboard interviews can cause excessive stress and cognitive load on candidates, ultimately reinforcing bias in hiring practices. Unfortunately, many sensors used for measuring cognitive state are not robust to movement. In this paper, we describe an approach where we use a head-mounted eye-tracker and computer vision algorithms to collect robust metrics of cognitive state. To demonstrate the feasibility of the approach, we study two proposed interview settings: on the whiteboard and on paper with 11 participants. Our preliminary results suggest that the whiteboard setting pressures candidates into keeping shorter attention lengths and experiencing higher levels of cognitive load compared to solving the same problems on paper. For instance, we observed 60ms shorter fixation durations and 3x more regressions when solving problems on the whiteboard. Finally, we describe a vision for creating a more inclusive technical interview process through future studies of interventions that lower cognitive load and stress.}, journal={2018 IEEE/ACM 40TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: NEW IDEAS AND EMERGING TECHNOLOGIES RESULTS (ICSE-NIER)}, author={Behroozi, Mahnaz and Lui, Alison and Moore, Ian and Ford, Denae and Parnin, Chris}, year={2018}, pages={93–96} } @article{rahman_barson_paul_kayani_andres lois_fernandez quezada_parnin_stolee_ray_2018, title={Evaluating How Developers Use General-Purpose Web-Search for Code Retrieval}, ISSN={["2160-1852"]}, DOI={10.1145/3196398.3196425}, abstractNote={Search is an integral part of a software development process. Developers often use search engines to look for information during development, including reusable code snippets, API understanding, and reference examples. Developers tend to prefer general-purpose search engines like Google, which are often not optimized for code related documents and use search strategies and ranking techniques that are more optimized for generic, non-code related information. In this paper, we explore whether a general purpose search engine like Google is an optimal choice for code-related searches. In particular, we investigate whether the performance of searching with Google varies for code vs. non-code related searches. To analyze this, we collect search logs from 310 developers that contains nearly 150,000 search queries from Google and the associated result clicks. To di?erentiate between code-related searches and non-code related searches, we build a model which identifies code intent of queries. Leveraging this model, we build an automatic classifier that detects a code and non-code related query. We confirm the e?ectiveness of the classifier on manually annotated queries where the classifier achieves a precision of 87%, a recall of 86%, and an F1-score of 87%. We apply this classifier to automatically annotate all the queries in the dataset. Analyzing this dataset, we observe that code related searching often requires more e?ort (e.g., time, result clicks, and query modifications) than general non-code search, which indicates code search performance with a general search engine is less effective.}, journal={2018 IEEE/ACM 15TH INTERNATIONAL CONFERENCE ON MINING SOFTWARE REPOSITORIES (MSR)}, author={Rahman, Md Masudur and Barson, Jed and Paul, Sydney and Kayani, Joshua and Andres Lois, Federico and Fernandez Quezada, Sebastian and Parnin, Christopher and Stolee, Kathryn T. and Ray, Baishakhi}, year={2018}, pages={465–475} } @article{horton_parnin_2018, title={Gistable: Evaluating the Executability of Python Code Snippets on GitHub}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME.2018.00031}, abstractNote={Software developers create and share code online to demonstrate programming language concepts and programming tasks. Code snippets can be a useful way to explain and demonstrate a programming concept, but may not always be directly executable. A code snippet can contain parse errors, or fail to execute if the environment contains unmet dependencies. This paper presents an empirical analysis of the executable status of Python code snippets shared through the GitHub gist system, and the ability of developers familiar with software configuration to correctly configure and run them. We find that 75.6% of gists require non-trivial configuration to overcome missing dependencies, configuration files, reliance on a specific operating system, or some other environment configuration. Our study also suggests the natural assumption developers make about resource names when resolving configuration errors is correct less than half the time. We also present Gistable, a database and extensible framework built on GitHub's gist system, which provides executable code snippets to enable reproducible studies in software engineering. Gistable contains 10,259 code snippets, approximately 5,000 with a Dockerfile to configure and execute them without import error. Gistable is publicly available at https://github.com/gistable/gistable.}, journal={PROCEEDINGS 2018 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME)}, author={Horton, Eric and Parnin, Chris}, year={2018}, pages={217–227} } @article{barik_ford_murphy-hill_parnin_2018, title={How Should Compilers Explain Problems to Developers?}, DOI={10.1145/3236024.3236040}, abstractNote={Compilers primarily give feedback about problems to developers through the use of error messages. Unfortunately, developers routinely find these messages to be confusing and unhelpful. In this paper, we postulate that because error messages present poor explanations, theories of explanation---such as Toulmin's model of argument---can be applied to improve their quality. To understand how compilers should present explanations to developers, we conducted a comparative evaluation with 68 professional software developers and an empirical study of compiler error messages found in Stack Overflow questions across seven different programming languages. Our findings suggest that, given a pair of error messages, developers significantly prefer the error message that employs proper argument structure over a deficient argument structure when neither offers a resolution---but will accept a deficient argument structure if it provides a resolution to the problem. Human-authored explanations on Stack Overflow converge to one of the three argument structures: those that provide a resolution to the error, simple arguments, and extended arguments that provide additional evidence for the problem. Finally, we contribute three practical design principles to inform the design and evaluation of compiler error messages.}, journal={ESEC/FSE'18: PROCEEDINGS OF THE 2018 26TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING}, author={Barik, Titus and Ford, Denae and Murphy-Hill, Emerson and Parnin, Chris}, year={2018}, pages={633–643} } @article{peitek_siegmund_parnin_apel_hofmeister_brechmann_2018, title={Simultaneous Measurement of Program Comprehension with fMRI and Eye Tracking: A Case Study}, DOI={10.1145/3239235.3240495}, abstractNote={Background Researchers have recently started to validate decades-old program-comprehension models using functional magnetic resonance imaging (fMRI). While fMRI helps us to understand neural correlates of cognitive processes during program comprehension, its comparatively low temporal resolution (i.e., seconds) cannot capture fast cognitive subprocesses (i.e., milliseconds). Aims To increase the explanatory power of fMRI measurement of programmers, we are exploring in this methodological paper the feasibility of adding simultaneous eye tracking to fMRI measurement. By developing a method to observe programmers with two complementary measures, we aim at obtaining a more comprehensive understanding of program comprehension. Method We conducted a controlled fMRI experiment of 22 student participants with simultaneous eye tracking. Results We have been able to successfully capture fMRI and eye-tracking data, although with limitations regarding partial data loss and spatial imprecision. The biggest issue that we experienced is the partial loss of data: for only 10 participants, we could collect a complete set of high-precision eye-tracking data. Since some participants of fMRI studies show excessive head motion, the proportion of full and high-quality data on fMRI and eye tracking is rather low. Still, the remaining data allowed us to confirm our prior hypothesis of semantic recall during program comprehension, which was not possible with fMRI alone. Conclusions Simultaneous measurement of program comprehension with fMRI and eye tracking is promising, but with limitations. By adding simultaneous eye tracking to our fMRI study framework, we can conduct more fine-grained fMRI analyses, which in turn helps us to understand programmer behavior better.}, journal={PROCEEDINGS OF THE 12TH ACM/IEEE INTERNATIONAL SYMPOSIUM ON EMPIRICAL SOFTWARE ENGINEERING AND MEASUREMENT (ESEM 2018)}, author={Peitek, Norman and Siegmund, Janet and Parnin, Chris and Apel, Sven and Hofmeister, Johannes C. and Brechmann, Andre}, year={2018} } @article{parnin_fabry_2018, title={Special edition of the Journal of Software: Evolution and Process for the 4th IEEE Working Conference on Software Visualization (VISSOFT 2016)}, volume={30}, ISSN={["2047-7481"]}, DOI={10.1002/smr.1940}, abstractNote={It is our pleasure to present this special edition of the Journal of Software: Evolution and Process for the 4th IEEEWorking Conference on Software Visualization (VISSOFT 20016). The technical program of the conference featured 9 full papers selected out of a total of 21 submissions, and of these 9 we invited 4 to submit an extended version to this special issue. We are happy to report that we received extended versions of all 4 of these papers and that all of these submissions to the journal have been accepted after a thorough review process with at least 2 expert reviewers. The papers are as follows:}, number={2}, journal={JOURNAL OF SOFTWARE-EVOLUTION AND PROCESS}, author={Parnin, Christopher and Fabry, Johan}, year={2018}, month={Feb} } @article{sarkar_parnin_2017, title={Characterizing and Predicting Mental Fatigue during Programming Tasks}, DOI={10.1109/semotion.2017.2}, abstractNote={Mental fatigue reduces one's cognitive and physical abilities. In tasks requiring continuous attention, such as driving, fatigue is a well-known risk. However, when fatigued during daily tasks, such as programming, the nature of risk is more diffuse and accumulative, yet the consequences can be just as severe (e.g. defects in autopilot software). Identifying risks of fatigue in the context of programming can lead to interventions that prevent introduction of defects and introduce coping mechanisms. To character and predict these risks, we conducted two studies: a survey study in which we asked 311 software developers to rate the severity and frequency of their fatigue and to recall a recent experience of being fatigued while programming, and an observational study with 9 professional software developers to investigate the feasibility of predicting fatigue from interaction history. From the survey, we found that a majority of developers report severe (66%) and frequent (59%) issues with fatigue. Further, we categorized their experiences into seven effects on programming tasks, which include reduced motivation and reduced ability to handle tasks involving large mental workloads. From our observational study, our results found how several measures, such as focus duration, key press time, error rates, and increases in software quality warnings, may be applied for detecting fatigue levels. Together, these results aims to support developers and the industry for improving software quality and work conditions for software developers.}, journal={2017 IEEE/ACM 2ND INTERNATIONAL WORKSHOP ON EMOTION AWARENESS IN SOFTWARE ENGINEERING (SEMOTION 2017)}, author={Sarkar, Saurabh and Parnin, Chris}, year={2017}, pages={32–37} } @article{terrell_kofink_middleton_rainear_murphy-hill_parnin_stallings_2017, title={Gender differences and bias in open source: pull request acceptance of women versus men}, journal={PeerJ Computer Science}, author={Terrell, J. and Kofink, A. and Middleton, J. and Rainear, C. and Murphy-Hill, E. and Parnin, C. and Stallings, J.}, year={2017} } @article{ford_barik_rand-pickett_parnin_2017, title={The Tech-Talk Balance: What Technical Interviewers Expect from Technical Candidates}, DOI={10.1109/chase.2017.8}, abstractNote={Software engineer job candidates are not succeeding at technical interviews. Although candidates are able to answer technical questions, there is a mismatch of what candidates think interviewers assess versus what criteria is used in practice. This mismatch in expectations can cost candidates a job opportunity. To determine what criteria interviewers value, we conducted mock technical interviews with software engineer candidates at a university and collected evaluations from interviewers. We analyzed 70 interview evaluations from 9 software companies. Using a grounded theory approach, we compared interviewer interpretations of criteria including: performing a problem solving walkthrough, applying previous experience to problem solving, and the ability to engaging in conversation beyond writing code. From these findings, we provide implications on what candidates can expect to be evaluated on during technical interviews across companies, which can sometimes vary significantly.}, journal={2017 IEEE/ACM 10TH INTERNATIONAL WORKSHOP ON COOPERATIVE AND HUMAN ASPECTS OF SOFTWARE ENGINEERING (CHASE 2017)}, author={Ford, Denae and Barik, Titus and Rand-Pickett, Leslie and Parnin, Chris}, year={2017}, pages={43–48} } @article{parnin_helms_atlee_boughton_ghattas_glover_holman_micco_murphy_savor_et al._2017, title={The Top 10 Adages in Continuous Deployment}, volume={34}, ISSN={["1937-4194"]}, DOI={10.1109/ms.2017.86}, abstractNote={Continuous deployment involves automatically testing incremental software changes and frequently deploying them to production environments. With it, developers' changes can reach customers in days or even hours. Such ultrafast changes create a new reality in software development. To understand the emerging practices surrounding continuous deployment, researchers facilitated a one-day Continuous Deployment Summit at the Facebook campus in July 2015, at which participants from 10 companies described how they used continuous deployment. From the resulting conversation, the researchers derived 10 adages about continuous-deployment practices. These adages represent a working set of approaches and beliefs that guide current practice and establish a tangible target for empirical validation by the research community.}, number={3}, journal={IEEE SOFTWARE}, author={Parnin, Chris and Helms, Eric and Atlee, Chris and Boughton, Harley and Ghattas, Mark and Glover, Andy and Holman, James and Micco, John and Murphy, Brendan and Savor, Tony and et al.}, year={2017}, pages={86–95} } @article{acharya_parnin_kraft_dagnino_qu_2016, title={Code Drones}, DOI={10.1145/2889160.2889211}, abstractNote={We propose and explore a new paradigm called Code Drones in which every software artifact such as a class is an intelligent and socially active entity. In this paradigm, humanized artifacts take the lead and choreograph (socially, in collaboration with other intelligent software artifacts and humans) automated software engineering solutions to a myriad of development and maintenance challenges,including API migration, reuse, documentation, testing, patching, and refactoring. We discuss the implications of having social and intelligent/cognitive software artifacts that guide their own self-improvement.}, journal={2016 IEEE/ACM 38TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING COMPANION (ICSE-C)}, author={Acharya, Mithun P. and Parnin, Chris and Kraft, Nicholas A. and Dagnino, Aldo and Qu, Xiao}, year={2016}, pages={785–788} } @inproceedings{parnin_begel_2016, title={Invited session: hands-on sensors 101}, booktitle={2016 IEEE/ACM 1st International Workshop on Emotion Awareness in Software Engineering (Semotion)}, author={Parnin, C. and Begel, A.}, year={2016}, pages={28–29} } @article{ford_parnin_2015, title={Exploring Causes of Frustration for Software Developers}, DOI={10.1109/chase.2015.19}, abstractNote={When learning to program, frustrating experiences contribute to negative learning outcomes and poor retention in the field. Defining a common framework that explains why these experiences occur can lead to better interventions and learning mechanisms. To begin constructing such a framework, we asked 45 software developers about the severity of their frustration and to recall their most recent frustrating programming experience. As a result, 67% considered their frustration to be severe. Further, we distilled the reported experiences into 11 categories, which include issues with mapping behaviors to code and broken programming tools. Finally, we discuss future directions for defining our framework and designing future interventions.}, journal={2015 IEEE/ACM 8TH INTERNATIONAL WORKSHOP ON COOPERATIVE AND HUMAN ASPECTS OF SOFTWARE ENGINEERING CHASE 2015}, author={Ford, Denae and Parnin, Chris}, year={2015}, pages={115–116} } @article{ur rahman_helms_williams_parnin_2015, title={Synthesizing Continuous Deployment Practices Used in Software Development}, DOI={10.1109/agile.2015.12}, abstractNote={Continuous deployment speeds up the process of existing agile methods, such as Scrum, and Extreme Programming (XP) through the automatic deployment of software changes to end-users upon passing of automated tests. Continuous deployment has become an emerging software engineering process amongst numerous software companies, such as Facebook, Github, Netflix, and Rally Software. A systematic analysis of software practices used in continuous deployment can facilitate a better understanding of continuous deployment as a software engineering process. Such analysis can also help software practitioners in having a shared vocabulary of practices and in choosing the software practices that they can use to implement continuous deployment. The goal of this paper is to aid software practitioners in implementing continuous deployment through a systematic analysis of software practices that are used by software companies. We studied the continuous deployment practices of 19 software companies by performing a qualitative analysis of Internet artifacts and by conducting follow-up inquiries. In total, we found 11 software practices that are used by 19 software companies. We also found that in terms of use, eight of the 11 software practices are common across 14 software companies. We observe that continuous deployment necessitates the consistent use of sound software engineering practices such as automated testing, automated deployment, and code review.}, journal={2015 AGILE CONFERENCE}, author={Ur Rahman, Akond Ashfaque and Helms, Eric and Williams, Laurie and Parnin, Chris}, year={2015}, pages={1–10} } @article{elliott_peiris_parnin_2015, title={Virtual Reality in Software Engineering: Affordances, Applications, and Challenges}, DOI={10.1109/icse.2015.191}, abstractNote={Software engineers primarily interact with source code using a keyboard and mouse, and typically view software on a small number of 2D monitors. This interaction paradigm does not take advantage of many affordances of natural human movement and perception. Virtual reality (VR) can use these affordances more fully than existing developer environments to enable new creative opportunities and potentially result in higher productivity, lower learning curves, and increased user satisfaction. This paper describes the affordances offered by VR, demonstrates the benefits of VR and software engineering in prototypes for live coding and code review, and discusses future work, open questions, and the challenges of VR.}, journal={2015 IEEE/ACM 37TH IEEE INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING, VOL 2}, author={Elliott, Anthony and Peiris, Brian and Parnin, Chris}, year={2015}, pages={547–550} } @article{parnin_bird_murphy-hill_2012, title={Adoption and use of Java generics}, volume={18}, ISSN={1382-3256 1573-7616}, url={http://dx.doi.org/10.1007/S10664-012-9236-6}, DOI={10.1007/S10664-012-9236-6}, number={6}, journal={Empirical Software Engineering}, publisher={Springer Science and Business Media LLC}, author={Parnin, Chris and Bird, Christian and Murphy-Hill, Emerson}, year={2012}, month={Dec}, pages={1047–1089} } @article{parnin_rugaber_2011, title={Resumption strategies for interrupted programming tasks}, volume={19}, ISSN={0963-9314 1573-1367}, url={http://dx.doi.org/10.1007/S11219-010-9104-9}, DOI={10.1007/S11219-010-9104-9}, number={1}, journal={Software Quality Journal}, publisher={Springer Science and Business Media LLC}, author={Parnin, Chris and Rugaber, Spencer}, year={2011}, pages={5–34} }