@article{shao_qiu_yu_yang_jin_xie_wu_2020, title={Database-Access Performance Antipatterns in Database-Backed Web Applications}, ISSN={["1063-6773"]}, DOI={10.1109/ICSME46990.2020.00016}, abstractNote={Database-backed web applications are prone to performance bugs related to database accesses. While much work has been conducted on database-access antipatterns with some recent work focusing on performance impact, there still lacks a comprehensive view of database-access performance antipatterns in database-backed web applications. To date, no existing work systematically reports known antipatterns in the literature, and no existing work has studied database-access performance bugs in major types of web applications that access databases differently.To address this issue, we first summarize all known database-access performance antipatterns found through our literature survey, and we report all of them in this paper. We further collect database-access performance bugs from web applications that access databases through language-provided SQL interfaces, which have been largely ignored by recent work, to check how extensively the known antipatterns can cover these bugs. For bugs not covered by the known antipatterns, we extract new database-access performance antipatterns based on real-world performance bugs from such web applications. Our study in total reports 24 known and 10 new database-access performance antipatterns. Our results can guide future work to develop effective tool support for different types of web applications.}, journal={2020 IEEE INTERNATIONAL CONFERENCE ON SOFTWARE MAINTENANCE AND EVOLUTION (ICSME 2020)}, author={Shao, Shudi and Qiu, Zhengyi and Yu, Xiao and Yang, Wei and Jin, Guoliang and Xie, Tao and Wu, Xintao}, year={2020}, pages={58–69} } @article{ghandehari_lei_kacker_kuhn_xie_kung_2020, title={A Combinatorial Testing-Based Approach to Fault Localization}, volume={46}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2018.2865935}, abstractNote={Combinatorial testing has been shown to be a very effective strategy for software testing. After a failure is detected, the next task is to identify one or more faulty statements in the source code that have caused the failure. In this paper, we present a fault localization approach, called BEN, which produces a ranking of statements in terms of their likelihood of being faulty by leveraging the result of combinatorial testing. BEN consists of two major phases. In the first phase, BEN identifies a combination that is very likely to be failure-inducing. A combination is failure-inducing if it causes any test in which it appears to fail. In the second phase, BEN takes as input a failure-inducing combination identified in the first phase and produces a ranking of statements in terms of their likelihood to be faulty. We conducted an experiment in which our approach was applied to the Siemens suite and four real-world programs, flex, grep, gzip and sed, from Software Infrastructure Repository (SIR). The experimental results show that our approach can effectively and efficiently localize the faulty statements in these programs.}, number={6}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Ghandehari, Laleh Sh and Lei, Yu and Kacker, Raghu and Kuhn, Richard and Xie, Tao and Kung, David}, year={2020}, month={Jun}, pages={616–645} } @inproceedings{li_zhou_lin_xiao_lin_lin_xie_2013, title={A characteristic study on failures of production distributed data-parallel programs}, DOI={10.1109/icse.2013.6606646}, abstractNote={SCOPE is adopted by thousands of developers from tens of different product teams in Microsoft Bing for daily web-scale data processing, including index building, search ranking, and advertisement display. A SCOPE job is composed of declarative SQL-like queries and imperative C# user-defined functions (UDFs), which are executed in pipeline by thousands of machines. There are tens of thousands of SCOPE jobs executed on Microsoft clusters per day, while some of them fail after a long execution time and thus waste tremendous resources. Reducing SCOPE failures would save significant resources. This paper presents a comprehensive characteristic study on 200 SCOPE failures/fixes and 50 SCOPE failures with debugging statistics from Microsoft Bing, investigating not only major failure types, failure sources, and fixes, but also current debugging practice. Our major findings include (1) most of the failures (84.5%) are caused by defects in data processing rather than defects in code logic; (2) table-level failures (22.5%) are mainly caused by programmers' mistakes and frequent data-schema changes while row-level failures (62%) are mainly caused by exceptional data; (3) 93% fixes do not change data processing logic; (4) there are 8% failures with root cause not at the failure-exposing stage, making current debugging practice insufficient in this case. Our study results provide valuable guidelines for future development of data-parallel programs. We believe that these guidelines are not limited to SCOPE, but can also be generalized to other similar data-parallel platforms.}, booktitle={Proceedings of the 35th International Conference on software engineering (ICSE 2013)}, author={Li, S. H. and Zhou, H. C. and Lin, H. X. and Xiao, T. and Lin, H. B. and Lin, W. and Xie, T.}, year={2013}, pages={963–972} } @inproceedings{xie_tillmann_halleux_2013, title={Educational software engineering: Where software engineering, education, and gaming meet}, DOI={10.1109/gas.2013.6632588}, abstractNote={We define and advocate the subfield of educational software engineering (i.e., software engineering for education), which develops software engineering technologies (e.g., software testing and analysis, software analytics) for general educational tasks, going beyond educational tasks for software engineering. In this subfield, gaming technologies often play an important role together with software engineering technologies. We expect that researchers in educational software engineering would be among key players in the education domain and in the coming age of Massive Open Online Courses (MOOCs). Educational software engineering can and will contribute significant solutions to address various critical challenges in education especially MOOCs such as automatic grading, intelligent tutoring, problem generation, and plagiarism detection. In this position paper, we define educational software engineering and illustrate Pex for Fun (in short as Pex4Fun), one of our recent examples on leveraging software engineering and gaming technologies to address educational tasks on teaching and learning programming and software engineering skills.}, booktitle={2013 3rd International Workshop on Games and Software Engineering: Engineering Computer Games to Enable Positive, Progressive Change (GAS)}, author={Xie, T. and Tillmann, N. and Halleux, J.}, year={2013}, pages={36–39} } @inproceedings{xie_2013, title={The synergy of human and artificial intelligence in software engineering}, DOI={10.1109/raise.2013.6615197}, abstractNote={To reduce human efforts and burden on human intelligence in software-engineering activities, Artificial Intelligence (AI) techniques have been employed to assist or automate these activities. On the other hand, human's domain knowledge can serve as starting points for designing AI techniques. Furthermore, the results of AI techniques are often interpreted or verified by human users. Such user feedback could be incorporated to further improve the AI techniques, forming a continuous feedback loop. We recently proposed cooperative testing and analysis including human-tool cooperation (consisting of human-assisted computing and human-centric computing) and human-human cooperation. In this paper, we present example software-engineering problems with solutions that leverage the synergy of human and artificial intelligence, and illustrate how cooperative testing and analysis can help realize such synergy.}, booktitle={International workshop on realizing artificial intelligence synergies in}, author={Xie, T.}, year={2013}, pages={4–6} } @inproceedings{pandita_xiao_zhong_xie_oney_paradkar_2012, title={Inferring method specifications from natural language API descriptions}, DOI={10.1109/icse.2012.6227137}, abstractNote={Application Programming Interface (API) documents are a typical way of describing legal usage of reusable software libraries, thus facilitating software reuse. However, even with such documents, developers often overlook some documents and build software systems that are inconsistent with the legal usage of those libraries. Existing software verification tools require formal specifications (such as code contracts), and therefore cannot directly verify the legal usage described in natural language text in API documents against code using that library. However, in practice, most libraries do not come with formal specifications, thus hindering tool-based verification. To address this issue, we propose a novel approach to infer formal specifications from natural language text of API documents. Our evaluation results show that our approach achieves an average of 92% precision and 93% recall in identifying sentences that describe code contracts from more than 2500 sentences of API documents. Furthermore, our results show that our approach has an average 83% accuracy in inferring specifications from over 1600 sentences describing code contracts.}, booktitle={2012 34th international conference on software engineering (icse)}, author={Pandita, R. and Xiao, X. S. and Zhong, H. and Xie, T. and Oney, S. and Paradkar, A.}, year={2012}, pages={815–825} } @article{wang_zhang_xie_mei_sun_2013, title={Locating Need-to-Externalize Constant Strings for Software Internationalization with Generalized String-Taint Analysis}, volume={39}, ISSN={["1939-3520"]}, DOI={10.1109/tse.2012.40}, abstractNote={Nowadays, a software product usually faces a global market. To meet the requirements of different local users, the software product must be internationalized. In an internationalized software product, user-visible hard-coded constant strings are externalized to resource files so that local versions can be generated by translating the resource files. In many cases, a software product is not internationalized at the beginning of the software development process. To internationalize an existing product, the developers must locate the user-visible constant strings that should be externalized. This locating process is tedious and error-prone due to 1) the large number of both user-visible and non-user-visible constant strings and 2) the complex data flows from constant strings to the Graphical User Interface (GUI). In this paper, we propose an automatic approach to locating need-to-externalize constant strings in the source code of a software product. Given a list of precollected API methods that output values of their string argument variables to the GUI and the source code of the software product under analysis, our approach traces from the invocation sites (within the source code) of these methods back to the need-to-externalize constant strings using generalized string-taint analysis. In our empirical evaluation, we used our approach to locate need-to-externalize constant strings in the uninternationalized versions of seven real-world open source software products. The results of our evaluation demonstrate that our approach is able to effectively locate need-to-externalize constant strings in uninternationalized software products. Furthermore, to help developers understand why a constant string requires translation and properly translate the need-to-externalize strings, we provide visual representation of the string dependencies related to the need-to-externalize strings.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Mei, Hong and Sun, Jiasu}, year={2013}, month={Apr}, pages={516–536} } @inproceedings{hwang_xie_el kateb_mouelhi_le traon_2012, title={Selection of regression system tests for security policy evolution}, DOI={10.1145/2351676.2351719}, abstractNote={As security requirements of software often change, developers may modify security policies such as access control policies (policies in short) according to evolving requirements. To increase confidence that the modification of policies is correct, developers conduct regression testing. However, rerunning all of existing system test cases could be costly and time-consuming. To address this issue, we develop a regression-test-selection approach, which selects every system test case that may reveal regression faults caused by policy changes. Our evaluation results show that our test-selection approach reduces a substantial number of system test cases efficiently.}, booktitle={2012 proceedings of the 27th ieee/acm international conference on automated software engineering (ase)}, author={Hwang, J. H. and Xie, T. and El Kateb, D. and Mouelhi, T. and Le Traon, Y.}, year={2012}, pages={266–269} } @article{thummalapenta_xie_2011, title={Alattin: mining alternative patterns for defect detection}, volume={18}, ISSN={["1573-7535"]}, DOI={10.1007/s10515-011-0086-z}, abstractNote={To improve software quality, static or dynamic defect-detection tools accept programming rules as input and detect their violations in software as defects. As these programming rules are often not well documented in practice, previous work developed various approaches that mine programming rules as frequent patterns from program source code. Then these approaches use static or dynamic defect-detection techniques to detect pattern violations in source code under analysis. However, these existing approaches often produce many false positives due to various factors. To reduce false positives produced by these mining approaches, we develop a novel approach, called Alattin, that includes new mining algorithms and a technique for detecting neglected conditions based on our mining algorithm. Our new mining algorithms mine patterns in four pattern formats: conjunctive, disjunctive, exclusive-disjunctive, and combinations of these patterns. We show the benefits and limitations of these four pattern formats with respect to false positives and false negatives among detected violations by applying those patterns to the problem of detecting neglected conditions.}, number={3-4}, journal={AUTOMATED SOFTWARE ENGINEERING}, author={Thummalapenta, Suresh and Xie, Tao}, year={2011}, month={Dec}, pages={293–323} } @inproceedings{xiao_xie_tillmann_halleux_2011, title={Covana: Precise identification of problems in Pex}, DOI={10.1145/1985793.1985976}, abstractNote={Achieving high structural coverage is an important goal of software testing. Instead of manually producing test inputs that achieve high structural coverage, testers or developers can employ tools built based on automated test-generation approaches, such as Pex, to automatically generate such test inputs. Although these tools can easily generate test inputs that achieve high structural coverage for simple programs, when applied on complex programs in practice, these tools face various problems, such as the problems of dealing with method calls to external libraries or generating method-call sequences to produce desired object states. Since these tools are currently not powerful enough to deal with these various problems in testing complex programs, we propose cooperative developer testing, where developers provide guidance to help tools achieve higher structural coverage. In this demo, we present Covana, a tool that precisely identifies and reports problems that prevent Pex from achieving high structural coverage. Covana identifies problems primarily by determining whether branch statements containing not-covered branches have data dependencies on problem candidates.}, booktitle={2011 33rd International Conference on Software Engineering (ICSE)}, author={Xiao, X. S. and Xie, T. and Tillmann, N. and Halleux, J.}, year={2011}, pages={1004–1006} } @inproceedings{ge_taneja_xie_tillmann_2011, title={DyTa: Dynamic symbolic execution guided with static verification results}, DOI={10.1145/1985793.1985971}, abstractNote={Software-defect detection is an increasingly important research topic in software engineering. To detect defects in a program, static verification and dynamic test generation are two important proposed techniques. However, both of these techniques face their respective issues. Static verification produces false positives, and on the other hand, dynamic test generation is often time consuming. To address the limitations of static verification and dynamic test generation, we present an automated defect-detection tool, called DyTa, that combines both static verification and dynamic test generation. DyTa consists of a static phase and a dynamic phase. The static phase detects potential defects with a static checker; the dynamic phase generates test inputs through dynamic symbolic execution to confirm these potential defects. DyTa reduces the number of false positives compared to static verification and performs more efficiently compared to dynamic test generation.}, booktitle={2011 33rd International Conference on Software Engineering (ICSE)}, author={Ge, X. and Taneja, K. and Xie, T. and Tillmann, N.}, year={2011}, pages={992–994} } @article{zhang_ma_lu_xie_tillmann_halleux_2012, title={Environmental Modeling for Automated Cloud Application Testing}, volume={29}, ISSN={["0740-7459"]}, DOI={10.1109/ms.2011.158}, abstractNote={Platforms such as Windows Azure let applications conduct data-intensive cloud computing. Unit testing can help ensure high-quality development of such applications, but the results depend on test inputs and the cloud environment's state. Manually providing various test inputs and cloud states is laborious and time-consuming. However, automated test generation must simulate various cloud states to achieve effective testing. To address this challenge, a proposed approach models the cloud environment and applies dynamic symbolic execution to generate test inputs and cloud states. Applying this approach to open-source Azure cloud applications shows that it can achieve high structural coverage.}, number={2}, journal={IEEE SOFTWARE}, author={Zhang, Linghao and Ma, Xiaoxing and Lu, Jian and Xie, Tao and Tillmann, Nikolai and Halleux, Peli}, year={2012}, pages={30–35} } @article{zhong_zhang_xie_mei_2011, title={Inferring specifications for resources from natural language API documentation}, volume={18}, ISSN={["1573-7535"]}, DOI={10.1007/s10515-011-0082-3}, number={3-4}, journal={AUTOMATED SOFTWARE ENGINEERING}, author={Zhong, Hao and Zhang, Lu and Xie, Tao and Mei, Hong}, year={2011}, month={Dec}, pages={227–261} } @article{hu_kuhn_xie_hwang_2011, title={MODEL CHECKING FOR VERIFICATION OF MANDATORY ACCESS CONTROL MODELS AND PROPERTIES}, volume={21}, ISSN={["1793-6403"]}, DOI={10.1142/s021819401100513x}, abstractNote={ Mandatory access control (MAC) mechanisms control which users or processes have access to which resources in a system. MAC policies are increasingly specified to facilitate managing and maintaining access control. However, the correct specification of the policies is a very challenging problem. To formally and precisely capture the security properties that MAC should adhere to, MAC models are usually written to bridge the rather wide gap in abstraction between policies and mechanisms. In this paper, we propose a general approach for property verification for MAC models. The approach defines a standardized structure for MAC models, providing for both property verification and automated generation of test cases. The approach expresses MAC models in the specification language of a model checker and expresses generic access control properties in the property language. Then the approach uses the model checker to verify the integrity, coverage, and confinement of these properties for the MAC models and finally generates test cases via combinatorial covering array for the system implementations of the models. }, number={1}, journal={INTERNATIONAL JOURNAL OF SOFTWARE ENGINEERING AND KNOWLEDGE ENGINEERING}, author={Hu, Vincent C. and Kuhn, D. Richard and Xie, Tao and Hwang, Jeehyun}, year={2011}, month={Feb}, pages={103–127} } @inproceedings{xiao_xie_tillmann_halleux_2011, title={Precise identification of problems for structural test generation}, DOI={10.1145/1985793.1985876}, abstractNote={An important goal of software testing is to achieve at least high structural coverage. To reduce the manual efforts of producing such high-covering test inputs, testers or developers can employ tools built based on automated structural test-generation approaches. Although these tools can easily achieve high structural coverage for simple programs, when they are applied on complex programs in practice, these tools face various problems, such as (1) the external-method-call problem (EMCP), where tools cannot deal with method calls to external libraries; (2) the object-creation problem (OCP), where tools fails to generate method-call sequences to produce desirable object states. Since these tools currently could not be powerful enough to deal with these problems in testing complex programs in practice, we propose cooperative developer testing, where developers provide guidance to help tools achieve higher structural coverage. To reduce the efforts of developers in providing guidance to tools, in this paper, we propose a novel approach, called Covana, which precisely identifies and reports problems that prevent the tools from achieving high structural coverage primarily by determining whether branch statements containing notcovered branches have data dependencies on problem candidates. We provide two techniques to instantiate Covana to identify EMCPs and OCPs. Finally, we conduct evaluations on two open source projects to show the effectiveness of Covana in identifying EMCPs and OCPs.}, booktitle={2011 33rd International Conference on Software Engineering (ICSE)}, author={Xiao, X. S. and Xie, T. and Tillmann, N. and Halleux, J.}, year={2011}, pages={611–620} } @article{thummalapenta_marri_xie_tillmann_halleux_2011, title={Retrofitting unit tests for parameterized unit testing}, volume={6603}, journal={Fundamental approaches to software engineering}, author={Thummalapenta, S. and Marri, M. R. and Xie, T. and Tillmann, N. and Halleux, J.}, year={2011}, pages={294–309} } @article{thummalapenta_xie_tillmann_halleux_su_2011, title={Synthesizing Method Sequences for High-Coverage Testing}, volume={46}, ISSN={["1558-1160"]}, DOI={10.1145/2076021.2048083}, abstractNote={High-coverage testing is challenging. Modern object-oriented programs present additional challenges for testing. One key difficulty is the generation of proper method sequences to construct desired objects as method parameters. In this paper, we cast the problem as an instance of program synthesis that automatically generates candidate programs to satisfy a user-specified intent. In our setting, candidate programs are method sequences, and desired object states specify an intent. Automatic generation of desired method sequences is difficult due to its large search space---sequences often involve methods from multiple classes and require specific primitive values. This paper introduces a novel approach, called Seeker, to intelligently navigate the large search space. Seeker synergistically combines static and dynamic analyses: (1) dynamic analysis generates method sequences to cover branches; (2) static analysis uses dynamic analysis information for not-covered branches to generate candidate sequences; and (3) dynamic analysis explores and eliminates statically generated sequences. For evaluation, we have implemented Seeker and demonstrate its effectiveness on four subject applications totalling 28K LOC. We show that Seeker achieves higher branch coverage and def-use coverage than existing state-of-the-art approaches. We also show that Seeker detects 34 new defects missed by existing tools.}, number={10}, journal={ACM SIGPLAN NOTICES}, author={Thummalapenta, Suresh and Xie, Tao and Tillmann, Nikolai and Halleux, Jonathan and Su, Zhendong}, year={2011}, month={Oct}, pages={189–206} } @article{liu_chen_hwang_xie_2011, title={Designing Fast and Scalable XACML Policy Evaluation Engines}, volume={60}, ISSN={["1557-9956"]}, DOI={10.1109/tc.2010.274}, abstractNote={Most prior research on policies has focused on correctness. While correctness is an important issue, the adoption of policy-based computing may be limited if the resulting systems are not implemented efficiently and thus perform poorly. To increase the effectiveness and adoption of policy-based computing, in this paper, we propose fast policy evaluation algorithms that can be adapted to support various policy languages. In this paper, we focus on XACML policy evaluation because XACML has become the de facto standard for specifying access control policies, has been widely used on web servers, and is most complex among existing policy languages. We implemented our algorithms in a policy evaluation system called XEngine and conducted side-by-side comparison with Sun Policy Decision Point (PDP), the industrial standard for XACML policy evaluation. The results show that XEngine is orders of magnitude faster than Sun PDP. The performance difference grows almost linearly with the number of rules in an XACML policy. To our best knowledge, there is no prior work on improving XACML policy evaluation performance. This paper represents the first step in exploring this unknown space.}, number={12}, journal={IEEE TRANSACTIONS ON COMPUTERS}, author={Liu, Alex X. and Chen, Fei and Hwang, JeeHyun and Xie, Tao}, year={2011}, month={Dec}, pages={1802–1817} } @inproceedings{pandita_xie_tillmann_halleux_2010, title={Guided test generation for coverage criteria}, DOI={10.1109/icsm.2010.5609565}, abstractNote={Test coverage criteria including boundary-value and logical coverage such as Modified Condition/Decision Coverage (MC/DC) have been increasingly used in safety-critical or mission-critical domains, complementing those more popularly used structural coverage criteria such as block or branch coverage. However, existing automated test-generation approaches often target at block or branch coverage for test generation and selection, and therefore do not support testing against boundary-value coverage or logical coverage. To address this issue, we propose a general approach that uses instrumentation to guide existing test-generation approaches to generate test inputs that achieve boundary-value and logical coverage for the program under test. Our preliminary evaluation shows that our approach effectively helps an approach based on Dynamic Symbolic Execution (DSE) to improve boundary-value and logical coverage of generated test inputs. The evaluation results show 30.5% maximum (23% average) increase in boundary-value coverage and 26% maximum (21.5% average) increase in logical coverage of the subject programs under test using our approach over without using our approach. In addition, our approach improves the fault-detection capability of generated test inputs by 12.5% maximum (11% average) compared to the test inputs generated without using our approach.}, booktitle={2010 ieee international conference on software maintenance}, author={Pandita, R. and Xie, T. and Tillmann, N. and Halleux, J.}, year={2010} } @article{li_xie_jin_liu_2010, title={Perturbation-based user-input-validation testing of web applications}, volume={83}, ISSN={["1873-1228"]}, DOI={10.1016/j.jss.2010.07.007}, abstractNote={User-input-validation (UIV) is the first barricade that protects web applications from application-level attacks. Most UIV test tools cannot detect semantics-related vulnerabilities in validators, such as filling a five-digit number to a field that accepts a year. To address this issue, we propose a new approach to generate test inputs for UIV based on the analysis of client-side information. In particular, we use input-field information to generate valid inputs, and then perturb valid inputs to generate invalid test inputs. We conducted an empirical study to evaluate our approach. The empirical result shows that, in comparison to existing vulnerability scanners, our approach is more effective than existing vulnerability scanners in finding semantics-related vulnerabilities of UIV for web applications.}, number={11}, journal={JOURNAL OF SYSTEMS AND SOFTWARE}, author={Li, Nuo and Xie, Tao and Jin, Maozhong and Liu, Chao}, year={2010}, month={Nov}, pages={2263–2274} } @article{thummalapenta_xie_2009, title={Alattin: Mining Alternative Patterns for Detecting Neglected Conditions}, ISSN={["1527-1366"]}, DOI={10.1109/ase.2009.72}, abstractNote={To improve software quality, static or dynamic verification tools accept programming rules as input and detect their violations in software as defects. As these programming rules are often not well documented in practice, previous work developed various approaches that mine programming rules as frequent patterns from program source code. Then these approaches use static defect-detection techniques to detect pattern violations in source code under analysis. These existing approaches often produce many false positives due to various factors. To reduce false positives produced by these mining approaches, we develop a novel approach, called Alattin, that includes a new mining algorithm and a technique for detecting neglected conditions based on our mining algorithm. Our new mining algorithm mines alternative patterns in example form "P1 or P2", where P1 and P2 are alternative rules such as condition checks on method arguments or return values related to the same API method. We conduct two evaluations to show the effectiveness of our Alattin approach. Our evaluation results show that (1) alternative patterns reach more than 40% of all mined patterns for APIs provided by six open source libraries; (2) the mining of alternative patterns helps reduce nearly 28% of false positives among detected violations.}, journal={2009 IEEE/ACM INTERNATIONAL CONFERENCE ON AUTOMATED SOFTWARE ENGINEERING, PROCEEDINGS}, author={Thummalapenta, Suresh and Xie, Tao}, year={2009}, pages={283–294} } @article{xie_thummalapenta_lo_liu_2009, title={DATA MINING FOR SOFTWARE ENGINEERING}, volume={42}, ISSN={["1558-0814"]}, DOI={10.1109/MC.2009.256}, abstractNote={To improve software productivity and quality, software engineers are increasingly applying data mining algorithms to various software engineering tasks. However, mining SE data poses several challenges. The authors present various algorithms to effectively mine sequences, graphs, and text from such data.}, number={8}, journal={COMPUTER}, author={Xie, Tao and Thummalapenta, Suresh and Lo, David and Liu, Chao}, year={2009}, month={Aug}, pages={55–62} } @article{hwang_xie_hu_2009, title={Detection of Multiple-Duty-Related Security Leakage in Access Control Policies}, ISBN={["978-0-7695-3758-0"]}, DOI={10.1109/ssiri.2009.63}, abstractNote={Access control mechanisms control which subjects (such as users or processes) have access to which resources. To facilitate managing access control, policy authors increasingly write access control policies in XACML. Access control policies written in XACML could be amenable to multiple-duty-related security leakage, which grants unauthorized access to a user when the user takes multiple duties (e.g., multiple roles in role-based access control policies). To help policy authors detect multiple-duty-related security leakage, we develop a novel framework that analyzes policies and detects cases that potentially cause the leakage. In such cases, a user taking multiple roles (e.g., both r1 and r2) is given a different access decision from the decision given to a user taking an individual role (e.g., r1 and r2, respectively). We conduct experiments on 11 XACML policies and our empirical results show that our framework effectively pinpoints potential multiple-duty-related security leakage for policy authors to inspect.}, journal={2009 THIRD IEEE INTERNATIONAL CONFERENCE ON SECURE SOFTWARE INTEGRATION AND RELIABILITY IMPROVEMENT, PROCEEDINGS}, author={Hwang, JeeHyun and Xie, Tao and Hu, Vincent C.}, year={2009}, pages={65–74} } @article{hwang_xie_chen_liu_2009, title={Fault Localization for Firewall Policies}, ISBN={["978-0-7695-3826-6"]}, ISSN={["1060-9857"]}, DOI={10.1109/srds.2009.38}, abstractNote={Firewalls are the mainstay of enterprise security and the most widely adopted technology for protecting private networks. Ensuring the correctness of firewall policies through testing is important. In firewall policy testing, test inputs are packets and test outputs are decisions. Packets with unexpected (expected) evaluated decisions are classified as failed (passed) tests. Given failed tests together with passed tests, policy testers need to debug the policy to detect fault locations (such as faulty rules). Such a process is often time-consuming.To help reduce effort on detecting fault locations, we propose an approach to reduce the number of rules for inspection based on information collected during evaluating failed tests. Our approach ranks the reduced rules to decide which rules should be inspected first. We performed experiments on applying our approach. The empirical results show that our approach can reduce 56% of rules that are required for inspection in fault localization.}, journal={2009 28TH IEEE INTERNATIONAL SYMPOSIUM ON RELIABLE DISTRIBUTED SYSTEMS, PROCEEDINGS}, author={Hwang, JeeHyun and Xie, Tao and Chen, Fei and Liu, Alex X.}, year={2009}, pages={100-+} } @article{xie_tillmann_halleux_schulte_2009, title={Fitness-Guided Path Exploration in Dynamic Symbolic Execution}, ISBN={["978-1-4244-4422-9"]}, ISSN={["1530-0889"]}, DOI={10.1109/dsn.2009.5270315}, abstractNote={Dynamic symbolic execution is a structural testing technique that systematically explores feasible paths of the program under test by running the program with different test inputs to improve code coverage. To address the space-explosion issue in path exploration, we propose a novel approach called Fitnex, a search strategy that uses state-dependent fitness values (computed through a fitness function) to guide path exploration. The fitness function measures how close an already discovered feasible path is to a particular test target (e.g., covering a not-yet-covered branch). Our new fitness-guided search strategy is integrated with other strategies that are effective for exploration problems where the fitness heuristic fails. We implemented the new approach in Pex, an automated structural testing tool developed at Microsoft Research. We evaluated our new approach by comparing it with existing search strategies. The empirical results show that our approach is effective since it consistently achieves high code coverage faster than existing search strategies.}, journal={2009 IEEE/IFIP INTERNATIONAL CONFERENCE ON DEPENDABLE SYSTEMS & NETWORKS (DSN 2009)}, author={Xie, Tao and Tillmann, Nikolai and Halleux, Jonathan and Schulte, Wolfram}, year={2009}, pages={359-+} } @article{hao_zhang_xie_mei_sun_2009, title={Interactive Fault Localization Using Test Information}, volume={24}, ISSN={["1860-4749"]}, DOI={10.1007/s11390-009-9270-z}, number={5}, journal={JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY}, author={Hao, Dan and Zhang, Lu and Xie, Tao and Mei, Hong and Sun, Jia-Su}, year={2009}, month={Sep}, pages={962–974} } @inproceedings{zhong_xie_zhang_pei_mei_2009, title={MAPO: mining and recommending API usage patterns}, volume={5653}, booktitle={Ecoop 2009 - object-oriented programming}, author={Zhong, H. and Xie, T. and Zhang, L. and Pei, J. and Mei, H.}, year={2009}, pages={318–343} } @article{li_xie_tillmann_halleux_schulte_2009, title={Reggae: Automated Test Generation for Programs using Complex Regular Expressions}, ISSN={["1527-1366"]}, DOI={10.1109/ase.2009.67}, abstractNote={Test coverage such as branch coverage is commonly measured to assess the sufficiency of test inputs. To reduce tedious manual efforts in generating high-covering test inputs, various automated techniques have been proposed. Some recent effective techniques include Dynamic Symbolic Execution (DSE) based on path exploration. However, these existing DSE techniques cannot generate high-covering test inputs for programs using complex regular expressions due to large exploration space; these complex regular expressions are commonly used for input validation and information extraction. To address this issue, we propose an approach, named Reggae, to reduce the exploration space of DSE in test generation. In our evaluation, we apply Reggae on various input-validation programs that use complex regular expressions. Empirical results show that Reggae helps a test-generation tool generate test inputs to achieve 79% branch coverage of validators, improved from 29% achieved without the help of Reggae.}, journal={2009 IEEE/ACM INTERNATIONAL CONFERENCE ON AUTOMATED SOFTWARE ENGINEERING, PROCEEDINGS}, author={Li, Nuo and Xie, Tao and Tillmann, Nikolai and Halleux, Jonathan and Schulte, Wolfram}, year={2009}, pages={515–519} } @article{hao_xie_zhang_wang_sun_mei_2010, title={Test input reduction for result inspection to facilitate fault localization}, volume={17}, ISSN={["1573-7535"]}, DOI={10.1007/s10515-009-0056-x}, number={1}, journal={AUTOMATED SOFTWARE ENGINEERING}, author={Hao, Dan and Xie, Tao and Zhang, Lu and Wang, Xiaoyin and Sun, Jiasu and Mei, Hong}, year={2010}, month={Mar}, pages={5–31} } @article{csallner_smaragdakis_xie_2008, title={DSD-Crasher: A hybrid analysis tool for bug finding}, volume={17}, ISSN={["1557-7392"]}, DOI={10.1145/1348250.1348254}, abstractNote={DSD-Crasher is a bug finding tool that follows a three-step approach to program analysis: D. Capture the program's intended execution behavior with dynamic invariant detection. The derived invariants exclude many unwanted values from the program's input domain. S. Statically analyze the program within the restricted input domain to explore many paths. D. Automatically generate test cases that focus on reproducing the predictions of the static analysis. Thereby confirmed results are feasible. This three-step approach yields benefits compared to past two-step combinations in the literature. In our evaluation with third-party applications, we demonstrate higher precision over tools that lack a dynamic step and higher efficiency over tools that lack a static step.}, number={2}, journal={ACM TRANSACTIONS ON SOFTWARE ENGINEERING AND METHODOLOGY}, author={Csallner, Christoph and Smaragdakis, Yannis and Xie, Tao}, year={2008} } @article{thomas_williams_xie_2009, title={On automated prepared statement generation to remove SQL injection vulnerabilities}, volume={51}, ISSN={["1873-6025"]}, DOI={10.1016/j.infsof.2008.08.002}, abstractNote={Since 2002, over 10% of total cyber vulnerabilities were SQL injection vulnerabilities (SQLIVs). This paper presents an algorithm of prepared statement replacement for removing SQLIVs by replacing SQL statements with prepared statements. Prepared statements have a static structure, which prevents SQL injection attacks from changing the logical structure of a prepared statement. We created a prepared statement replacement algorithm and a corresponding tool for automated fix generation. We conducted four case studies of open source projects to evaluate the capability of the algorithm and its automation. The empirical results show that prepared statement code correctly replaced 94% of the SQLIVs in these projects.}, number={3}, journal={INFORMATION AND SOFTWARE TECHNOLOGY}, author={Thomas, Stephen and Williams, Laurie and Xie, Tao}, year={2009}, month={Mar}, pages={589–598} } @article{xie_2006, title={Augmenting automatically generated unit-test suites with regression oracle checking}, number={4067}, journal={Lecture Notes in Computer Science}, author={Xie, T.}, year={2006}, pages={380–403} }