@article{chirkova_yu_2017, title={Exact Detection of Information Leakage: Decidability and Complexity}, volume={10420}, ISBN={["978-3-662-55607-8"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-662-55608-5_1}, abstractNote={Elaborate security policies often require organizations to restrict user data access in a fine-grained manner, instead of traditional table- or column-level access control. Not surprisingly, managing fine-grained access control in software is rather challenging. In particular, if access is not configured carefully, information leakage may happen: Users may infer sensitive information through the data explicitly accessible to them. In this paper we formalize this information-leakage problem, by modeling sensitive information as answers to “secret queries,” and by modeling access-control rules as views. We focus on the scenario where sensitive information can be deterministically derived by adversaries. We review a natural data-exchange based inference model for detecting information leakage, and show its capabilities and limitation. We then introduce and formally study a new inference model, view-verified data exchange, that overcomes the limitation for the query language under consideration. Our formal study provides correctness and complexity results for the proposed inference model in the context of queries belonging to a frequent realistic query type and common types of integrity constraints on the data.}, journal={TRANSACTIONS ON LARGE-SCALE DATA- AND KNOWLEDGE-CENTERED SYSTEMS XXXII}, author={Chirkova, Rada and Yu, Ting}, year={2017}, pages={1–23} } @inproceedings{choo_yu_chi_2015, title={Detecting opinion spammer groups through community discovery and sentiment analysis}, volume={9149}, booktitle={Data and applications security and privacy xxix}, author={Choo, E. and Yu, T. and Chi, M.}, year={2015}, pages={170–187} } @inproceedings{ong_rojcewicz_farnan_lee_chrysanthis_yu_2015, title={Interactive preference-aware query optimization}, DOI={10.1109/icde.2015.7113414}, abstractNote={PASQL is an extension to SQL that allows users of a distributed database to specify privacy constraints on an SQL query evaluation plan. However, privacy constraints can be difficult for users to specify, and worse yet, all possible situations that could lead to a privacy violation may not be known to the user a priori. To address these challenges, we propose a GUI-based interactive process for detecting such violations and generating appropriate constraints. In this work, we demonstrate two approaches to implementing such a GUI that provide different ways of analyzing and interactively optimizing a PASQL query plan.}, booktitle={2015 ieee 31st international conference on data engineering (icde)}, author={Ong, N. R. and Rojcewicz, S. E. and Farnan, N. L. and Lee, A. J. and Chrysanthis, P. K. and Yu, T.}, year={2015}, pages={1512–1515} } @inproceedings{wei_yu_2014, title={Integrity assurance for outsourced databases without DBMS modification}, volume={8566}, booktitle={Data and applications security and privacy xxviii}, author={Wei, W. and Yu, T.}, year={2014}, pages={1–16} } @inproceedings{farnan_lee_chrysanthis_yu_2014, title={PAQO: Preference-aware query optimization for decentralized database systems}, DOI={10.1109/icde.2014.6816670}, abstractNote={The declarative nature of SQL has traditionally been a major strength. Users simply state what information they are interested in, and the database management system determines the best plan for retrieving it. A consequence of this model is that should a user ever want to specify some aspect of how their queries are evaluated (e.g., a preference to read data from a specific replica, or a requirement for all joins to be performed by a single server), they are unable to. This can leave database administrators shoehorning evaluation preferences into database cost models. Further, for distributed database users, it can result in query evaluation plans that violate data handling best practices or the privacy of the user. To address such issues, we have developed a framework for declarative, user-specified constraints on the query optimization process and implemented it within PosgreSQL. Our Preference-Aware Query Optimizer (PAQO) upholds both strict requirements and partially ordered preferences that are issued alongside of the queries that it processes. In this paper, we present the design of PAQO and thoroughly evaluate its performance.}, booktitle={2014 ieee 30th international conference on data engineering (icde)}, author={Farnan, N. L. and Lee, A. J. and Chrysanthis, P. K. and Yu, T.}, year={2014}, pages={424–435} } @article{du_dean_tan_gu_yu_2014, title={Scalable Distributed Service Integrity Attestation for Software-as-a-Service Clouds}, volume={25}, ISSN={["1558-2183"]}, DOI={10.1109/tpds.2013.62}, abstractNote={Software-as-a-service (SaaS) cloud systems enable application service providers to deliver their applications via massive cloud computing infrastructures. However, due to their sharing nature, SaaS clouds are vulnerable to malicious attacks. In this paper, we present IntTest, a scalable and effective service integrity attestation framework for SaaS clouds. IntTest provides a novel integrated attestation graph analysis scheme that can provide stronger attacker pinpointing power than previous schemes. Moreover, IntTest can automatically enhance result quality by replacing bad results produced by malicious attackers with good results produced by benign service providers. We have implemented a prototype of the IntTest system and tested it on a production cloud computing infrastructure using IBM System S stream processing applications. Our experimental results show that IntTest can achieve higher attacker pinpointing accuracy than existing approaches. IntTest does not require any special hardware or secure kernel support and imposes little performance impact to the application, which makes it practical for large-scale cloud systems.}, number={3}, journal={IEEE TRANSACTIONS ON PARALLEL AND DISTRIBUTED SYSTEMS}, author={Du, Juan and Dean, Daniel J. and Tan, Yongmin and Gu, Xiaohui and Yu, Ting}, year={2014}, month={Mar}, pages={730–739} } @article{yuan_chen_yu_yu_2013, title={Protecting Sensitive Labels in Social Network Data Anonymization}, volume={25}, ISSN={["1558-2191"]}, DOI={10.1109/tkde.2011.259}, abstractNote={Privacy is one of the major concerns when publishing or sharing social network data for social science research and business analysis. Recently, researchers have developed privacy models similar to k-anonymity to prevent node reidentification through structure information. However, even when these privacy models are enforced, an attacker may still be able to infer one's private information if a group of nodes largely share the same sensitive labels (i.e., attributes). In other words, the label-node relationship is not well protected by pure structure anonymization methods. Furthermore, existing approaches, which rely on edge editing or node clustering, may significantly alter key graph properties. In this paper, we define a k-degree-l-diversity anonymity model that considers the protection of structural information as well as sensitive labels of individuals. We further propose a novel anonymization methodology based on adding noise nodes. We develop a new algorithm by adding noise nodes into the original graph with the consideration of introducing the least distortion to graph properties. Most importantly, we provide a rigorous analysis of the theoretical bounds on the number of noise nodes added and their impacts on an important graph property. We conduct extensive experiments to evaluate the effectiveness of the proposed technique.}, number={3}, journal={IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING}, author={Yuan, Mingxuan and Chen, Lei and Yu, Philip S. and Yu, Ting}, year={2013}, month={Mar}, pages={633–647} } @article{cormode_srivastava_yu_zhang_2010, title={Anonymizing bipartite graph data using safe groupings}, volume={19}, ISSN={["0949-877X"]}, DOI={10.1007/s00778-009-0167-9}, number={1}, journal={VLDB JOURNAL}, author={Cormode, Graham and Srivastava, Divesh and Yu, Ting and Zhang, Qing}, year={2010}, month={Feb}, pages={115–139} } @article{deng_xie_liu_wu_zhao_yu_2009, title={Coupling winds to ocean surface currents over the global ocean}, volume={29}, ISSN={["1463-5011"]}, DOI={10.1016/j.ocemod.2009.05.003}, abstractNote={A Wind stress–Current Coupled System (WCCS) consisting of the HYbrid Coordinate Ocean Model (HYCOM) and an improved wind stress algorithm based on Donelan et al. [Donelan, W.M., Drennan, Katsaros, K.B., 1997. The air–sea momentum flux in mixed wind sea and swell conditions. J. Phys. Oceanogr. 27, 2087–2099] is developed by using the Earth System Modeling Framework (ESMF). The WCCS is applied to the global ocean to study the interactions between the wind stress and the ocean surface currents. In this study, the ocean surface current velocity is taken into consideration in the wind stress calculation and air–sea heat flux calculation. The wind stress that contains the effect of ocean surface current velocity will be used to force the HYCOM. The results indicate that the ocean surface velocity exerts an important influence on the wind stress, which, in turn, significantly affects the global ocean surface currents, air–sea heat fluxes, and the thickness of ocean surface boundary layer. Comparison with the TOGA TAO buoy data, the sea surface temperature from the wind–current coupled simulation showed noticeable improvement over the stand-alone HYCOM simulation.}, number={4}, journal={OCEAN MODELLING}, author={Deng, Zengan and Xie, Lian and Liu, Bin and Wu, Kejian and Zhao, Dongliang and Yu, Ting}, year={2009}, pages={261–268} } @article{li_li_wang_yu_2009, title={Denial of service attacks and defenses in decentralized trust management}, volume={8}, ISSN={["1615-5270"]}, DOI={10.1007/s10207-008-0068-8}, abstractNote={Trust management is an approach to scalable and flexible access control in decentralized systems. In trust management, a server often needs to evaluate a chain of credentials submitted by a client; this requires the server to perform multiple expensive digital signature verifications. In this paper, we study low-bandwidth Denial-of-Service (DoS) attacks that exploit the existence of trust management systems to deplete server resources. Although the threat of DoS attacks has been studied for some application-level protocols such as authentication protocols, we show that it is especially destructive for trust management systems. Exploiting the delegation feature in trust management languages, an attacker can forge a long credential chain to force a server to consume a large amount of computing resource. Using game theory as an analytic tool, we demonstrate that unprotected trust management servers will easily fall prey to a witty attacker who moves smartly. We report our empirical study of existing trust management systems, which manifests the gravity of this threat. We also propose a defense technique using credential caching, and show that it is effective in the presence of intelligent attackers.}, number={2}, journal={INTERNATIONAL JOURNAL OF INFORMATION SECURITY}, author={Li, Jiangtao and Li, Ninghui and Wang, XiaoFeng and Yu, Ting}, year={2009}, month={Apr}, pages={89–101} } @article{zhang_wei_yu_2009, title={On the Modeling of Honest Players in Reputation Systems}, volume={24}, ISSN={["1860-4749"]}, DOI={10.1007/s11390-009-9271-y}, number={5}, journal={JOURNAL OF COMPUTER SCIENCE AND TECHNOLOGY}, author={Zhang, Qing and Wei, Wei and Yu, Ting}, year={2009}, month={Sep}, pages={808–819} } @article{wei_du_yu_gu_2009, title={SecureMR: A Service Integrity Assurance Framework for Map Reduce}, ISBN={["978-0-7695-3919-5"]}, DOI={10.1109/acsac.2009.17}, abstractNote={MapReduce has become increasingly popular as a powerful parallel data processing model. To deploy MapReduce as a data processing service over open systems such as service oriented architecture, cloud computing, and volunteer computing, we must provide necessary security mechanisms to protect the integrity of MapReduce data processing services. In this paper, we present SecureMR, a practical service integrity assurance framework for MapReduce. SecureMR consists of five security components, which provide a set of practical security mechanisms that not only ensure MapReduce service integrity as well as to prevent replay and Denial of Service (DoS) attacks, but also preserve the simplicity, applicability and scalability of MapReduce. We have implemented a prototype of SecureMR based on Hadoop, an open source MapReduce implementation. Our analytical study and experimental results show that SecureMR can ensure data processing service integrity while imposing low performance overhead.}, journal={25TH ANNUAL COMPUTER SECURITY APPLICATIONS CONFERENCE}, author={Wei, Wei and Du, Juan and Yu, Ting and Gu, Xiaohui}, year={2009}, pages={73–82} } @article{zhang_yu_ning_2008, title={A framework for identifying compromised nodes in wireless sensor networks}, volume={11}, ISSN={["1557-7406"]}, DOI={10.1145/1341731.1341733}, abstractNote={Sensor networks are often subject to physical attacks. Once a node's cryptographic key is compromised, an attacker may completely impersonate it and introduce arbitrary false information into the network. Basic cryptographic mechanisms are often not effective in this situation. Most techniques to address this problem focus on detecting and tolerating false information introduced by compromised nodes. They cannot pinpoint exactly where the false information is introduced and who is responsible for it.}, number={3}, journal={ACM TRANSACTIONS ON INFORMATION AND SYSTEM SECURITY}, author={Zhang, Qing and Yu, Ting and Ning, Peng}, year={2008}, month={Mar} } @article{anton_bertino_li_yu_2007, title={A roadmap for COMPREHENSIVE online privacy policy management}, volume={50}, ISSN={["1557-7317"]}, DOI={10.1145/1272516.1272522}, abstractNote={A framework supporting the privacy policy life cycle helps guide the kind of research to consider before sound privacy answers may be realized.}, number={7}, journal={COMMUNICATIONS OF THE ACM}, author={Anton, Annie I. and Bertino, Elisa and Li, Ninghui and Yu, Ting}, year={2007}, month={Jul}, pages={109–116} } @book{secure data management in decentralized systems_2007, ISBN={0387276947}, DOI={10.1007/978-0-387-27696-0}, abstractNote={Database security is one of the classical topics in the research of information system security. Ever since the early years of database management systems, a great deal of research activity has been c}, publisher={New York: Springer}, year={2007} } @article{li_yu_anton_2006, title={A semantics based approach to privacy languages}, volume={21}, number={5}, journal={Computer Systems Science and Engineering}, author={Li, N. and Yu, T. and Anton, A.}, year={2006}, pages={339–352} } @article{guha_jagadish_koudas_srivastava_yu_2006, title={Integrating XML data sources using approximate joins}, volume={31}, ISSN={["1557-4644"]}, DOI={10.1145/1132863.1132868}, abstractNote={XML is widely recognized as the data interchange standard of tomorrow because of its ability to represent data from a variety of sources. Hence, XML is likely to be the format through which data from multiple sources is integrated. In this article, we study the problem of integrating XML data sources through correlations realized as join operations. A challenging aspect of this operation is the XML document structure. Two documents might convey approximately or exactly the same information but may be quite different in structure. Consequently, an approximate match in structure, in addition to content, has to be folded into the join operation. We quantify an approximate match in structure and content for pairs of XML documents using well defined notions of distance. We show how notions of distance that have metric properties can be incorporated in a framework for joins between XML data sources and introduce the idea of reference sets to facilitate this operation. Intuitively, a reference set consists of data elements used to project the data space. We characterize what constitutes a good choice of a reference set, and we propose sampling-based algorithms to identify them. We then instantiate our join framework using the tree edit distance between a pair of trees. We next turn our attention to utilizing well known index structures to improve the performance of approximate XML join operations. We present a methodology enabling adaptation of index structures for this problem, and we instantiate it in terms of the R-tree. We demonstrate the practical utility of our solutions using large collections of real and synthetic XML data sets, varying parameters of interest, and highlighting the performance benefits of our approach.}, number={1}, journal={ACM TRANSACTIONS ON DATABASE SYSTEMS}, author={Guha, Sudipto and Jagadish, H. V. and Koudas, Nick and Srivastava, Divesh and Yu, Ting}, year={2006}, month={Mar}, pages={161–207} } @article{yu_srivastava_lakshmanan_jagadish_2004, title={A compressed accessibility map for XML}, volume={29}, ISSN={["1557-4644"]}, DOI={10.1145/1005566.1005570}, abstractNote={XML is the undisputed standard for data representation and exchange. As companies transact business over the Internet, letting authorized customers directly access, and even modify, XML data offers many advantages in terms of cost, accuracy, and timeliness. Given the complex business relationships between companies, and the sensitive nature of information, access must be provided selectively, using sophisticated access control specifications. Using the specification directly to determine if a user has access to an XML data item can be extremely inefficient. The alternative of fully materializing, for each data item, the users authorized to access it can be space-inefficient. In this article, we introduce a compressed accessibility map (CAM) as a space- and time-efficient solution to the access control problem for XML data. A CAM compactly identifies the XML data items to which a user has access, by exploiting structural locality of accessibility in tree-structured data. We present a CAM lookup algorithm for determining if a user has access to a data item that takes time proportional to the product of the depth of the item in the XML data and logarithm of the CAM size. We develop an algorithm for building an optimal size CAM that takes time linear in the size of the XML data set. While optimality cannot be preserved incrementally under data item updates, we provide an algorithm for incrementally maintaining near-optimality. Finally, we experimentally demonstrate the effectiveness of the CAM for multiple users on a variety of real and synthetic data sets.}, number={2}, journal={ACM TRANSACTIONS ON DATABASE SYSTEMS}, author={Yu, T and Srivastava, D and Lakshmanan, LVS and Jagadish, HV}, year={2004}, month={Jun}, pages={363–402} }