@misc{gou_chirkova_2007, title={Efficiently querying large XML data repositories: A survey}, volume={19}, ISSN={["1041-4347"]}, DOI={10.1109/tkde.2007.1060}, abstractNote={Extensible markup language (XML) is emerging as a de facto standard for information exchange among various applications on the World Wide Web. There has been a growing need for developing high-performance techniques to query large XML data repositories efficiently. One important problem in XML query processing is twig pattern matching, that is, finding in an XML data tree D all matches that satisfy a specified twig (or path) query pattern Q. In this survey, we review, classify, and compare major techniques for twig pattern matching. Specifically, we consider two classes of major XML query processing techniques: the relational approach and the native approach. The relational approach directly utilizes existing relational database systems to store and query XML data, which enables the use of all important techniques that have been developed for relational databases, whereas in the native approach, specialized storage and query processing systems tailored for XML data are developed from scratch to further improve XML query performance. As implied by existing work, XML data querying and management are developing in the direction of integrating the relational approach with the native approach, which could result in higher query processing performance and also significantly reduce system reengineering costs.}, number={10}, journal={IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING}, author={Gou, Gang and Chirkova, Rada}, year={2007}, month={Oct}, pages={1381–1403} } @article{gou_yu_lu_2006, title={A* search: An efficient and flexible approach to materialized view selection}, volume={36}, ISSN={["1558-2442"]}, DOI={10.1109/TSMCC.2004.843248}, abstractNote={Decision support systems issue a large number of online analytical processing (OLAP) queries to access very large databases. A data warehouse needs to precompute or materialize some of such OLAP queries in order to improve the system throughput, since many coming queries can benefit greatly from these materialized views. Materialized view selection with resource constraint is one of the most important issues in the management of data warehouses. It addresses how to fully utilize the limited resource, disk space, or maintenance time to minimize the total query processing cost. This paper revisits the problem of materialized view selection under a disk-space constraint S. Many efficient greedy algorithms have been developed to address this problem. The quality of greedy solutions is guaranteed by a lower bound. However, it is observed that, when S is small, this lower bound can be very small and even be negative. In such cases, their solution quality will not be guaranteed well. In order to improve further the solution quality in such cases, a new competitive A* algorithm is proposed. It is shown that it is just the distinctive topological structure of the dependent lattice that makes the A* search a very competitive strategy for this problem. Both theoretical and experimental results show that the proposed algorithm is a powerful, efficient, and flexible approach to this problem}, number={3}, journal={IEEE TRANSACTIONS ON SYSTEMS MAN AND CYBERNETICS PART C-APPLICATIONS AND REVIEWS}, author={Gou, Gang and Yu, Jeffrey Xu and Lu, Hongjun}, year={2006}, month={May}, pages={411–425} }