@inproceedings{tan_chirkova_gadepally_mattson_2017, title={Enabling query processing across heterogeneous data models: A survey}, DOI={10.1109/bigdata.2017.8258302}, abstractNote={Modern applications often need to manage and analyze widely diverse datasets that span multiple data models [1], [2], [3], [4], [5]. Warehousing the data through Extract-Transform-Load (ETL) processes can be expensive in such scenarios. Transforming disparate data into a single data model may degrade performance. Further, curating diverse datasets and maintaining the pipeline can prove to be labor intensive. As a result, an emerging trend is to shift the focus to federating specialized data stores and enabling query processing across heterogeneous data models [6]. This shift can bring many advantages: First, systems can natively leverage multiple data models, which can translate to maximizing the semantic expressiveness of underlying interfaces and leveraging the internal processing capabilities of component data stores. Second, federated architectures support query-specific data integration with just-in-time transformation and migration, which has the potential to significantly reduce the operational complexity and overhead. Projects that focus on developing systems in this research area stem from various backgrounds and address diverse concerns, which could make it difficult to form a consistent view of the work in this area. In this survey, we introduce a taxonomy for describing the state of the art and propose a systematic evaluation framework conducive to understanding of query-processing characteristics in the relevant systems. We use the framework to assess four representative implementations: BigDAWG [7], [8], CloudMdsQL [9], [10], Myria [11], [12], and Apache Drill [13].}, booktitle={2017 IEEE International Conference on Big Data (Big Data)}, author={Tan, R. and Chirkova, R. and Gadepally, V. and Mattson, T. G.}, year={2017}, pages={3211–3220} }