@article{shen_zhang_dea_andow_arroyo-fang_gafter_george_grueter_meijer_shivers_et al._2021, title={Coarsening Optimization for Differentiable Programming}, volume={5}, ISSN={["2475-1421"]}, url={https://doi.org/10.1145/3485507}, DOI={10.1145/3485507}, abstractNote={This paper presents a novel optimization for differentiable programming named coarsening optimization. It offers a systematic way to synergize symbolic differentiation and algorithmic differentiation (AD). Through it, the granularity of the computations differentiated by each step in AD can become much larger than a single operation, and hence lead to much reduced runtime computations and data allocations in AD. To circumvent the difficulties that control flow creates to symbolic differentiation in coarsening, this work introduces phi-calculus, a novel method to allow symbolic reasoning and differentiation of computations that involve branches and loops. It further avoids "expression swell" in symbolic differentiation and balance reuse and coarsening through the design of reuse-centric segment of interest identification. Experiments on a collection of real-world applications show that coarsening optimization is effective in speeding up AD, producing several times to two orders of magnitude speedups.}, number={OOPSLA}, journal={PROCEEDINGS OF THE ACM ON PROGRAMMING LANGUAGES-PACMPL}, publisher={Association for Computing Machinery (ACM)}, author={Shen, Xipeng and Zhang, Guoqiang and Dea, Irene and Andow, Samantha and Arroyo-Fang, Emilio and Gafter, Neal and George, Johann and Grueter, Melissa and Meijer, Erik and Shivers, Olin Grigsby and et al.}, year={2021}, month={Oct} } @article{zhang_xu_shen_dillig_2021, title={UDF to SQL Translation through Compositional Lazy Inductive Synthesis}, volume={5}, ISSN={["2475-1421"]}, url={https://doi.org/10.1145/3485489}, DOI={10.1145/3485489}, abstractNote={ Many data processing systems allow SQL queries that call user-defined functions (UDFs) written in conventional programming languages. While such SQL extensions provide convenience and flexibility to users, queries involving UDFs are not as efficient as their pure SQL counterparts that invoke SQL’s highly-optimized built-in functions. Motivated by this problem, we propose a new technique for translating SQL queries with UDFs to pure SQL expressions. Unlike prior work in this space, our method is not based on syntactic rewrite rules and can handle a much more general class of UDFs. At a high-level, our method is based on counterexample-guided inductive synthesis (CEGIS) but employs a novel compositional strategy that decomposes the synthesis task into simpler sub-problems. However, because there is no universal decomposition strategy that works for all UDFs, we propose a novel lazy inductive synthesis approach that generates a sequence of decompositions that correspond to increasingly harder inductive synthesis problems. Because most realistic UDF-to-SQL translation tasks are amenable to a fine-grained decomposition strategy, our lazy inductive synthesis method scales significantly better than traditional CEGIS. We have implemented our proposed technique in a tool called CLIS for optimizing Spark SQL programs containing Scala UDFs. To evaluate CLIS, we manually study 100 randomly selected UDFs and find that 63 of them can be expressed in pure SQL. Our evaluation on these 63 UDFs shows that CLIS can automatically synthesize equivalent SQL expressions in 92% of the cases and that it can solve 2.4× more benchmarks compared to a baseline that does not use our compositional approach. We also show that CLIS yields an average speed-up of 3.5× for individual UDFs and 1.3× to 3.1× in terms of end-to-end application performance.}, number={OOPSLA}, journal={PROCEEDINGS OF THE ACM ON PROGRAMMING LANGUAGES-PACMPL}, publisher={Association for Computing Machinery (ACM)}, author={Zhang, Guoqiang and Xu, Yuanchao and Shen, Xipeng and Dillig, Isil}, year={2021}, month={Oct} }