@article{zhai_ibrahim_qiu_boemer_chen_titov_lyashevsky_2022, title={Accelerating Encrypted Computing on Intel GPUs}, ISSN={["1530-2075"]}, DOI={10.1109/IPDPS53621.2022.00074}, abstractNote={Homomorphic Encryption (HE) is an emerging encryption scheme that allows computations to be performed directly on encrypted messages. This property provides promising applications such as privacy-preserving deep learning and cloud computing. Prior works have been proposed to enable practical privacy-preserving applications with architectural-aware optimizations on CPUs, CUDA-enabled GPUs and FPGAs. However, there is no systematic optimization for the whole HE pipeline on Intel GPUs. In this paper, we present the first-ever SYCL-based GPU backend for Microsoft SEAL APIs. We perform optimizations from instruction level, algorithmic level and application level to accelerate our HE library based on the Cheon, Kim, Kim and Song (CKKS) scheme on Intel GPUs. The performance is validated on two latest Intel GPUs. Experimental results show that our staged optimizations together with optimizations including low-level optimizations and kernel fusion accelerate the Number Theoretic Transform (NTT), a key algorithm for HE, by up to 9.93X compared with the naive GPU baseline. The roofline analysis confirms that our optimized NTT reaches 79.8% and 85.7% of the peak performance on two GPU devices. Through the highly optimized NTT and the assembly-level optimization, we obtain 2.32X – 3.05X acceleration for HE evaluation routines. In addition, our all-together systematic optimizations improve the performance of encrypted element-wise polynomial matrix multiplication application by up to 3.11X.}, journal={2022 IEEE 36TH INTERNATIONAL PARALLEL AND DISTRIBUTED PROCESSING SYMPOSIUM (IPDPS 2022)}, author={Zhai, Yujia and Ibrahim, Mohannad and Qiu, Yiqin and Boemer, Fabian and Chen, Zizhong and Titov, Alexey and Lyashevsky, Alexander}, year={2022}, pages={705–716} } @article{ibrahim_bennett_mason_rodgers_abolhasani_2022, title={Flexible homogeneous hydroformylation: on-demand tuning of aldehyde branching with a cyclic fluorophosphite ligand}, volume={409}, ISSN={["1090-2694"]}, DOI={10.1016/j.jcat.2022.03.030}, abstractNote={Tuning aldehyde regioselectivity via homogeneous hydroformylation of olefins using the same catalyst system remains a challenge. Here, we present flexible rhodium (Rh)-catalyzed hydroformylation of 1-octene and propylene with a bulky cyclic monofluorophosphite ligand L. Hydroformylation of 1-octene with Rh/L catalyst achieves, for the first time, turnover frequencies exceeding 75,000 mol ald.mol Rh−1.h−1 (at 30% conversion) in segmented flow, while enabling access to an unmatched tunable aldehyde branching (0.06 < linear/branched < 15) with the same ligand L. Our mechanistic studies demonstrate that L provides a viable alternative to traditional bidentate phosphine/phosphite ligands for high activity with the added benefit of tunable selectivity. The unique high flexibility feature of L over traditional linear- or branched-selective ligands allows for on-demand tuning from 90% linear to 75% branched aldehyde in a continuous flow reactor without the need for ligand/catalyst alteration. Furthermore, when starting from the internal olefins, Rh/L catalyst achieves high regioselectivity (>90%) toward the two positional aldehyde isomers. The high turnover frequencies obtained with L in flow will enhance the economics of the production of aldehydes and their isotopically labeled analogues by significantly reducing the reaction time, thereby enabling better utilization of the increasingly expensive Rh catalyst and minimizing the need for catalyst/ligand separation and recycle.}, journal={JOURNAL OF CATALYSIS}, author={Ibrahim, Malek Y. S. and Bennett, Jeffrey A. and Mason, Dawn and Rodgers, Jody and Abolhasani, Milad}, year={2022}, month={May}, pages={105–117} } @article{han_ibrahim_abolhasani_2021, title={Intensified recovery of switchable hydrophilicity solvents in flow}, volume={9}, ISSN={["1364-548X"]}, url={https://doi.org/10.1039/D1CC03819B}, DOI={10.1039/d1cc03819b}, abstractNote={Intensified continuous extraction and recovery of switchable hydrophilicity solvents is presented, offering an energy-efficient solvent utilization strategy for green synthesis.}, journal={CHEMICAL COMMUNICATIONS}, publisher={Royal Society of Chemistry (RSC)}, author={Han, Suyong and Ibrahim, Malek Y. S. and Abolhasani, Milad}, year={2021}, month={Sep} } @article{mughrabi_ibrahim_byrd_2021, title={QPR: Quantizing PageRank with Coherent Shared Memory Accelerators}, ISSN={["1530-2075"]}, DOI={10.1109/IPDPS49936.2021.00105}, abstractNote={Graph algorithms often require fine-grained, random access across substantially large data structures. Previous work on FPGA-based acceleration has required significant preprocessing and restructuring to transform the memory access patterns into a streaming format that is more friendly to of fchip hardware. However, the emergence of cache-coherent shared memory interfaces, such as CAPI, allows designers to more easily work with the natural in-memory organization of the data. This paper introduces a vertex-centric shared-memory accelerator for the PageRank algorithm, optimized for high performance while effectively using coherent caching on the FPGA hardware. The proposed design achieves up to 14.9x speedups by selectively caching graph data for the accelerator while taking into account locality and reuse, compared to naively using the shared address space access and DRAM only. We also introduce PageRank Quantization, an innovative technique to represent page-ranks with 32-bit quantized fixed-point values. This approach is up to 1.5x faster than 64-bit fixed-point while keeping precision within a tolerable error margin. As a result, we maintain both the hardware scalability of fixed-point representation and the cache performance of 32-bit floating-point.}, journal={2021 IEEE 35TH INTERNATIONAL PARALLEL AND DISTRIBUTED PROCESSING SYMPOSIUM (IPDPS)}, author={Mughrabi, Abdullah T. and Ibrahim, Mohannad and Byrd, Gregory T.}, year={2021}, pages={962–972} } @article{turtletaub_li_ibrahim_franzon_2020, title={Application of Quantum Machine Learning to VLSI Placement}, DOI={10.1145/3380446.3430644}, abstractNote={Considerable advances in quantum computing with functioning noisy, near-term devices have allowed for the application space to grow as a emerging field for problems with large solution spaces. However, current quantum hardware is limited in scale and noisy in generated data, necessitating hybrid quantum-classical solutions for viability of results and convergence. A quantum backend generates data for classical algorithms to optimize control parameters with, creating a hybrid quantum-classical computing loop. VLSI placement problems have shown potential for utilization, where traditionally heuristic solutions such as Kernighan-Lin (KL) are used. The Variational Quantum Eigensolver (VQE) is used to formulate a recursive Balanced Min-Cut (BMC) algorithm, and we suggest that quantum machine learning techniques can lower error rates and allow for faster convergence to an optimal solution.}, journal={PROCEEDINGS OF THE 2020 ACM/IEEE 2ND WORKSHOP ON MACHINE LEARNING FOR CAD (MLCAD '20)}, author={Turtletaub, Isaac and Li, George and Ibrahim, Mohannad and Franzon, Paul}, year={2020}, pages={61–66} }