@article{zhao_gao_nie_zhou_2022, title={A Survey of GPU Multitasking Methods Supported by Hardware Architecture}, volume={33}, DOI={10.1109/TPDS.2021.3115630}, number={6}, journal={IEEE TRANSACTIONS ON PARALLEL AND DISTRIBUTED SYSTEMS}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Zhao, Chen and Gao, Wu and Nie, Feiping and Zhou, Huiyang}, year={2022}, month={Jun}, pages={1451–1463} } @article{yuan_yudha_solihin_zhou_2021, title={Analyzing Secure Memory Architecture for GPUs}, DOI={10.1109/ISPASS51385.2021.00017}, journal={2021 IEEE INTERNATIONAL SYMPOSIUM ON PERFORMANCE ANALYSIS OF SYSTEMS AND SOFTWARE (ISPASS 2021)}, author={Yuan, Shougang and Yudha, Ardhi Wiratama Baskara and Solihin, Yan and Zhou, Huiyang}, year={2021} } @article{ravi_nguyen_zhou_becchi_2021, title={PILOT: a Runtime System to Manage Multi-tenant GPU Unified Memory Footprint}, DOI={10.1109/HiPC53243.2021.00063}, journal={2021 IEEE 28TH INTERNATIONAL CONFERENCE ON HIGH PERFORMANCE COMPUTING, DATA, AND ANALYTICS (HIPC 2021)}, author={Ravi, John and Nguyen, Tri and Zhou, Huiyang and Becchi, Michela}, year={2021} } @article{liu_bello_zhou_2021, title={Relaxed Peephole Optimization: A Novel Compiler Optimization for Quantum Circuits}, DOI={10.1109/CGO51591.2021.9370310}, journal={CGO '21: PROCEEDINGS OF THE 2021 IEEE/ACM INTERNATIONAL SYMPOSIUM ON CODE GENERATION AND OPTIMIZATION (CGO)}, author={Liu, Ji and Bello, Luciano and Zhou, Huiyang}, year={2021} } @article{liu_zhou_2021, title={Systematic Approaches for Precise and Approximate Quantum State Runtime Assertion}, DOI={10.1109/HPCA51647.2021.00025}, journal={2021 27TH IEEE INTERNATIONAL SYMPOSIUM ON HIGH-PERFORMANCE COMPUTER ARCHITECTURE (HPCA 2021)}, author={Liu, Ji and Zhou, Huiyang}, year={2021} } @article{mao_zhou_gui_shen_2020, title={Exploring Convolution Neural Network for Branch Prediction}, volume={8}, DOI={10.1109/ACCESS.2020.3017196}, journal={IEEE Access}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Mao, Yonghua and Zhou, Huiyang and Gui, Xiaolin and Shen, Junjie}, year={2020}, pages={152008–152016} } @article{zhao_gao_nie_wang_zhou_2020, title={Fair and cache blocking aware warp scheduling for concurrent kernel execution on GPU}, volume={112}, ISBN={1872-7115}, DOI={10.1016/j.future.2020.05.023}, journal={FUTURE GENERATION COMPUTER SYSTEMS-THE INTERNATIONAL JOURNAL OF ESCIENCE}, author={Zhao, Chen and Gao, Wu and Nie, Feiping and Wang, Fei and Zhou, Huiyang}, year={2020}, month={Nov}, pages={1093–1105} } @inproceedings{liu_byrd_zhou_2020, place={New York, NY, USA}, title={Quantum Circuits for Dynamic Runtime Assertions in Quantum Computation}, DOI={10.1145/3373376.3378488}, booktitle={ASPLOS '20: Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems}, publisher={Association for Computing Machinery}, author={Liu, J. and Byrd, G. and Zhou, H.}, year={2020}, pages={1017–1030} } @article{liu_zhou_2020, title={Reliability Modeling of NISQ-Era Quantum Computers}, DOI={10.1109/IISWC50251.2020.00018}, journal={2020 IEEE INTERNATIONAL SYMPOSIUM ON WORKLOAD CHARACTERIZATION (IISWC 2020)}, author={Liu, Ji and Zhou, Huiyang}, year={2020} } @article{yudha_kimura_zhou_solihin_2020, title={Scalable and Fast Lazy Persistency on GPUs}, DOI={10.1109/IISWC50251.2020.00032}, journal={2020 IEEE INTERNATIONAL SYMPOSIUM ON WORKLOAD CHARACTERIZATION (IISWC 2020)}, author={Yudha, Ardhi Wiratama Baskara and Kimura, Keiji and Zhou, Huiyang and Solihin, Yan}, year={2020} } @article{lin_dai_mantor_zhou_2019, title={Coordinated CTA Combination and Bandwidth Partitioning for GPU Concurrent Kernel Execution}, volume={16}, ISBN={1544-3973}, DOI={10.1145/3326124}, number={3}, journal={ACM TRANSACTIONS ON ARCHITECTURE AND CODE OPTIMIZATION}, author={Lin, Zhen and Dai, Hongwen and Mantor, Michael and Zhou, Huiyang}, year={2019}, month={Aug} } @inproceedings{lin_alshboul_solihin_zhou_2019, series={International Conference on Parallel Architectures and Compilation Techniques}, title={Exploring Memory Persistency Models for GPUs}, ISBN={1089-795X}, DOI={10.1109/PACT.2019.00032}, booktitle={28th International Conference on Parallel Architectures and Compilation Techniques (PACT)}, author={Lin, Zhen and Alshboul, Mohammad and Solihin, Yan and Zhou, Huiyang}, year={2019}, pages={310–322}, collection={International Conference on Parallel Architectures and Compilation Techniques} } @inproceedings{guan_ning_lin_shen_zhou_lim_2019, place={San Mateo, CA}, title={In-Place Zero-Space Memory Protection for CNN}, volume={32}, booktitle={Advances in Neural Information Processing Systems}, publisher={Morgan Kaufmann Publishers}, author={Guan, H. and Ning, L. and Lin, Z. and Shen, X. and Zhou, H. and Lim, S.}, editor={Wallach, H and Larochelle, H and Beygelzimer, A and d'Alché-Buc, F and Fox, E. and Garnett, R.Editors}, year={2019} } @article{zhou_byrd_2019, title={Quantum Circuits for Dynamic Runtime Assertions in Quantum Computation}, volume={18}, ISBN={1556-6064}, ISSN={1556-6056 1556-6064 2473-2575}, url={http://dx.doi.org/10.1109/LCA.2019.2935049}, DOI={10.1109/LCA.2019.2935049}, number={2}, journal={IEEE Computer Architecture Letters}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Zhou, Huiyang and Byrd, Gregory T.}, year={2019}, month={Jul}, pages={111–114} } @article{liu_byrd_zhou_2019, title={Quantum Circuits for Dynamic Runtime Assertions in Quantum Computation}, DOI={10.36227/techrxiv.11319929}, author={Liu, Ji and Byrd, Greg and Zhou, Huiyang}, year={2019}, month={Dec} } @article{liu_byrd_zhou_2019, title={Quantum Circuits for Dynamic Runtime Assertions in Quantum Computation}, DOI={10.36227/techrxiv.11319929.v1}, author={Liu, Ji and Byrd, Greg and Zhou, Huiyang}, year={2019}, month={Dec} } @article{lin_mathur_zhou_2019, title={Scatter-and-Gather Revisited: High-Performance Side-Channel-Resistant AES on GPUs}, ISBN={978-1-4503-6255-9}, DOI={10.1145/3300053.3319415}, journal={12TH WORKSHOP ON GENERAL PURPOSE PROCESSING USING GPUS (GPGPU 12)}, author={Lin, Zhen and Mathur, Utkarsh and Zhou, Huiyang}, year={2019}, pages={2–11} } @inproceedings{dai_lin_li_zhao_wang_zheng_zhou_2018, title={Accelerate GPU Concurrent Kernel Execution by Mitigating Memory Pipeline Stalls}, DOI={10.1109/HPCA.2018.00027}, booktitle={2018 IEEE International Symposium on High Performance Computer Architecture (HPCA)}, author={Dai, H. and Lin, Z. and Li, C. and Zhao, C. and Wang, F. and Zheng, N. and Zhou, H.}, year={2018}, month={Feb} } @article{zhong_li_huiyang_wang_2018, title={Developing Noise-Resistant Three-Dimensional Single Particle Tracking Using Deep Neural Networks}, volume={90}, DOI={10.1021/acs.analchem.8b01334}, number={18}, journal={ANALYTICAL CHEMISTRY}, author={Zhong, Yaning and Li, Chao and Huiyang and Wang, Gufeng}, year={2018}, pages={10748–10757} } @article{lin_mantor_huiyang_2018, title={GPU performance vs. thread-level parallelism: Scalability analysis and a novel way to improve TLP}, volume={15}, DOI={10.1145/3177964}, number={1}, journal={ACM Transactions on Architecture and Code Optimization}, author={Lin, Zhen and Mantor, M. and Huiyang}, year={2018} } @inproceedings{verma_huiyang_booth_king_coole_keep_marshall_feng_2017, title={Developing dynamic profiling and debugging support in OpenCL for FPGAs}, DOI={10.1145/3061639.3062230}, booktitle={Proceedings of the 2017 54th acm/edac/ieee design automation conference (dac)}, author={Verma, A. and Huiyang and Booth, S. and King, R. and Coole, J. and Keep, A. and Marshall, J. and Feng, W. C.}, year={2017} } @inproceedings{chen_zhao_shen_zhou_2017, title={EffiSha: A Software Framework for Enabling Efficient Preemptive Scheduling of GPU}, DOI={10.1145/3018743.3018748}, booktitle={PPoPP '17: Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, author={Chen, G. and Zhao, Y. and Shen, X. and Zhou, H.}, year={2017}, pages={3–16} } @book{mao_zhou_gui_2017, place={Raleigh, NC}, title={Exploring deep neural networks for branch prediction}, url={https://people.engr.ncsu.edu/hzhou/CNN_DBN_zhou_2017.pdf}, institution={Electrical and Computer Engineering Department, N.C. State University}, author={Mao, Y. and Zhou, H. and Gui, X.}, year={2017}, month={Sep} } @article{cramer_pohlmann_gomez_mark_kornegay_hall_siraliev-perez_walavalkar_sperlazza_bilinovich_et al._2017, title={Methylation specific targeting of a chromatin remodeling complex from sponges to humans}, volume={7}, DOI={10.1038/srep40674}, journal={Scientific Reports}, author={Cramer, J. M. and Pohlmann, D. and Gomez, F. and Mark, L. and Kornegay, B. and Hall, C. and Siraliev-Perez, E. and Walavalkar, N. M. and Sperlazza, M. J. and Bilinovich, S. and et al.}, year={2017} } @inproceedings{dai_li_lin_zhou_2017, title={The Demand for a Sound Baseline in GPU Memory Architecture Research}, url={https://people.engr.ncsu.edu/hzhou/Hongwen_WDDD2017.pdf}, booktitle={14th Annual Workshop on Duplicating, Deconstructing and Debunking (WDDD)}, author={Dai, H. and Li, C. and Lin, Z. and Zhou, H.}, year={2017} } @article{zhang_li_yan_zhou_2016, title={A Cross-Platform SpMV Framework on Many-Core Architectures}, volume={13}, ISSN={1544-3566}, url={http://dx.doi.org/10.1145/2994148}, DOI={10.1145/2994148}, number={4}, journal={ACM Transactions on Architecture and Code Optimization}, publisher={Association for Computing Machinery (ACM)}, author={Zhang, Yunquan and Li, Shigang and Yan, Shengen and Zhou, Huiyang}, year={2016}, month={Oct}, pages={1–25} } @inproceedings{dai_li_huiyang_gupta_kartsaklis_mantor_2016, title={A model-driven approach to warp/thread-block level CPU cache bypassing}, DOI={10.1145/2897937.2897966}, booktitle={2016 53rd acm/edac/ieee design automation conference (dac)}, author={Dai, H. W. and Li, C. and Huiyang and Gupta, S. and Kartsaklis, C. and Mantor, M.}, year={2016} } @inproceedings{lin_nyland_huiyang_2016, title={Enabling efficient preemption for SIMT architectures with lightweight context switching}, DOI={10.1109/sc.2016.76}, booktitle={SC '16: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, author={Lin, Zhen and Nyland, L. and Huiyang}, year={2016}, pages={898–908} } @inproceedings{chen_huiyang_shen_gahm_venkat_booth_marshall_2016, title={Opencl-based erasure coding on heterogeneous architectures}, DOI={10.1109/asap.2016.7760770}, booktitle={Ieee international conference on application-specific systems}, author={Chen, G. Y. and Huiyang and Shen, Xipeng and Gahm, J. and Venkat, N. and Booth, S. and Marshall, J.}, year={2016}, pages={33–40} } @inproceedings{li_yang_feng_chakradhar_huiyang_2016, title={Optimizing memory efficiency for deep convolutional neural networks on GPUs}, DOI={10.1109/sc.2016.53}, booktitle={SC '16: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, author={Li, C. and Yang, Y. and Feng, M. and Chakradhar, S. and Huiyang}, year={2016}, pages={633–644} } @inproceedings{zhao_wang_lin_zhou_zheng_2016, place={Los Alamitos, CA}, title={Selective GPU Cache Bypassing for Un-Coalesced Loads}, DOI={10.1109/ICPADS.2016.0122}, booktitle={22nd IEEE International Conference on Parallel and Distributed Systems : ICPADS 2016 : proceedings : 13-16 December 2016, Wuhan, Hubei, China}, publisher={IEEE Computer Society}, author={Zhao, C. and Wang, F. and Lin, Z. and Zhou, H. and Zheng, N.}, editor={Liao, XiaofeiEditor}, year={2016} } @inproceedings{jia_huiyang_2016, title={Tuning stencil codes in opencl for fpgas}, DOI={10.1109/iccd.2016.7753287}, booktitle={Proceedings of the 34th ieee international conference on computer design (iccd)}, author={Jia, Q. and Huiyang}, year={2016}, pages={249–256} } @inproceedings{jia_padia_amboju_zhou_2015, title={An Optimized AMPM-based Prefetcher Coupled with Configurable Cache Line Sizing}, booktitle={JILP Workshop on Computer Architecture Competitions (JWAC): 2nd Data Prefetching Championship (DPC2)}, author={Jia, Q. and Padia, M.B. and Amboju, K. and Zhou, H.}, year={2015}, month={Jun} } @inproceedings{mayank_dai_wei_huiyang_2015, title={Analyzing graphics processor unit (GPU) instruction set architectures}, DOI={10.1109/ispass.2015.7095794}, booktitle={Ieee international symposium on performance analysis of systems and}, author={Mayank, K. and Dai, H. W. and Wei, J. Z. and Huiyang}, year={2015}, pages={155–156} } @inproceedings{li_yang_lin_huiyang_2015, title={Automatic data placement into GPU on-chip memory resources}, DOI={10.1109/cgo.2015.7054184}, booktitle={2015 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)}, author={Li, C. and Yang, Y. and Lin, Zhen and Huiyang}, year={2015}, pages={23–33} } @article{yang_li_huiyang_2015, title={CUDA-NP: Realizing nested thread-level parallelism in GPGPU applications}, volume={30}, DOI={10.1007/s11390-015-1500-y}, number={1}, journal={Journal of Computer Science and Technology}, author={Yang, Y. and Li, C. and Huiyang}, year={2015}, pages={3–19} } @inproceedings{li_song_dai_sidelnik_hari_zhou_2015, place={New York}, title={Locality-Driven Dynamic GPU Cache Bypassing}, DOI={10.1145/2751205.2751237}, booktitle={ICS '15: Proceedings of the 29th ACM on International Conference on Supercomputing}, publisher={ACM}, author={Li, C. and Song, S. and Dai, H. and Sidelnik, A. and Hari, S. and Zhou, H.}, year={2015}, month={Jun}, pages={61–77} } @inproceedings{xiang_yang_mantor_rubin_zhou_2015, place={New Jersey}, title={Revisiting ILP Designs for Throughput-Oriented GPGPU Architecture}, ISBN={9781479980062}, DOI={10.1109/CCGrid.2015.14}, booktitle={Proceedings of the 2015 15th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing}, publisher={IEEE}, author={Xiang, P. and Yang, Y. and Mantor, M. and Rubin, N. and Zhou, H.}, year={2015}, month={May}, pages={121–130} } @inproceedings{gupta_huiyang_2015, title={Spatial locality-aware cache partitioning for effective cache sharing}, DOI={10.1109/icpp.2015.24}, booktitle={2015 44th international conference on parallel processing (icpp)}, author={Gupta, S. and Huiyang}, year={2015}, pages={150–159} } @inproceedings{yang_xiang_mantor_rubin_hsu_dong_zhou_2014, place={New Jersey}, title={A Case for a Flexible Scalar Unit in SIMT Architecture}, ISBN={9781479938001 9781479937998}, DOI={10.1109/IPDPS.2014.21}, booktitle={Proceedings of 2014 IEEE 28th International Parallel and Distributed Processing Symposium}, publisher={IEEE}, author={Yang, Y. and Xiang, P. and Mantor, M. and Rubin, N. and Hsu, L. and Dong, Q. and Zhou, H.}, year={2014}, month={May} } @inbook{yang_zhou_2014, title={A Highly Efficient FFT Using Shared-Memory Multiplexing}, ISBN={9783319065472 9783319065489}, url={http://dx.doi.org/10.1007/978-3-319-06548-9_17}, DOI={10.1007/978-3-319-06548-9_17}, booktitle={Numerical Computations with GPUs}, publisher={Springer International Publishing}, author={Yang, Yi and Zhou, Huiyang}, year={2014}, pages={363–377} } @article{yang_huiyang_2014, title={CUDA-NP: Realizing nested thread-level parallelism in GPGPU applications}, volume={49}, DOI={10.1145/2692916.2555254}, number={8}, journal={ACM SIGPLAN Notices}, author={Yang, Y. and Huiyang}, year={2014}, pages={93–105} } @inproceedings{li_yang_dai_yan_mueller_zhou_2014, title={Understanding the tradeoffs between software-managed vs. hardware-managed caches in GPUs}, booktitle={Ieee international symposium on performance analysis of systems and}, author={Li, C. and Yang, Y. and Dai, H. W. and Yan, S. G. and Mueller, F. and Zhou, H. Y.}, year={2014}, pages={231–241} } @inproceedings{xiang_yang_huiyang_2014, title={Warp-level divergence in GPUs: Characterization, impact, and mitigation}, DOI={10.1109/hpca.2014.6835939}, booktitle={International symposium on high-performance computer}, author={Xiang, P. and Yang, Y. and Huiyang}, year={2014}, pages={284–295} } @inproceedings{yan_li_zhang_zhou_2014, place={New York}, title={yaSpM: Yet Another SpMV Framework on GPUs}, volume={49}, DOI={10.1145/2692916.2555255}, number={8}, booktitle={Proceedings of the 19th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, publisher={Association for Computing Machinery}, author={Yan, S. and Li, C. and Zhang, Y. and Zhou, H.}, year={2014}, month={Feb}, pages={107–118} } @inproceedings{gupta_gao_huiyang_2013, title={Adaptive cache bypassing for inclusive last level caches}, DOI={10.1109/ipdps.2013.16}, booktitle={Ieee 27th international parallel and distributed processing symposium (ipdps 2013)}, author={Gupta, S. and Gao, H. L. and Huiyang}, year={2013}, pages={1243–1253} } @article{gupta_xiang_zhou_2013, place={New York}, title={Analyzing locality of memory references in GPU architectures}, volume={6}, DOI={10.1145/2492408.2492423}, journal={MSPC '13: Proceedings of the ACM SIGPLAN Workshop on Memory Systems Performance and Correctness}, publisher={Association for Computing Machinery}, author={Gupta, S. and Xiang, P. and Zhou, H.}, year={2013}, month={Jun} } @article{kong_aciicmez_seifert_huiyang_2013, title={Architecting against software cache-based side-channel attacks}, volume={62}, DOI={10.1109/tc.2012.78}, number={7}, journal={IEEE Transactions on Computers}, author={Kong, J. F. and Aciicmez, O. and Seifert, J. P. and Huiyang}, year={2013}, pages={1276–1288} } @inproceedings{xiang_yang_mantor_rubin_hsu_zhou_2013, place={New York}, title={Exploiting Uniform Vector Instructions for GPGPU Performance, Energy Efficiency, and Opportunistic Reliability Enhancement}, DOI={10.1145/2464996.2465022}, booktitle={Proceedings of the 27th International ACM Conference on International Conference on Supercomputing}, publisher={Association for Computing Machinery}, author={Xiang, P. and Yang, Y. and Mantor, M. and Rubin, N. and Hsu, L. and Zhou, H.}, year={2013}, month={Jun}, pages={433–442} } @article{gupta_xiang_yang_huiyang_2013, title={Locality principle revisited: A probability-based quantitative approach}, volume={73}, DOI={10.1016/j.jpdc.2013.01.010}, number={7}, journal={Journal of Parallel and Distributed Computing}, author={Gupta, S. and Xiang, P. and Yang, Y. and Huiyang}, year={2013}, pages={1011–1027} } @article{yang_huiyang_2013, title={The implementation of a high performance GPGPU compiler}, volume={41}, DOI={10.1007/s10766-012-0228-3}, number={6}, journal={International Journal of Parallel Programming}, author={Yang, Y. and Huiyang}, year={2013}, pages={768–781} } @article{yang_xiang_kong_mantor_huiyang_2012, title={A unified optimizing compiler framework for different GPGPU architectures}, volume={9}, DOI={10.1145/2207222.2207225}, number={2}, journal={ACM Transactions on Architecture and Code Optimization}, author={Yang, Y. and Xiang, P. and Kong, J. F. and Mantor, M. and Huiyang}, year={2012} } @inproceedings{yang_xiang_mantor_zhou_2012, title={CPU-assisted GPGPU on fused CPU-GPU architectures}, booktitle={International symposium on high-performance computer}, author={Yang, Y. and Xiang, P. and Mantor, M. and Zhou, H. Y.}, year={2012}, pages={103–114} } @inproceedings{yang_xiang_mantor_zhou_2012, title={Fixing Performance Bugs: An Empirical Study of Open-Source GPGPU Programs}, ISBN={9781467325080 9780769547961}, url={http://dx.doi.org/10.1109/icpp.2012.30}, DOI={10.1109/icpp.2012.30}, booktitle={2012 41st International Conference on Parallel Processing}, publisher={IEEE}, author={Yang, Yi and Xiang, Ping and Mantor, Mike and Zhou, Huiyang}, year={2012}, month={Sep} } @inproceedings{gupta_xiang_yang_huiyang_2012, title={Locality principle revisited: A probability-based quantitative approach}, DOI={10.1109/ipdps.2012.93}, booktitle={2012 ieee 26th international parallel and distributed processing symposium (ipdps)}, author={Gupta, S. and Xiang, P. and Yang, Y. and Huiyang}, year={2012}, pages={995–1009} } @inproceedings{yang_xiang_mantor_rubin_zhou_2012, title={Shared Memory Multiplexing: A Novel Way to Improve GPGPU Throughput}, ISBN={9781509066094 9781450311823}, booktitle={Proceedings of the 2012 21st International Conference on Parallel Architectures and Compilation Techniques (PACT)}, author={Yang, Y. and Xiang, P. and Mantor, M. and Rubin, N. and Zhou, H.}, year={2012}, month={Sep} } @article{dimitrov_zhou_2011, title={Combining Local and Global History for High Performance Data Prefetching}, volume={13}, journal={Journal of Instruction-Level Parallelism (JILP)}, author={Dimitrov, M. and Zhou, H.}, year={2011}, month={Feb}, pages={1–14} } @inproceedings{yang_zhou_2011, title={Developing a High Performance GPGPU Compiler using Cetus}, booktitle={Proceedings of the Cetus Users and Compiler Infrastructure Workshop, International Conference on Parallel Architectures and Compilation Techniques (PACT’11)}, author={Yang, Y. and Zhou, H.}, year={2011}, month={Oct} } @article{bhansali_panirwla_zhou_2011, title={Exploring Correlation for Indirect Branch Prediction}, journal={2nd JILP Workshop on Computer Architecture Competitions (JWAC-2): Championship Branch Prediction}, author={Bhansali, N. and Panirwla, C. and Zhou, H.}, year={2011}, month={Jun} } @inproceedings{dimitrov_zhou_2011, title={Time-Ordered Event Traces: A New Debugging Primitive for Concurrency Bugs}, ISBN={9781612843728}, url={http://dx.doi.org/10.1109/ipdps.2011.38}, DOI={10.1109/ipdps.2011.38}, booktitle={2011 IEEE International Parallel & Distributed Processing Symposium}, publisher={IEEE}, author={Dimitrov, Martin and Zhou, Huiyang}, year={2011}, month={May} } @inproceedings{yang_xiang_kong_huiyang_2010, title={A GPGPU compiler for memory optimization and parallelism management}, volume={45}, DOI={10.1145/1809028.1806606}, number={6}, booktitle={ACM SIGPLAN Notices}, author={Yang, Y. and Xiang, P. and Kong, J. F. and Huiyang}, year={2010}, pages={86–97} } @inproceedings{kong_dimitrov_yang_liyanage_cao_staples_mantor_zhou_2010, place={New York}, title={Accelerating MATLAB Image Processing Toolbox Functions on GPUs}, ISBN={9781605589350}, DOI={10.1145/1735688.1735703}, booktitle={Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units}, publisher={Association for Computing Machinery}, author={Kong, J. and Dimitrov, M. and Yang, Y. and Liyanage, J. and Cao, L. and Staples, J. and Mantor, M. and Zhou, H.}, year={2010}, month={Mar}, pages={75–85} } @article{yang_xiang_kong_huiyang_2010, title={An optimizing compiler for GPGPU programs with input-data sharing}, volume={45}, DOI={10.1145/1837853.1693505}, number={5}, journal={ACM SIGPLAN Notices}, author={Yang, Y. and Xiang, P. and Kong, J. F. and Huiyang}, year={2010}, pages={343–344} } @inproceedings{yang_xiang_kong_huiyang_2010, title={An optimizing compiler for GPGPU programs with input-data sharing}, DOI={10.1145/1693453.1693505}, booktitle={ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP)}, author={Yang, Y. and Xiang, P. and Kong, J. F. and Huiyang}, year={2010}, pages={343–344} } @inproceedings{kong_zhou_2010, title={Improving privacy and lifetime of PCM-based main memory}, ISBN={9781424475001}, url={http://dx.doi.org/10.1109/dsn.2010.5544298}, DOI={10.1109/dsn.2010.5544298}, booktitle={2010 IEEE/IFIP International Conference on Dependable Systems & Networks (DSN)}, publisher={IEEE}, author={Kong, Jingfei and Zhou, Huiyang}, year={2010}, month={Jun} } @inproceedings{dimitrov_zhou_2009, title={Anomaly-based bug prediction, isolation, and validation}, ISBN={9781605584065}, url={http://dx.doi.org/10.1145/1508244.1508252}, DOI={10.1145/1508244.1508252}, booktitle={Proceeding of the 14th international conference on Architectural support for programming languages and operating systems - ASPLOS '09}, publisher={ACM Press}, author={Dimitrov, Martin and Zhou, Huiyang}, year={2009} } @inproceedings{kong_aciicmez_seifert_zhou_2009, title={Hardware-software integrated approaches to defend against software cache-based side channel attacks}, ISBN={9781424429325}, url={http://dx.doi.org/10.1109/hpca.2009.4798277}, DOI={10.1109/hpca.2009.4798277}, booktitle={2009 IEEE 15th International Symposium on High Performance Computer Architecture}, publisher={IEEE}, author={Kong, J. and Aciicmez, O. and Seifert, J.-P. and Zhou, Huiyang}, year={2009}, month={Feb} } @inproceedings{dimitrov_mantor_zhou_2009, title={Understanding software approaches for GPGPU reliability}, ISBN={9781605585178}, url={http://dx.doi.org/10.1145/1513895.1513907}, DOI={10.1145/1513895.1513907}, booktitle={Proceedings of 2nd Workshop on General Purpose Processing on Graphics Processing Units - GPGPU-2}, publisher={ACM Press}, author={Dimitrov, Martin and Mantor, Mike and Zhou, Huiyang}, year={2009} } @inproceedings{gao_ma_dimitrov_zhou_2008, title={Address-branch correlation: A novel locality for long-latency hard-to-predict branches}, ISBN={9781424420704}, ISSN={1530-0897}, url={http://dx.doi.org/10.1109/hpca.2008.4658629}, DOI={10.1109/hpca.2008.4658629}, booktitle={2008 IEEE 14th International Symposium on High Performance Computer Architecture}, publisher={IEEE}, author={Gao, Hongliang and Ma, Yi and Dimitrov, Martin and Zhou, Huiyang}, year={2008}, month={Feb} } @inproceedings{kong_aciicmez_seifert_zhou_2008, title={Deconstructing new cache designs for thwarting software cache-based side channel attacks}, ISBN={9781605583006}, url={http://dx.doi.org/10.1145/1456508.1456514}, DOI={10.1145/1456508.1456514}, booktitle={Proceedings of the 2nd ACM workshop on Computer security architectures - CSAW '08}, publisher={ACM Press}, author={Kong, Jingfei and Aciicmez, Onur and Seifert, Jean-Pierre and Zhou, Huiyang}, year={2008} } @article{ma_gao_dimitrov_huiyang_2007, title={Optimizing dual-core execution for power efficiency and transient-fault recovery}, volume={18}, DOI={10.1109/tpds.2007.4288106}, number={8}, journal={IEEE Transactions on Parallel and Distributed Systems}, author={Ma, Y. and Gao, H. and Dimitrov, M. and Huiyang}, year={2007}, pages={1080–1093} } @article{gao_zhou_2007, title={PMPM: Prediction by combining multiple partial matches}, volume={9}, journal={Journal of Instruction-level Parallelism}, author={Gao, H. and Zhou, H.}, year={2007}, pages={1–18} } @inproceedings{dimitrov_zhou_2007, title={Unified Architectural Support for Soft-Error Protection or Software Bug Detection}, ISBN={0769529445 9780769529448}, ISSN={1089-795X}, url={http://dx.doi.org/10.1109/pact.2007.4336201}, DOI={10.1109/pact.2007.4336201}, booktitle={16th International Conference on Parallel Architecture and Compilation Techniques (PACT 2007)}, publisher={IEEE}, author={Dimitrov, Martin and Zhou, Huiyang}, year={2007}, month={Sep} } @inproceedings{ma_zhou_2006, title={Efficient Transient-Fault Tolerance for Multithreaded Processors Using Dual-Thread Execution}, ISBN={9780780397064 9780780397071}, ISSN={1063-6404}, url={http://dx.doi.org/10.1109/iccd.2006.4380804}, DOI={10.1109/iccd.2006.4380804}, booktitle={2006 International Conference on Computer Design}, publisher={IEEE}, author={Ma, Yi and Zhou, Huiyang}, year={2006}, month={Oct} } @inproceedings{kong_zou_zhou_2006, title={Improving software security via runtime instruction-level taint checking}, ISBN={1595935762}, url={http://dx.doi.org/10.1145/1181309.1181313}, DOI={10.1145/1181309.1181313}, booktitle={Proceedings of the 1st workshop on Architectural and system support for improving software dependability - ASID '06}, publisher={ACM Press}, author={Kong, Jingfei and Zou, Cliff C. and Zhou, Huiyang}, year={2006} } @inproceedings{dimitrov_zhou_2006, title={Locality-based Information Redundancy for Processor Reliability}, booktitle={2nd Workshop on Architectural Reliability (WAR-2) held in conjunction with 39th International Symposium on Microarchitecture (MICRO-39)}, author={Dimitrov, M. and Zhou, H.}, year={2006}, month={Dec}, pages={29–36} } @inproceedings{gao_zhou_2006, title={PMPM: Prediction by Combining Multiple Partial Matches}, booktitle={2nd Championship Branch Prediction (CBP-2) held with the 39th International Symposium on Microarchitecture (MICRO-39)}, author={Gao, H. and Zhou, H.}, year={2006}, month={Dec}, pages={19–24} } @article{ma y._zhou_2006, title={Using index functions to reduce conflict aliasing in branch prediction tables}, volume={55}, number={8}, journal={IEEE Transactions on Computers}, author={Ma Y., Gao H. and Zhou, H.}, year={2006}, pages={1057–1061} } @article{zhou_2005, title={A case for fault tolerance and performance enhancement using chip multi-processors}, volume={4}, journal={IEEE Computer Architecture Letters}, author={Zhou, H.}, year={2005}, pages={1–4} } @article{gao_zhou_2005, title={Adaptive information processing: an effective way to improve perceptron branch predictors}, volume={7}, journal={Journal of Instruction-level Parallelism}, author={Gao, H. and Zhou, H.}, year={2005}, pages={1–10} } @inproceedings{zhou_conte_2005, title={Code size efficiency in global scheduling for ILP processors}, ISBN={0769515347}, url={http://dx.doi.org/10.1109/intera.2002.995845}, DOI={10.1109/intera.2002.995845}, booktitle={Proceedings Sixth Annual Workshop on Interaction between Compilers and Computer Architectures}, publisher={IEEE Comput. Soc}, author={Zhou, Huiyang and Conte, T.M.}, year={2005}, month={Aug} } @inproceedings{zhou_flanagan_conte_2005, title={Detecting global stride locality in value streams}, ISBN={0769519458}, url={http://dx.doi.org/10.1109/isca.2003.1207011}, DOI={10.1109/isca.2003.1207011}, booktitle={30th Annual International Symposium on Computer Architecture, 2003. Proceedings.}, publisher={IEEE Comput. Soc}, author={Zhou, Huiyang and Flanagan, J. and Conte, T.M.}, year={2005}, month={Apr} } @inproceedings{zhou_2005, title={Dual-core execution: building a highly scalable single-thread instruction window}, ISBN={076952429X}, url={http://dx.doi.org/10.1109/pact.2005.18}, DOI={10.1109/pact.2005.18}, booktitle={14th International Conference on Parallel Architectures and Compilation Techniques (PACT'05)}, publisher={IEEE}, author={Zhou, Huiyang}, year={2005} } @article{huiyang_conte_2005, title={Enhancing memory-level parallelism via recovery-free value prediction}, volume={54}, DOI={10.1109/tc.2005.117}, journal={IEEE Transactions on Computers}, author={Huiyang and Conte, T. M.}, year={2005}, pages={897–912} } @inproceedings{gao_zhou_2004, title={Adaptive Information Processing: An Effective Way to Improve Perceptron Branch Predictors}, booktitle={1st Championship Branch Prediction (CBP-1) held with the 37th International Symposium on Microarchitecture (MICRO-37)}, author={Gao, H. and Zhou, H.}, year={2004}, month={Dec} } @article{huiyang_toburen_rotenberg_conte_2003, title={Adaptive mode control: A static-power-efficient cache design}, volume={2}, DOI={10.1145/860176.860181}, number={3}, journal={ACM Transactions on Embedded Computing Systems}, author={Huiyang and Toburen, M. C. and Rotenberg, E. and Conte, T. M.}, year={2003}, pages={347–372} } @book{zhou_2003, title={Code size aware compilation for real-time applications}, institution={Computer Science Department, University of Central Florida}, author={Zhou, H.}, year={2003}, month={Jul} } @inproceedings{zhou_conte_2003, title={Enhancing Memory Level Parallelism via Recovery-Free Value Prediction}, booktitle={The 2003 International Conference on Supercomputing (ICS'03)}, author={Zhou, H. and Conte, T.M.}, year={2003}, month={Jun}, pages={326–335} } @book{zhou_conte_2003, place={Raleigh, NC}, title={Performance modeling of memory latency hiding techniques}, institution={Department of Electrical and Computer Engineering, North Carolina State University}, author={Zhou, H. and Conte, T.M.}, year={2003}, month={Jan} } @inbook{zhou_jennings_conte_2003, title={Tree Traversal Scheduling: A Global Instruction Scheduling Technique for VLIW/EPIC Processors}, volume={2624}, ISBN={9783540040293 9783540357674}, ISSN={0302-9743}, url={http://dx.doi.org/10.1007/3-540-35767-x_15}, DOI={10.1007/3-540-35767-x_15}, booktitle={Languages and Compilers for Parallel Computing}, publisher={Springer Berlin Heidelberg}, author={Zhou, Huiyang and Jennings, Matthew D. and Conte, Thomas M.}, year={2003}, pages={223–238} } @book{zhou_conte_2002, place={Raleigh, NC}, title={Using Performance Bounds to Guide Pre-scheduling Code Optimizations}, institution={Department of Electrical and Computer Engineering, North Carolina State University}, author={Zhou, H. and Conte, T.M.}, year={2002}, month={Sep} } @book{jennings_zhou_conte_2001, place={Raleigh, NC}, title={A Treegion-based Unified Approach to Speculation and Predication in Global Instruction Scheduling}, institution={Department of Electrical and Computer Engineering, North Carolina State University}, author={Jennings, M.D. and Zhou, H. and Conte, T.M.}, year={2001}, month={Aug} } @book{zhou_fu_rotenberg_conte_2001, place={Raleigh, NC}, title={A study of value speculative execution and mispeculation recovery in superscalar microprocessors}, institution={Department of Electrical and Computer Engineering, North Carolina State University}, author={Zhou, H. and Fu, C. and Rotenberg, E. and Conte, T.}, year={2001}, month={Jan} } @inproceedings{huiyang_toburen_rotenberg_conte_2001, title={Adaptive mode control: A static-power-efficient cache design}, ISBN={0769513638}, DOI={10.1109/pact.2001.953288}, booktitle={2001 International Conference on Parallel Architectures and Compilation Techniques: Proceedings: 8-12 September, 2001, Barcelona, Catalunya, Spain}, publisher={Los Alamitos, CA: IEEE Computer Society}, author={Huiyang and Toburen, M. C. and Rotenberg, E. and Conte, T. M.}, year={2001}, pages={61–70} } @book{zhou_toburen_rotenberg_conte_2000, place={Raleigh, NC}, title={Adaptive Mode Control: A Low-Leakage Power-Efficient Cache Design}, institution={Department of Electrical and Computer Engineering, North Carolina State University}, author={Zhou, H. and Toburen, M. and Rotenberg, E. and Conte, T.}, year={2000}, month={Nov} } @article{kassim_huiyang_raganath_2000, title={Automatic IC orientation checks}, volume={12}, DOI={10.1007/s001380050129}, number={3}, journal={Machine Vision and Applications}, author={Kassim, A. A. and Huiyang and Raganath, S.}, year={2000}, pages={107–112} } @article{huiyang_kassim_ranganath_1998, title={A fast algorithm for detecting die extrusion defects in IC packages}, volume={11}, DOI={10.1007/s001380050088}, number={1}, journal={Machine Vision and Applications}, author={Huiyang and Kassim, A.A. and Ranganath, S.}, year={1998}, pages={37–41} } @article{huiyang_qu_li_1996, title={Test sequencing and diagnosis in electrical system with decision table}, volume={36}, DOI={10.1016/0026-2714(95)00142-5}, number={9}, journal={Microelectronics and Reliability}, author={Huiyang and Qu, L. and Li, A.}, year={1996}, pages={1167–1175} }