@inproceedings{ku_forbes_chowdhury_rotenberg_2017, title={A case for standard-cell based RAMs in highly-ported superscalar processor structures}, DOI={10.1109/isqed.2017.7918305}, abstractNote={Highly-ported memories are pervasive within superscalar processors. Accordingly, they have been targets for full-custom design using multi-ported versions of the 6T SRAM bitcell. Unfortunately, full-custom design of highly-ported memories is becoming exceedingly difficult in deep sub-micron technologies. This paper makes the case for implementing highly-ported memories with standard cells (flip-flops, muxes, clock buffers). In lieu of exotic peripheral circuits for each port, standard-cell SRAMs use muxes. Consequently, area differences between full-custom and standard-cell designs are greatly reduced at a high number of ports. To also compete with full-custom memories in terms of timing and power, we introduce a standard-cell memory compiler with three key features: (i) per-row clock gating, (ii) a new tri-state based mux standard cell, and (iii) a modular layout strategy, which is the centerpiece of the memory compiler. For a 16-read/8-write 128-entry register file, our modular standard-cell memory consumes 13% more area and 4% more power, and is 35% faster, than the custom memory produced by FabMem. The automatic (built-in) robustness of standard cell designs further weigh in their favor, contrasted with exquisite transistor sizing/tuning of intertwined sub-circuits in a full-custom design.}, booktitle={Proceedings of the eighteenth international symposium on quality electronic design (isqed)}, author={Ku, S. and Forbes, E. and Chowdhury, R. B. R. and Rotenberg, E.}, year={2017}, pages={131–137} } @inproceedings{chowdhury_kannepalli_ku_rotenberg_2016, title={AnyCore: A synthesizable RTL model for exploring and fabricating adaptive superscalar cores}, DOI={10.1109/ispass.2016.7482096}, abstractNote={Adaptive superscalar cores have the ability to dynamically adjust their execution resources to match the instruction-level parallelism (ILP) of different program phases. The goal of adaptivity is to maximize performance in as energy-efficient a manner as possible. This is achieved by disabling execution resources that contribute only marginally to performance for the code at hand. Researchers have proposed many adaptive features, including structures, superscalar width, and pipeline depth. The benefits of adaptivity are eroded by its circuit-level overheads. Unfortunately, circuit-level overheads cannot be effectively estimated or appreciated without a hardware design. To this end, we developed a register-transfer-level (RTL) design of a highly adaptive superscalar core, called AnyCore. AnyCore can be used to quantify logic overheads of an adaptive core with respect to fixed cores, synthesize and compare different adaptive cores, synthesize and compare an adaptive core to a multi-core comprised of multiple fixed core types, and fabricate adaptive superscalar cores. We provide examples of these use-cases.}, booktitle={Ieee international symposium on performance analysis of systems and}, author={Chowdhury, R. B. R. and Kannepalli, A. K. and Ku, S. and Rotenberg, E.}, year={2016}, pages={214–224} }