@article{dey_baker_schabel_li_franzon_2021, title={A Scalable Cluster-based Hierarchical Hardware Accelerator for a Cortically Inspired Algorithm}, volume={17}, ISSN={["1550-4840"]}, DOI={10.1145/3447777}, abstractNote={This article describes a scalable, configurable and cluster-based hierarchical hardware accelerator through custom hardware architecture for Sparsey, a cortical learning algorithm. Sparsey is inspired by the operation of the human cortex and uses a Sparse Distributed Representation to enable unsupervised learning and inference in the same algorithm. A distributed on-chip memory organization is designed and implemented in custom hardware to improve memory bandwidth and accelerate the memory read/write operations for synaptic weight matrices. Bit-level data are processed from distributed on-chip memory and custom multiply-accumulate hardware is implemented for binary and fixed-point multiply-accumulation operations. The fixed-point arithmetic and fixed-point storage are also adapted in this implementation. At 16 nm, the custom hardware of Sparsey achieved an overall 24.39× speedup, 353.12× energy efficiency per frame, and 1.43× reduction in silicon area against a state-of-the-art GPU.}, number={4}, journal={ACM JOURNAL ON EMERGING TECHNOLOGIES IN COMPUTING SYSTEMS}, author={Dey, Sumon and Baker, Lee and Schabel, Joshua and Li, Weifu and Franzon, Paul D.}, year={2021}, month={Oct} } @article{franzon_davis_rotenberg_stevens_lipa_nigussie_pan_baker_schabel_dey_et al._2021, title={Design for 3D Stacked Circuits}, ISSN={["2380-9248"]}, DOI={10.1109/IEDM19574.2021.9720553}, abstractNote={2.5D and 3D technologies can give rise to a node equivalent of scaling due to improved connectivity. Aggressive exploitation scenarios include functional partitioning, circuit partitioning, logic on DRAM, design obfuscation and modular chiplets. Design issues that need to be addressed in pursuing such exploitations include thermal management, design for test and computer aided design.}, journal={2021 IEEE INTERNATIONAL ELECTRON DEVICES MEETING (IEDM)}, author={Franzon, P. and Davis, W. and Rotenberg, E. and Stevens, J. and Lipa, S. and Nigussie, T. and Pan, H. and Baker, L. and Schabel, J. and Dey, S. and et al.}, year={2021} } @article{baker_patti_franzon_2021, title={Multi-ANN embedded system based on a custom 3D-DRAM}, ISSN={["2164-0157"]}, DOI={10.1109/3DIC52383.2021.9687617}, abstractNote={Machine Learning in the form of Artificial Neural Networks (ANNs) has gained considerable traction in applications such as image recognition and speech recognition. These applications typically employ a subset of ANNs known as Convolutional Neural Networks (CNNs) which re-use parameters and thus reduce main memory bandwidth. However, there are other types of ANN that do not provide reuse opportunities such as autoencoders and Long Short-term memory. Most research has focused on implementing CNNs but because of their extensive use of SRAM have both ANN size restrictions and performance degradation when used in applications that utilize other types of ANN. This work demon-strates how a customized 3D-DRAM with a very wide databus can be combined with application-specific layers to produce a system meeting the requirements of embedded systems employing multiple instances of disparate ANNs.}, journal={2021 IEEE INTERNATIONAL 3D SYSTEMS INTEGRATION CONFERENCE (3DIC)}, author={Baker, Lee B. and Patti, Robert and Franzon, Paul}, year={2021} } @article{park_baker_franzon_2019, title={Appliance Identification Algorithm for a Non-Intrusive Home Energy Monitor Using Cogent Confabulation}, volume={10}, ISSN={1949-3053}, DOI={10.1109/TSG.2017.2751465}, abstractNote={This paper presents an appliance identification algorithm for use with a non-intrusive home energy monitor based on a cogent confabulation neural network. As a cogent confabulation neural network does not require multiplications during the identification phase, it is an effective choice for systems with low-computational capability. A non-intrusive home energy monitor needs to learn not only the energy patterns of individual appliances but also those of combinations of appliances. To relieve the burden of learning power patterns of the combinations, this paper proposes a parameter-building scheme based on the parameters of individual appliances. The proposed algorithm is evaluated on datasets prepared by the reference energy disaggregation dataset and the authors. The average success rate was 83.8% for up to eight appliances and showed better performance than the combinatorial optimization and artificial neural network approaches.}, number={1}, journal={IEEE Transactions on Smart Grid}, author={Park, S. W. and Baker, L. B. and Franzon, P. D.}, year={2019}, month={Jan}, pages={714–721} } @inproceedings{schabel_baker_dey_li_franzon, title={Processor-in-memory support for artificial neural networks}, booktitle={2016 IEEE International Conference on Rebooting Computing (icrc)}, author={Schabel, J. and Baker, L. and Dey, S. and Li, W. F. and Franzon, P. D.} }