Update on Overleaf.

This commit is contained in:
2024-06-12 14:33:55 +00:00
committed by node
parent 304ff3c48a
commit 382c028c78
6 changed files with 103 additions and 90 deletions

View File

@@ -281,6 +281,7 @@
urldate = {2024-03-20},
langid = {english},
}
@article{kim2016a,
title = {Ramulator: {{A Fast}} and {{Extensible DRAM Simulator}}},
shorttitle = {Ramulator},
@@ -296,11 +297,15 @@
urldate = {2024-03-20},
langid = {english},
}
@misc{rust,
title = {The {{Rust Programming Language}}},
author = {{Rust Foundation}},
year = {2015},
howpublished = {https://www.rust-lang.org/}
}@article{forlin2022,
}
@article{forlin2022,
title = {Sim 2 {{PIM}}: {{A}} Complete Simulation Framework for {{Processing-in-Memory}}},
shorttitle = {Sim 2 {{PIM}}},
author = {Forlin, Bruno E. and others},
@@ -449,3 +454,19 @@
abstract = {Many applications heavily use bitwise operations on large bitvectors as part of their computation. In existing systems, performing such bulk bitwise operations requires the processor to transfer a large amount of data on the memory channel, thereby consuming high latency, memory bandwidth, and energy. In this paper, we describe Ambit, a recently-proposed mechanism to perform bulk bitwise operations completely inside main memory. Ambit exploits the internal organization and analog operation of DRAM-based memory to achieve low cost, high performance, and low energy. Ambit exposes a new bulk bitwise execution model to the host processor. Evaluations show that Ambit significantly improves the performance of several applications that use bulk bitwise operations, including databases.},
archiveprefix = {arxiv},
}
@article{jeong2024,
title = {{{PipePIM}}: {{Maximizing Computing Unit Utilization}} in {{ML-Oriented Digital PIM}} by {{Pipelining}} and {{Dual Buffering}}},
shorttitle = {{{PipePIM}}},
author = {Jeong, Taeyang and Chung, Eui-Young},
year = {2024},
journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
pages = {1--1},
issn = {0278-0070, 1937-4151},
doi = {10.1109/TCAD.2024.3410842},
urldate = {2024-06-10},
abstract = {A digital Processing-in-Memory (PIM) that integrates computing units (CUs) with DRAM banks emerges as a promising technique for accelerating matrix-vector multiplication (MV). However, activating and precharging all banks incur significant overheads in a digital PIM based on conventional DRAM, which is limited to activating only a single subarray in a bank. Moreover, a digital PIM utilizes a vector buffer to store and reuse the input vector. This necessitates repeated buffer writes, incurring substantial overhead for large MV. Consequently, these overheads reduce CU utilization in a digital PIM, degrading the performance. To overcome these issues, we propose PipePIM, which maximizes CU utilization in a digital PIM by pipelining and dual buffering. PipePIM consists of two primary schemes: subarray-level pipelining (SAPI) and a dual vector buffer. They exploit and extend the features of a multitude of activated subarrays (MASA) introduced by subarray-level parallelism (SALP). SAPI enables a digital PIM to perform activation, precharging, and computation on different subarrays in a pipelined manner. Through SAPI, these operations are overlapped, and activation and precharging overheads are hidden. A dual vector buffer employs two vector buffers and manages them as ping-pong buffering, one for computation and another for buffer write simultaneously. To facilitate it, PipePIM proposes a half-division mode (HDM) enabling independent access to two activated subarrays with marginal area increase. We demonstrate the improvements by PipePIM on the state-of-the-art digital PIMs, Newton and HBM-PIM. Our simulation results indicate that the average speedups of Newton and HBM-PIM on MV are 2.16x and 1.74x, respectively.},
copyright = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html},
langid = {english},
keywords = {,PIM}
}