Update on Overleaf.

2024-06-12 14:33:55 +00:00
parent 304ff3c48a
commit 382c028c78
6 changed files with 103 additions and 90 deletions
--- a/references.bib
+++ b/references.bib
@@ -281,6 +281,7 @@
 	urldate = {2024-03-20},
 	langid = {english},
 }
+
@article{kim2016a,
 	title = {Ramulator: {{A Fast}} and {{Extensible DRAM Simulator}}},
 	shorttitle = {Ramulator},
@@ -296,11 +297,15 @@
 	urldate = {2024-03-20},
 	langid = {english},
 }
+
@misc{rust,
  title = {The {{Rust Programming Language}}},
  author = {{Rust Foundation}},
+  year = {2015},
  howpublished = {https://www.rust-lang.org/}
-}@article{forlin2022,
+}
+
+@article{forlin2022,
  title = {Sim 2 {{PIM}}: {{A}} Complete Simulation Framework for {{Processing-in-Memory}}},
  shorttitle = {Sim 2 {{PIM}}},
  author = {Forlin, Bruno E. and others},
@@ -449,3 +454,19 @@
  abstract = {Many applications heavily use bitwise operations on large bitvectors as part of their computation. In existing systems, performing such bulk bitwise operations requires the processor to transfer a large amount of data on the memory channel, thereby consuming high latency, memory bandwidth, and energy. In this paper, we describe Ambit, a recently-proposed mechanism to perform bulk bitwise operations completely inside main memory. Ambit exploits the internal organization and analog operation of DRAM-based memory to achieve low cost, high performance, and low energy. Ambit exposes a new bulk bitwise execution model to the host processor. Evaluations show that Ambit significantly improves the performance of several applications that use bulk bitwise operations, including databases.},
  archiveprefix = {arxiv},
 }
+
+@article{jeong2024,
+  title = {{{PipePIM}}: {{Maximizing Computing Unit Utilization}} in {{ML-Oriented Digital PIM}} by {{Pipelining}} and {{Dual Buffering}}},
+  shorttitle = {{{PipePIM}}},
+  author = {Jeong, Taeyang and Chung, Eui-Young},
+  year = {2024},
+  journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+  pages = {1--1},
+  issn = {0278-0070, 1937-4151},
+  doi = {10.1109/TCAD.2024.3410842},
+  urldate = {2024-06-10},
+  abstract = {A digital Processing-in-Memory (PIM) that integrates computing units (CUs) with DRAM banks emerges as a promising technique for accelerating matrix-vector multiplication (MV). However, activating and precharging all banks incur significant overheads in a digital PIM based on conventional DRAM, which is limited to activating only a single subarray in a bank. Moreover, a digital PIM utilizes a vector buffer to store and reuse the input vector. This necessitates repeated buffer writes, incurring substantial overhead for large MV. Consequently, these overheads reduce CU utilization in a digital PIM, degrading the performance. To overcome these issues, we propose PipePIM, which maximizes CU utilization in a digital PIM by pipelining and dual buffering. PipePIM consists of two primary schemes: subarray-level pipelining (SAPI) and a dual vector buffer. They exploit and extend the features of a multitude of activated subarrays (MASA) introduced by subarray-level parallelism (SALP). SAPI enables a digital PIM to perform activation, precharging, and computation on different subarrays in a pipelined manner. Through SAPI, these operations are overlapped, and activation and precharging overheads are hidden. A dual vector buffer employs two vector buffers and manages them as ping-pong buffering, one for computation and another for buffer write simultaneously. To facilitate it, PipePIM proposes a half-division mode (HDM) enabling independent access to two activated subarrays with marginal area increase. We demonstrate the improvements by PipePIM on the state-of-the-art digital PIMs, Newton and HBM-PIM. Our simulation results indicate that the average speedups of Newton and HBM-PIM on MV are 2.16x and 1.74x, respectively.},
+  copyright = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html},
+  langid = {english},
+  keywords = {,PIM}
+}