Update on Overleaf.
This commit is contained in:
@@ -139,3 +139,93 @@
|
||||
langid = {english},
|
||||
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/73HULZKB/Sudarshan et al. - 2022 - A Critical Assessment of DRAM-PIM Architectures - .pdf}
|
||||
}
|
||||
|
||||
@InProceedings{jouhyu_21,
|
||||
author = {Jouppi, Norman P. and Hyun Yoon, Doe and Ashcraft, Matthew and Gottscho, Mark and Jablin, Thomas B. and Kurian, George and Laudon, James and Li, Sheng and Ma, Peter and Ma, Xiaoyu and Norrie, Thomas and Patil, Nishant and Prasad, Sushma and Young, Cliff and Zhou, Zongwei and Patterson, David},
|
||||
booktitle = {2021 ACM/IEEE 48th Annual International Symposium on Computer Architecture (ISCA)},
|
||||
title = {Ten Lessons From Three Generations Shaped Google’s TPUv4i : Industrial Product},
|
||||
doi = {10.1109/ISCA52012.2021.00010},
|
||||
pages = {1-14},
|
||||
keywords = {Training;Program processors;Quantization (signal);Wires;Random access memory;Throughput;Software},
|
||||
owner = {MJ},
|
||||
year = {2021},
|
||||
}
|
||||
|
||||
@Article{sto_70,
|
||||
author = {Stone, Harold S.},
|
||||
title = {A Logic-in-Memory Computer},
|
||||
doi = {10.1109/TC.1970.5008902},
|
||||
number = {1},
|
||||
pages = {73-78},
|
||||
volume = {C-19},
|
||||
journal = {IEEE Transactions on Computers},
|
||||
keywords = {Computers;Logic arrays;Microelectronics;Memory management;Adders;Magnetic memory;Complexity theory;Cache memories;computer architecture;logic-in-memory;microelectronic memories;unconventional computer systems},
|
||||
owner = {MJ},
|
||||
year = {1970},
|
||||
}
|
||||
|
||||
@Article{gomhaj_21,
|
||||
author = {Juan G{\'{o}}mez{-}Luna and Izzat El Hajj and Ivan Fernandez and Christina Giannoula and Geraldo F. Oliveira and Onur Mutlu},
|
||||
title = {Benchmarking a New Paradigm: An Experimental Analysis of a Real Processing-in-Memory Architecture},
|
||||
eprint = {2105.03814},
|
||||
eprinttype = {arXiv},
|
||||
url = {https://arxiv.org/abs/2105.03814},
|
||||
volume = {abs/2105.03814},
|
||||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||||
biburl = {https://dblp.org/rec/journals/corr/abs-2105-03814.bib},
|
||||
journal = {CoRR},
|
||||
owner = {MJ},
|
||||
timestamp = {Fri, 14 May 2021 12:13:30 +0200},
|
||||
year = {2021},
|
||||
}
|
||||
|
||||
@InProceedings{heson_20,
|
||||
author = {M. He and C. Song and I. Kim and C. Jeong and S. Kim and I. Park and M. Thottethodi and T. N. Vijaykumar},
|
||||
booktitle = {2020 53rd Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)},
|
||||
title = {Newton: A DRAM-maker’s Accelerator-in-Memory (AiM) Architecture for Machine Learning},
|
||||
doi = {10.1109/MICRO50266.2020.00040},
|
||||
pages = {372-385},
|
||||
publisher = {IEEE Computer Society},
|
||||
url = {https://doi.ieeecomputersociety.org/10.1109/MICRO50266.2020.00040},
|
||||
address = {Los Alamitos, CA, USA},
|
||||
keywords = {computational modeling;random access memory;graphics processing units;bandwidth;machine learning;acceleration;optimization},
|
||||
month = {oct},
|
||||
owner = {MJ},
|
||||
year = {2020},
|
||||
}
|
||||
|
||||
@InProceedings{leekan_21,
|
||||
author = {Lee, Sukhan and Kang, Shin-haeng and Lee, Jaehoon and Kim, Hyeonsu and Lee, Eojin and Seo, Seungwoo and Yoon, Hosang and Lee, Seungwon and Lim, Kyounghwan and Shin, Hyunsung and Kim, Jinhyun and Seongil, O and Iyer, Anand and Wang, David and Sohn, Kyomin and Kim, Nam Sung},
|
||||
booktitle = {2021 ACM/IEEE 48th Annual International Symposium on Computer Architecture (ISCA)},
|
||||
title = {Hardware Architecture and Software Stack for PIM Based on Commercial DRAM Technology : Industrial Product},
|
||||
doi = {10.1109/ISCA52012.2021.00013},
|
||||
pages = {43-56},
|
||||
keywords = {Program processors;Neural networks;Memory management;Random access memory;Bandwidth;Software;Energy efficiency;processing in memory;neural network;accelerator;DRAM},
|
||||
owner = {MJ},
|
||||
year = {2021},
|
||||
}
|
||||
|
||||
@Misc{lowahm_20,
|
||||
author = {Jason Lowe-Power and Abdul Mutaal Ahmad and Ayaz Akram and Mohammad Alian and Rico Amslinger and Matteo Andreozzi and Adrià Armejach and Nils Asmussen and Srikant Bharadwaj and Gabe Black and Gedare Bloom and Bobby R. Bruce and Daniel Rodrigues Carvalho and Jeronimo Castrillon and Lizhong Chen and Nicolas Derumigny and Stephan Diestelhorst and Wendy Elsasser and Marjan Fariborz and Amin Farmahini-Farahani and Pouya Fotouhi and Ryan Gambord and Jayneel Gandhi and Dibakar Gope and Thomas Grass and Bagus Hanindhito and Andreas Hansson and Swapnil Haria and Austin Harris and Timothy Hayes and Adrian Herrera and Matthew Horsnell and Syed Ali Raza Jafri and Radhika Jagtap and Hanhwi Jang and Reiley Jeyapaul and Timothy M. Jones and Matthias Jung and Subash Kannoth and Hamidreza Khaleghzadeh and Yuetsu Kodama and Tushar Krishna and Tommaso Marinelli and Christian Menard and Andrea Mondelli and Tiago Mück and Omar Naji and Krishnendra Nathella and Hoa Nguyen and Nikos Nikoleris and Lena E. Olson and Marc Orr and Binh Pham and Pablo Prieto and Trivikram Reddy and Alec Roelke and Mahyar Samani and Andreas Sandberg and Javier Setoain and Boris Shingarov and Matthew D. Sinclair and Tuan Ta and Rahul Thakur and Giacomo Travaglini and Michael Upton and Nilay Vaish and Ilias Vougioukas and Zhengrong Wang and Norbert Wehn and Christian Weis and David A. Wood and Hongil Yoon and Éder F. Zulian},
|
||||
title = {{T}he gem5 {S}imulator: {V}ersion 20.0+},
|
||||
eprint = {2007.03152},
|
||||
archiveprefix = {arXiv},
|
||||
groups = {MJ:1},
|
||||
owner = {MJ},
|
||||
primaryclass = {cs.AR},
|
||||
timestamp = {2020-07-08},
|
||||
year = {2020},
|
||||
}
|
||||
|
||||
@InProceedings{stejun_20,
|
||||
author = {Steiner, Lukas and Jung, Matthias and Prado, Felipe S. and Bykov, Kyrill and Wehn, Norbert},
|
||||
booktitle = {International Conference on Embedded Computer Systems Architectures Modeling and Simulation (SAMOS)},
|
||||
title = {{DRAMS}ys4.0: {A} {F}ast and {C}ycle-{A}ccurate {S}ystem{C}/{TLM}-{B}ased {DRAM} {S}imulator},
|
||||
publisher = {Springer},
|
||||
groups = {MJ:1},
|
||||
month = {July},
|
||||
owner = {MJ},
|
||||
timestamp = {2020-07-14},
|
||||
year = {2020},
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
\usepackage{tabularray}
|
||||
\usepackage{pgfplotstable}
|
||||
\usepackage{subfig}
|
||||
\usepackage{csquotes}
|
||||
|
||||
\usepackage{graphicx}
|
||||
% Used for displaying a sample figure. If possible, figure files should
|
||||
@@ -36,53 +37,70 @@
|
||||
|
||||
\begin{document}
|
||||
%
|
||||
\title{Contribution Title\thanks{Supported by organization x.}}
|
||||
\title{PIMSys: A Virtual Prototype for Processing in Memory}
|
||||
%
|
||||
%\titlerunning{Abbreviated paper title}
|
||||
% If the paper title is too long for the running head, you can set
|
||||
% an abbreviated paper title here
|
||||
%
|
||||
\author{%
|
||||
Derek Christ\inst{1}\orcidID{0000-1111-2222-3333} \and
|
||||
Lukas Steiner\inst{2,3}\orcidID{1111-2222-3333-4444} \and
|
||||
Matthias Jung\inst{3}\orcidID{2222--3333-4444-5555} \and
|
||||
Norbert Wehn\inst{3}\orcidID{2222--3333-4444-5555}
|
||||
Derek Christ\inst{1}\orcidID{0000-1111-2222-3333} \and
|
||||
Lukas Steiner\inst{2}\orcidID{1111-2222-3333-4444} \and
|
||||
Matthias Jung\inst{1,3}\orcidID{2222--3333-4444-5555} \and
|
||||
Norbert Wehn\inst{2}\orcidID{2222--3333-4444-5555}
|
||||
}
|
||||
%
|
||||
\authorrunning{F. Author et al.}
|
||||
\authorrunning{D. Christ et al.}
|
||||
% First names are abbreviated in the running head.
|
||||
% If there are more than two authors, 'et al.' is used.
|
||||
%
|
||||
\institute{Princeton University, Princeton NJ 08544, USA \and
|
||||
Springer Heidelberg, Tiergartenstr. 17, 69121 Heidelberg, Germany
|
||||
\email{lncs@springer.com}\\
|
||||
\url{http://www.springer.com/gp/computer-science/lncs} \and
|
||||
ABC Institute, Rupert-Karls-University Heidelberg, Heidelberg, Germany\\
|
||||
\email{\{abc,lncs\}@uni-heidelberg.de}}
|
||||
\institute{
|
||||
Fraunhofer IESE, Germany\\
|
||||
\email{\{firstname.lastname\}@iese.fraunhofer.de}\\
|
||||
\and
|
||||
RPTU Kaiserslautern-Landau, Germany\\
|
||||
\email{\{firstname.lastname\}@rptu.de}\\
|
||||
\and
|
||||
JMU Würzburg, Germany\\
|
||||
\email{m.jung@uni-wuerzburg.de}
|
||||
}
|
||||
%
|
||||
\maketitle
|
||||
%
|
||||
\begin{abstract}
|
||||
The abstract should briefly summarize the contents of the paper in
|
||||
15--250 words.
|
||||
Data-driven applications are increasingly central to our information technology society, propelled by AI techniques reshaping various sectors of our economy and society. Despite their transformative potential, these applications demand immense data processing, leading to significant energy consumption primarily in communication and data storage rather than computation. The concept of "Processing in Memory" (PIM) offers a solution by processing data within memory, reducing energy overheads associated with data transfer. PIM has been an enduring idea, with recent advancements in DRAM test chips integrating PIM functionality, indicating potential market adoption.
|
||||
|
||||
\keywords{First keyword \and Second keyword \and Another keyword.}
|
||||
This paper introduces a virtual prototype of Samsung's PIM-HBM architecture, leveraging open-source tools like gem5 and DRAMSys, along with a custom Rust software library facilitating easy utilization of PIM functionality. Key contributions include the first full-system simulation of HBM-PIM, experimental validation of the virtual platform with benchmarks, and the development of a Rust library enabling PIM functionality at the software level.
|
||||
TODO: Benchmark results
|
||||
\keywords{DRAM \and PIM \and Virtual Platforms}
|
||||
\end{abstract}
|
||||
%
|
||||
%
|
||||
%
|
||||
\section{Introduction}
|
||||
\label{sec:intro}
|
||||
% TODO Lukas/Matthias
|
||||
Contributions:
|
||||
% TODO Matthias
|
||||
Data-driven applications are increasingly becoming the focal point of our information technology society, with AI techniques fundamentally altering various sectors of our society and economy. A common characteristic of these applications is the vast amount of data they require to be captured, stored, and processed. Consequently, a significant portion of energy is consumed by communication and data storage rather than computation. As demonstrated by Jouppi et al.~\cite{jouhyu_21}, in a 7nm process, a 32-bit floating-point multiplication requires \qty{1.31}{\pico\joule}, whereas a 64-bit DRAM memory access demands \qty{1300}{\pico\joule}. This energy is expended in transferring data from memory through the network on chip, arbiters, and various levels of caches. Hence, it would be considerably more energy-efficient to process data where it resides, particularly within the memory itself. In other words, rather than transmitting data to computational units, the computational instructions should be sent to the memory housing the data.
|
||||
|
||||
The concept, known as "Processing in Memory" (PIM), has been around for many years. For instance, Stone already proposed it in the 1970s~\cite{sto_70}. Since then, similar to the field of artificial intelligence, this idea has experienced \enquote{summer} and \enquote{winter} periods in research over the past decades. However, recently, different companies have developed DRAM test chips with integrated PIM functionality, showing promising potential for entry into the commodity market.
|
||||
|
||||
For instance, UPMEM introduced the first publicly available real-world PIM architecture~\cite{gomhaj_21}. UPMEM integrates standard DDR4 DIMM-based DRAM with a series of PIM-enabled UPMEM DIMMs containing multiple PIM chips. Each PIM chip houses eight "DRAM Processing Units" (DPUs), each with dedicated access to a 64 MiB memory bank, a 24 KiB instruction memory, and a 64 KiB scratchpad memory. These DPUs function as multithreaded 32-bit reduced instruction set computer (RISC) cores, featuring a complete set of general-purpose registers and a 14-stage pipeline~\cite{gomhaj_21}. In 2020, SK Hynix, a leading DRAM manufacturer, unveiled its PIM technology, named Newton, utilizing Graphics Double Data Rate 6 (GDDR6) memory~\cite{heson_20}. Unlike UPMEM, Newton integrates small MAC units and buffers into the bank area to mitigate the space and power overhead of a fully programmable processor core. Following SK Hynix's lead, Samsung, another major DRAM manufacturer, announced its own PIM DRAM implementation named Function-In-Memory DRAM (FIMDRAM or PIM-HBM) one year later~\cite{leekan_21}.
|
||||
|
||||
With these new architectures on the horizon, it becomes crucial for system-level designers to assess whether these promising developments can enhance their applications. Furthermore, these emerging hardware architectures necessitate new software paradigms. It remains unclear whether libraries, compilers, or operating systems will effectively manage these new devices at the software level. Therefore, it is imperative to establish comprehensive virtual platforms for these devices, enabling real applications to be tested within a realistic architectural and software platform context.
|
||||
|
||||
This paper introduces a virtual prototype of Samsung's PIM-HBM, developed using open-source tools such as gem5~\cite{lowahm_20} and the memory simulator DRAMSys~\cite{stejun_20}. Additionally, the virtual prototype is accompanied by a custom Rust software library, simplifying the utilization of PIM functionality at the software level.
|
||||
|
||||
In summary this paper makes the following contributions:
|
||||
\begin{itemize}
|
||||
\item First time Full System Simulation of SAMSUNG-PIM
|
||||
\item VP consisting of gem5 and DRAMSys
|
||||
\item Experimantal verification of VP
|
||||
\item First time Full System Simulation of HBM-PIM with a virtual plattform consisting of gem5 and DRAMSys
|
||||
\item Experimantal verification of VP with Benchmarks
|
||||
\item A Rust library to provide the PIM functionality up to the software level
|
||||
\end{itemize}
|
||||
|
||||
The paper is structured as follows ...
|
||||
%
|
||||
\section{Related Work}
|
||||
% TODO Derek/Lukas
|
||||
% TODO Derek
|
||||
Onur Ramulator
|
||||
|
||||
With the \textbf{PIMSimulator} \cite{shin-haengkang2023}, Samsung provides a virtual prototype of \ac{fimdram} based on the DRAMSim2 \cite{rosenfeld2011} cycle-accurate memory simulator.
|
||||
|
||||
Reference in New Issue
Block a user