From eb05bf65073342fbbc632bc3670317f95fb83810 Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Mon, 29 Jan 2024 21:04:17 +0100 Subject: [PATCH] DRAM chapter up to DIMMs --- src/acronyms.tex | 52 +++++++++++++++++++++++++++++++++++++++++-- src/chapters/dram.tex | 45 ++++++++++++++++++++++++++----------- src/chapters/pim.tex | 26 ++++++++++++++++++++++ src/doc.bib | 8 +++++++ src/index.tex | 2 +- 5 files changed, 117 insertions(+), 16 deletions(-) diff --git a/src/acronyms.tex b/src/acronyms.tex index 75f5ff4..b4996c8 100644 --- a/src/acronyms.tex +++ b/src/acronyms.tex @@ -46,6 +46,10 @@ short = PIM, long = processing-in-memory, } +\DeclareAcronym{1t1c}{ + short = 1T1C, + long = {one-transistor, one-capacitor}, +} \DeclareAcronym{subarray}{ short = SA, long = subarray, @@ -55,7 +59,7 @@ long = local wordline, } \DeclareAcronym{lbl}{ - short = LWL, + short = LBL, long = local bitline, } \DeclareAcronym{mwl}{ @@ -63,7 +67,7 @@ long = master wordline, } \DeclareAcronym{mbl}{ - short = MWL, + short = MBL, long = master bitline, } \DeclareAcronym{psa}{ @@ -78,6 +82,50 @@ short = CSL, long = column select line, } +\DeclareAcronym{bl}{ + short = BL, + long = burst length, +} +\DeclareAcronym{we}{ + short = WE, + long = write enable, +} +\DeclareAcronym{io}{ + short = I/O, + long = input/output, +} +\DeclareAcronym{ddr}{ + short = DDR, + long = double data rate, +} +\DeclareAcronym{dimm}{ + short = DIMM, + long = dual in-line memory module, +} +\DeclareAcronym{pcb}{ + short = PCB, + long = printed circuit board, +} +\DeclareAcronym{mpsoc}{ + short = MPSoC, + long = Multiprocessor System on Chip, +} +\DeclareAcronym{act}{ + short = ACT, + long = activate, +} +\DeclareAcronym{pre}{ + short = PRE, + long = precharge, +} +\DeclareAcronym{rd}{ + short = RD, + long = read, +} +\DeclareAcronym{wr}{ + short = WR, + long = write, +} \DeclareAcronym{tlm}{ short = TLM, long = transaction level modeling, diff --git a/src/chapters/dram.tex b/src/chapters/dram.tex index 494eeb0..79f27f9 100644 --- a/src/chapters/dram.tex +++ b/src/chapters/dram.tex @@ -7,18 +7,19 @@ In particular, the architecture of \ac{hbm} will be discussed, since it is the \ \subsection{DRAM Basics} \label{sec:dram_basics} -A \ac{dram} is a special type of \ac{ram} that uses a single transistor-capacitor pair as a memory cell to encode exactly one bit \cite{jacob2008}. -Since a capacitor holds electrical charge, it is a volatile type of storage and the bit value it represents eventually vanishes over time as the stored charge is leaked. +A \ac{dram} is a special type of \ac{ram} that uses a \ac{1t1c} cell as a memory cell to store a single bit of data \cite{jacob2008}. +Because a capacitor holds electrical charge, it is a volatile form of storage, and the bit value it represents will eventually vanish over time as the stored charge is leaked. To circumvent this, regular \textit{refresh} operations are required, involving reading and rewriting the stored value, making this storage method \textit{dynamic}. -A typical \ac{dram} device consists of several banks, which are themselves composed of a set of \textit{memory arrays}, which in turn are composed of multiple \acp{subarray}. -Banks operate independently of each other, while the memory arrays of each bank operate in lockstep mode to form the per-device data word, with the number of data bits equal to the number of memory arrays per bank. -The \acp{subarray} are grid-like structures composed of \acp{lwl} and \acp{lbl}, with a storage cell at each intersection point. +A typical \ac{dram} device consists of several \textit{banks}, which are themselves composed of a set of \textit{memory arrays}. +The banks can be controlled independently of each other, while the memory arrays of each bank operate in lockstep mode to form the per-device data word, with the number of data bits equal to the number of memory arrays per bank. +Memory arrays, in turn, are composed of multiple \acp{subarray}. +\Acp{subarray} are grid-like structures composed of \acp{lwl} and \acp{lbl}, with a storage cell at each intersection point. The \ac{lwl} is connected to the transistor's gate, switching it on and off, while the \ac{lbl} is used to access the stored value. Global \acp{mwl} and \acp{mbl} span over all \acp{subarray}, forming complete \textit{rows} and \textit{columns} of a memory array. -Because the charge stored in each cell is very small, so-called \acp{psa} are needed to amplify the stored voltage of each cell while it is being connected to the shared \ac{lbl} \cite{jacob2008}, illustrated in figure \ref{img:psa}. +Because the charge stored in each cell is very small, so-called \acp{psa} are needed to amplify the voltage of each cell while it is being connected to the shared \ac{lbl} \cite{jacob2008}, basic structure of which is illustrated in Figure \ref{img:psa}. -\begin{figure}[!ht] +\begin{figure} \centering \includegraphics{images/psa} \caption[\ac{psa} of an open bitline architecture]{\ac{psa} of an open bitline architecture \cite{jacob2008} \cite{jung2017a}} @@ -26,18 +27,36 @@ Because the charge stored in each cell is very small, so-called \acp{psa} are ne \end{figure} However, before a value can be read, the \ac{psa} needs to \textit{precharge} its bitline to a halfway voltage $\frac{V_{DD}}{2}$ between 0 and $V_{DD}$. -When the capacitor is then connected to the bitline, it pushes the voltage level marginally in one direction, enough for the \ac{psa} to detect the voltage difference to an adjacent bitline in another \ac{subarray} and amplifies the voltage level all the way to high or low. +When the selected wordline is then activated, the charge from the capacitor flows to the bitline and pushes the voltage level slightly in one direction. +The \ac{psa} compares the changed voltage level with an adjacent bitline in another \ac{subarray} and amplifies that difference all the way to a high or low level. -The process of loading the stored value into the \ac{psa} is done for all columns at the same time and is called \textit{row activation}. -Once a row is activated, it is referred to as \textit{open} and following from a -% \ac{csl} +The process of loading the stored values into the \acp{psa} is done for all columns of a row at once and is called \textit{row activation}. +Once a row is activated, it can be read from or written to with a certain access granularity determined by the \ac{bl} of the memory. +To perform such a burst access, the \acp{csl} of a set of \acp{psa} must be enabled, connecting them to the more powerful \acp{ssa} that drive the actual bank \ac{io}. +Depending on the \ac{we} signal, the \acp{ssa} either sense and amplify the logic value of the \acp{psa}, or they overwrite it using the \textit{write drivers}. +The Figure \ref{img:bank} summarizes the basic architecture of a single storage device consisting of a number of banks that has been discussed so far. -\begin{figure}[!ht] +\begin{figure} \centering \includegraphics{images/bank} - \caption[]{\cite{jung2017a}} + \caption[Architecture of a single DRAM device]{Architecture of a single DRAM device \cite{jung2017a}} \label{img:bank} \end{figure} +Since a single \ac{dram} device has only a small width, for example in the case of x8 \ac{dram} a width of 8, several devices operate in lockstep mode to form the wider \textit{data bus} of the \textit{memory channel} \cite{jung2017a}. +One kind of \ac{dram} subsystem places these sets of devices on a special \ac{pcb} called \ac{dimm}. +A \ac{dimm} may also consist of several independent \textit{ranks}, which are complete sets of \ac{dram} devices connected to the same data bus, but accessed in an interleaved manner. + +Besides the data bus, the channel consists also of the \textit{command bus} and the \textit{address bus}. +Over the command bus, the commands necessary to control memory are issued by the \textit{memory controller}, that sits in between the \ac{dram} and the \ac{mpsoc}. +For example, to read data, the memory controller may first issue a \ac{pre} command to precharge the bitlines in a certain bank, followed by an \ac{act} command to load the contents of a row into the \acp{psa}, and finally a \ac{rd} command to move the data from the \acp{psa} to the \acp{ssa} where it can be exposed to the data bus. +The row, column, bank and rank in question is determined by the address bus. + +% gibt dimms oder auch gddr +% ODER auch hbm -> überleitung zu hbm + \subsection{High Bandwidth Memory} \label{sec:hbm} + +% similar to ranks, pch ... + diff --git a/src/chapters/pim.tex b/src/chapters/pim.tex index 2b84ea3..d3e237a 100644 --- a/src/chapters/pim.tex +++ b/src/chapters/pim.tex @@ -1,2 +1,28 @@ \section{Processing-in-Memory} \label{sec:pim} + +Allgemeiner overview hier... +wird seit 70ern diskutiert... +durch DNNs neuer Aufwind... + +\subsection{Applicable Problems} +\label{sec:pim_problems} + +hier matrixoperationen für dnns beschreiben +memory-boundness +BLAS kernel und so weiter... + +\subsection{PIM Architectures} +\label{sec:pim_architectures} + +kurzer overview über die kategorien von PIM (paper vom lehrstuhl) + +\subsection{UPMEM} +\label{sec:pim_upmem} + +\subsection{Newton AiM} +\label{sec:pim_newton} + +\subsection{FIMDRAM/HBM-PIM} +\label{sec:pim_fim} +unterschiede zu hynix pim diff --git a/src/doc.bib b/src/doc.bib index 6cdd174..996a8ba 100644 --- a/src/doc.bib +++ b/src/doc.bib @@ -142,6 +142,14 @@ file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/BNREUV34/Jacob et al. - 2008 - Memory systems Cache, DRAM, Disk.pdf} } +@misc{jedec2021b, + title = {{{DDR5 SDRAM}}}, + author = {{JEDEC}}, + year = {2021}, + month = oct, + file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/JKBKSL9D/JESD79-5A_DDR5.pdf} +} + @inproceedings{jouppi2017, title = {In-{{Datacenter Performance Analysis}} of a {{Tensor Processing Unit}}}, booktitle = {Proceedings of the 44th {{Annual International Symposium}} on {{Computer Architecture}}}, diff --git a/src/index.tex b/src/index.tex index d903fb0..a6493a8 100644 --- a/src/index.tex +++ b/src/index.tex @@ -13,7 +13,7 @@ \usepackage{fancyhdr} \usepackage{subfig} \usepackage{url} -\usepackage{hyperref} +\usepackage[hidelinks]{hyperref} \usepackage{acro} \usepackage{lipsum} \usepackage{siunitx}