DRAM chapter up to DIMMs
This commit is contained in:
@@ -46,6 +46,10 @@
|
|||||||
short = PIM,
|
short = PIM,
|
||||||
long = processing-in-memory,
|
long = processing-in-memory,
|
||||||
}
|
}
|
||||||
|
\DeclareAcronym{1t1c}{
|
||||||
|
short = 1T1C,
|
||||||
|
long = {one-transistor, one-capacitor},
|
||||||
|
}
|
||||||
\DeclareAcronym{subarray}{
|
\DeclareAcronym{subarray}{
|
||||||
short = SA,
|
short = SA,
|
||||||
long = subarray,
|
long = subarray,
|
||||||
@@ -55,7 +59,7 @@
|
|||||||
long = local wordline,
|
long = local wordline,
|
||||||
}
|
}
|
||||||
\DeclareAcronym{lbl}{
|
\DeclareAcronym{lbl}{
|
||||||
short = LWL,
|
short = LBL,
|
||||||
long = local bitline,
|
long = local bitline,
|
||||||
}
|
}
|
||||||
\DeclareAcronym{mwl}{
|
\DeclareAcronym{mwl}{
|
||||||
@@ -63,7 +67,7 @@
|
|||||||
long = master wordline,
|
long = master wordline,
|
||||||
}
|
}
|
||||||
\DeclareAcronym{mbl}{
|
\DeclareAcronym{mbl}{
|
||||||
short = MWL,
|
short = MBL,
|
||||||
long = master bitline,
|
long = master bitline,
|
||||||
}
|
}
|
||||||
\DeclareAcronym{psa}{
|
\DeclareAcronym{psa}{
|
||||||
@@ -78,6 +82,50 @@
|
|||||||
short = CSL,
|
short = CSL,
|
||||||
long = column select line,
|
long = column select line,
|
||||||
}
|
}
|
||||||
|
\DeclareAcronym{bl}{
|
||||||
|
short = BL,
|
||||||
|
long = burst length,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{we}{
|
||||||
|
short = WE,
|
||||||
|
long = write enable,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{io}{
|
||||||
|
short = I/O,
|
||||||
|
long = input/output,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{ddr}{
|
||||||
|
short = DDR,
|
||||||
|
long = double data rate,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{dimm}{
|
||||||
|
short = DIMM,
|
||||||
|
long = dual in-line memory module,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{pcb}{
|
||||||
|
short = PCB,
|
||||||
|
long = printed circuit board,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{mpsoc}{
|
||||||
|
short = MPSoC,
|
||||||
|
long = Multiprocessor System on Chip,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{act}{
|
||||||
|
short = ACT,
|
||||||
|
long = activate,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{pre}{
|
||||||
|
short = PRE,
|
||||||
|
long = precharge,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{rd}{
|
||||||
|
short = RD,
|
||||||
|
long = read,
|
||||||
|
}
|
||||||
|
\DeclareAcronym{wr}{
|
||||||
|
short = WR,
|
||||||
|
long = write,
|
||||||
|
}
|
||||||
\DeclareAcronym{tlm}{
|
\DeclareAcronym{tlm}{
|
||||||
short = TLM,
|
short = TLM,
|
||||||
long = transaction level modeling,
|
long = transaction level modeling,
|
||||||
|
|||||||
@@ -7,18 +7,19 @@ In particular, the architecture of \ac{hbm} will be discussed, since it is the \
|
|||||||
\subsection{DRAM Basics}
|
\subsection{DRAM Basics}
|
||||||
\label{sec:dram_basics}
|
\label{sec:dram_basics}
|
||||||
|
|
||||||
A \ac{dram} is a special type of \ac{ram} that uses a single transistor-capacitor pair as a memory cell to encode exactly one bit \cite{jacob2008}.
|
A \ac{dram} is a special type of \ac{ram} that uses a \ac{1t1c} cell as a memory cell to store a single bit of data \cite{jacob2008}.
|
||||||
Since a capacitor holds electrical charge, it is a volatile type of storage and the bit value it represents eventually vanishes over time as the stored charge is leaked.
|
Because a capacitor holds electrical charge, it is a volatile form of storage, and the bit value it represents will eventually vanish over time as the stored charge is leaked.
|
||||||
To circumvent this, regular \textit{refresh} operations are required, involving reading and rewriting the stored value, making this storage method \textit{dynamic}.
|
To circumvent this, regular \textit{refresh} operations are required, involving reading and rewriting the stored value, making this storage method \textit{dynamic}.
|
||||||
A typical \ac{dram} device consists of several banks, which are themselves composed of a set of \textit{memory arrays}, which in turn are composed of multiple \acp{subarray}.
|
A typical \ac{dram} device consists of several \textit{banks}, which are themselves composed of a set of \textit{memory arrays}.
|
||||||
Banks operate independently of each other, while the memory arrays of each bank operate in lockstep mode to form the per-device data word, with the number of data bits equal to the number of memory arrays per bank.
|
The banks can be controlled independently of each other, while the memory arrays of each bank operate in lockstep mode to form the per-device data word, with the number of data bits equal to the number of memory arrays per bank.
|
||||||
The \acp{subarray} are grid-like structures composed of \acp{lwl} and \acp{lbl}, with a storage cell at each intersection point.
|
Memory arrays, in turn, are composed of multiple \acp{subarray}.
|
||||||
|
\Acp{subarray} are grid-like structures composed of \acp{lwl} and \acp{lbl}, with a storage cell at each intersection point.
|
||||||
The \ac{lwl} is connected to the transistor's gate, switching it on and off, while the \ac{lbl} is used to access the stored value.
|
The \ac{lwl} is connected to the transistor's gate, switching it on and off, while the \ac{lbl} is used to access the stored value.
|
||||||
Global \acp{mwl} and \acp{mbl} span over all \acp{subarray}, forming complete \textit{rows} and \textit{columns} of a memory array.
|
Global \acp{mwl} and \acp{mbl} span over all \acp{subarray}, forming complete \textit{rows} and \textit{columns} of a memory array.
|
||||||
|
|
||||||
Because the charge stored in each cell is very small, so-called \acp{psa} are needed to amplify the stored voltage of each cell while it is being connected to the shared \ac{lbl} \cite{jacob2008}, illustrated in figure \ref{img:psa}.
|
Because the charge stored in each cell is very small, so-called \acp{psa} are needed to amplify the voltage of each cell while it is being connected to the shared \ac{lbl} \cite{jacob2008}, basic structure of which is illustrated in Figure \ref{img:psa}.
|
||||||
|
|
||||||
\begin{figure}[!ht]
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{images/psa}
|
\includegraphics{images/psa}
|
||||||
\caption[\ac{psa} of an open bitline architecture]{\ac{psa} of an open bitline architecture \cite{jacob2008} \cite{jung2017a}}
|
\caption[\ac{psa} of an open bitline architecture]{\ac{psa} of an open bitline architecture \cite{jacob2008} \cite{jung2017a}}
|
||||||
@@ -26,18 +27,36 @@ Because the charge stored in each cell is very small, so-called \acp{psa} are ne
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
However, before a value can be read, the \ac{psa} needs to \textit{precharge} its bitline to a halfway voltage $\frac{V_{DD}}{2}$ between 0 and $V_{DD}$.
|
However, before a value can be read, the \ac{psa} needs to \textit{precharge} its bitline to a halfway voltage $\frac{V_{DD}}{2}$ between 0 and $V_{DD}$.
|
||||||
When the capacitor is then connected to the bitline, it pushes the voltage level marginally in one direction, enough for the \ac{psa} to detect the voltage difference to an adjacent bitline in another \ac{subarray} and amplifies the voltage level all the way to high or low.
|
When the selected wordline is then activated, the charge from the capacitor flows to the bitline and pushes the voltage level slightly in one direction.
|
||||||
|
The \ac{psa} compares the changed voltage level with an adjacent bitline in another \ac{subarray} and amplifies that difference all the way to a high or low level.
|
||||||
|
|
||||||
The process of loading the stored value into the \ac{psa} is done for all columns at the same time and is called \textit{row activation}.
|
The process of loading the stored values into the \acp{psa} is done for all columns of a row at once and is called \textit{row activation}.
|
||||||
Once a row is activated, it is referred to as \textit{open} and following from a
|
Once a row is activated, it can be read from or written to with a certain access granularity determined by the \ac{bl} of the memory.
|
||||||
% \ac{csl}
|
To perform such a burst access, the \acp{csl} of a set of \acp{psa} must be enabled, connecting them to the more powerful \acp{ssa} that drive the actual bank \ac{io}.
|
||||||
|
Depending on the \ac{we} signal, the \acp{ssa} either sense and amplify the logic value of the \acp{psa}, or they overwrite it using the \textit{write drivers}.
|
||||||
|
The Figure \ref{img:bank} summarizes the basic architecture of a single storage device consisting of a number of banks that has been discussed so far.
|
||||||
|
|
||||||
\begin{figure}[!ht]
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{images/bank}
|
\includegraphics{images/bank}
|
||||||
\caption[]{\cite{jung2017a}}
|
\caption[Architecture of a single DRAM device]{Architecture of a single DRAM device \cite{jung2017a}}
|
||||||
\label{img:bank}
|
\label{img:bank}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
Since a single \ac{dram} device has only a small width, for example in the case of x8 \ac{dram} a width of 8, several devices operate in lockstep mode to form the wider \textit{data bus} of the \textit{memory channel} \cite{jung2017a}.
|
||||||
|
One kind of \ac{dram} subsystem places these sets of devices on a special \ac{pcb} called \ac{dimm}.
|
||||||
|
A \ac{dimm} may also consist of several independent \textit{ranks}, which are complete sets of \ac{dram} devices connected to the same data bus, but accessed in an interleaved manner.
|
||||||
|
|
||||||
|
Besides the data bus, the channel consists also of the \textit{command bus} and the \textit{address bus}.
|
||||||
|
Over the command bus, the commands necessary to control memory are issued by the \textit{memory controller}, that sits in between the \ac{dram} and the \ac{mpsoc}.
|
||||||
|
For example, to read data, the memory controller may first issue a \ac{pre} command to precharge the bitlines in a certain bank, followed by an \ac{act} command to load the contents of a row into the \acp{psa}, and finally a \ac{rd} command to move the data from the \acp{psa} to the \acp{ssa} where it can be exposed to the data bus.
|
||||||
|
The row, column, bank and rank in question is determined by the address bus.
|
||||||
|
|
||||||
|
% gibt dimms oder auch gddr
|
||||||
|
% ODER auch hbm -> überleitung zu hbm
|
||||||
|
|
||||||
\subsection{High Bandwidth Memory}
|
\subsection{High Bandwidth Memory}
|
||||||
\label{sec:hbm}
|
\label{sec:hbm}
|
||||||
|
|
||||||
|
% similar to ranks, pch ...
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1,28 @@
|
|||||||
\section{Processing-in-Memory}
|
\section{Processing-in-Memory}
|
||||||
\label{sec:pim}
|
\label{sec:pim}
|
||||||
|
|
||||||
|
Allgemeiner overview hier...
|
||||||
|
wird seit 70ern diskutiert...
|
||||||
|
durch DNNs neuer Aufwind...
|
||||||
|
|
||||||
|
\subsection{Applicable Problems}
|
||||||
|
\label{sec:pim_problems}
|
||||||
|
|
||||||
|
hier matrixoperationen für dnns beschreiben
|
||||||
|
memory-boundness
|
||||||
|
BLAS kernel und so weiter...
|
||||||
|
|
||||||
|
\subsection{PIM Architectures}
|
||||||
|
\label{sec:pim_architectures}
|
||||||
|
|
||||||
|
kurzer overview über die kategorien von PIM (paper vom lehrstuhl)
|
||||||
|
|
||||||
|
\subsection{UPMEM}
|
||||||
|
\label{sec:pim_upmem}
|
||||||
|
|
||||||
|
\subsection{Newton AiM}
|
||||||
|
\label{sec:pim_newton}
|
||||||
|
|
||||||
|
\subsection{FIMDRAM/HBM-PIM}
|
||||||
|
\label{sec:pim_fim}
|
||||||
|
unterschiede zu hynix pim
|
||||||
|
|||||||
@@ -142,6 +142,14 @@
|
|||||||
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/BNREUV34/Jacob et al. - 2008 - Memory systems Cache, DRAM, Disk.pdf}
|
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/BNREUV34/Jacob et al. - 2008 - Memory systems Cache, DRAM, Disk.pdf}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{jedec2021b,
|
||||||
|
title = {{{DDR5 SDRAM}}},
|
||||||
|
author = {{JEDEC}},
|
||||||
|
year = {2021},
|
||||||
|
month = oct,
|
||||||
|
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/JKBKSL9D/JESD79-5A_DDR5.pdf}
|
||||||
|
}
|
||||||
|
|
||||||
@inproceedings{jouppi2017,
|
@inproceedings{jouppi2017,
|
||||||
title = {In-{{Datacenter Performance Analysis}} of a {{Tensor Processing Unit}}},
|
title = {In-{{Datacenter Performance Analysis}} of a {{Tensor Processing Unit}}},
|
||||||
booktitle = {Proceedings of the 44th {{Annual International Symposium}} on {{Computer Architecture}}},
|
booktitle = {Proceedings of the 44th {{Annual International Symposium}} on {{Computer Architecture}}},
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
\usepackage{fancyhdr}
|
\usepackage{fancyhdr}
|
||||||
\usepackage{subfig}
|
\usepackage{subfig}
|
||||||
\usepackage{url}
|
\usepackage{url}
|
||||||
\usepackage{hyperref}
|
\usepackage[hidelinks]{hyperref}
|
||||||
\usepackage{acro}
|
\usepackage{acro}
|
||||||
\usepackage{lipsum}
|
\usepackage{lipsum}
|
||||||
\usepackage{siunitx}
|
\usepackage{siunitx}
|
||||||
|
|||||||
Reference in New Issue
Block a user