Start of PIM chapter

2024-02-01 21:58:18 +01:00
parent 557f884eb4
commit e2cbec5644
5 changed files with 94 additions and 6 deletions
--- a/src/acronyms.tex
+++ b/src/acronyms.tex
@@ -151,6 +151,18 @@
    short = pCH,
    long = pseudo channel,
 }
+\DeclareAcronym{blas}{
+    short = BLAS,
+    long = Basic Linear Algebra Subprograms,
+}
+\DeclareAcronym{gemv}{
+    short = GEMV,
+    long = matrix vector multiply,
+}
+\DeclareAcronym{gemm}{
+    short = GEMM,
+    long = matrix matrix multiply,
+}
 \DeclareAcronym{tlm}{
    short = TLM,
    long = transaction-level modeling,
--- a/src/chapters/dram.tex
+++ b/src/chapters/dram.tex
@@ -21,7 +21,7 @@ Because the charge stored in each cell is very small, so-called \acp{psa} are ne

 \begin{figure}
 	\centering
-	\includegraphics{images/psa}
+	\includegraphics[width=\linewidth]{images/psa}
 	\caption[\ac{psa} of an open bitline architecture]{\ac{psa} of an open bitline architecture \cite{jacob2008} \cite{jung2017a}.}
 	\label{img:psa}
 \end{figure}
@@ -38,7 +38,7 @@ The Figure \ref{img:bank} summarizes the basic architecture of a single storage

 \begin{figure}
 	\centering
-	\includegraphics{images/bank}
+	\includegraphics[width=\linewidth]{images/bank}
 	\caption[Architecture of a single DRAM device]{Architecture of a single DRAM device \cite{jung2017a}.}
 	\label{img:bank}
 \end{figure}
@@ -102,7 +102,7 @@ What differentiates \ac{hbm} from other types of memory is its \ac{sip} approach
 Several \ac{dram} dies are stacked on top of each other and connected with \acp{tsv} to form a cube of memory dies consisting of many layers and a buffer die at the bottom, as shown in Figure \ref{img:sip}.
 \begin{figure}
 	\centering
-	\includegraphics[width=0.7\linewidth]{images/sip}
+	\includegraphics[width=0.8\linewidth]{images/sip}
 	\caption[Cross-section view of an \ac{hbm} \ac{sip}]{Cross-section view of a \ac{hbm} \ac{sip} \cite{lee2021}.}
 	\label{img:sip}
 \end{figure}
@@ -123,7 +123,7 @@ In the center of the die, the \acp{tsv} connect to the next die above or the pre

 \begin{figure}
 	\centering
-	\includegraphics[width=0.7\linewidth]{images/hbm}
+	\includegraphics[width=0.8\linewidth]{images/hbm}
 	\caption[\aca{hbm} memory die architecture]{\aca{hbm} memory die architecture \cite{lee2021}}
 	\label{img:hbm}
 \end{figure}
--- a/src/chapters/pim.tex
+++ b/src/chapters/pim.tex
@@ -5,9 +5,83 @@
 % wird seit 70ern diskutiert...
 % durch DNNs neuer Aufwind...

-\subsection{Applicable Problems}
-\label{sec:pim_problems}
+\subsection{Applicable Workloads}
+\label{sec:pim_workloads}

+As already discussed in Section \ref{sec:introduction}, \ac{pim} is a good fit for accelerating memory-bound workloads.
+In contrast, compute-bound workloads tend to have high data reuse and can make excessive use of the on-chip cache, and therefore do not need to use the full memory bandwidth.
+For problems like this, \ac{pim} is of only limited use.
+
+Many layers of modern \acp{dnn} can be expressed as a matrix-vector multiplication.
+The layer inputs can be represented as a vector and the model weights can be viewed as a matrix, where the number of columns is equal to the size of the input vector and the number of rows is equal to the size of the output vector.
+Pairwise multiplication of the input vector and a row of the matrix can be used to calculate an entry of the output vector.
+This process is illustrated for in Figure \ref{img:dnn}.
+
+\begin{figure}
+	\centering
+	\begin{tikzpicture}
+		\node[circle,thick,draw=red!60,fill=blue!20,minimum size=5mm,anchor=center] (inode0) at (0,0) {$i_0$};
+		\node[circle,thick,draw=red!60,fill=blue!30,minimum size=5mm] (inode1) [below of=inode0] {$i_1$};
+		\node[circle,thick,draw=red!60,fill=blue!40,minimum size=5mm] (inode2) [below of=inode1] {$i_2$};
+		\node[circle,thick,draw=red!60,fill=blue!50,minimum size=5mm] (inode3) [below of=inode2] {$i_3$};
+
+		\node[circle,draw=black,fill=ForestGreen!20,minimum size=5mm,anchor=center] (onode0) at (2cm,0.5cm) {$o_0$};
+		\node[circle,thick,draw=red!60,fill=ForestGreen!30,minimum size=5mm] (onode1) [below of=onode0] {$o_1$};
+		\node[circle,draw=black,fill=ForestGreen!40,minimum size=5mm] (onode2) [below of=onode1] {$o_2$};
+		\node[circle,draw=black,fill=ForestGreen!50,minimum size=5mm] (onode3) [below of=onode2] {$o_3$};
+		\node[circle,draw=black,fill=ForestGreen!60,minimum size=5mm] (onode4) [below of=onode3] {$o_4$};
+
+		\draw (inode0.east) to (onode0.west);
+		\draw (inode1.east) to (onode0.west);
+		\draw (inode2.east) to (onode0.west);
+		\draw (inode3.east) to (onode0.west);
+
+		\draw (inode0.east) to (onode2.west);
+		\draw (inode1.east) to (onode2.west);
+		\draw (inode2.east) to (onode2.west);
+		\draw (inode3.east) to (onode2.west);
+
+		\draw (inode0.east) to (onode3.west);
+		\draw (inode1.east) to (onode3.west);
+		\draw (inode2.east) to (onode3.west);
+		\draw (inode3.east) to (onode3.west);
+
+		\draw (inode0.east) to (onode4.west);
+		\draw (inode1.east) to (onode4.west);
+		\draw (inode2.east) to (onode4.west);
+		\draw (inode3.east) to (onode4.west);
+
+		\draw[red!60,thick] (inode0.east) to (onode1.west);
+		\draw[red!60,thick] (inode1.east) to (onode1.west);
+		\draw[red!60,thick] (inode2.east) to (onode1.west);
+		\draw[red!60,thick] (inode3.east) to (onode1.west);
+
+		\matrix (matrix) [matrix of nodes,left delimiter=(,right delimiter=),right of=onode2,node distance=4cm] {
+			$w_{0,0}$ & $w_{0,1}$ & $w_{0,2}$ & $w_{0,3}$ \\
+			$w_{1,0}$ & $w_{1,1}$ & $w_{1,2}$ & $w_{1,3}$ \\
+			$w_{2,0}$ & $w_{2,1}$ & $w_{2,2}$ & $w_{2,3}$ \\
+			$w_{3,0}$ & $w_{3,1}$ & $w_{3,2}$ & $w_{3,3}$ \\
+			$w_{4,0}$ & $w_{4,1}$ & $w_{4,2}$ & $w_{4,3}$ \\
+		};
+
+		\node[draw,thick,red!60,rounded corners,inner sep=0,fit=(matrix-2-1) (matrix-2-4)] {};
+
+		\node (prod) [right of=matrix,node distance=2.6cm] {$*$};
+
+		\matrix (vector) [matrix of nodes,left delimiter=(,right delimiter=),right of=prod] {
+			$i_{0}$ \\
+			$i_{1}$ \\
+			$i_{2}$ \\
+			$i_{3}$ \\
+		};
+
+		\node (eq) [right of=vector,node distance=1.2cm] {$=$};
+	\end{tikzpicture}
+	\caption[]{\cite{he2020}}
+	\label{img:dnn}
+\end{figure}
+
+Such an operation, defined in the widely used \ac{blas} library \cite{blas1979}, is also known as a \acs{gemv} routine.
 % hier matrixoperationen für dnns beschreiben
 % memory-boundness
 % BLAS kernel und so weiter...
--- a/src/images/sip.pdf
+++ b/src/images/sip.pdf
--- a/src/index.tex
+++ b/src/index.tex
@@ -23,6 +23,8 @@
 \usepackage{bytefield}

 % Configurations
+\usetikzlibrary{matrix}
+\usetikzlibrary{fit}
 \setlength\textheight{24cm}
 \setkomafont{paragraph}{\footnotesize}
 \numberwithin{table}{section}