Do some fine tuning
This commit is contained in:
95
main.tex
95
main.tex
@@ -17,6 +17,7 @@
|
||||
\addbibresource{references.bib}
|
||||
|
||||
\setbeamerfont{footnote}{size=\tiny}
|
||||
\setbeamercolor{alerted text}{fg=uniwuered}
|
||||
|
||||
\newdate{presentationday}{01}{10}{2024}
|
||||
|
||||
@@ -52,47 +53,24 @@
|
||||
|
||||
% \section{Introduction}
|
||||
|
||||
\begin{frame}{Energy Demand of Applications}
|
||||
Total compute energy approaches world’s energy production\autocite{src2021}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.6\textwidth]{images/world_energy}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item compute 2x every two years
|
||||
\item energy production 2\% per year
|
||||
\item to meet future compute demands, drastic improvements in energy efficiency
|
||||
}
|
||||
|
||||
\begin{frame}{Memory Bound Workloads}
|
||||
AI applications become increasingly memory-bound\autocite{ivobolsens2023}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.5\textwidth]{images/gpt}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item Emerging AI applications become increasingly memory-bound
|
||||
\item Roofline model
|
||||
\item Not limited by compute power but by memory
|
||||
\item researchers begin to consider PIM to circumvent memory bottleneck
|
||||
\item (drastically more parameters in GPT-3, operational intensity goes down)
|
||||
}
|
||||
|
||||
% \section{Processing-in-Memory}
|
||||
|
||||
\begin{frame}{Workloads for PIM}
|
||||
Fully connected neural network layers:
|
||||
\begin{itemize}
|
||||
\item Large weight matrix -\alert{does not fit onto cache}
|
||||
\item No data reuse - \alert{cache is useless}
|
||||
\item Large weight matrix - \alert{does not fit onto cache}
|
||||
\item No data reuse - \alert{cache is almost useless}
|
||||
\end{itemize}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.6\textwidth]{images/dnn}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item Let's start by having a look on which workloads can be accelerated by PIM...
|
||||
\item memory-bound: each entry only used once
|
||||
}
|
||||
|
||||
\begin{frame}{Workloads for PIM}
|
||||
Convolutional layers:
|
||||
\begin{itemize}
|
||||
@@ -106,10 +84,17 @@
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item compute-bound: elements of filter matrix used often
|
||||
}
|
||||
|
||||
\begin{frame}{Workloads for PIM}
|
||||
\begin{columns}[T]
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{center} \includesvg[height=50px]{images/thumbs-up} \end{center}
|
||||
\begin{center}
|
||||
\includesvg[height=50px]{images/thumbs-up}
|
||||
\end{center}
|
||||
\begin{center}\alert{(memory-bound)}\end{center}
|
||||
\begin{itemize}
|
||||
\item Fully connected layers in multilayer perceptrons (MLPs)
|
||||
\item Layers in recurrent neural networks (RNNs)
|
||||
@@ -117,6 +102,7 @@
|
||||
\end{column}
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{center} \includesvg[height=50px]{images/thumbs-unsure} \end{center}
|
||||
\begin{center}\alert{(compute-bound)}\end{center}
|
||||
\begin{itemize}
|
||||
\item Convolutional neural network (CNNs)
|
||||
\end{itemize}
|
||||
@@ -133,7 +119,7 @@
|
||||
\begin{column}{0.4\textwidth}
|
||||
\begin{itemize}
|
||||
\item<2-> Inside the memory subarray
|
||||
\item<3-> Near the subarray in the PSA output region
|
||||
\item<3-> Near the subarray in the PSA region
|
||||
\item<4-> Near the bank in its peripheral region
|
||||
\item<5-> In the I/O region of the memory
|
||||
\end{itemize}
|
||||
@@ -152,7 +138,7 @@
|
||||
\note[itemize]{
|
||||
\item Architecture space of PIM:
|
||||
\item Inside the memory SA - simple bulk logic
|
||||
\item Near SA in PSA output region - logic gates in the region
|
||||
\item Near SA in PSA region - logic gates in the region
|
||||
\item Near a bank in its peripheral region - computation units with control
|
||||
\item I/O region of memory - limited by memory bus
|
||||
}
|
||||
@@ -162,7 +148,7 @@
|
||||
\begin{frame}{Samsung HBM-PIM/FIMDRAM\autocite{lee2021}}
|
||||
\begin{itemize}
|
||||
\item Real-world PIM implementation based on HBM2
|
||||
\item PIM units embedded at the bank level
|
||||
\item PIM units embedded at the bank level - in parallel
|
||||
\end{itemize}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.8\textwidth]{images/hbm-pim}
|
||||
@@ -314,9 +300,11 @@
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{itemize}
|
||||
\item Vector benchmarks (BLAS level 1)
|
||||
\item VADD: $z = x + y$
|
||||
\item VMUL: $z = x \cdot y$
|
||||
\item HAXPY: $z = a \cdot x + y$
|
||||
\begin{itemize}
|
||||
\item VADD: $z = x + y$
|
||||
\item VMUL: $z = x \cdot y$
|
||||
\item HAXPY: $z = a \cdot x + y$
|
||||
\end{itemize}
|
||||
|
||||
\item Vector-Matrix benchmarks (BLAS level 2)
|
||||
\begin{itemize}
|
||||
@@ -397,7 +385,7 @@
|
||||
\item GEMV: 9.0x
|
||||
}
|
||||
|
||||
\begin{frame}{Speedups / Samsung}
|
||||
\begin{frame}{Speedups / Samsung\autocite{lee2021}}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.8\textwidth]{images/samsung.svg}
|
||||
\end{figure}
|
||||
@@ -429,12 +417,13 @@
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Conclusion and Future Work}
|
||||
\textbf{Conclusion}
|
||||
\begin{itemize}
|
||||
\item PIM can accelerate memory-bound workloads
|
||||
\item Special PIM-friendly memory layouts are required
|
||||
\end{itemize}
|
||||
|
||||
Future work:
|
||||
\textbf{Future work}
|
||||
\begin{itemize}
|
||||
\item Implementation of Linux driver
|
||||
\item Comparison with real neural networks
|
||||
@@ -452,6 +441,34 @@
|
||||
|
||||
\appendix
|
||||
|
||||
\begin{frame}{Energy Demand of Applications}
|
||||
Total compute energy approaches world’s energy production\autocite{src2021}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.6\textwidth]{images/world_energy}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item compute 2x every two years
|
||||
\item energy production 2\% per year
|
||||
\item to meet future compute demands, drastic improvements in energy efficiency
|
||||
}
|
||||
|
||||
\begin{frame}{Memory Bound Workloads}
|
||||
AI applications become increasingly memory-bound\autocite{ivobolsens2023}
|
||||
\begin{figure}
|
||||
\includesvg[width=0.5\textwidth]{images/gpt}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\note[itemize]{
|
||||
\item Emerging AI applications become increasingly memory-bound
|
||||
\item Roofline model
|
||||
\item Not limited by compute power but by memory
|
||||
\item researchers begin to consider PIM to circumvent memory bottleneck
|
||||
\item (drastically more parameters in GPT-3, operational intensity goes down)
|
||||
}
|
||||
|
||||
\begin{frame}{HBM-PIM/FIMDRAM GEMV Operation}
|
||||
\begin{figure}
|
||||
\only<1>{\includesvg[width=0.8\textwidth]{images/gemv_normal}}
|
||||
|
||||
Reference in New Issue
Block a user