132 lines
3.5 KiB
TeX
132 lines
3.5 KiB
TeX
\documentclass[aspectratio=169]{beamer}
|
||
\usetheme{UniWue}
|
||
|
||
\usepackage[style=verbose-ibid]{biblatex}
|
||
\usepackage{datetime}
|
||
\usepackage[inkscapeversion=1]{svg}
|
||
|
||
\addbibresource{references.bib}
|
||
|
||
\setbeamerfont{footnote}{size=\tiny}
|
||
|
||
\newdate{presentationday}{01}{10}{2024}
|
||
|
||
\title{PIMSys}
|
||
\subtitle{A Virtual Prototype for Processing in Memory}
|
||
|
||
\author{
|
||
Derek~Christ\inst{1}
|
||
\and
|
||
Lukas~Steiner\inst{2}
|
||
\and
|
||
Matthias~Jung\inst{3}
|
||
\and
|
||
Norbert~Wehn\inst{2}
|
||
}
|
||
|
||
\institute{
|
||
\inst{1}
|
||
Fraunhofer IESE
|
||
\quad
|
||
\inst{2}
|
||
RPTU Kaiserslautern-Landau
|
||
\quad
|
||
\inst{3}
|
||
University of Würzburg
|
||
}
|
||
|
||
\date{MEMSYS~2024}
|
||
|
||
\begin{document}
|
||
|
||
\frame{\titlepage}
|
||
|
||
\section{Introduction}
|
||
|
||
\begin{frame}{Energy Demand of Applications}
|
||
Total compute energy approaches world’s energy production\autocite{src2021}
|
||
\begin{figure}
|
||
\includesvg[width=0.6\textwidth]{images/world_energy}
|
||
\end{figure}
|
||
\end{frame}
|
||
|
||
\begin{frame}{Memory Bound Workloads}
|
||
AI applications become increasingly memory-bound\autocite{ivobolsens2023}
|
||
\begin{figure}
|
||
\includesvg[width=0.5\textwidth]{images/gpt}
|
||
\end{figure}
|
||
\end{frame}
|
||
|
||
\section{Processing-in-Memory}
|
||
|
||
\begin{frame}{Workloads for PIM}
|
||
Fully connected neural network layers:
|
||
\begin{itemize}
|
||
\item Large weight matrix - does not fit onto cache
|
||
\item No data reuse - cache is useless
|
||
\end{itemize}
|
||
\begin{figure}
|
||
\includesvg[width=0.6\textwidth]{images/dnn}
|
||
\end{figure}
|
||
\end{frame}
|
||
|
||
\begin{frame}{Workloads for PIM}
|
||
Convolutional layers:
|
||
\begin{itemize}
|
||
\item Small filter matrix - does fit onto cache
|
||
\item Excessive data reuse - cache is useful
|
||
\end{itemize}
|
||
\begin{figure}
|
||
TODO Tikz Image
|
||
% \includesvg[width=0.6\textwidth]{images/dnn}
|
||
\end{figure}
|
||
\end{frame}
|
||
|
||
\begin{frame}{Workloads for PIM}
|
||
\begin{columns}[T]
|
||
\begin{column}{0.5\textwidth}
|
||
\begin{center} \includesvg[height=50px]{images/thumbs-up} \end{center}
|
||
\begin{itemize}
|
||
\item Fully connected layers in multilayer perceptrons (MLPs)
|
||
\item Layers in recurrent neural networks (RNNs)
|
||
\end{itemize}
|
||
\end{column}
|
||
\begin{column}{0.5\textwidth}
|
||
\begin{center} \includesvg[height=50px]{images/thumbs-unsure} \end{center}
|
||
\begin{itemize}
|
||
\item Convolutional neural network (CNNs)
|
||
\end{itemize}
|
||
\end{column}
|
||
\end{columns}
|
||
\end{frame}
|
||
|
||
\begin{frame}{PIM Architectures}
|
||
\begin{columns}[T]
|
||
\begin{column}{0.4\textwidth}
|
||
\begin{itemize}
|
||
\item<2-> Inside the memory subarray
|
||
\item<3-> Near the subarray in the PSA output region
|
||
\item<4-> Near the bank in its peripheral region
|
||
\item<5-> In the I/O region of the memory
|
||
\end{itemize}
|
||
\end{column}
|
||
\begin{column}{0.6\textwidth}
|
||
\only<1>{\includesvg[width=\textwidth]{images/pim_positions_0}}
|
||
\only<2>{\includesvg[width=\textwidth]{images/pim_positions_1}}
|
||
\only<3>{\includesvg[width=\textwidth]{images/pim_positions_2}}
|
||
\only<4>{\includesvg[width=\textwidth]{images/pim_positions_3}}
|
||
\only<5>{\includesvg[width=\textwidth]{images/pim_positions_4}}
|
||
\end{column}
|
||
\end{columns}
|
||
\end{frame}
|
||
|
||
\begin{frame}{Samsung's HBM-PIM/FIMDRAM}
|
||
\end{frame}
|
||
|
||
\begin{frame}
|
||
\frametitle{Outline}
|
||
\tableofcontents
|
||
\end{frame}
|
||
|
||
\end{document}
|