Files
memsys24-slides/main.tex
2024-09-17 12:18:07 +02:00

236 lines
6.9 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
\documentclass[aspectratio=169]{beamer}
\usetheme{UniWue}
\usepackage[style=verbose-ibid]{biblatex}
\usepackage{datetime}
\usepackage[inkscapeversion=1]{svg}
\addbibresource{references.bib}
\setbeamerfont{footnote}{size=\tiny}
\newdate{presentationday}{01}{10}{2024}
\title{PIMSys}
\subtitle{A Virtual Prototype for Processing in Memory}
\author{
Derek~Christ\inst{1}
\and
Lukas~Steiner\inst{2}
\and
Matthias~Jung\inst{3}
\and
Norbert~Wehn\inst{2}
}
\institute{
\inst{1}
Fraunhofer IESE
\quad
\inst{2}
RPTU Kaiserslautern-Landau
\quad
\inst{3}
University of Würzburg
}
\date{MEMSYS~2024}
\begin{document}
\frame{\titlepage}
\section{Introduction}
\begin{frame}{Energy Demand of Applications}
Total compute energy approaches worlds energy production\autocite{src2021}
\begin{figure}
\includesvg[width=0.6\textwidth]{images/world_energy}
\end{figure}
\end{frame}
\begin{frame}{Memory Bound Workloads}
AI applications become increasingly memory-bound\autocite{ivobolsens2023}
\begin{figure}
\includesvg[width=0.5\textwidth]{images/gpt}
\end{figure}
\end{frame}
\section{Processing-in-Memory}
\begin{frame}{Workloads for PIM}
Fully connected neural network layers:
\begin{itemize}
\item Large weight matrix - does not fit onto cache
\item No data reuse - cache is useless
\end{itemize}
\begin{figure}
\includesvg[width=0.6\textwidth]{images/dnn}
\end{figure}
\end{frame}
\begin{frame}{Workloads for PIM}
Convolutional layers:
\begin{itemize}
\item Small filter matrix - does fit onto cache
\item Excessive data reuse - cache is useful
\end{itemize}
\begin{figure}
TODO Tikz Image
% \includesvg[width=0.6\textwidth]{images/dnn}
\end{figure}
\end{frame}
\begin{frame}{Workloads for PIM}
\begin{columns}[T]
\begin{column}{0.5\textwidth}
\begin{center} \includesvg[height=50px]{images/thumbs-up} \end{center}
\begin{itemize}
\item Fully connected layers in multilayer perceptrons (MLPs)
\item Layers in recurrent neural networks (RNNs)
\end{itemize}
\end{column}
\begin{column}{0.5\textwidth}
\begin{center} \includesvg[height=50px]{images/thumbs-unsure} \end{center}
\begin{itemize}
\item Convolutional neural network (CNNs)
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{PIM Architectures}
\begin{columns}[T]
\begin{column}{0.4\textwidth}
\begin{itemize}
\item<2-> Inside the memory subarray
\item<3-> Near the subarray in the PSA output region
\item<4-> Near the bank in its peripheral region
\item<5-> In the I/O region of the memory
\end{itemize}
\end{column}
\begin{column}{0.6\textwidth}
\only<1>{\includesvg[height=115px]{images/pim_positions_0}}
\only<2>{\includesvg[height=115px]{images/pim_positions_1}}
\only<3>{\includesvg[height=115px]{images/pim_positions_2}}
\only<4>{\includesvg[height=115px]{images/pim_positions_3}}
\only<5->{\includesvg[height=115px]{images/pim_positions_4}}
\end{column}
\end{columns}
\visible<6>{\begin{block}{Remark}The nearer the computation is to the memory cells, the higher the achievable bandwidth!\end{block}}
\end{frame}
\section{Samsung HBM-PIM/FIMDRAM}
\begin{frame}{Samsung HBM-PIM/FIMDRAM\autocite{lee2021}}
\begin{itemize}
\item Real-world PIM implementation based on HBM2
\item PIM units embedded at the bank level
\end{itemize}
\begin{figure}
\includesvg[width=0.8\textwidth]{images/hbm-pim}
\end{figure}
\end{frame}
\begin{frame}{Samsung HBM-PIM/FIMDRAM\autocite{lee2021}}
\begin{columns}
\begin{column}{0.4\textwidth}
Processing units:
\begin{itemize}
\item Two 16-wide 16-bit FPUs
\item Register files and control unit
\end{itemize}
Instructions:
\begin{itemize}
\item Control: NOP, JUMP, EXIT
\item Data: MOV (ReLU), FILL
\item Arithmetic: ADD, MUL, MAC, MAD
\end{itemize}
\end{column}
\begin{column}{0.6\textwidth}
\begin{figure}
\includesvg[width=\textwidth]{images/pu}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{HBM-PIM/FIMDRAM GEMV Operation}
\begin{figure}
\only<1>{\includesvg[width=0.8\textwidth]{images/gemv_normal}}
\only<2>{\includesvg[width=0.9\textwidth]{images/gemv_interleaved}}
\end{figure}
\end{frame}
\begin{frame}{HBM-PIM/FIMDRAM GEMV Operation\autocite{kang2022}}
\begin{figure}
\only<1>{\includesvg[width=0.55\textwidth]{images/gemv.svg}}
\only<2>{\includesvg[width=0.55\textwidth]{images/gemv_0.svg}}
\only<3>{\includesvg[width=0.55\textwidth]{images/gemv_1.svg}}
\only<4>{\includesvg[width=0.55\textwidth]{images/gemv_2.svg}}
\only<5>{\includesvg[width=0.55\textwidth]{images/gemv_3.svg}}
\only<6>{\includesvg[width=0.55\textwidth]{images/gemv_4.svg}}
\end{figure}
\end{frame}
\begin{frame}{HBM-PIM/FIMDRAM}
\begin{huge}
How fast is it?
\end{huge}\\
Research should ...
\begin{itemize}
\item ... conduct simulations to explore \alert{performance gains}
\item ... consider also the programmability to identify \alert{challenges}
\end{itemize}
\end{frame}
\section{Virtual Prototype}
\begin{frame}{Virtual Prototype}
\begin{itemize}
\item Coupling of gem5 and DRAMSys
\item Implementation of HBM-PIM in DRAM model
\end{itemize}
\begin{figure}
\includesvg[width=0.8\textwidth]{images/dramsys.svg}
\end{figure}
\end{frame}
\begin{frame}{Software Library}
Software support library
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{itemize}
\item Initialization
\begin{itemize}
\item Assembly and loading of microkernel
\end{itemize}
\item Execution
\begin{itemize}
\item Generation of RD and WR requests
\item Insetion of memory barriers
\end{itemize}
\end{itemize}
\end{column}
\begin{column}{0.5\textwidth}
\begin{figure}
\includesvg[width=0.8\textwidth]{images/data_structures.svg}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[fragile]{Example: GEMV Kernel}
\end{frame}
\begin{frame}
\frametitle{Outline}
\tableofcontents
\end{frame}
\end{document}