First revision of introduction

This commit is contained in:
2024-01-24 19:20:09 +01:00
parent 28c8dc299c
commit 9aa94e4f0f
7 changed files with 203 additions and 55 deletions

View File

@@ -1,12 +1,48 @@
\DeclareAcronym{dnn}{
short = DNN,
long = Deep Neural Network,
long = deep neural network,
}
\DeclareAcronym{cnn}{
short = CNN,
long = convolutional neural network,
}
\DeclareAcronym{mlp}{
short = MLP,
long = multi-layer perceptron,
}
\DeclareAcronym{rnn}{
short = RNN,
long = recurrent neural network,
}
\DeclareAcronym{llm}{
short = LLM,
long = Large Language Model,
long = large language model,
}
\DeclareAcronym{ai}{
short = AI,
long = artificial intelligence,
}
\DeclareAcronym{gpu}{
short = GPU,
long = graphics processing unit,
}
\DeclareAcronym{tpu}{
short = TPU,
long = tensor processing unit,
}
\DeclareAcronym{dram}{
short = DRAM,
long = dynamic random-access memory,
}
\DeclareAcronym{hbm}{
short = HBM,
long = High Bandwidth Memory,
}
\DeclareAcronym{pim}{
short = PIM,
long = processing-in-memory,
}
\DeclareAcronym{tlm}{
short = TLM,
long = Transaction Level Modeling,
long = transaction level modeling,
}

View File

@@ -1,41 +0,0 @@
% This file was created with tikzplotlib v0.10.1.
\begin{tikzpicture}
\definecolor{darkgray176}{RGB}{176,176,176}
\definecolor{steelblue}{RGB}{70,130,180}
\definecolor{tomato}{RGB}{255,99,71}
\begin{axis}[
log basis y={10},
tick align=outside,
tick pos=left,
x grid style={darkgray176},
xmajorgrids,
xmin=2010, xmax=2050,
xtick style={color=black},
y grid style={darkgray176},
ymajorgrids,
ymin=1e+16, ymax=1e+22,
ymode=log,
ytick style={color=black}
]
\addplot [semithick, tomato]
table {%
2010 5e+20
2050 1e+21
};
\addplot [semithick, steelblue]
table {%
2010 5e+17
2020 5e+18
2030 4e+19
};
\addplot [semithick, steelblue, dashed]
table {%
2030 4e+19
2040 1.1e+20
2050 1.2e+20
};
\end{axis}
\end{tikzpicture}

View File

@@ -6,18 +6,50 @@ An important compound of these models make use of \acp{dnn}, which are a type of
Consequently, \acp{dnn} make it possible to tackle many new classes of problems that were previously beyond the reach of conventional algorithms.
However, the ever-increasing use of these technologies poses new challenges for hardware architectures, as the energy required to train and run these models reaches unprecedented levels.
Recently published numbers approximate that the development and training of Meta's LLaMA model over a period of about 5 months consumed around $\qty{2638}{\mega\watt\hour}$ of electrical energy and caused a total emission of $\qty{1015}{tCO_2eq}$ \cite{touvron2023}.
As these numbers are expected to increase in the future, it is clear that the energy footprint current deployment of artificial intelligence applications is not sustainable \cite{blott2023}.
In a more general view, the energy demand of computing for new applications continues to grow exponentially, doubling about every two years, while the world's energy production grows only linearly, at about $\qty{2}{\percent}$ per year \cite{src2021}.
Recently published numbers approximate that the development and training of Meta's LLaMA model over a period of about 5 months consumed around $\qty{2638}{\mega\watt\hour}$ of electrical energy and caused a total emission of $\qty{1015}{tCO_2eq}$ \cite{touvron2023}.
As these numbers are expected to increase in the future, it is clear that the energy footprint of current deployment of \ac{ai} applications is not sustainable \cite{blott2023}.
In a more general view, the energy demand of computing for new applications continues to grow exponentially, doubling about every two years, while the world's energy production only grows linearly, at about $\qty{2}{\percent}$ per year \cite{src2021}.
This dramatic increase in energy consumption is due to the fact that while the energy efficiency of compute processor units has continued to improve, the ever-increasing demand for computing is outpacing this progress.
In addition, Moore's Law is slowing down as further device scaling approaches physical limits.
% TODO move in correct directory
\input{chapters/energy_chart}
\begin{figure}[!ht]
\centering
\input{plots/energy_chart}
\caption[Total energy of computing]{Total energy of computing \cite{src2021}}
\label{plt:enery_chart}
\end{figure}
The exponential grow in compute energy will eventually be constrained by market dynamics, flattening the energy curve and making it impossible to meet future computing demands.
It is therefore required to achieve radical improvements in energy efficiency to avoid this scenario.
It is therefore required to achieve radical improvements in energy efficiency in order to avoid such a scenario.
% -> effizierntere systeme
% diskussion bezieht sich vor allem auf prozessoren
% -> muss vor allem memory beachten, movement cost diagram
In recent years, domain-specific accelerators, such as \acp{gpu} or \acp{tpu} have become very popular, as they provide orders of magnitude higher performance and energy efficiency for \ac{ai} applications \cite{kwon2021}.
However, research must also consider the off-chip memory - the date movement between the computation unit and the \ac{dram} has a high cost as fetching operands costs more than doing the computation on them.
While performing a double precision floating point operation on a $\qty{28}{\nano\meter}$ technology might consume an energy of about $\qty{20}{\pico\joule}$, fetching the operands from \ac{dram} consumes almost 3 orders of magnitude more energy at about $\qty{16}{\nano\joule}$ \cite{dally2010}.
Furthermore, many types of \ac{dnn} used for language and speech processing such as \acp{rnn}, \acp{mlp} and some layers of \acp{cnn} are severely limited by the memory-bandwidth that the \ac{dram} can provide, in contrast to compute-intensive workloads such as visual processing \cite{he2020}.
Such workloads are referred to as \textit{memory-bound}.
\begin{figure}[!ht]
\centering
\input{plots/roofline}
\caption[Roofline model of GPT revisions]{Roofline model of GPT revisions \cite{ivobolsens2023}}
\label{plt:roofline}
\end{figure}
In the past, specialized types of \ac{dram} such as \ac{hbm} have been able to meet high bandwidth requirements.
However, recent AI technologies require even greater bandwidth than \ac{hbm} can provide \cite{kwon2021}.
All things considered, to meet the need for energy-efficient computing systems, which are increasingly becoming memory-bound, new approaches to computing are required.
This has led researchers to reconsider past \ac{pim} architectures and advance them further \cite{lee2021}.
\Ac{pim} integrates computational logic into the \ac{dram} itself, to exploit minimal data movement cost and extensive internal data parallelism \cite{sudarshan2022}.
This work analyzes various \ac{pim} architectures, identifies the challenges of integrating them into state-of-the-art \acp{dram}, examines the changes required in the way applications lay out their data in memory and explores a \ac{pim} implementation from one of the leading \ac{dram} vendors.
The remainder of it is structured as follows:
Section \ref{sec:dram} gives a brief overview of the architecture of \acp{dram}, in detail that of \acp{hbm}.
In section \ref{sec:pim} various types of \ac{pim} architectures are presented, with some concrete examples discussed in detail.
Section \ref{sec:vp} is an introduction to virtual prototyping and system-level hardware simulation.
After explaining the necessary prerequisites, section \ref{sec:implementation} implements a concrete \ac{pim} architecture in software and provides a development library that applications can use to take advantage of in-memory processing.
The section \ref{sec:results} demonstrates the possible performance enhancement of \ac{pim} by simulating a typical neural-network inference.
Finally, section \ref{sec:conclusion} concludes the findings and identifies future improvements in \ac{pim} architectures.

View File

@@ -122,6 +122,14 @@
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/7M7QNRVN/He et al. - 2020 - Newton A DRAM-makers Accelerator-in-Memory (AiM).pdf}
}
@inproceedings{ivobolsens2023,
title = {Scalable {{AI Architectures}} for {{Edge}} and {{Cloud}}},
booktitle = {{{HiPEAC23}}},
author = {{Ivo Bolsens}},
year = {2023},
month = jan
}
@book{jacob2008,
title = {Memory Systems: {{Cache}}, {{DRAM}}, {{Disk}}},
shorttitle = {Memory Systems},
@@ -450,6 +458,5 @@
urldate = {2024-01-23},
abstract = {We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.},
archiveprefix = {arxiv},
keywords = {Computer Science - Computation and Language},
file = {/home/derek/Nextcloud/Verschiedenes/Zotero/storage/MGQYNDPQ/Touvron et al. - 2023 - LLaMA Open and Efficient Foundation Language Mode.pdf;/home/derek/Nextcloud/Verschiedenes/Zotero/storage/YDAT8K7L/2302.html}
}

View File

@@ -2,7 +2,7 @@
\usepackage[small,bf,hang]{caption}
\usepackage[english]{babel}
\usepackage{wrapfig}
\usepackage{xcolor}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amssymb}
@@ -107,6 +107,7 @@
% \clearpage
% List of Abbreviations
% TODO näher beieinander und serifen für abkürzungen
\begingroup
\phantomsection
\addcontentsline{toc}{section}{List of Abbreviations}

View File

@@ -0,0 +1,50 @@
\begin{tikzpicture}
\definecolor{darkgray176}{RGB}{176,176,176}
\definecolor{steelblue}{RGB}{70,130,180}
\definecolor{tomato}{RGB}{255,99,71}
\begin{axis}[
width=12cm,
height=8cm,
axis background/.style={fill=gray!8},
axis line style={white},
tick style={draw=none},
grid=both,
grid style={white},
ymode=log,
log basis y={10},
xtick={2010,2020,2030,2040,2050},
ytick={1e16,1e18,1e20,1e22},
xticklabel style={/pgf/number format/1000 sep=},
ylabel={Compute Energy in $\si{\joule\per Year}$},
xmin=2010,
xmax=2050,
ymin=5e15,
ymax=2e22,
legend pos=south east,
legend style={draw=none, fill=none}
]
\addplot [very thick, tomato]
table {
2010 5e+20
2050 8e+20
};
\addlegendentry{world's energy production}
\addplot [very thick, steelblue]
table {
2010 5e+17
2020 5e+18
2030 4e+19
};
\addlegendentry{total compute energy}
\addplot [very thick, steelblue, dashed]
table {
2030 4e+19
2035 8e+19
2040 1.1e+20
2050 1.2e+20
}
node[above,pos=0.5,scale=0.8] {"market dynamics limited" scenario};
\end{axis}
\end{tikzpicture}

63
src/plots/roofline.tex Normal file
View File

@@ -0,0 +1,63 @@
\begin{tikzpicture}
\begin{axis}[
width=12cm,
height=8cm,
axis lines=middle,
% enlargelimits={abs=5},
ticks=none,
grid=both,
grid style={white},
xlabel={Operational Intensity in $\si{OPS \per Byte}$ (log)},
ylabel={Achievable Performance in $\si{\giga OPS \per\second}$ (log)},
xlabel near ticks,
ylabel near ticks,
xmin=0,
xmax=100,
ymin=0,
ymax=100
]
\addplot [thick, dashed, gray]
table {
40 0
40 100
};
\addplot [very thick, BrickRed]
table {
0 20
40 70
100 70
}
node[above,sloped,pos=0.25,scale=0.8] {\textit{memory-bound}}
node[above,pos=0.75,scale=0.8] {\textit{compute-bound}};
\addplot [very thick, dashed, BrickRed]
table {
40 70
60 95
};
\addplot [very thick, dashed, BrickRed]
table {
0 70
40 70
};
\addplot [only marks, mark=o, mark options={color=NavyBlue}]
table {
60 30
65 45
55 55
50 35
};
\node (gpt2) at (57, 43) {GPT-2};
\addplot [only marks, mark=o, mark options={color=NavyBlue}]
table {
28 49
};
\node (gpt3) at (28, 43) {GPT-3};
\draw[-latex](gpt2)--(gpt3);
\end{axis}
\end{tikzpicture}