Real hardware plots
This commit is contained in:
162
src/appendix.tex
162
src/appendix.tex
@@ -3,69 +3,105 @@
|
|||||||
|
|
||||||
\subsection{Simulation Results}
|
\subsection{Simulation Results}
|
||||||
|
|
||||||
\begin{table}[!ht]
|
TODO !! nochmal aktualisieren!
|
||||||
\centering
|
|
||||||
\input{tables/vadd_3GHz}
|
\begin{table}[H]
|
||||||
\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the generic ARM system.}
|
\csvreader[
|
||||||
|
head to column names,
|
||||||
|
centered tabularray = {
|
||||||
|
hlines,
|
||||||
|
vlines,
|
||||||
|
hline{3} = {2}{-}{},
|
||||||
|
column{1,2} = {c},
|
||||||
|
column{3} = {r},
|
||||||
|
column{4} = {r},
|
||||||
|
row{1,2} = {c},
|
||||||
|
cell{1}{1} = {r=2}{},
|
||||||
|
cell{1}{2} = {r=2}{},
|
||||||
|
cell{1}{3} = {c=2}{},
|
||||||
|
cell{1}{5} = {c=2}{},
|
||||||
|
cell{3}{1} = {r=4}{},
|
||||||
|
cell{7}{1} = {r=4}{},
|
||||||
|
cell{11}{1} = {r=4}{},
|
||||||
|
cell{15}{1} = {r=4}{},
|
||||||
|
cell{19}{1} = {r=4}{},
|
||||||
|
},
|
||||||
|
table head = {
|
||||||
|
Workload & Level & Runtime [$\unit{\pico\second}$] & \\
|
||||||
|
& & Non-PIM & PIM \\
|
||||||
|
}
|
||||||
|
]{tables/simulations_3GHz.csv}{}{
|
||||||
|
\csvexpval\workload & \csvexpval\level & \csvexpval\hbm & \csvexpval\pim
|
||||||
|
}
|
||||||
|
\caption{Runtimes of the microbenchmarks on the generic ARM-based system for non-\acs*{pim} and \acs*{pim} mode.}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
\begin{table}[!ht]
|
\begin{table}[H]
|
||||||
\centering
|
\csvreader[
|
||||||
\input{tables/vmul_3GHz}
|
head to column names,
|
||||||
\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the generic ARM system.}
|
centered tabularray = {
|
||||||
|
hlines,
|
||||||
|
vlines,
|
||||||
|
hline{3} = {2}{-}{},
|
||||||
|
column{1,2} = {c},
|
||||||
|
column{3} = {r},
|
||||||
|
column{4} = {r},
|
||||||
|
row{1,2} = {c},
|
||||||
|
cell{1}{1} = {r=2}{},
|
||||||
|
cell{1}{2} = {r=2}{},
|
||||||
|
cell{1}{3} = {c=2}{},
|
||||||
|
cell{1}{5} = {c=2}{},
|
||||||
|
cell{3}{1} = {r=4}{},
|
||||||
|
cell{7}{1} = {r=4}{},
|
||||||
|
cell{11}{1} = {r=4}{},
|
||||||
|
cell{15}{1} = {r=4}{},
|
||||||
|
cell{19}{1} = {r=4}{},
|
||||||
|
},
|
||||||
|
table head = {
|
||||||
|
Workload & Level & Runtime [$\unit{\pico\second}$] & \\
|
||||||
|
& & Non-PIM & PIM \\
|
||||||
|
}
|
||||||
|
]{tables/simulations_100GHz.csv}{}{
|
||||||
|
\csvexpval\workload & \csvexpval\level & \csvexpval\hbm & \csvexpval\pim
|
||||||
|
}
|
||||||
|
\caption{Runtimes of the microbenchmarks on the infinite compute system for non-\acs*{pim} and \acs*{pim} mode.}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
\begin{table}[!ht]
|
\begin{table}[H]
|
||||||
\centering
|
\csvreader[
|
||||||
\input{tables/haxpy_3GHz}
|
head to column names,
|
||||||
\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
|
centered tabularray = {
|
||||||
\end{table}
|
hlines,
|
||||||
|
vlines,
|
||||||
\begin{table}[!ht]
|
hline{3} = {2}{-}{},
|
||||||
\centering
|
column{1,2} = {c},
|
||||||
\input{tables/gemv_3GHz}
|
column{3} = {r},
|
||||||
\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
|
column{4} = {r},
|
||||||
\end{table}
|
row{1,2} = {c},
|
||||||
|
cell{1}{1} = {r=2}{},
|
||||||
\begin{table}[!ht]
|
cell{1}{2} = {r=2}{},
|
||||||
\centering
|
cell{1}{3} = {c=2}{},
|
||||||
\input{tables/gemv_layers_3GHz}
|
cell{1}{5} = {c=2}{},
|
||||||
\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
|
cell{3}{1} = {r=4}{},
|
||||||
\end{table}
|
cell{7}{1} = {r=4}{},
|
||||||
|
cell{11}{1} = {r=4}{},
|
||||||
\begin{table}[!ht]
|
cell{15}{1} = {r=4}{},
|
||||||
\centering
|
cell{19}{1} = {r=4}{},
|
||||||
\input{tables/vadd_100GHz}
|
},
|
||||||
\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the infinite compute system.}
|
table head = {
|
||||||
\end{table}
|
Workload & Level & Runtime [$\unit{\pico\second}$] & \\
|
||||||
|
& & Vega & Tesla \\
|
||||||
\begin{table}[!ht]
|
}
|
||||||
\centering
|
]{tables/torch.csv}{}{
|
||||||
\input{tables/vmul_100GHz}
|
\csvexpval\workload & \csvexpval\level & \csvexpval\vega & \csvexpval\tesla
|
||||||
\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the infinite compute system.}
|
}
|
||||||
\end{table}
|
\caption{Runtimes of the microbenchmarks on the different \acs*{gpu} platforms.}
|
||||||
|
|
||||||
\begin{table}[!ht]
|
|
||||||
\centering
|
|
||||||
\input{tables/haxpy_100GHz}
|
|
||||||
\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
\begin{table}[!ht]
|
|
||||||
\centering
|
|
||||||
\input{tables/gemv_100GHz}
|
|
||||||
\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
\begin{table}[!ht]
|
|
||||||
\centering
|
|
||||||
\input{tables/gemv_layers_100GHz}
|
|
||||||
\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
|
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
|
\newpage
|
||||||
\subsection{Microkernels}
|
\subsection{Microkernels}
|
||||||
|
|
||||||
\begin{listing}[!ht]
|
\begin{listing}[H]
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
MOV GRF_A #0, BANK
|
MOV GRF_A #0, BANK
|
||||||
MOV GRF_A #1, BANK
|
MOV GRF_A #1, BANK
|
||||||
@@ -97,7 +133,7 @@ EXIT
|
|||||||
\label{lst:vadd_bench}
|
\label{lst:vadd_bench}
|
||||||
\end{listing}
|
\end{listing}
|
||||||
|
|
||||||
\begin{listing}[!ht]
|
\begin{listing}[H]
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
MOV GRF_A #0, BANK
|
MOV GRF_A #0, BANK
|
||||||
MOV GRF_A #1, BANK
|
MOV GRF_A #1, BANK
|
||||||
@@ -129,7 +165,7 @@ EXIT
|
|||||||
\label{lst:vmul_bench}
|
\label{lst:vmul_bench}
|
||||||
\end{listing}
|
\end{listing}
|
||||||
|
|
||||||
\begin{listing}[!ht]
|
\begin{listing}[H]
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
MOV SRF_M #0, BANK
|
MOV SRF_M #0, BANK
|
||||||
MOV GRF_A #0, BANK
|
MOV GRF_A #0, BANK
|
||||||
@@ -162,7 +198,7 @@ EXIT
|
|||||||
\label{lst:haxpy_bench}
|
\label{lst:haxpy_bench}
|
||||||
\end{listing}
|
\end{listing}
|
||||||
|
|
||||||
\begin{listing}[!ht]
|
\begin{listing}[H]
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
MOV GRF_A #0, BANK
|
MOV GRF_A #0, BANK
|
||||||
MOV GRF_A #1, BANK
|
MOV GRF_A #1, BANK
|
||||||
@@ -188,28 +224,33 @@ EXIT
|
|||||||
\label{lst:gemv_bench}
|
\label{lst:gemv_bench}
|
||||||
\end{listing}
|
\end{listing}
|
||||||
|
|
||||||
|
\newpage
|
||||||
\subsection{Source Code}
|
\subsection{Source Code}
|
||||||
|
|
||||||
\begin{listing}[!ht]
|
\begin{listing}[H]
|
||||||
\begin{minted}{rust}
|
\begin{minted}[linenos]{rust}
|
||||||
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
||||||
matrix: &Matrix<X16R, X16C>,
|
matrix: &Matrix<X16R, X16C>,
|
||||||
input_vector: &Vector<X16C>,
|
input_vector: &Vector<X16C>,
|
||||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||||
dummy: &impl PimOperand,
|
dummy: &impl PimOperand,
|
||||||
) {
|
) {
|
||||||
|
// Load input vector into GRF-A registers
|
||||||
for chunk in input_vector.0.iter() {
|
for chunk in input_vector.0.iter() {
|
||||||
chunk.execute_read();
|
chunk.execute_read();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Execute the MAC instructions without memory barriers
|
||||||
for sub_matrix in matrix.0.iter() {
|
for sub_matrix in matrix.0.iter() {
|
||||||
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
|
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
|
||||||
column_block.execute_read_async();
|
column_block.execute_read_async();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify all memory accesses have finished
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
|
// Copy the partial sums into the bank
|
||||||
for chunk in output_partial_sum_vector
|
for chunk in output_partial_sum_vector
|
||||||
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
@@ -217,6 +258,7 @@ pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
|||||||
chunk.execute_write();
|
chunk.execute_write();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Execute the EXIT instruction
|
||||||
dummy.execute_read();
|
dummy.execute_read();
|
||||||
}
|
}
|
||||||
\end{minted}
|
\end{minted}
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ The levels X1-X4 denote the increasing dimensions, with each successive level do
|
|||||||
\begin{table}
|
\begin{table}
|
||||||
\centering
|
\centering
|
||||||
\begin{tblr}{
|
\begin{tblr}{
|
||||||
|
column{1} = {c},
|
||||||
cell{2}{2} = {r},
|
cell{2}{2} = {r},
|
||||||
cell{3}{2} = {r},
|
cell{3}{2} = {r},
|
||||||
cell{4}{2} = {r},
|
cell{4}{2} = {r},
|
||||||
@@ -168,6 +169,7 @@ Again, several different dimensions of the benchmark inputs are used, whose matr
|
|||||||
\begin{table}
|
\begin{table}
|
||||||
\centering
|
\centering
|
||||||
\begin{tblr}{
|
\begin{tblr}{
|
||||||
|
column{1} = {c},
|
||||||
cell{2}{2} = {r},
|
cell{2}{2} = {r},
|
||||||
cell{3}{2} = {r},
|
cell{3}{2} = {r},
|
||||||
cell{4}{2} = {r},
|
cell{4}{2} = {r},
|
||||||
@@ -259,7 +261,7 @@ Since the Samsung \ac{fpga} platform can be assumed to be a highly optimized acc
|
|||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=0.8\linewidth]{plots/samsung}
|
\includegraphics[width=0.8\linewidth]{plots/samsung}
|
||||||
\caption{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung \cite{lee2021}.}
|
\caption[Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung.]{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung \cite{lee2021}.}
|
||||||
\label{fig:samsung_speedup}
|
\label{fig:samsung_speedup}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
@@ -269,10 +271,29 @@ The \ac{gemv} microbenchmark on the other hand shows a more drastic speedup with
|
|||||||
Although the dimensions used by Samsung are different from the simulations of this thesis, the highest achieved speedup of $\qty{6.1}{\times}$ is well within the reach of the real hardware implementation.
|
Although the dimensions used by Samsung are different from the simulations of this thesis, the highest achieved speedup of $\qty{6.1}{\times}$ is well within the reach of the real hardware implementation.
|
||||||
|
|
||||||
\subsubsection{Comparison to Real Hardware}
|
\subsubsection{Comparison to Real Hardware}
|
||||||
|
TODO: check all ranges
|
||||||
|
|
||||||
In addition to the comparison of Samsung's real hardware implementation, the same benchmarks of the performed simulations are run on a [...] with HBM2 [...].
|
In addition to the comparison of Samsung's real hardware implementation, the same benchmarks of the performed simulations are run on a [...] with HBM2 [...].
|
||||||
As this system is using a generic \aca{hbm} \ac{dram} and not \aca{fimdram}, the measurements are only intended to serve as a vague estimation of the runtimes in a non-\ac{pim} case.
|
As this system is using a generic \aca{hbm} \ac{dram} and not \aca{fimdram}, the measurements are only intended to serve as a vague estimation of the runtimes in a non-\ac{pim} case.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\resizebox{\linewidth}{!}{%
|
||||||
|
\input{plots/runtimes_vector}
|
||||||
|
}
|
||||||
|
\caption{}
|
||||||
|
\label{fig:runtimes_vector}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
% \resizebox{\linewidth}{!}{%
|
||||||
|
\input{plots/runtimes_matrix}
|
||||||
|
% }
|
||||||
|
\caption{}
|
||||||
|
\label{fig:runtimes_matrix}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
% \subsubsection{Initialization Overhead}
|
% \subsubsection{Initialization Overhead}
|
||||||
% conversion der operanden im verhältnis zur laufzeit abschätzen
|
% conversion der operanden im verhältnis zur laufzeit abschätzen
|
||||||
|
|
||||||
|
|||||||
@@ -28,12 +28,15 @@
|
|||||||
\usepackage{makecell}
|
\usepackage{makecell}
|
||||||
\usepackage{minted}
|
\usepackage{minted}
|
||||||
\usepackage{lscape}
|
\usepackage{lscape}
|
||||||
|
\usepackage{float}
|
||||||
|
\usepackage[l3]{csvsimple}
|
||||||
|
|
||||||
% Configurations
|
% Configurations
|
||||||
\usetikzlibrary{matrix}
|
\usetikzlibrary{matrix}
|
||||||
\usetikzlibrary{automata}
|
\usetikzlibrary{automata}
|
||||||
\usetikzlibrary{fit}
|
\usetikzlibrary{fit}
|
||||||
\usetikzlibrary{positioning}
|
\usetikzlibrary{positioning}
|
||||||
|
\usepgfplotslibrary[groupplots]
|
||||||
|
|
||||||
\addbibresource{doc.bib}
|
\addbibresource{doc.bib}
|
||||||
|
|
||||||
@@ -82,13 +85,13 @@
|
|||||||
\setcounter{page}{1}
|
\setcounter{page}{1}
|
||||||
|
|
||||||
% Chapters
|
% Chapters
|
||||||
\include{chapters/introduction}
|
% \include{chapters/introduction}
|
||||||
\include{chapters/dram}
|
% \include{chapters/dram}
|
||||||
\include{chapters/pim}
|
% \include{chapters/pim}
|
||||||
\include{chapters/vp}
|
% \include{chapters/vp}
|
||||||
\include{chapters/implementation}
|
% \include{chapters/implementation}
|
||||||
\include{chapters/results}
|
\include{chapters/results}
|
||||||
\include{chapters/conclusion}
|
% \include{chapters/conclusion}
|
||||||
|
|
||||||
% Appendix
|
% Appendix
|
||||||
\appendix
|
\appendix
|
||||||
|
|||||||
@@ -1,19 +1,16 @@
|
|||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/gemv_100GHz.csv}\gemv
|
\pgfplotstableread[col sep=comma]{plots/speedup_tables/matrix_100GHz.csv}\csv
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/gemv_layers_100GHz.csv}\gemvlayers
|
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
width=0.9\textwidth,
|
width=0.9\textwidth,
|
||||||
ybar=1pt,
|
ybar=1pt,
|
||||||
bar width = 15pt,
|
bar width = 15pt,
|
||||||
ymin=0.1,
|
ymin=0.1,
|
||||||
ymax=100,
|
ymax=10,
|
||||||
ymode=log,
|
|
||||||
log origin=infty,
|
|
||||||
ymajorgrids,
|
ymajorgrids,
|
||||||
ylabel={Relative Performance},
|
ylabel={Relative Performance},
|
||||||
tick pos=left,
|
tick pos=left,
|
||||||
xtick=data,
|
xtick=data,
|
||||||
xticklabels from table={\gemv}{level},
|
xticklabels from table={\csv}{level},
|
||||||
enlarge x limits=0.25,
|
enlarge x limits=0.25,
|
||||||
legend style={
|
legend style={
|
||||||
at={(current bounding box.south-|current axis.south)},
|
at={(current bounding box.south-|current axis.south)},
|
||||||
@@ -23,10 +20,10 @@
|
|||||||
/tikz/every even column/.append style={column sep=0.5cm}
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
\addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\gemv};
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\csv};
|
||||||
\addlegendentry{GEMV}
|
\addlegendentry{GEMV}
|
||||||
|
|
||||||
\addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\gemvlayers};
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\csv};
|
||||||
\addlegendentry{DNN}
|
\addlegendentry{DNN}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|||||||
@@ -1,20 +1,16 @@
|
|||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/gemv_3GHz.csv}\gemv
|
\pgfplotstableread[col sep=comma]{plots/speedup_tables/matrix_3GHz.csv}\csv
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/gemv_layers_3GHz.csv}\gemvlayers
|
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
width=0.9\textwidth,
|
width=0.9\textwidth,
|
||||||
ybar=1pt,
|
ybar=1pt,
|
||||||
bar width = 15pt,
|
bar width = 15pt,
|
||||||
ymin=0.1,
|
ymin=0.1,
|
||||||
ymax=100,
|
ymax=75,
|
||||||
ymode=log,
|
|
||||||
log origin=infty,
|
|
||||||
% minor y tick num = 5,
|
|
||||||
ymajorgrids,
|
ymajorgrids,
|
||||||
ylabel={Relative Performance},
|
ylabel={Relative Performance},
|
||||||
tick pos=left,
|
tick pos=left,
|
||||||
xtick=data,
|
xtick=data,
|
||||||
xticklabels from table={\gemv}{level},
|
xticklabels from table={\csv}{level},
|
||||||
enlarge x limits=0.25,
|
enlarge x limits=0.25,
|
||||||
legend style={
|
legend style={
|
||||||
at={(current bounding box.south-|current axis.south)},
|
at={(current bounding box.south-|current axis.south)},
|
||||||
@@ -24,10 +20,10 @@
|
|||||||
/tikz/every even column/.append style={column sep=0.5cm}
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
\addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\gemv};
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\csv};
|
||||||
\addlegendentry{GEMV}
|
\addlegendentry{GEMV}
|
||||||
|
|
||||||
\addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\gemvlayers};
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\csv};
|
||||||
\addlegendentry{DNN}
|
\addlegendentry{DNN}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|||||||
5
src/plots/runtime_tables/hbm_100GHz.csv
Normal file
5
src/plots/runtime_tables/hbm_100GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,11768899990,9758441990,16772264000,7916400980,176584310
|
||||||
|
X2,23071196990,18975123990,33462372990,15800020980,1860990350
|
||||||
|
X3,46873992980,37109877990,65993469990,62587326980,7063630630
|
||||||
|
X4,91264808000,74066441980,134134323970,127514526980,27344749530
|
||||||
|
5
src/plots/runtime_tables/hbm_3GHz.csv
Normal file
5
src/plots/runtime_tables/hbm_3GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,21334729581,20112009858,36613117899,72834815610,5504078079
|
||||||
|
X2,43268334354,39990439863,73515923487,151235040573,21478024143
|
||||||
|
X3,85485583179,80576580096,146061622170,614362203486,85912073262
|
||||||
|
X4,166942414809,163020260223,294729236073,1228045754304,322188095061
|
||||||
|
5
src/plots/runtime_tables/pim_100GHz.csv
Normal file
5
src/plots/runtime_tables/pim_100GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,911446480,911416480,954454480,951904860,536177760
|
||||||
|
X2,1822806480,1822776480,1908822480,1814530860,738329760
|
||||||
|
X3,3645526480,3645496480,3817558480,6990944860,1547139760
|
||||||
|
X4,7290966480,7290936480,7635030480,13892610860,4782339760
|
||||||
|
5
src/plots/runtime_tables/pim_3GHz.csv
Normal file
5
src/plots/runtime_tables/pim_3GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,1475510346,1475512344,1543044078,1377734886,933823908
|
||||||
|
X2,2950962084,2950964082,3085992252,2601142920,1220409702
|
||||||
|
X3,5901852240,5901848244,6171893928,9942655392,2367353610
|
||||||
|
X4,11803639878,11803641876,12343693950,19731271644,6955629408
|
||||||
|
5
src/plots/runtime_tables/tesla.csv
Normal file
5
src/plots/runtime_tables/tesla.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,69572650,67408281,69791189,750246152,231093065
|
||||||
|
X2,123217536,103994272,123543145,648714601,431703456
|
||||||
|
X3,207693503,182162140,207947543,2454455479,877622611
|
||||||
|
X4,378089165,350280326,377434890,4968984949,2175751385
|
||||||
|
5
src/plots/runtime_tables/vega.csv
Normal file
5
src/plots/runtime_tables/vega.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy,gemv,dnn
|
||||||
|
X1,69572650,67408281,69791189,750246152,231093065
|
||||||
|
X2,123217536,103994272,123543145,648714601,431703456
|
||||||
|
X3,207693503,182162140,207947543,2454455479,877622611
|
||||||
|
X4,378089165,350280326,377434890,4968984949,2175751385
|
||||||
|
74
src/plots/runtimes_matrix.tex
Normal file
74
src/plots/runtimes_matrix.tex
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
\begin{tikzpicture}
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_3GHz.csv}\hbmarm
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_3GHz.csv}\hbmpim
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_100GHz.csv}\hbminf
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_100GHz.csv}\piminf
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/vega.csv}\vega
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/tesla.csv}\tesla
|
||||||
|
\begin{groupplot}[
|
||||||
|
group style={
|
||||||
|
group size=2 by 1,
|
||||||
|
horizontal sep=0pt,
|
||||||
|
vertical sep=0pt,
|
||||||
|
xticklabels at=edge bottom,
|
||||||
|
yticklabels at=edge left,
|
||||||
|
xlabels at=edge bottom,
|
||||||
|
ylabels at=edge left,
|
||||||
|
},
|
||||||
|
height=8cm,
|
||||||
|
width=0.45\linewidth,
|
||||||
|
ybar=1pt,
|
||||||
|
axis line style={draw=none},
|
||||||
|
tick style={draw=none},
|
||||||
|
ymin=0.1,
|
||||||
|
ymax=1e13,
|
||||||
|
ymode=log,
|
||||||
|
% ymax=25,
|
||||||
|
ymajorgrids,
|
||||||
|
ylabel={Runtime [$\unit{\pico\second}$]},
|
||||||
|
tick pos=left,
|
||||||
|
xtick=data,
|
||||||
|
xticklabels from table={\vega}{level},
|
||||||
|
legend style={
|
||||||
|
at={(1.0,-0.15)},
|
||||||
|
anchor=north,
|
||||||
|
legend columns=-1,
|
||||||
|
draw=none,
|
||||||
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
|
},
|
||||||
|
enlarge x limits=0.2,
|
||||||
|
]
|
||||||
|
\nextgroupplot[
|
||||||
|
xlabel=GEMV,
|
||||||
|
bar width=3pt,
|
||||||
|
% width=3cm,
|
||||||
|
]
|
||||||
|
|
||||||
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\hbmarm};
|
||||||
|
\addlegendentry{Non-PIM ARM}
|
||||||
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={gemv}]{\hbmpim};
|
||||||
|
\addlegendentry{PIM ARM}
|
||||||
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={gemv}]{\hbminf};
|
||||||
|
\addlegendentry{Non-PIM Inf}
|
||||||
|
\addplot[fill=_green!90] table [x expr=\coordindex, y={gemv}]{\piminf};
|
||||||
|
\addlegendentry{PIM Inf}
|
||||||
|
\addplot[fill=_darkblue!90] table [x expr=\coordindex, y={gemv}]{\vega};
|
||||||
|
\addlegendentry{Vega}
|
||||||
|
\addplot[fill=violet!90] table [x expr=\coordindex, y={gemv}]{\tesla};
|
||||||
|
\addlegendentry{Tesla}
|
||||||
|
|
||||||
|
\nextgroupplot[
|
||||||
|
xlabel=DNN,
|
||||||
|
bar width=3pt,
|
||||||
|
% width=3cm,
|
||||||
|
]
|
||||||
|
|
||||||
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={dnn}]{\hbmarm};
|
||||||
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\hbmpim};
|
||||||
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={dnn}]{\hbminf};
|
||||||
|
\addplot[fill=_green!90] table [x expr=\coordindex, y={dnn}]{\piminf};
|
||||||
|
\addplot[fill=_darkblue!90] table [x expr=\coordindex, y={dnn}]{\vega};
|
||||||
|
\addplot[fill=violet!90] table [x expr=\coordindex, y={dnn}]{\tesla};
|
||||||
|
|
||||||
|
\end{groupplot}
|
||||||
|
\end{tikzpicture}
|
||||||
87
src/plots/runtimes_vector.tex
Normal file
87
src/plots/runtimes_vector.tex
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
\begin{tikzpicture}
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_3GHz.csv}\hbmarm
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_3GHz.csv}\hbmpim
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_100GHz.csv}\hbminf
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_100GHz.csv}\piminf
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/vega.csv}\vega
|
||||||
|
\pgfplotstableread[col sep=comma]{plots/runtime_tables/tesla.csv}\tesla
|
||||||
|
\begin{groupplot}[
|
||||||
|
group style={
|
||||||
|
group size=3 by 1,
|
||||||
|
horizontal sep=0pt,
|
||||||
|
vertical sep=0pt,
|
||||||
|
xticklabels at=edge bottom,
|
||||||
|
yticklabels at=edge left,
|
||||||
|
xlabels at=edge bottom,
|
||||||
|
ylabels at=edge left,
|
||||||
|
},
|
||||||
|
height=8cm,
|
||||||
|
width=0.45\linewidth,
|
||||||
|
ybar=1pt,
|
||||||
|
axis line style={draw=none},
|
||||||
|
tick style={draw=none},
|
||||||
|
ymin=0.1,
|
||||||
|
ymax=1e12,
|
||||||
|
ymode=log,
|
||||||
|
% ymax=25,
|
||||||
|
ymajorgrids,
|
||||||
|
ylabel={Runtime [$\unit{\pico\second}$]},
|
||||||
|
tick pos=left,
|
||||||
|
xtick=data,
|
||||||
|
xticklabels from table={\vega}{level},
|
||||||
|
legend style={
|
||||||
|
at={(0.5,-0.15)},
|
||||||
|
anchor=north,
|
||||||
|
legend columns=-1,
|
||||||
|
draw=none,
|
||||||
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
|
},
|
||||||
|
enlarge x limits=0.2,
|
||||||
|
]
|
||||||
|
\nextgroupplot[
|
||||||
|
xlabel=VADD,
|
||||||
|
bar width=3pt,
|
||||||
|
% width=3cm,
|
||||||
|
]
|
||||||
|
|
||||||
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\hbmarm};
|
||||||
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={vadd}]{\hbmpim};
|
||||||
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={vadd}]{\hbminf};
|
||||||
|
\addplot[fill=_green!90] table [x expr=\coordindex, y={vadd}]{\piminf};
|
||||||
|
\addplot[fill=_darkblue!90] table [x expr=\coordindex, y={vadd}]{\vega};
|
||||||
|
\addplot[fill=violet!90] table [x expr=\coordindex, y={vadd}]{\tesla};
|
||||||
|
|
||||||
|
\nextgroupplot[
|
||||||
|
xlabel=VMUL,
|
||||||
|
bar width=3pt,
|
||||||
|
% width=3cm,
|
||||||
|
]
|
||||||
|
|
||||||
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={vmul}]{\hbmarm};
|
||||||
|
\addlegendentry{Non-PIM ARM}
|
||||||
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\hbmpim};
|
||||||
|
\addlegendentry{PIM ARM}
|
||||||
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={vmul}]{\hbminf};
|
||||||
|
\addlegendentry{Non-PIM Inf}
|
||||||
|
\addplot[fill=_green!90] table [x expr=\coordindex, y={vmul}]{\piminf};
|
||||||
|
\addlegendentry{PIM Inf}
|
||||||
|
\addplot[fill=_darkblue!90] table [x expr=\coordindex, y={vmul}]{\vega};
|
||||||
|
\addlegendentry{Vega}
|
||||||
|
\addplot[fill=violet!90] table [x expr=\coordindex, y={vmul}]{\tesla};
|
||||||
|
\addlegendentry{Tesla}
|
||||||
|
|
||||||
|
\nextgroupplot[
|
||||||
|
xlabel=HAXPY,
|
||||||
|
bar width=3pt,
|
||||||
|
% width=3cm,
|
||||||
|
]
|
||||||
|
|
||||||
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={haxpy}]{\hbmarm};
|
||||||
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={haxpy}]{\hbmpim};
|
||||||
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\hbminf};
|
||||||
|
\addplot[fill=_green!90] table [x expr=\coordindex, y={haxpy}]{\piminf};
|
||||||
|
\addplot[fill=_darkblue!90] table [x expr=\coordindex, y={haxpy}]{\vega};
|
||||||
|
\addplot[fill=violet!90] table [x expr=\coordindex, y={haxpy}]{\tesla};
|
||||||
|
|
||||||
|
\end{groupplot}
|
||||||
|
\end{tikzpicture}
|
||||||
5
src/plots/speedup_tables/matrix_100GHz.csv
Normal file
5
src/plots/speedup_tables/matrix_100GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,gemv,dnn
|
||||||
|
X1,8.316378361593825,0.3293391169376365
|
||||||
|
X2,8.707496426927674,2.520540889480061
|
||||||
|
X3,8.952627753954278,4.565606038073768
|
||||||
|
X4,9.178586247394538,5.717860064798073
|
||||||
|
5
src/plots/speedup_tables/matrix_3GHz.csv
Normal file
5
src/plots/speedup_tables/matrix_3GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,gemv,dnn
|
||||||
|
X1,52.86562483836241,5.894128466670185
|
||||||
|
X2,58.14176507187079,17.599027693570402
|
||||||
|
X3,61.79055586904123,36.290342473171975
|
||||||
|
X4,62.23855088820042,46.32048031346181
|
||||||
|
5
src/plots/speedup_tables/vector_100GHz.csv
Normal file
5
src/plots/speedup_tables/vector_100GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy
|
||||||
|
X1,12.912332482758615,10.706896577073085,17.57261802574388
|
||||||
|
X2,12.656964545133722,10.410011429377231,17.530374532261376
|
||||||
|
X3,12.857948841452387,10.179649930700249,17.28682620992881
|
||||||
|
X4,12.517518527941442,10.158700762676236,17.568276160961705
|
||||||
|
5
src/plots/speedup_tables/vector_3GHz.csv
Normal file
5
src/plots/speedup_tables/vector_3GHz.csv
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
level,vadd,vmul,haxpy
|
||||||
|
X1,14.459220593631812,13.63052633194372,23.727849658355645
|
||||||
|
X2,14.66245011706494,13.551652528382078,23.822458866951166
|
||||||
|
X3,14.484534634672588,13.652770583844921,23.665607976080565
|
||||||
|
X4,14.143299569834605,13.81101374775393,23.87690729103017
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
gemv,X1,100GHz,0.2108059965502083
|
|
||||||
gemv,X2,100GHz,0.40509080127411157
|
|
||||||
gemv,X3,100GHz,0.8462958338758609
|
|
||||||
gemv,X4,100GHz,4.7274497979448125
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
gemv,X1,3GHz,3.468782996825547
|
|
||||||
gemv,X2,3GHz,6.723879985176877
|
|
||||||
gemv,X3,3GHz,12.744110856471028
|
|
||||||
gemv,X4,3GHz,23.645526777997713
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
gemv_layers,X1,100GHz,0.17890250001597863
|
|
||||||
gemv_layers,X2,100GHz,0.6097840333112959
|
|
||||||
gemv_layers,X3,100GHz,3.9637284525723304
|
|
||||||
gemv_layers,X4,100GHz,6.088778065749799
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
gemv_layers,X1,3GHz,2.992752194063702
|
|
||||||
gemv_layers,X2,3GHz,11.246371082010572
|
|
||||||
gemv_layers,X3,3GHz,34.94598413478715
|
|
||||||
gemv_layers,X4,3GHz,72.33604077371677
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
haxpy,X1,100GHz,2.0481358611246403
|
|
||||||
haxpy,X2,100GHz,2.3234133539462776
|
|
||||||
haxpy,X3,100GHz,2.272582592673281
|
|
||||||
haxpy,X4,100GHz,2.3895030032424387
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
haxpy,X1,3GHz,19.816741597088416
|
|
||||||
haxpy,X2,3GHz,25.395400082633245
|
|
||||||
haxpy,X3,3GHz,28.676005064893953
|
|
||||||
haxpy,X4,3GHz,31.783592582828017
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
vadd,X1,100GHz,2.398047786583271
|
|
||||||
vadd,X2,100GHz,1.823243660465808
|
|
||||||
vadd,X3,100GHz,1.562017010059411
|
|
||||||
vadd,X4,100GHz,1.7888939829610704
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
vadd,X1,3GHz,12.766775777414075
|
|
||||||
vadd,X2,3GHz,14.19338061465721
|
|
||||||
vadd,X3,3GHz,15.313227057302887
|
|
||||||
vadd,X4,3GHz,16.430379164365913
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
vmul,X1,100GHz,2.4019901321627604
|
|
||||||
vmul,X2,100GHz,2.2189241847884267
|
|
||||||
vmul,X3,100GHz,1.86705278821741
|
|
||||||
vmul,X4,100GHz,1.7484391189395834
|
|
||||||
|
@@ -1,5 +0,0 @@
|
|||||||
workload,level,frequency,speedup
|
|
||||||
vmul,X1,3GHz,14.157521157521158
|
|
||||||
vmul,X2,3GHz,15.915413533834586
|
|
||||||
vmul,X3,3GHz,16.959713823354058
|
|
||||||
vmul,X4,3GHz,18.215465292791755
|
|
||||||
|
@@ -1,20 +1,16 @@
|
|||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/vadd_100GHz.csv}\vadd
|
\pgfplotstableread[col sep=comma]{plots/speedup_tables/vector_100GHz.csv}\csv
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/vmul_100GHz.csv}\vmul
|
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/haxpy_100GHz.csv}\haxpy
|
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
width=0.8\textwidth,
|
width=0.8\textwidth,
|
||||||
ybar=1pt,
|
ybar=1pt,
|
||||||
bar width = 15pt,
|
bar width = 15pt,
|
||||||
ymin=0,
|
ymin=0,
|
||||||
ymax=5,
|
ymax=25,
|
||||||
% ymode=log,
|
|
||||||
% log origin=infty,
|
|
||||||
ymajorgrids,
|
ymajorgrids,
|
||||||
ylabel={Relative Performance},
|
ylabel={Relative Performance},
|
||||||
tick pos=left,
|
tick pos=left,
|
||||||
xtick=data,
|
xtick=data,
|
||||||
xticklabels from table={\vadd}{level},
|
xticklabels from table={\csv}{level},
|
||||||
enlarge x limits=0.25,
|
enlarge x limits=0.25,
|
||||||
legend style={
|
legend style={
|
||||||
at={(current bounding box.south-|current axis.south)},
|
at={(current bounding box.south-|current axis.south)},
|
||||||
@@ -24,13 +20,13 @@
|
|||||||
/tikz/every even column/.append style={column sep=0.5cm}
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
\addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\vadd};
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\csv};
|
||||||
\addlegendentry{VADD}
|
\addlegendentry{VADD}
|
||||||
|
|
||||||
\addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\vmul};
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\csv};
|
||||||
\addlegendentry{VMUL}
|
\addlegendentry{VMUL}
|
||||||
|
|
||||||
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={speedup}]{\haxpy};
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\csv};
|
||||||
\addlegendentry{HAXPY}
|
\addlegendentry{HAXPY}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/vadd_3GHz.csv}\vadd
|
\pgfplotstableread[col sep=comma]{plots/speedup_tables/vector_3GHz.csv}\csv
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/vmul_3GHz.csv}\vmul
|
|
||||||
\pgfplotstableread[col sep=comma]{plots/tables/haxpy_3GHz.csv}\haxpy
|
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
width=0.8\textwidth,
|
width=0.8\textwidth,
|
||||||
ybar=1pt,
|
ybar=1pt,
|
||||||
bar width = 15pt,
|
bar width = 15pt,
|
||||||
ymin=0,
|
ymin=0,
|
||||||
ymax=35,
|
ymax=25,
|
||||||
% ymode=log,
|
|
||||||
% log origin=infty,
|
|
||||||
% minor y tick num = 5,
|
|
||||||
ymajorgrids,
|
ymajorgrids,
|
||||||
ylabel={Relative Performance},
|
ylabel={Relative Performance},
|
||||||
tick pos=left,
|
tick pos=left,
|
||||||
xtick=data,
|
xtick=data,
|
||||||
xticklabels from table={\vadd}{level},
|
xticklabels from table={\csv}{level},
|
||||||
enlarge x limits=0.25,
|
enlarge x limits=0.25,
|
||||||
legend style={
|
legend style={
|
||||||
at={(current bounding box.south-|current axis.south)},
|
at={(current bounding box.south-|current axis.south)},
|
||||||
@@ -25,13 +20,13 @@
|
|||||||
/tikz/every even column/.append style={column sep=0.5cm}
|
/tikz/every even column/.append style={column sep=0.5cm}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
\addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\vadd};
|
\addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\csv};
|
||||||
\addlegendentry{VADD}
|
\addlegendentry{VADD}
|
||||||
|
|
||||||
\addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\vmul};
|
\addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\csv};
|
||||||
\addlegendentry{VMUL}
|
\addlegendentry{VMUL}
|
||||||
|
|
||||||
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={speedup}]{\haxpy};
|
\addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\csv};
|
||||||
\addlegendentry{HAXPY}
|
\addlegendentry{HAXPY}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&96465760&457604440 \\
|
|
||||||
X2&192178090&474407440 \\
|
|
||||||
X3&430015980&508115440 \\
|
|
||||||
X4&2720535980&575476440 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&2734886043&788428116 \\
|
|
||||||
X2&5462015184&812330856 \\
|
|
||||||
X3&10958710653&859903902 \\
|
|
||||||
X4&22594486896&955550160 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&8957090&50066880 \\
|
|
||||||
X2&33647200&55178880 \\
|
|
||||||
X3&299035090&75442880 \\
|
|
||||||
X4&951182090&156218880 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&262765971&87800778 \\
|
|
||||||
X2&1070441487&95181057 \\
|
|
||||||
X3&4332005991&123962913 \\
|
|
||||||
X4&17236314765&238281147 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&2374720&1088170 \\
|
|
||||||
X2&3677220&1604170 \\
|
|
||||||
X3&5875080&2708170 \\
|
|
||||||
X4&11640050&4703250 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&30413223&1555443 \\
|
|
||||||
X2&59499108&2417913 \\
|
|
||||||
X3&117506376&4207788 \\
|
|
||||||
X4&234943821&7578414 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
21
src/tables/simulations_100GHz.csv
Normal file
21
src/tables/simulations_100GHz.csv
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
workload,level,hbm,pim
|
||||||
|
VADD,X1,11768899990,455723240
|
||||||
|
VADD,X2,23071196990,911403240
|
||||||
|
VADD,X3,46873992980,1822763240
|
||||||
|
VADD,X4,91264808000,3645483240
|
||||||
|
VMUL,X1,9758441990,455708240
|
||||||
|
VMUL,X2,18975123990,911388240
|
||||||
|
VMUL,X3,37109877990,1822748240
|
||||||
|
VMUL,X4,74066441980,3645468240
|
||||||
|
HAXPY,X1,16772264000,477227240
|
||||||
|
HAXPY,X2,33462372990,954411240
|
||||||
|
HAXPY,X3,65993469990,1908779240
|
||||||
|
HAXPY,X4,134134323970,3817515240
|
||||||
|
GEMV,X1,7916400980,475952430
|
||||||
|
GEMV,X2,15800020980,907265430
|
||||||
|
GEMV,X3,62587326980,3495472430
|
||||||
|
GEMV,X4,127514526980,6946305430
|
||||||
|
DNN,X1,176584310,268088880
|
||||||
|
DNN,X2,1860990350,369164880
|
||||||
|
DNN,X3,7063630630,773569880
|
||||||
|
DNN,X4,27344749530,2391169880
|
||||||
|
21
src/tables/simulations_3GHz.csv
Normal file
21
src/tables/simulations_3GHz.csv
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
workload,level,hbm,pim
|
||||||
|
VADD,X1,21334729581,737755173
|
||||||
|
VADD,X2,43268334354,1475481042
|
||||||
|
VADD,X3,85485583179,2950926120
|
||||||
|
VADD,X4,166942414809,5901819939
|
||||||
|
VMUL,X1,20112009858,737756172
|
||||||
|
VMUL,X2,39990439863,1475482041
|
||||||
|
VMUL,X3,80576580096,2950924122
|
||||||
|
VMUL,X4,163020260223,5901820938
|
||||||
|
HAXPY,X1,36613117899,771522039
|
||||||
|
HAXPY,X2,73515923487,1542996126
|
||||||
|
HAXPY,X3,146061622170,3085946964
|
||||||
|
HAXPY,X4,294729236073,6171846975
|
||||||
|
GEMV,X1,72834815610,688867443
|
||||||
|
GEMV,X2,151235040573,1300571460
|
||||||
|
GEMV,X3,614362203486,4971327696
|
||||||
|
GEMV,X4,1228045754304,9865635822
|
||||||
|
DNN,X1,5504078079,466911954
|
||||||
|
DNN,X2,21478024143,610204851
|
||||||
|
DNN,X3,85912073262,1183676805
|
||||||
|
DNN,X4,322188095061,3477814704
|
||||||
|
21
src/tables/torch.csv
Normal file
21
src/tables/torch.csv
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
workload,level,vega,tesla
|
||||||
|
VADD,X1,69572650,TODO
|
||||||
|
VADD,X2,123217536,TODO
|
||||||
|
VADD,X3,207693503,TODO
|
||||||
|
VADD,X4,378089165,TODO
|
||||||
|
VMUL,X1,67408281,TODO
|
||||||
|
VMUL,X2,103994272,TODO
|
||||||
|
VMUL,X3,182162140,TODO
|
||||||
|
VMUL,X4,350280326,TODO
|
||||||
|
HAXPY,X1,69791189,TODO
|
||||||
|
HAXPY,X2,123543145,TODO
|
||||||
|
HAXPY,X3,207947543,TODO
|
||||||
|
HAXPY,X4,377434890,TODO
|
||||||
|
GEMV,X1,750246152,TODO
|
||||||
|
GEMV,X2,648714601,TODO
|
||||||
|
GEMV,X3,2454455479,TODO
|
||||||
|
GEMV,X4,4968984949,TODO
|
||||||
|
DNN,X1,231093065,TODO
|
||||||
|
DNN,X2,431703456,TODO
|
||||||
|
DNN,X3,877622611,TODO
|
||||||
|
DNN,X4,2175751385,TODO
|
||||||
|
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&2024360&844170 \\
|
|
||||||
X2&2574730&1412170 \\
|
|
||||||
X3&3899060&2496170 \\
|
|
||||||
X4&8037930&4493240 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&15585399&1220778 \\
|
|
||||||
X2&29988981&2112885 \\
|
|
||||||
X3&59177430&3864465 \\
|
|
||||||
X4&118902645&7236756 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&2030090&845170 \\
|
|
||||||
X2&3109090&1401170 \\
|
|
||||||
X3&4654880&2493170 \\
|
|
||||||
X4&7829930&4478240 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
\begin{tblr}{
|
|
||||||
hlines,
|
|
||||||
vlines,
|
|
||||||
cell{2}{2} = {r},
|
|
||||||
cell{2}{3} = {r},
|
|
||||||
cell{3}{2} = {r},
|
|
||||||
cell{3}{3} = {r},
|
|
||||||
cell{4}{2} = {r},
|
|
||||||
cell{4}{3} = {r},
|
|
||||||
cell{5}{2} = {r},
|
|
||||||
cell{5}{3} = {r},
|
|
||||||
hline{2} = {-}{solid,black},
|
|
||||||
hline{2} = {2}{-}{solid,black},
|
|
||||||
}
|
|
||||||
Level & Non-\ac{pim} & \ac{pim} \\
|
|
||||||
X1&17269047&1219779 \\
|
|
||||||
X2&33834132&2125872 \\
|
|
||||||
X3&66308292&3909753 \\
|
|
||||||
X4&131863338&7239087 \\
|
|
||||||
|
|
||||||
\end{tblr}
|
|
||||||
Reference in New Issue
Block a user