This commit is contained in:
2024-03-01 23:22:30 +01:00
parent 30db51a8de
commit 494662da66
12 changed files with 439 additions and 1 deletions

View File

@@ -2,8 +2,226 @@
\label{sec:appendix} \label{sec:appendix}
\subsection{Simulation Results} \subsection{Simulation Results}
\begin{table}[!ht]
\centering
\input{tables/vadd_3GHz}
\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the generic ARM system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/vmul_3GHz}
\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the generic ARM system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/haxpy_3GHz}
\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/gemv_3GHz}
\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/gemv_layers_3GHz}
\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the generic ARM system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/vadd_100GHz}
\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the infinite compute system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/vmul_100GHz}
\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the infinite compute system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/haxpy_100GHz}
\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/gemv_100GHz}
\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
\end{table}
\begin{table}[!ht]
\centering
\input{tables/gemv_layers_100GHz}
\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the infinite compute system.}
\end{table}
\subsection{Microkernels} \subsection{Microkernels}
\begin{listing}[!ht]
\begin{verbatim}
MOV GRF_A #0, BANK
MOV GRF_A #1, BANK
MOV GRF_A #2, BANK
MOV GRF_A #3, BANK
MOV GRF_A #4, BANK
MOV GRF_A #5, BANK
MOV GRF_A #6, BANK
MOV GRF_A #7, BANK
ADD GRF_B #0, BANK, GRF_A #0
ADD GRF_B #1, BANK, GRF_A #1
ADD GRF_B #2, BANK, GRF_A #2
ADD GRF_B #3, BANK, GRF_A #3
ADD GRF_B #4, BANK, GRF_A #4
ADD GRF_B #5, BANK, GRF_A #5
ADD GRF_B #6, BANK, GRF_A #6
ADD GRF_B #7, BANK, GRF_A #7
FILL BANK, GRF_B #0
FILL BANK, GRF_B #1
FILL BANK, GRF_B #2
FILL BANK, GRF_B #3
FILL BANK, GRF_B #4
FILL BANK, GRF_B #5
FILL BANK, GRF_B #6
FILL BANK, GRF_B #7
EXIT
\end{verbatim}
\caption{The microkernel used in the VADD benchmark.}
\label{lst:vadd_bench}
\end{listing}
\begin{listing}[!ht]
\begin{verbatim}
MOV GRF_A #0, BANK
MOV GRF_A #1, BANK
MOV GRF_A #2, BANK
MOV GRF_A #3, BANK
MOV GRF_A #4, BANK
MOV GRF_A #5, BANK
MOV GRF_A #6, BANK
MOV GRF_A #7, BANK
MUL GRF_B #0, BANK, GRF_A #0
MUL GRF_B #1, BANK, GRF_A #1
MUL GRF_B #2, BANK, GRF_A #2
MUL GRF_B #3, BANK, GRF_A #3
MUL GRF_B #4, BANK, GRF_A #4
MUL GRF_B #5, BANK, GRF_A #5
MUL GRF_B #6, BANK, GRF_A #6
MUL GRF_B #7, BANK, GRF_A #7
FILL BANK, GRF_B #0
FILL BANK, GRF_B #1
FILL BANK, GRF_B #2
FILL BANK, GRF_B #3
FILL BANK, GRF_B #4
FILL BANK, GRF_B #5
FILL BANK, GRF_B #6
FILL BANK, GRF_B #7
EXIT
\end{verbatim}
\caption{The microkernel used in the VMUL benchmark.}
\label{lst:vmul_bench}
\end{listing}
\begin{listing}[!ht]
\begin{verbatim}
MOV SRF_M #0, BANK
MOV GRF_A #0, BANK
MOV GRF_A #1, BANK
MOV GRF_A #2, BANK
MOV GRF_A #3, BANK
MOV GRF_A #4, BANK
MOV GRF_A #5, BANK
MOV GRF_A #6, BANK
MOV GRF_A #7, BANK
MAD GRF_B #0, BANK, SRF_M #0 GRF_A #0
MAD GRF_B #1, BANK, SRF_M #0 GRF_A #1
MAD GRF_B #2, BANK, SRF_M #0 GRF_A #2
MAD GRF_B #3, BANK, SRF_M #0 GRF_A #3
MAD GRF_B #4, BANK, SRF_M #0 GRF_A #4
MAD GRF_B #5, BANK, SRF_M #0 GRF_A #5
MAD GRF_B #6, BANK, SRF_M #0 GRF_A #6
MAD GRF_B #7, BANK, SRF_M #0 GRF_A #7
FILL BANK, GRF_B #0
FILL BANK, GRF_B #1
FILL BANK, GRF_B #2
FILL BANK, GRF_B #3
FILL BANK, GRF_B #4
FILL BANK, GRF_B #5
FILL BANK, GRF_B #6
FILL BANK, GRF_B #7
EXIT
\end{verbatim}
\caption{The microkernel used in the \ac{haxpy} benchmark.}
\label{lst:haxpy_bench}
\end{listing}
\begin{listing}[!ht]
\begin{verbatim}
MOV GRF_A #0, BANK
MOV GRF_A #1, BANK
MOV GRF_A #2, BANK
MOV GRF_A #3, BANK
MOV GRF_A #4, BANK
MOV GRF_A #5, BANK
MOV GRF_A #6, BANK
MOV GRF_A #7, BANK
MAC(AAM) GRF_B, BANK, GRF_A
JUMP -1, 63
FILL BANK, GRF_B #0
FILL BANK, GRF_B #1
FILL BANK, GRF_B #2
FILL BANK, GRF_B #3
FILL BANK, GRF_B #4
FILL BANK, GRF_B #5
FILL BANK, GRF_B #6
FILL BANK, GRF_B #7
EXIT
\end{verbatim}
\caption{The microkernel used in the \ac{gemv} and \ac{dnn} benchmark.}
\label{lst:gemv_bench}
\end{listing}
\subsection{Source Code} \subsection{Source Code}
\begin{listing}[!ht]
\begin{minted}{rust}
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
matrix: &Matrix<X16R, X16C>,
input_vector: &Vector<X16C>,
output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand,
) {
for chunk in input_vector.0.iter() {
chunk.execute_read();
}
for sub_matrix in matrix.0.iter() {
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
column_block.execute_read_async();
}
}
barrier::dsb(barrier::SY);
for chunk in output_partial_sum_vector
.fixed_rows_with_step_mut::<X16R>(0, 16)
.iter_mut()
{
chunk.execute_write();
}
dummy.execute_read();
}
\end{minted}
\caption{The \ac{gemv} kernel execution code that runs on the host processor.}
\end{listing}
% etwas source code, % etwas source code,
% von der vm % von der vm
% einige microkernels % einige microkernels

View File

@@ -259,7 +259,7 @@ Since the Samsung \ac{fpga} platform can be assumed to be a highly optimized acc
\begin{figure} \begin{figure}
\centering \centering
\includegraphics[width=0.8\linewidth]{plots/samsung} \includegraphics[width=0.8\linewidth]{plots/samsung}
\caption{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes \cite{lee2021}.} \caption{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung \cite{lee2021}.}
\label{fig:samsung_speedup} \label{fig:samsung_speedup}
\end{figure} \end{figure}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&96465760&457604440 \\
X2&192178090&474407440 \\
X3&430015980&508115440 \\
X4&2720535980&575476440 \\
\end{tblr}

22
src/tables/gemv_3GHz.tex Normal file
View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&2734886043&788428116 \\
X2&5462015184&812330856 \\
X3&10958710653&859903902 \\
X4&22594486896&955550160 \\
\end{tblr}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&8957090&50066880 \\
X2&33647200&55178880 \\
X3&299035090&75442880 \\
X4&951182090&156218880 \\
\end{tblr}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&262765971&87800778 \\
X2&1070441487&95181057 \\
X3&4332005991&123962913 \\
X4&17236314765&238281147 \\
\end{tblr}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&2374720&1088170 \\
X2&3677220&1604170 \\
X3&5875080&2708170 \\
X4&11640050&4703250 \\
\end{tblr}

22
src/tables/haxpy_3GHz.tex Normal file
View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&30413223&1555443 \\
X2&59499108&2417913 \\
X3&117506376&4207788 \\
X4&234943821&7578414 \\
\end{tblr}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&2024360&844170 \\
X2&2574730&1412170 \\
X3&3899060&2496170 \\
X4&8037930&4493240 \\
\end{tblr}

22
src/tables/vadd_3GHz.tex Normal file
View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&15585399&1220778 \\
X2&29988981&2112885 \\
X3&59177430&3864465 \\
X4&118902645&7236756 \\
\end{tblr}

View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&2030090&845170 \\
X2&3109090&1401170 \\
X3&4654880&2493170 \\
X4&7829930&4478240 \\
\end{tblr}

22
src/tables/vmul_3GHz.tex Normal file
View File

@@ -0,0 +1,22 @@
\begin{tblr}{
hlines,
vlines,
cell{2}{2} = {r},
cell{2}{3} = {r},
cell{3}{2} = {r},
cell{3}{3} = {r},
cell{4}{2} = {r},
cell{4}{3} = {r},
cell{5}{2} = {r},
cell{5}{3} = {r},
hline{2} = {-}{solid,black},
hline{2} = {2}{-}{solid,black},
}
Level & Non-\ac{pim} & \ac{pim} \\
X1&17269047&1219779 \\
X2&33834132&2125872 \\
X3&66308292&3909753 \\
X4&131863338&7239087 \\
\end{tblr}