From 123c7e0b25d09d5da270a3e0bdcb6c78a3c24264 Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Wed, 6 Mar 2024 23:26:38 +0100 Subject: [PATCH] Real hardware plots --- src/appendix.tex | 162 +++++++++++++-------- src/chapters/results.tex | 23 ++- src/index.tex | 15 +- src/plots/matrix_infinite.tex | 13 +- src/plots/matrix_normal.tex | 14 +- src/plots/runtime_tables/hbm_100GHz.csv | 5 + src/plots/runtime_tables/hbm_3GHz.csv | 5 + src/plots/runtime_tables/pim_100GHz.csv | 5 + src/plots/runtime_tables/pim_3GHz.csv | 5 + src/plots/runtime_tables/tesla.csv | 5 + src/plots/runtime_tables/vega.csv | 5 + src/plots/runtimes_matrix.tex | 74 ++++++++++ src/plots/runtimes_vector.tex | 87 +++++++++++ src/plots/speedup_tables/matrix_100GHz.csv | 5 + src/plots/speedup_tables/matrix_3GHz.csv | 5 + src/plots/speedup_tables/vector_100GHz.csv | 5 + src/plots/speedup_tables/vector_3GHz.csv | 5 + src/plots/tables/gemv_100GHz.csv | 5 - src/plots/tables/gemv_3GHz.csv | 5 - src/plots/tables/gemv_layers_100GHz.csv | 5 - src/plots/tables/gemv_layers_3GHz.csv | 5 - src/plots/tables/haxpy_100GHz.csv | 5 - src/plots/tables/haxpy_3GHz.csv | 5 - src/plots/tables/vadd_100GHz.csv | 5 - src/plots/tables/vadd_3GHz.csv | 5 - src/plots/tables/vmul_100GHz.csv | 5 - src/plots/tables/vmul_3GHz.csv | 5 - src/plots/vector_infinite.tex | 16 +- src/plots/vector_normal.tex | 17 +-- src/tables/gemv_100GHz.tex | 22 --- src/tables/gemv_3GHz.tex | 22 --- src/tables/gemv_layers_100GHz.tex | 22 --- src/tables/gemv_layers_3GHz.tex | 22 --- src/tables/haxpy_100GHz.tex | 22 --- src/tables/haxpy_3GHz.tex | 22 --- src/tables/simulations_100GHz.csv | 21 +++ src/tables/simulations_3GHz.csv | 21 +++ src/tables/torch.csv | 21 +++ src/tables/vadd_100GHz.tex | 22 --- src/tables/vadd_3GHz.tex | 22 --- src/tables/vmul_100GHz.tex | 22 --- src/tables/vmul_3GHz.tex | 22 --- 42 files changed, 429 insertions(+), 375 deletions(-) create mode 100644 src/plots/runtime_tables/hbm_100GHz.csv create mode 100644 src/plots/runtime_tables/hbm_3GHz.csv create mode 100644 src/plots/runtime_tables/pim_100GHz.csv create mode 100644 src/plots/runtime_tables/pim_3GHz.csv create mode 100644 src/plots/runtime_tables/tesla.csv create mode 100644 src/plots/runtime_tables/vega.csv create mode 100644 src/plots/runtimes_matrix.tex create mode 100644 src/plots/runtimes_vector.tex create mode 100644 src/plots/speedup_tables/matrix_100GHz.csv create mode 100644 src/plots/speedup_tables/matrix_3GHz.csv create mode 100644 src/plots/speedup_tables/vector_100GHz.csv create mode 100644 src/plots/speedup_tables/vector_3GHz.csv delete mode 100644 src/plots/tables/gemv_100GHz.csv delete mode 100644 src/plots/tables/gemv_3GHz.csv delete mode 100644 src/plots/tables/gemv_layers_100GHz.csv delete mode 100644 src/plots/tables/gemv_layers_3GHz.csv delete mode 100644 src/plots/tables/haxpy_100GHz.csv delete mode 100644 src/plots/tables/haxpy_3GHz.csv delete mode 100644 src/plots/tables/vadd_100GHz.csv delete mode 100644 src/plots/tables/vadd_3GHz.csv delete mode 100644 src/plots/tables/vmul_100GHz.csv delete mode 100644 src/plots/tables/vmul_3GHz.csv delete mode 100644 src/tables/gemv_100GHz.tex delete mode 100644 src/tables/gemv_3GHz.tex delete mode 100644 src/tables/gemv_layers_100GHz.tex delete mode 100644 src/tables/gemv_layers_3GHz.tex delete mode 100644 src/tables/haxpy_100GHz.tex delete mode 100644 src/tables/haxpy_3GHz.tex create mode 100644 src/tables/simulations_100GHz.csv create mode 100644 src/tables/simulations_3GHz.csv create mode 100644 src/tables/torch.csv delete mode 100644 src/tables/vadd_100GHz.tex delete mode 100644 src/tables/vadd_3GHz.tex delete mode 100644 src/tables/vmul_100GHz.tex delete mode 100644 src/tables/vmul_3GHz.tex diff --git a/src/appendix.tex b/src/appendix.tex index 2226f82..def883c 100644 --- a/src/appendix.tex +++ b/src/appendix.tex @@ -3,69 +3,105 @@ \subsection{Simulation Results} -\begin{table}[!ht] -\centering -\input{tables/vadd_3GHz} -\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the generic ARM system.} + TODO !! nochmal aktualisieren! + +\begin{table}[H] +\csvreader[ + head to column names, + centered tabularray = { + hlines, + vlines, + hline{3} = {2}{-}{}, + column{1,2} = {c}, + column{3} = {r}, + column{4} = {r}, + row{1,2} = {c}, + cell{1}{1} = {r=2}{}, + cell{1}{2} = {r=2}{}, + cell{1}{3} = {c=2}{}, + cell{1}{5} = {c=2}{}, + cell{3}{1} = {r=4}{}, + cell{7}{1} = {r=4}{}, + cell{11}{1} = {r=4}{}, + cell{15}{1} = {r=4}{}, + cell{19}{1} = {r=4}{}, + }, + table head = { + Workload & Level & Runtime [$\unit{\pico\second}$] & \\ + & & Non-PIM & PIM \\ + } + ]{tables/simulations_3GHz.csv}{}{ + \csvexpval\workload & \csvexpval\level & \csvexpval\hbm & \csvexpval\pim + } +\caption{Runtimes of the microbenchmarks on the generic ARM-based system for non-\acs*{pim} and \acs*{pim} mode.} \end{table} -\begin{table}[!ht] -\centering -\input{tables/vmul_3GHz} -\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the generic ARM system.} +\begin{table}[H] +\csvreader[ + head to column names, + centered tabularray = { + hlines, + vlines, + hline{3} = {2}{-}{}, + column{1,2} = {c}, + column{3} = {r}, + column{4} = {r}, + row{1,2} = {c}, + cell{1}{1} = {r=2}{}, + cell{1}{2} = {r=2}{}, + cell{1}{3} = {c=2}{}, + cell{1}{5} = {c=2}{}, + cell{3}{1} = {r=4}{}, + cell{7}{1} = {r=4}{}, + cell{11}{1} = {r=4}{}, + cell{15}{1} = {r=4}{}, + cell{19}{1} = {r=4}{}, + }, + table head = { + Workload & Level & Runtime [$\unit{\pico\second}$] & \\ + & & Non-PIM & PIM \\ + } + ]{tables/simulations_100GHz.csv}{}{ + \csvexpval\workload & \csvexpval\level & \csvexpval\hbm & \csvexpval\pim + } +\caption{Runtimes of the microbenchmarks on the infinite compute system for non-\acs*{pim} and \acs*{pim} mode.} \end{table} -\begin{table}[!ht] -\centering -\input{tables/haxpy_3GHz} -\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the generic ARM system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/gemv_3GHz} -\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the generic ARM system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/gemv_layers_3GHz} -\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the generic ARM system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/vadd_100GHz} -\caption{Runtime of the VADD benchmark in $\unit{\pico\second}$ on the infinite compute system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/vmul_100GHz} -\caption{Runtime of the VMUL benchmark in $\unit{\pico\second}$ on the infinite compute system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/haxpy_100GHz} -\caption{Runtime of the \ac{haxpy} benchmark in $\unit{\pico\second}$ on the infinite compute system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/gemv_100GHz} -\caption{Runtime of the \ac{gemv} benchmark in $\unit{\pico\second}$ on the infinite compute system.} -\end{table} - -\begin{table}[!ht] -\centering -\input{tables/gemv_layers_100GHz} -\caption{Runtime of the \ac{dnn} benchmark in $\unit{\pico\second}$ on the infinite compute system.} +\begin{table}[H] +\csvreader[ + head to column names, + centered tabularray = { + hlines, + vlines, + hline{3} = {2}{-}{}, + column{1,2} = {c}, + column{3} = {r}, + column{4} = {r}, + row{1,2} = {c}, + cell{1}{1} = {r=2}{}, + cell{1}{2} = {r=2}{}, + cell{1}{3} = {c=2}{}, + cell{1}{5} = {c=2}{}, + cell{3}{1} = {r=4}{}, + cell{7}{1} = {r=4}{}, + cell{11}{1} = {r=4}{}, + cell{15}{1} = {r=4}{}, + cell{19}{1} = {r=4}{}, + }, + table head = { + Workload & Level & Runtime [$\unit{\pico\second}$] & \\ + & & Vega & Tesla \\ + } + ]{tables/torch.csv}{}{ + \csvexpval\workload & \csvexpval\level & \csvexpval\vega & \csvexpval\tesla + } +\caption{Runtimes of the microbenchmarks on the different \acs*{gpu} platforms.} \end{table} +\newpage \subsection{Microkernels} -\begin{listing}[!ht] +\begin{listing}[H] \begin{verbatim} MOV GRF_A #0, BANK MOV GRF_A #1, BANK @@ -97,7 +133,7 @@ EXIT \label{lst:vadd_bench} \end{listing} -\begin{listing}[!ht] +\begin{listing}[H] \begin{verbatim} MOV GRF_A #0, BANK MOV GRF_A #1, BANK @@ -129,7 +165,7 @@ EXIT \label{lst:vmul_bench} \end{listing} -\begin{listing}[!ht] +\begin{listing}[H] \begin{verbatim} MOV SRF_M #0, BANK MOV GRF_A #0, BANK @@ -162,7 +198,7 @@ EXIT \label{lst:haxpy_bench} \end{listing} -\begin{listing}[!ht] +\begin{listing}[H] \begin{verbatim} MOV GRF_A #0, BANK MOV GRF_A #1, BANK @@ -188,28 +224,33 @@ EXIT \label{lst:gemv_bench} \end{listing} +\newpage \subsection{Source Code} -\begin{listing}[!ht] -\begin{minted}{rust} +\begin{listing}[H] +\begin{minted}[linenos]{rust} pub fn execute( matrix: &Matrix, input_vector: &Vector, output_partial_sum_vector: &mut SVector, dummy: &impl PimOperand, ) { + // Load input vector into GRF-A registers for chunk in input_vector.0.iter() { chunk.execute_read(); } + // Execute the MAC instructions without memory barriers for sub_matrix in matrix.0.iter() { for column_block in sub_matrix.fixed_rows::<1>(0).iter() { column_block.execute_read_async(); } } + // Verify all memory accesses have finished barrier::dsb(barrier::SY); + // Copy the partial sums into the bank for chunk in output_partial_sum_vector .fixed_rows_with_step_mut::(0, 16) .iter_mut() @@ -217,6 +258,7 @@ pub fn execute( chunk.execute_write(); } + // Execute the EXIT instruction dummy.execute_read(); } \end{minted} diff --git a/src/chapters/results.tex b/src/chapters/results.tex index 63b2fe4..66ebd9a 100644 --- a/src/chapters/results.tex +++ b/src/chapters/results.tex @@ -99,6 +99,7 @@ The levels X1-X4 denote the increasing dimensions, with each successive level do \begin{table} \centering \begin{tblr}{ + column{1} = {c}, cell{2}{2} = {r}, cell{3}{2} = {r}, cell{4}{2} = {r}, @@ -168,6 +169,7 @@ Again, several different dimensions of the benchmark inputs are used, whose matr \begin{table} \centering \begin{tblr}{ + column{1} = {c}, cell{2}{2} = {r}, cell{3}{2} = {r}, cell{4}{2} = {r}, @@ -259,7 +261,7 @@ Since the Samsung \ac{fpga} platform can be assumed to be a highly optimized acc \begin{figure} \centering \includegraphics[width=0.8\linewidth]{plots/samsung} - \caption{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung \cite{lee2021}.} + \caption[Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung.]{Relative performance of the \ac{gemv} and ADD microbenchmark for different batch sizes on the hardware implementation of Samsung \cite{lee2021}.} \label{fig:samsung_speedup} \end{figure} @@ -269,10 +271,29 @@ The \ac{gemv} microbenchmark on the other hand shows a more drastic speedup with Although the dimensions used by Samsung are different from the simulations of this thesis, the highest achieved speedup of $\qty{6.1}{\times}$ is well within the reach of the real hardware implementation. \subsubsection{Comparison to Real Hardware} +TODO: check all ranges In addition to the comparison of Samsung's real hardware implementation, the same benchmarks of the performed simulations are run on a [...] with HBM2 [...]. As this system is using a generic \aca{hbm} \ac{dram} and not \aca{fimdram}, the measurements are only intended to serve as a vague estimation of the runtimes in a non-\ac{pim} case. +\begin{figure} + \centering + \resizebox{\linewidth}{!}{% + \input{plots/runtimes_vector} + } + \caption{} + \label{fig:runtimes_vector} +\end{figure} + +\begin{figure} + \centering + % \resizebox{\linewidth}{!}{% + \input{plots/runtimes_matrix} + % } + \caption{} + \label{fig:runtimes_matrix} +\end{figure} + % \subsubsection{Initialization Overhead} % conversion der operanden im verhältnis zur laufzeit abschätzen diff --git a/src/index.tex b/src/index.tex index f25fdd1..288973b 100644 --- a/src/index.tex +++ b/src/index.tex @@ -28,12 +28,15 @@ \usepackage{makecell} \usepackage{minted} \usepackage{lscape} +\usepackage{float} +\usepackage[l3]{csvsimple} % Configurations \usetikzlibrary{matrix} \usetikzlibrary{automata} \usetikzlibrary{fit} \usetikzlibrary{positioning} +\usepgfplotslibrary[groupplots] \addbibresource{doc.bib} @@ -82,13 +85,13 @@ \setcounter{page}{1} % Chapters -\include{chapters/introduction} -\include{chapters/dram} -\include{chapters/pim} -\include{chapters/vp} -\include{chapters/implementation} +% \include{chapters/introduction} +% \include{chapters/dram} +% \include{chapters/pim} +% \include{chapters/vp} +% \include{chapters/implementation} \include{chapters/results} -\include{chapters/conclusion} +% \include{chapters/conclusion} % Appendix \appendix diff --git a/src/plots/matrix_infinite.tex b/src/plots/matrix_infinite.tex index fd205af..275a01c 100644 --- a/src/plots/matrix_infinite.tex +++ b/src/plots/matrix_infinite.tex @@ -1,19 +1,16 @@ \begin{tikzpicture} - \pgfplotstableread[col sep=comma]{plots/tables/gemv_100GHz.csv}\gemv - \pgfplotstableread[col sep=comma]{plots/tables/gemv_layers_100GHz.csv}\gemvlayers + \pgfplotstableread[col sep=comma]{plots/speedup_tables/matrix_100GHz.csv}\csv \begin{axis}[ width=0.9\textwidth, ybar=1pt, bar width = 15pt, ymin=0.1, - ymax=100, - ymode=log, - log origin=infty, + ymax=10, ymajorgrids, ylabel={Relative Performance}, tick pos=left, xtick=data, - xticklabels from table={\gemv}{level}, + xticklabels from table={\csv}{level}, enlarge x limits=0.25, legend style={ at={(current bounding box.south-|current axis.south)}, @@ -23,10 +20,10 @@ /tikz/every even column/.append style={column sep=0.5cm} }, ] - \addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\gemv}; + \addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\csv}; \addlegendentry{GEMV} - \addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\gemvlayers}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\csv}; \addlegendentry{DNN} \end{axis} \end{tikzpicture} diff --git a/src/plots/matrix_normal.tex b/src/plots/matrix_normal.tex index b6a7958..d4056b2 100644 --- a/src/plots/matrix_normal.tex +++ b/src/plots/matrix_normal.tex @@ -1,20 +1,16 @@ \begin{tikzpicture} - \pgfplotstableread[col sep=comma]{plots/tables/gemv_3GHz.csv}\gemv - \pgfplotstableread[col sep=comma]{plots/tables/gemv_layers_3GHz.csv}\gemvlayers + \pgfplotstableread[col sep=comma]{plots/speedup_tables/matrix_3GHz.csv}\csv \begin{axis}[ width=0.9\textwidth, ybar=1pt, bar width = 15pt, ymin=0.1, - ymax=100, - ymode=log, - log origin=infty, - % minor y tick num = 5, + ymax=75, ymajorgrids, ylabel={Relative Performance}, tick pos=left, xtick=data, - xticklabels from table={\gemv}{level}, + xticklabels from table={\csv}{level}, enlarge x limits=0.25, legend style={ at={(current bounding box.south-|current axis.south)}, @@ -24,10 +20,10 @@ /tikz/every even column/.append style={column sep=0.5cm} }, ] - \addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\gemv}; + \addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\csv}; \addlegendentry{GEMV} - \addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\gemvlayers}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\csv}; \addlegendentry{DNN} \end{axis} \end{tikzpicture} diff --git a/src/plots/runtime_tables/hbm_100GHz.csv b/src/plots/runtime_tables/hbm_100GHz.csv new file mode 100644 index 0000000..d1e2290 --- /dev/null +++ b/src/plots/runtime_tables/hbm_100GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,11768899990,9758441990,16772264000,7916400980,176584310 +X2,23071196990,18975123990,33462372990,15800020980,1860990350 +X3,46873992980,37109877990,65993469990,62587326980,7063630630 +X4,91264808000,74066441980,134134323970,127514526980,27344749530 diff --git a/src/plots/runtime_tables/hbm_3GHz.csv b/src/plots/runtime_tables/hbm_3GHz.csv new file mode 100644 index 0000000..80b454b --- /dev/null +++ b/src/plots/runtime_tables/hbm_3GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,21334729581,20112009858,36613117899,72834815610,5504078079 +X2,43268334354,39990439863,73515923487,151235040573,21478024143 +X3,85485583179,80576580096,146061622170,614362203486,85912073262 +X4,166942414809,163020260223,294729236073,1228045754304,322188095061 diff --git a/src/plots/runtime_tables/pim_100GHz.csv b/src/plots/runtime_tables/pim_100GHz.csv new file mode 100644 index 0000000..4ac0df3 --- /dev/null +++ b/src/plots/runtime_tables/pim_100GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,911446480,911416480,954454480,951904860,536177760 +X2,1822806480,1822776480,1908822480,1814530860,738329760 +X3,3645526480,3645496480,3817558480,6990944860,1547139760 +X4,7290966480,7290936480,7635030480,13892610860,4782339760 diff --git a/src/plots/runtime_tables/pim_3GHz.csv b/src/plots/runtime_tables/pim_3GHz.csv new file mode 100644 index 0000000..7629fa6 --- /dev/null +++ b/src/plots/runtime_tables/pim_3GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,1475510346,1475512344,1543044078,1377734886,933823908 +X2,2950962084,2950964082,3085992252,2601142920,1220409702 +X3,5901852240,5901848244,6171893928,9942655392,2367353610 +X4,11803639878,11803641876,12343693950,19731271644,6955629408 diff --git a/src/plots/runtime_tables/tesla.csv b/src/plots/runtime_tables/tesla.csv new file mode 100644 index 0000000..b0471fa --- /dev/null +++ b/src/plots/runtime_tables/tesla.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,69572650,67408281,69791189,750246152,231093065 +X2,123217536,103994272,123543145,648714601,431703456 +X3,207693503,182162140,207947543,2454455479,877622611 +X4,378089165,350280326,377434890,4968984949,2175751385 diff --git a/src/plots/runtime_tables/vega.csv b/src/plots/runtime_tables/vega.csv new file mode 100644 index 0000000..b0471fa --- /dev/null +++ b/src/plots/runtime_tables/vega.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy,gemv,dnn +X1,69572650,67408281,69791189,750246152,231093065 +X2,123217536,103994272,123543145,648714601,431703456 +X3,207693503,182162140,207947543,2454455479,877622611 +X4,378089165,350280326,377434890,4968984949,2175751385 diff --git a/src/plots/runtimes_matrix.tex b/src/plots/runtimes_matrix.tex new file mode 100644 index 0000000..07af808 --- /dev/null +++ b/src/plots/runtimes_matrix.tex @@ -0,0 +1,74 @@ +\begin{tikzpicture} +\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_3GHz.csv}\hbmarm +\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_3GHz.csv}\hbmpim +\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_100GHz.csv}\hbminf +\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_100GHz.csv}\piminf +\pgfplotstableread[col sep=comma]{plots/runtime_tables/vega.csv}\vega +\pgfplotstableread[col sep=comma]{plots/runtime_tables/tesla.csv}\tesla +\begin{groupplot}[ + group style={ + group size=2 by 1, + horizontal sep=0pt, + vertical sep=0pt, + xticklabels at=edge bottom, + yticklabels at=edge left, + xlabels at=edge bottom, + ylabels at=edge left, + }, + height=8cm, + width=0.45\linewidth, + ybar=1pt, + axis line style={draw=none}, + tick style={draw=none}, + ymin=0.1, + ymax=1e13, + ymode=log, + % ymax=25, + ymajorgrids, + ylabel={Runtime [$\unit{\pico\second}$]}, + tick pos=left, + xtick=data, + xticklabels from table={\vega}{level}, + legend style={ + at={(1.0,-0.15)}, + anchor=north, + legend columns=-1, + draw=none, + /tikz/every even column/.append style={column sep=0.5cm} + }, + enlarge x limits=0.2, +] + \nextgroupplot[ + xlabel=GEMV, + bar width=3pt, + % width=3cm, + ] + + \addplot[fill=_blue!90] table [x expr=\coordindex, y={gemv}]{\hbmarm}; + \addlegendentry{Non-PIM ARM} + \addplot[fill=_orange!90] table [x expr=\coordindex, y={gemv}]{\hbmpim}; + \addlegendentry{PIM ARM} + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={gemv}]{\hbminf}; + \addlegendentry{Non-PIM Inf} + \addplot[fill=_green!90] table [x expr=\coordindex, y={gemv}]{\piminf}; + \addlegendentry{PIM Inf} + \addplot[fill=_darkblue!90] table [x expr=\coordindex, y={gemv}]{\vega}; + \addlegendentry{Vega} + \addplot[fill=violet!90] table [x expr=\coordindex, y={gemv}]{\tesla}; + \addlegendentry{Tesla} + + \nextgroupplot[ + xlabel=DNN, + bar width=3pt, + % width=3cm, + ] + + \addplot[fill=_blue!90] table [x expr=\coordindex, y={dnn}]{\hbmarm}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={dnn}]{\hbmpim}; + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={dnn}]{\hbminf}; + \addplot[fill=_green!90] table [x expr=\coordindex, y={dnn}]{\piminf}; + \addplot[fill=_darkblue!90] table [x expr=\coordindex, y={dnn}]{\vega}; + \addplot[fill=violet!90] table [x expr=\coordindex, y={dnn}]{\tesla}; + +\end{groupplot} +\end{tikzpicture} diff --git a/src/plots/runtimes_vector.tex b/src/plots/runtimes_vector.tex new file mode 100644 index 0000000..7f0c932 --- /dev/null +++ b/src/plots/runtimes_vector.tex @@ -0,0 +1,87 @@ +\begin{tikzpicture} +\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_3GHz.csv}\hbmarm +\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_3GHz.csv}\hbmpim +\pgfplotstableread[col sep=comma]{plots/runtime_tables/hbm_100GHz.csv}\hbminf +\pgfplotstableread[col sep=comma]{plots/runtime_tables/pim_100GHz.csv}\piminf +\pgfplotstableread[col sep=comma]{plots/runtime_tables/vega.csv}\vega +\pgfplotstableread[col sep=comma]{plots/runtime_tables/tesla.csv}\tesla +\begin{groupplot}[ + group style={ + group size=3 by 1, + horizontal sep=0pt, + vertical sep=0pt, + xticklabels at=edge bottom, + yticklabels at=edge left, + xlabels at=edge bottom, + ylabels at=edge left, + }, + height=8cm, + width=0.45\linewidth, + ybar=1pt, + axis line style={draw=none}, + tick style={draw=none}, + ymin=0.1, + ymax=1e12, + ymode=log, + % ymax=25, + ymajorgrids, + ylabel={Runtime [$\unit{\pico\second}$]}, + tick pos=left, + xtick=data, + xticklabels from table={\vega}{level}, + legend style={ + at={(0.5,-0.15)}, + anchor=north, + legend columns=-1, + draw=none, + /tikz/every even column/.append style={column sep=0.5cm} + }, + enlarge x limits=0.2, +] + \nextgroupplot[ + xlabel=VADD, + bar width=3pt, + % width=3cm, + ] + + \addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\hbmarm}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={vadd}]{\hbmpim}; + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={vadd}]{\hbminf}; + \addplot[fill=_green!90] table [x expr=\coordindex, y={vadd}]{\piminf}; + \addplot[fill=_darkblue!90] table [x expr=\coordindex, y={vadd}]{\vega}; + \addplot[fill=violet!90] table [x expr=\coordindex, y={vadd}]{\tesla}; + + \nextgroupplot[ + xlabel=VMUL, + bar width=3pt, + % width=3cm, + ] + + \addplot[fill=_blue!90] table [x expr=\coordindex, y={vmul}]{\hbmarm}; + \addlegendentry{Non-PIM ARM} + \addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\hbmpim}; + \addlegendentry{PIM ARM} + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={vmul}]{\hbminf}; + \addlegendentry{Non-PIM Inf} + \addplot[fill=_green!90] table [x expr=\coordindex, y={vmul}]{\piminf}; + \addlegendentry{PIM Inf} + \addplot[fill=_darkblue!90] table [x expr=\coordindex, y={vmul}]{\vega}; + \addlegendentry{Vega} + \addplot[fill=violet!90] table [x expr=\coordindex, y={vmul}]{\tesla}; + \addlegendentry{Tesla} + + \nextgroupplot[ + xlabel=HAXPY, + bar width=3pt, + % width=3cm, + ] + + \addplot[fill=_blue!90] table [x expr=\coordindex, y={haxpy}]{\hbmarm}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={haxpy}]{\hbmpim}; + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\hbminf}; + \addplot[fill=_green!90] table [x expr=\coordindex, y={haxpy}]{\piminf}; + \addplot[fill=_darkblue!90] table [x expr=\coordindex, y={haxpy}]{\vega}; + \addplot[fill=violet!90] table [x expr=\coordindex, y={haxpy}]{\tesla}; + +\end{groupplot} +\end{tikzpicture} diff --git a/src/plots/speedup_tables/matrix_100GHz.csv b/src/plots/speedup_tables/matrix_100GHz.csv new file mode 100644 index 0000000..905dea6 --- /dev/null +++ b/src/plots/speedup_tables/matrix_100GHz.csv @@ -0,0 +1,5 @@ +level,gemv,dnn +X1,8.316378361593825,0.3293391169376365 +X2,8.707496426927674,2.520540889480061 +X3,8.952627753954278,4.565606038073768 +X4,9.178586247394538,5.717860064798073 diff --git a/src/plots/speedup_tables/matrix_3GHz.csv b/src/plots/speedup_tables/matrix_3GHz.csv new file mode 100644 index 0000000..97fbf68 --- /dev/null +++ b/src/plots/speedup_tables/matrix_3GHz.csv @@ -0,0 +1,5 @@ +level,gemv,dnn +X1,52.86562483836241,5.894128466670185 +X2,58.14176507187079,17.599027693570402 +X3,61.79055586904123,36.290342473171975 +X4,62.23855088820042,46.32048031346181 diff --git a/src/plots/speedup_tables/vector_100GHz.csv b/src/plots/speedup_tables/vector_100GHz.csv new file mode 100644 index 0000000..46cf9a2 --- /dev/null +++ b/src/plots/speedup_tables/vector_100GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy +X1,12.912332482758615,10.706896577073085,17.57261802574388 +X2,12.656964545133722,10.410011429377231,17.530374532261376 +X3,12.857948841452387,10.179649930700249,17.28682620992881 +X4,12.517518527941442,10.158700762676236,17.568276160961705 diff --git a/src/plots/speedup_tables/vector_3GHz.csv b/src/plots/speedup_tables/vector_3GHz.csv new file mode 100644 index 0000000..1a57daf --- /dev/null +++ b/src/plots/speedup_tables/vector_3GHz.csv @@ -0,0 +1,5 @@ +level,vadd,vmul,haxpy +X1,14.459220593631812,13.63052633194372,23.727849658355645 +X2,14.66245011706494,13.551652528382078,23.822458866951166 +X3,14.484534634672588,13.652770583844921,23.665607976080565 +X4,14.143299569834605,13.81101374775393,23.87690729103017 diff --git a/src/plots/tables/gemv_100GHz.csv b/src/plots/tables/gemv_100GHz.csv deleted file mode 100644 index 566d659..0000000 --- a/src/plots/tables/gemv_100GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -gemv,X1,100GHz,0.2108059965502083 -gemv,X2,100GHz,0.40509080127411157 -gemv,X3,100GHz,0.8462958338758609 -gemv,X4,100GHz,4.7274497979448125 diff --git a/src/plots/tables/gemv_3GHz.csv b/src/plots/tables/gemv_3GHz.csv deleted file mode 100644 index d5e7712..0000000 --- a/src/plots/tables/gemv_3GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -gemv,X1,3GHz,3.468782996825547 -gemv,X2,3GHz,6.723879985176877 -gemv,X3,3GHz,12.744110856471028 -gemv,X4,3GHz,23.645526777997713 diff --git a/src/plots/tables/gemv_layers_100GHz.csv b/src/plots/tables/gemv_layers_100GHz.csv deleted file mode 100644 index b0c8971..0000000 --- a/src/plots/tables/gemv_layers_100GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -gemv_layers,X1,100GHz,0.17890250001597863 -gemv_layers,X2,100GHz,0.6097840333112959 -gemv_layers,X3,100GHz,3.9637284525723304 -gemv_layers,X4,100GHz,6.088778065749799 diff --git a/src/plots/tables/gemv_layers_3GHz.csv b/src/plots/tables/gemv_layers_3GHz.csv deleted file mode 100644 index cf86411..0000000 --- a/src/plots/tables/gemv_layers_3GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -gemv_layers,X1,3GHz,2.992752194063702 -gemv_layers,X2,3GHz,11.246371082010572 -gemv_layers,X3,3GHz,34.94598413478715 -gemv_layers,X4,3GHz,72.33604077371677 diff --git a/src/plots/tables/haxpy_100GHz.csv b/src/plots/tables/haxpy_100GHz.csv deleted file mode 100644 index 33ab8fd..0000000 --- a/src/plots/tables/haxpy_100GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -haxpy,X1,100GHz,2.0481358611246403 -haxpy,X2,100GHz,2.3234133539462776 -haxpy,X3,100GHz,2.272582592673281 -haxpy,X4,100GHz,2.3895030032424387 diff --git a/src/plots/tables/haxpy_3GHz.csv b/src/plots/tables/haxpy_3GHz.csv deleted file mode 100644 index 4970b87..0000000 --- a/src/plots/tables/haxpy_3GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -haxpy,X1,3GHz,19.816741597088416 -haxpy,X2,3GHz,25.395400082633245 -haxpy,X3,3GHz,28.676005064893953 -haxpy,X4,3GHz,31.783592582828017 diff --git a/src/plots/tables/vadd_100GHz.csv b/src/plots/tables/vadd_100GHz.csv deleted file mode 100644 index 4ad7277..0000000 --- a/src/plots/tables/vadd_100GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -vadd,X1,100GHz,2.398047786583271 -vadd,X2,100GHz,1.823243660465808 -vadd,X3,100GHz,1.562017010059411 -vadd,X4,100GHz,1.7888939829610704 diff --git a/src/plots/tables/vadd_3GHz.csv b/src/plots/tables/vadd_3GHz.csv deleted file mode 100644 index 71f7e18..0000000 --- a/src/plots/tables/vadd_3GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -vadd,X1,3GHz,12.766775777414075 -vadd,X2,3GHz,14.19338061465721 -vadd,X3,3GHz,15.313227057302887 -vadd,X4,3GHz,16.430379164365913 diff --git a/src/plots/tables/vmul_100GHz.csv b/src/plots/tables/vmul_100GHz.csv deleted file mode 100644 index 2f93e8e..0000000 --- a/src/plots/tables/vmul_100GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -vmul,X1,100GHz,2.4019901321627604 -vmul,X2,100GHz,2.2189241847884267 -vmul,X3,100GHz,1.86705278821741 -vmul,X4,100GHz,1.7484391189395834 diff --git a/src/plots/tables/vmul_3GHz.csv b/src/plots/tables/vmul_3GHz.csv deleted file mode 100644 index d6898d6..0000000 --- a/src/plots/tables/vmul_3GHz.csv +++ /dev/null @@ -1,5 +0,0 @@ -workload,level,frequency,speedup -vmul,X1,3GHz,14.157521157521158 -vmul,X2,3GHz,15.915413533834586 -vmul,X3,3GHz,16.959713823354058 -vmul,X4,3GHz,18.215465292791755 diff --git a/src/plots/vector_infinite.tex b/src/plots/vector_infinite.tex index 70c2d83..91baebe 100644 --- a/src/plots/vector_infinite.tex +++ b/src/plots/vector_infinite.tex @@ -1,20 +1,16 @@ \begin{tikzpicture} - \pgfplotstableread[col sep=comma]{plots/tables/vadd_100GHz.csv}\vadd - \pgfplotstableread[col sep=comma]{plots/tables/vmul_100GHz.csv}\vmul - \pgfplotstableread[col sep=comma]{plots/tables/haxpy_100GHz.csv}\haxpy + \pgfplotstableread[col sep=comma]{plots/speedup_tables/vector_100GHz.csv}\csv \begin{axis}[ width=0.8\textwidth, ybar=1pt, bar width = 15pt, ymin=0, - ymax=5, - % ymode=log, - % log origin=infty, + ymax=25, ymajorgrids, ylabel={Relative Performance}, tick pos=left, xtick=data, - xticklabels from table={\vadd}{level}, + xticklabels from table={\csv}{level}, enlarge x limits=0.25, legend style={ at={(current bounding box.south-|current axis.south)}, @@ -24,13 +20,13 @@ /tikz/every even column/.append style={column sep=0.5cm} }, ] - \addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\vadd}; + \addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\csv}; \addlegendentry{VADD} - \addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\vmul}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\csv}; \addlegendentry{VMUL} - \addplot[fill=_yellow!90] table [x expr=\coordindex, y={speedup}]{\haxpy}; + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\csv}; \addlegendentry{HAXPY} \end{axis} \end{tikzpicture} diff --git a/src/plots/vector_normal.tex b/src/plots/vector_normal.tex index 69148b1..a643618 100644 --- a/src/plots/vector_normal.tex +++ b/src/plots/vector_normal.tex @@ -1,21 +1,16 @@ \begin{tikzpicture} - \pgfplotstableread[col sep=comma]{plots/tables/vadd_3GHz.csv}\vadd - \pgfplotstableread[col sep=comma]{plots/tables/vmul_3GHz.csv}\vmul - \pgfplotstableread[col sep=comma]{plots/tables/haxpy_3GHz.csv}\haxpy + \pgfplotstableread[col sep=comma]{plots/speedup_tables/vector_3GHz.csv}\csv \begin{axis}[ width=0.8\textwidth, ybar=1pt, bar width = 15pt, ymin=0, - ymax=35, - % ymode=log, - % log origin=infty, - % minor y tick num = 5, + ymax=25, ymajorgrids, ylabel={Relative Performance}, tick pos=left, xtick=data, - xticklabels from table={\vadd}{level}, + xticklabels from table={\csv}{level}, enlarge x limits=0.25, legend style={ at={(current bounding box.south-|current axis.south)}, @@ -25,13 +20,13 @@ /tikz/every even column/.append style={column sep=0.5cm} }, ] - \addplot[fill=_blue!90] table [x expr=\coordindex, y={speedup}]{\vadd}; + \addplot[fill=_blue!90] table [x expr=\coordindex, y={vadd}]{\csv}; \addlegendentry{VADD} - \addplot[fill=_orange!90] table [x expr=\coordindex, y={speedup}]{\vmul}; + \addplot[fill=_orange!90] table [x expr=\coordindex, y={vmul}]{\csv}; \addlegendentry{VMUL} - \addplot[fill=_yellow!90] table [x expr=\coordindex, y={speedup}]{\haxpy}; + \addplot[fill=_yellow!90] table [x expr=\coordindex, y={haxpy}]{\csv}; \addlegendentry{HAXPY} \end{axis} \end{tikzpicture} diff --git a/src/tables/gemv_100GHz.tex b/src/tables/gemv_100GHz.tex deleted file mode 100644 index 1a2d751..0000000 --- a/src/tables/gemv_100GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&96465760&457604440 \\ -X2&192178090&474407440 \\ -X3&430015980&508115440 \\ -X4&2720535980&575476440 \\ - -\end{tblr} diff --git a/src/tables/gemv_3GHz.tex b/src/tables/gemv_3GHz.tex deleted file mode 100644 index 5a0302e..0000000 --- a/src/tables/gemv_3GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&2734886043&788428116 \\ -X2&5462015184&812330856 \\ -X3&10958710653&859903902 \\ -X4&22594486896&955550160 \\ - -\end{tblr} diff --git a/src/tables/gemv_layers_100GHz.tex b/src/tables/gemv_layers_100GHz.tex deleted file mode 100644 index de0fd78..0000000 --- a/src/tables/gemv_layers_100GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&8957090&50066880 \\ -X2&33647200&55178880 \\ -X3&299035090&75442880 \\ -X4&951182090&156218880 \\ - -\end{tblr} diff --git a/src/tables/gemv_layers_3GHz.tex b/src/tables/gemv_layers_3GHz.tex deleted file mode 100644 index ddbf54c..0000000 --- a/src/tables/gemv_layers_3GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&262765971&87800778 \\ -X2&1070441487&95181057 \\ -X3&4332005991&123962913 \\ -X4&17236314765&238281147 \\ - -\end{tblr} diff --git a/src/tables/haxpy_100GHz.tex b/src/tables/haxpy_100GHz.tex deleted file mode 100644 index 9181da2..0000000 --- a/src/tables/haxpy_100GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&2374720&1088170 \\ -X2&3677220&1604170 \\ -X3&5875080&2708170 \\ -X4&11640050&4703250 \\ - -\end{tblr} diff --git a/src/tables/haxpy_3GHz.tex b/src/tables/haxpy_3GHz.tex deleted file mode 100644 index 099ea6b..0000000 --- a/src/tables/haxpy_3GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&30413223&1555443 \\ -X2&59499108&2417913 \\ -X3&117506376&4207788 \\ -X4&234943821&7578414 \\ - -\end{tblr} diff --git a/src/tables/simulations_100GHz.csv b/src/tables/simulations_100GHz.csv new file mode 100644 index 0000000..6e87a4e --- /dev/null +++ b/src/tables/simulations_100GHz.csv @@ -0,0 +1,21 @@ +workload,level,hbm,pim +VADD,X1,11768899990,455723240 +VADD,X2,23071196990,911403240 +VADD,X3,46873992980,1822763240 +VADD,X4,91264808000,3645483240 +VMUL,X1,9758441990,455708240 +VMUL,X2,18975123990,911388240 +VMUL,X3,37109877990,1822748240 +VMUL,X4,74066441980,3645468240 +HAXPY,X1,16772264000,477227240 +HAXPY,X2,33462372990,954411240 +HAXPY,X3,65993469990,1908779240 +HAXPY,X4,134134323970,3817515240 +GEMV,X1,7916400980,475952430 +GEMV,X2,15800020980,907265430 +GEMV,X3,62587326980,3495472430 +GEMV,X4,127514526980,6946305430 +DNN,X1,176584310,268088880 +DNN,X2,1860990350,369164880 +DNN,X3,7063630630,773569880 +DNN,X4,27344749530,2391169880 diff --git a/src/tables/simulations_3GHz.csv b/src/tables/simulations_3GHz.csv new file mode 100644 index 0000000..1afc297 --- /dev/null +++ b/src/tables/simulations_3GHz.csv @@ -0,0 +1,21 @@ +workload,level,hbm,pim +VADD,X1,21334729581,737755173 +VADD,X2,43268334354,1475481042 +VADD,X3,85485583179,2950926120 +VADD,X4,166942414809,5901819939 +VMUL,X1,20112009858,737756172 +VMUL,X2,39990439863,1475482041 +VMUL,X3,80576580096,2950924122 +VMUL,X4,163020260223,5901820938 +HAXPY,X1,36613117899,771522039 +HAXPY,X2,73515923487,1542996126 +HAXPY,X3,146061622170,3085946964 +HAXPY,X4,294729236073,6171846975 +GEMV,X1,72834815610,688867443 +GEMV,X2,151235040573,1300571460 +GEMV,X3,614362203486,4971327696 +GEMV,X4,1228045754304,9865635822 +DNN,X1,5504078079,466911954 +DNN,X2,21478024143,610204851 +DNN,X3,85912073262,1183676805 +DNN,X4,322188095061,3477814704 diff --git a/src/tables/torch.csv b/src/tables/torch.csv new file mode 100644 index 0000000..02f50b1 --- /dev/null +++ b/src/tables/torch.csv @@ -0,0 +1,21 @@ +workload,level,vega,tesla +VADD,X1,69572650,TODO +VADD,X2,123217536,TODO +VADD,X3,207693503,TODO +VADD,X4,378089165,TODO +VMUL,X1,67408281,TODO +VMUL,X2,103994272,TODO +VMUL,X3,182162140,TODO +VMUL,X4,350280326,TODO +HAXPY,X1,69791189,TODO +HAXPY,X2,123543145,TODO +HAXPY,X3,207947543,TODO +HAXPY,X4,377434890,TODO +GEMV,X1,750246152,TODO +GEMV,X2,648714601,TODO +GEMV,X3,2454455479,TODO +GEMV,X4,4968984949,TODO +DNN,X1,231093065,TODO +DNN,X2,431703456,TODO +DNN,X3,877622611,TODO +DNN,X4,2175751385,TODO diff --git a/src/tables/vadd_100GHz.tex b/src/tables/vadd_100GHz.tex deleted file mode 100644 index 4754e87..0000000 --- a/src/tables/vadd_100GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&2024360&844170 \\ -X2&2574730&1412170 \\ -X3&3899060&2496170 \\ -X4&8037930&4493240 \\ - -\end{tblr} diff --git a/src/tables/vadd_3GHz.tex b/src/tables/vadd_3GHz.tex deleted file mode 100644 index a183fda..0000000 --- a/src/tables/vadd_3GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&15585399&1220778 \\ -X2&29988981&2112885 \\ -X3&59177430&3864465 \\ -X4&118902645&7236756 \\ - -\end{tblr} diff --git a/src/tables/vmul_100GHz.tex b/src/tables/vmul_100GHz.tex deleted file mode 100644 index 4e9589c..0000000 --- a/src/tables/vmul_100GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&2030090&845170 \\ -X2&3109090&1401170 \\ -X3&4654880&2493170 \\ -X4&7829930&4478240 \\ - -\end{tblr} diff --git a/src/tables/vmul_3GHz.tex b/src/tables/vmul_3GHz.tex deleted file mode 100644 index 10d76f8..0000000 --- a/src/tables/vmul_3GHz.tex +++ /dev/null @@ -1,22 +0,0 @@ - -\begin{tblr}{ - hlines, - vlines, - cell{2}{2} = {r}, - cell{2}{3} = {r}, - cell{3}{2} = {r}, - cell{3}{3} = {r}, - cell{4}{2} = {r}, - cell{4}{3} = {r}, - cell{5}{2} = {r}, - cell{5}{3} = {r}, - hline{2} = {-}{solid,black}, - hline{2} = {2}{-}{solid,black}, -} -Level & Non-\ac{pim} & \ac{pim} \\ -X1&17269047&1219779 \\ -X2&33834132&2125872 \\ -X3&66308292&3909753 \\ -X4&131863338&7239087 \\ - -\end{tblr}