Some updating of slides
This commit is contained in:
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 74 KiB |
95
main.tex
95
main.tex
@@ -223,7 +223,6 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
\begin{frame}{Software Library}
|
\begin{frame}{Software Library}
|
||||||
Software support library
|
|
||||||
\begin{columns}
|
\begin{columns}
|
||||||
\begin{column}{0.5\textwidth}
|
\begin{column}{0.5\textwidth}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@@ -235,7 +234,6 @@
|
|||||||
\item Execution
|
\item Execution
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Generation of RD and WR requests
|
\item Generation of RD and WR requests
|
||||||
\item Insetion of memory barriers
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{column}
|
\end{column}
|
||||||
@@ -270,7 +268,7 @@
|
|||||||
|
|
||||||
\begin{frame}{Virtual Prototype Platform}
|
\begin{frame}{Virtual Prototype Platform}
|
||||||
\begin{columns}
|
\begin{columns}
|
||||||
\begin{column}{0.5\textwidth}
|
\begin{column}{0.4\textwidth}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item ARM processor model
|
\item ARM processor model
|
||||||
\item Bare-metal kernel
|
\item Bare-metal kernel
|
||||||
@@ -281,7 +279,7 @@
|
|||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{column}
|
\end{column}
|
||||||
\begin{column}{0.5\textwidth}
|
\begin{column}{0.6\textwidth}
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\includesvg[width=0.8\textwidth]{images/bare_metal.svg}
|
\includesvg[width=0.8\textwidth]{images/bare_metal.svg}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@@ -300,11 +298,11 @@
|
|||||||
\begin{column}{0.5\textwidth}
|
\begin{column}{0.5\textwidth}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Vector benchmarks (BLAS level 1)
|
\item Vector benchmarks (BLAS level 1)
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item VADD: $z = x + y$
|
\item VADD: $z = x + y$
|
||||||
\item VMUL: $z = x \cdot y$
|
\item VMUL: $z = x \cdot y$
|
||||||
\item HAXPY: $z = a \cdot x + y$
|
\item HAXPY: $z = a \cdot x + y$
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\item Vector-Matrix benchmarks (BLAS level 2)
|
\item Vector-Matrix benchmarks (BLAS level 2)
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@@ -345,27 +343,27 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
\begin{frame}{System Configuration}
|
\begin{frame}{System Configuration}
|
||||||
\begin{columns}[t]
|
% \begin{columns}[t]
|
||||||
\begin{column}{0.5\textwidth}
|
% \begin{column}{0.5\textwidth}
|
||||||
Two simulated systems:
|
Two simulated systems:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Generic ARM system
|
\item Generic ARM system
|
||||||
\item Infinite compute system
|
\item Infinite compute system
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Unrealistic high frequency of 100 GHz
|
\item Infinite CPU clock frequency
|
||||||
\item Completely memory bound
|
\item Completely memory bound
|
||||||
\item Lower bound of possible speedup
|
\item Lower bound of possible PIM speedup
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{column}
|
% \end{column}
|
||||||
\begin{column}{0.5\textwidth}
|
% \begin{column}{0.5\textwidth}
|
||||||
Two real GPUs using HBM2:
|
% Two real GPUs using HBM2:
|
||||||
\begin{itemize}
|
% \begin{itemize}
|
||||||
\item AMD RX Vega 56
|
% \item AMD RX Vega 56
|
||||||
\item NVIDIA Tesla V100
|
% \item NVIDIA Tesla V100
|
||||||
\end{itemize}
|
% \end{itemize}
|
||||||
\end{column}
|
% \end{column}
|
||||||
\end{columns}
|
% \end{columns}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
\begin{frame}{Speedups / Generic ARM System}
|
\begin{frame}{Speedups / Generic ARM System}
|
||||||
@@ -398,24 +396,6 @@
|
|||||||
\item GPU has no speculative execution
|
\item GPU has no speculative execution
|
||||||
}
|
}
|
||||||
|
|
||||||
\begin{frame}{Runtimes / Vector Benchmarks}
|
|
||||||
\begin{figure}
|
|
||||||
\includesvg[width=0.8\textwidth]{images/runtimes_vector.svg}
|
|
||||||
\end{figure}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\note[itemize]{
|
|
||||||
\item Real GPUs use multiple memory channels
|
|
||||||
\item Memory barriers
|
|
||||||
\item Also architectural differences
|
|
||||||
}
|
|
||||||
|
|
||||||
\begin{frame}{Runtimes / Matrix Benchmarks}
|
|
||||||
\begin{figure}
|
|
||||||
\includesvg[width=0.8\textwidth]{images/runtimes_matrix.svg}
|
|
||||||
\end{figure}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\begin{frame}{Conclusion and Future Work}
|
\begin{frame}{Conclusion and Future Work}
|
||||||
\textbf{Conclusion}
|
\textbf{Conclusion}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@@ -500,9 +480,26 @@
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
\note[itemize]{
|
\note[itemize]{
|
||||||
\item Data layout in program and address mapping must match
|
\item Data layout in program and address mapping must match
|
||||||
}
|
}
|
||||||
|
|
||||||
|
\begin{frame}{Runtimes / Vector Benchmarks}
|
||||||
|
\begin{figure}
|
||||||
|
\includesvg[width=0.8\textwidth]{images/runtimes_vector.svg}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\note[itemize]{
|
||||||
|
\item Real GPUs use multiple memory channels
|
||||||
|
\item Memory barriers
|
||||||
|
\item Also architectural differences
|
||||||
|
}
|
||||||
|
|
||||||
|
\begin{frame}{Runtimes / Matrix Benchmarks}
|
||||||
|
\begin{figure}
|
||||||
|
\includesvg[width=0.8\textwidth]{images/runtimes_matrix.svg}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
Reference in New Issue
Block a user