DbiPlayer progress
This commit is contained in:
16
.gitignore
vendored
Normal file
16
.gitignore
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
.directory
|
||||
.kile/
|
||||
doc.aux
|
||||
doc.bbl
|
||||
doc.blg
|
||||
doc.ilg
|
||||
doc.lof
|
||||
doc.log
|
||||
doc.lol
|
||||
doc.lot
|
||||
doc.nlo
|
||||
doc.nls
|
||||
doc.out
|
||||
doc.pdf
|
||||
doc.toc
|
||||
tikzit.sty
|
||||
@@ -54,13 +54,13 @@ encoding=UTF-8
|
||||
highlight=LaTeX
|
||||
mode=LaTeX
|
||||
|
||||
[item:inc/appendix.tex]
|
||||
[item:inc/6.implementation.tex]
|
||||
archive=true
|
||||
encoding=UTF-8
|
||||
highlight=LaTeX
|
||||
mode=LaTeX
|
||||
|
||||
[item:inc/x.implementation.tex]
|
||||
[item:inc/appendix.tex]
|
||||
archive=true
|
||||
encoding=UTF-8
|
||||
highlight=LaTeX
|
||||
|
||||
2
doc.tex
2
doc.tex
@@ -34,6 +34,7 @@
|
||||
\usepackage{url}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{tikzit}
|
||||
\usepackage{lscape}
|
||||
%\usepackage{listings}
|
||||
%\input{subsections.sty}
|
||||
\setcounter{secnumdepth}{5}
|
||||
@@ -101,6 +102,7 @@
|
||||
\newminted{cpp}{bgcolor=light-gray, fontsize=\scriptsize}
|
||||
\newminted{tcl}{bgcolor=light-gray, fontsize=\scriptsize}
|
||||
\newminted{sh}{bgcolor=light-gray, fontsize=\scriptsize}
|
||||
\newminted{text}{bgcolor=light-gray, fontsize=\scriptsize}
|
||||
\newminted{basemake}{bgcolor=light-gray, fontsize=\scriptsize}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%}}}%
|
||||
|
||||
@@ -6,13 +6,13 @@
|
||||
% Node styles
|
||||
\tikzstyle{function}=[fill={rgb,255: red,255; green,125; blue,125}, draw=red, shape=rectangle, rounded corners=3pt, thick]
|
||||
\tikzstyle{application}=[fill={rgb,255: red,238; green,238; blue,238}, draw=black, shape=rectangle, minimum width=3cm, minimum height=1cm, thick]
|
||||
\tikzstyle{thread player}=[fill=white, draw=black, shape=rectangle, minimum width=3cm, minimum height=8mm]
|
||||
\tikzstyle{initiator socket}=[fill=black, draw=black, shape=rectangle, minimum width=2mm, minimum height=5mm]
|
||||
\tikzstyle{interconnect thin}=[fill=white, draw=black, shape=rectangle, minimum height=3.5cm, align=center, minimum width=2.5cm]
|
||||
\tikzstyle{thread player}=[fill=white, draw=black, shape=rectangle, minimum width=3.25cm, minimum height=8mm]
|
||||
\tikzstyle{initiator socket}=[fill=black, draw=black, shape=rectangle, minimum width=2, minimum height=5mm]
|
||||
\tikzstyle{interconnect thin}=[fill=white, draw=black, shape=rectangle, minimum height=3.5cm, align=center, minimum width=2.75cm]
|
||||
\tikzstyle{interconnect thick}=[fill=white, draw=black, shape=rectangle, minimum height=3.5cm, align=center, minimum width=3.5cm]
|
||||
\tikzstyle{target socket}=[fill=white, draw=black, shape=rectangle, minimum width=2mm, minimum height=5mm]
|
||||
\tikzstyle{cache}=[fill=white, draw=black, shape=rectangle, minimum height=8mm, minimum width=1.75cm]
|
||||
\tikzstyle{l3cache}=[fill=white, draw=black, shape=rectangle, minimum height=2.5cm, minimum width=1.75cm]
|
||||
\tikzstyle{cache}=[fill=white, draw=black, shape=rectangle, minimum height=8mm, minimum width=2cm]
|
||||
\tikzstyle{l3cache}=[fill=white, draw=black, shape=rectangle, minimum height=2.5cm, minimum width=2cm]
|
||||
|
||||
% Edge styles
|
||||
\tikzstyle{dashed line}=[-, dashed]
|
||||
|
||||
@@ -1,66 +1,66 @@
|
||||
\begin{tikzpicture}
|
||||
\begin{pgfonlayer}{nodelayer}
|
||||
\node [style=thread player] (0) at (-16, 0) {DbiThreadPlayer};
|
||||
\node [style=thread player] (0) at (-16.25, 0) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (1) at (-12.75, 0) {};
|
||||
\node [style=thread player] (2) at (-16, -2) {DbiThreadPlayer};
|
||||
\node [style=thread player] (2) at (-16.25, -2) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (3) at (-12.75, -2) {};
|
||||
\node [style=thread player] (4) at (-16, -5) {DbiThreadPlayer};
|
||||
\node [style=thread player] (4) at (-16.25, -5) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (5) at (-12.75, -5) {};
|
||||
\node [style=interconnect thin] (10) at (18, -2.5) {MultiCoupler\\(Interconnect)};
|
||||
\node [style=none] (12) at (-16, -3.25) {\vdots};
|
||||
\node [style=target socket] (13) at (15.25, -2.5) {};
|
||||
\node [style=none] (14) at (15, -2.5) {};
|
||||
\node [style=initiator socket] (15) at (20.75, -2.5) {};
|
||||
\node [style=initiator socket] (16) at (22, -2.5) {};
|
||||
\node [style=initiator socket] (17) at (22.5, -2.5) {};
|
||||
\node [style=none] (18) at (-19.5, 2.5) {};
|
||||
\node [style=none] (19) at (22.25, 2.5) {};
|
||||
\node [style=none] (20) at (-19.5, -7) {};
|
||||
\node [style=none] (21) at (22.25, -7) {};
|
||||
\node [style=none] (22) at (1.5, 1.75) {DbiPlayer};
|
||||
\node [style=none] (24) at (21, -2.5) {};
|
||||
\node [style=none] (25) at (21, -2.5) {};
|
||||
\node [style=none] (28) at (21.75, -2.5) {};
|
||||
\node [style=none] (30) at (25, 2.5) {};
|
||||
\node [style=none] (31) at (25, -7) {};
|
||||
\node [style=target socket] (32) at (24.75, -2.5) {};
|
||||
\node [style=none] (35) at (24.5, -2.5) {};
|
||||
\node [style=none] (38) at (22.75, -2.5) {};
|
||||
\node [style=none] (40) at (30.25, 2.5) {};
|
||||
\node [style=none] (41) at (30.25, -7) {};
|
||||
\node [style=none] (43) at (27.75, -2.5) {DRAMSys};
|
||||
\node [style=interconnect thin] (10) at (16, -2.5) {MultiCoupler\\(Interconnect)};
|
||||
\node [style=none] (12) at (-16.25, -3.25) {\vdots};
|
||||
\node [style=target socket] (13) at (13, -2.5) {};
|
||||
\node [style=none] (14) at (12.75, -2.5) {};
|
||||
\node [style=initiator socket] (15) at (19, -2.5) {};
|
||||
\node [style=initiator socket] (16) at (20, -2.5) {};
|
||||
\node [style=initiator socket] (17) at (20.5, -2.5) {};
|
||||
\node [style=none] (18) at (-20, 2.5) {};
|
||||
\node [style=none] (19) at (20.25, 2.5) {};
|
||||
\node [style=none] (20) at (-20, -7) {};
|
||||
\node [style=none] (21) at (20.25, -7) {};
|
||||
\node [style=none] (22) at (0.25, 1.75) {DbiPlayer};
|
||||
\node [style=none] (24) at (18.75, -2.5) {};
|
||||
\node [style=none] (25) at (19.25, -2.5) {};
|
||||
\node [style=none] (28) at (19.75, -2.5) {};
|
||||
\node [style=none] (30) at (22, 2.5) {};
|
||||
\node [style=none] (31) at (22, -7) {};
|
||||
\node [style=target socket] (32) at (21.75, -2.5) {};
|
||||
\node [style=none] (35) at (21.5, -2.5) {};
|
||||
\node [style=none] (38) at (20.75, -2.5) {};
|
||||
\node [style=none] (40) at (26.75, 2.5) {};
|
||||
\node [style=none] (41) at (26.75, -7) {};
|
||||
\node [style=none] (43) at (24.5, -2.5) {DRAMSys};
|
||||
\node [style=cache] (44) at (-9.5, 0) {L1 Cache};
|
||||
\node [style=initiator socket] (45) at (-7.5, 0) {};
|
||||
\node [style=target socket] (46) at (-11.5, 0) {};
|
||||
\node [style=initiator socket] (45) at (-7.25, 0) {};
|
||||
\node [style=target socket] (46) at (-11.75, 0) {};
|
||||
\node [style=cache] (47) at (-9.5, -2) {L1 Cache};
|
||||
\node [style=initiator socket] (48) at (-7.5, -2) {};
|
||||
\node [style=target socket] (49) at (-11.5, -2) {};
|
||||
\node [style=initiator socket] (48) at (-7.25, -2) {};
|
||||
\node [style=target socket] (49) at (-11.75, -2) {};
|
||||
\node [style=cache] (50) at (-9.5, -5) {L1 Cache};
|
||||
\node [style=initiator socket] (51) at (-7.5, -5) {};
|
||||
\node [style=target socket] (52) at (-11.5, -5) {};
|
||||
\node [style=initiator socket] (51) at (-7.25, -5) {};
|
||||
\node [style=target socket] (52) at (-11.75, -5) {};
|
||||
\node [style=none] (53) at (-9.5, -3.25) {\vdots};
|
||||
\node [style=cache] (54) at (-4, 0) {L2 Cache};
|
||||
\node [style=initiator socket] (55) at (-2, 0) {};
|
||||
\node [style=target socket] (56) at (-6, 0) {};
|
||||
\node [style=initiator socket] (55) at (-1.75, 0) {};
|
||||
\node [style=target socket] (56) at (-6.25, 0) {};
|
||||
\node [style=cache] (57) at (-4, -2) {L2 Cache};
|
||||
\node [style=initiator socket] (58) at (-2, -2) {};
|
||||
\node [style=target socket] (59) at (-6, -2) {};
|
||||
\node [style=initiator socket] (58) at (-1.75, -2) {};
|
||||
\node [style=target socket] (59) at (-6.25, -2) {};
|
||||
\node [style=cache] (60) at (-4, -5) {L2 Cache};
|
||||
\node [style=initiator socket] (61) at (-2, -5) {};
|
||||
\node [style=target socket] (62) at (-6, -5) {};
|
||||
\node [style=initiator socket] (61) at (-1.75, -5) {};
|
||||
\node [style=target socket] (62) at (-6.25, -5) {};
|
||||
\node [style=none] (63) at (-4, -3.25) {\vdots};
|
||||
\node [style=interconnect thick] (64) at (4, -2.5) {MultiSimpleCoupler\\(Interconnect)};
|
||||
\node [style=interconnect thin] (64) at (3.25, -2.5) {MultiSimple-\\Coupler\\(Interconnect)};
|
||||
\node [style=target socket] (65) at (0.25, -2.5) {};
|
||||
\node [style=none] (66) at (0, -2.5) {};
|
||||
\node [style=initiator socket] (67) at (7.75, -2.5) {};
|
||||
\node [style=none] (69) at (8, -2.5) {};
|
||||
\node [style=none] (70) at (-1.75, 0) {};
|
||||
\node [style=none] (71) at (-1.75, -2) {};
|
||||
\node [style=none] (72) at (-1.75, -5) {};
|
||||
\node [style=l3cache] (74) at (11.5, -2.5) {L3 Cache};
|
||||
\node [style=initiator socket] (75) at (13.5, -2.5) {};
|
||||
\node [style=target socket] (76) at (9.5, -2.5) {};
|
||||
\node [style=none] (77) at (13.75, -2.5) {};
|
||||
\node [style=initiator socket] (67) at (6.25, -2.5) {};
|
||||
\node [style=none] (69) at (6.5, -2.5) {};
|
||||
\node [style=none] (70) at (-1.5, 0) {};
|
||||
\node [style=none] (71) at (-1.5, -2) {};
|
||||
\node [style=none] (72) at (-1.5, -5) {};
|
||||
\node [style=l3cache] (74) at (9.5, -2.5) {L3 Cache};
|
||||
\node [style=initiator socket] (75) at (11.75, -2.5) {};
|
||||
\node [style=target socket] (76) at (7.25, -2.5) {};
|
||||
\node [style=none] (77) at (12, -2.5) {};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{edgelayer}
|
||||
\draw [style=block] (18.center)
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
\begin{tikzpicture}
|
||||
\begin{pgfonlayer}{nodelayer}
|
||||
\node [style={thread_player}] (0) at (0, 0) {DbiThreadPlayer};
|
||||
\node [style={initiator_socket}] (1) at (3.25, 0) {};
|
||||
\node [style={thread_player}] (2) at (0, -2) {DbiThreadPlayer};
|
||||
\node [style={initiator_socket}] (3) at (3.25, -2) {};
|
||||
\node [style={thread_player}] (4) at (0, -5) {DbiThreadPlayer};
|
||||
\node [style={initiator_socket}] (5) at (3.25, -5) {};
|
||||
\node [style=interconnect] (10) at (11.25, -2.5) {MultiCoupler\\(Interconnect)};
|
||||
\node [style=thread player] (0) at (0, 0) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (1) at (3.5, 0) {};
|
||||
\node [style=thread player] (2) at (0, -2) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (3) at (3.5, -2) {};
|
||||
\node [style=thread player] (4) at (0, -5) {DbiThreadPlayer};
|
||||
\node [style=initiator socket] (5) at (3.5, -5) {};
|
||||
\node [style=interconnect thin] (10) at (11.25, -2.5) {MultiCoupler\\(Interconnect)};
|
||||
\node [style=none] (12) at (0, -3.25) {\vdots};
|
||||
\node [style={target_socket}] (13) at (8.5, -2.5) {};
|
||||
\node [style=none] (14) at (8.25, -2.5) {};
|
||||
\node [style={initiator_socket}] (15) at (14, -2.5) {};
|
||||
\node [style={initiator_socket}] (16) at (16.75, -2.5) {};
|
||||
\node [style={initiator_socket}] (17) at (17.25, -2.5) {};
|
||||
\node [style=none] (18) at (-3.5, 2) {};
|
||||
\node [style=target socket] (13) at (8.25, -2.5) {};
|
||||
\node [style=none] (14) at (8, -2.5) {};
|
||||
\node [style=initiator socket] (15) at (14.25, -2.5) {};
|
||||
\node [style=initiator socket] (16) at (16.75, -2.5) {};
|
||||
\node [style=initiator socket] (17) at (17.25, -2.5) {};
|
||||
\node [style=none] (18) at (-4, 2) {};
|
||||
\node [style=none] (19) at (17, 2) {};
|
||||
\node [style=none] (20) at (-3.5, -7) {};
|
||||
\node [style=none] (20) at (-4, -7) {};
|
||||
\node [style=none] (21) at (17, -7) {};
|
||||
\node [style=none] (22) at (6.5, 1.25) {DbiPlayer};
|
||||
\node [style=none] (23) at (14.25, -2.25) {};
|
||||
\node [style=none] (22) at (6.25, 1.25) {DbiPlayer};
|
||||
\node [style=none] (23) at (14.5, -2.25) {};
|
||||
\node [style=none] (24) at (14.25, -2.5) {};
|
||||
\node [style=none] (25) at (14.25, -2.5) {};
|
||||
\node [style=none] (26) at (14.25, -2.75) {};
|
||||
\node [style=none] (25) at (14.5, -2.5) {};
|
||||
\node [style=none] (26) at (14.5, -2.75) {};
|
||||
\node [style=none] (27) at (16.5, -2.25) {};
|
||||
\node [style=none] (28) at (16.5, -2.5) {};
|
||||
\node [style=none] (29) at (16.5, -2.75) {};
|
||||
\node [style=none] (30) at (19.75, 2) {};
|
||||
\node [style=none] (31) at (19.75, -7) {};
|
||||
\node [style={initiator_socket}] (32) at (19.5, -2.5) {};
|
||||
\node [style=target socket] (32) at (19.5, -2.5) {};
|
||||
\node [style=none] (34) at (19.25, -2.25) {};
|
||||
\node [style=none] (35) at (19.25, -2.5) {};
|
||||
\node [style=none] (36) at (19.25, -2.75) {};
|
||||
@@ -37,12 +37,10 @@
|
||||
\node [style=none] (40) at (25, 2) {};
|
||||
\node [style=none] (41) at (25, -7) {};
|
||||
\node [style=none] (43) at (22.5, -2.5) {DRAMSys};
|
||||
\node [style=none] (44) at (6.25, -3.5) {};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{edgelayer}
|
||||
\draw (1) to (14.center);
|
||||
\draw (3) to (14.center);
|
||||
\draw (5) to (14.center);
|
||||
\draw (18.center)
|
||||
\draw [style=block] (18.center)
|
||||
to (19.center)
|
||||
to (21.center)
|
||||
to (20.center)
|
||||
@@ -50,12 +48,16 @@
|
||||
\draw (23.center) to (27.center);
|
||||
\draw (25.center) to (28.center);
|
||||
\draw (26.center) to (29.center);
|
||||
\draw (30.center) to (31.center);
|
||||
\draw [style=block] (30.center)
|
||||
to (40.center)
|
||||
to (41.center)
|
||||
to (31.center)
|
||||
to cycle;
|
||||
\draw (37.center) to (34.center);
|
||||
\draw (38.center) to (35.center);
|
||||
\draw (39.center) to (36.center);
|
||||
\draw (30.center) to (40.center);
|
||||
\draw (40.center) to (41.center);
|
||||
\draw (41.center) to (31.center);
|
||||
\draw (1) to (14.center);
|
||||
\draw (3) to (14.center);
|
||||
\draw (5) to (14.center);
|
||||
\end{pgfonlayer}
|
||||
\end{tikzpicture}
|
||||
|
||||
@@ -6,9 +6,10 @@ At first, the DynamoRIO analyzer tool that produces the memory access traces and
|
||||
Furthermore, the trace player for DRAMSys will acquire special focus as well as the mandatory cache model that is used to model the cache-filtering in a real system.
|
||||
The last part will concentrate on the special architecture of new trace player and challenges the internal interconnection solves.
|
||||
|
||||
\subsection{Analysis tool}
|
||||
\subsection{Analysis Tool}
|
||||
\label{sec:analysis_tool}
|
||||
|
||||
As described in section TODO the dynamic binary instrumentation tool DynamoRIO will be used to trace the memory accesses while the target application is running.
|
||||
As described in section \ref{sec:dynamorio} the dynamic binary instrumentation tool DynamoRIO will be used to trace the memory accesses while the target application is running.
|
||||
Instead of writing a DynamoRIO client from the ground up, the DrCacheSim framework is used.
|
||||
|
||||
DrCacheSim is a DynamoRIO client that gathers memory and instruction access traces and forwards them to an analyzer tool.
|
||||
@@ -30,21 +31,23 @@ In case of the online tracing, DrCacheSim consists of two seperate processes:
|
||||
The analyzer-side can contain many analysis tools that operate on those stream of records.
|
||||
\end{itemize}
|
||||
|
||||
The \abbr{inter-process communication}{IPC} between the two parts is achieved through a \textit{named\ pipe}.
|
||||
The \revabbr{inter-process communication}{IPC} between the two parts is achieved through a \textit{named\ pipe}.
|
||||
Figure \ref{fig:drcachesim} illustrates the structure of the individual parts.
|
||||
|
||||
\begin{figure}
|
||||
\input{img/thesis.tikzstyles}
|
||||
\begin{figure}
|
||||
\begin{center}
|
||||
\tikzfig{img/drcachesim}
|
||||
\caption{Structure of the DrCacheSim online tracing.}
|
||||
\label{fig:drcachesim}
|
||||
\end{center}
|
||||
\end{figure}
|
||||
|
||||
A \texttt{memref\_t} can either represent an instruction, a data reference or a metadata event such as a timestamp or a CPU identifier.
|
||||
Besides of the type, the \abbr{process identifier}{PID} and \abbr{thread identifier}{TID} is included in every record to be able to associate them.
|
||||
Besides of the type, the \revabbr{process identifier}{PID} and \revabbr{thread identifier}{TID} is included in every record to be able to associate them.
|
||||
For an instruction marker, the size of the instruction as well as the virtual address of the instruction in the memory map is provided.
|
||||
DrCacheSim stores the current mapping of all binary executables and shared libraries in a seperate file, so that it is possible to decode named instructions even after the application has exited.
|
||||
For data references, the address and size of the desired access is provided as well the \abbr{program counter}{PC} from which it was initiated.
|
||||
For data references, the address and size of the desired access is provided as well the \revabbr{program counter}{PC} from which it was initiated.
|
||||
|
||||
Analysis tools implement the \texttt{analysis\_tool\_t} interface as this enables the analyzer to forward a received record to multiple tools in a polymorphic manner.
|
||||
In particular, the \texttt{process\_memref\_t()} method of a tool is called for incoming every record.
|
||||
@@ -54,10 +57,77 @@ As it is not known how many threads an application will spawn, the tool will lis
|
||||
For every data reference, a new entry in the corresponding trace file is made which contains the size and the address of the access, whether it was a read or write, and also a count of (computational) instructions that have been executed since the last reference.
|
||||
This instruction count is used to approximate the delay between the memory accesses when the trace is replayed by DRAMSys as described in section TODO.
|
||||
|
||||
\begin{listing}
|
||||
\begin{textcode}
|
||||
# instruction count,read/write,data size,data address
|
||||
# <timestamp>
|
||||
<13295366593324052>
|
||||
4,r,8,1774ef30
|
||||
0,r,8,1774ef38
|
||||
1,w,8,1774ef28
|
||||
2,w,8,1774ee88
|
||||
0,r,8,17744728
|
||||
1,r,8,238c3fb0
|
||||
\end{textcode}
|
||||
\caption{Example of a memory access trace with a timestamp.}
|
||||
\label{list:memtrace}
|
||||
\end{listing}
|
||||
|
||||
As of writing this thesis, there is no application binary interface for analysis tools defined in the DrCacheSim-Framework.
|
||||
Therefore it is not possible to load the DRAMTracer tool as a shared library but rather it is required to modify the DynamoRIO source code to integrate the tool.
|
||||
|
||||
\subsection{DbiPlayer architecture}
|
||||
\subsection{DbiPlayer Architecture}
|
||||
\label{sec:dbiplayer_architecture}
|
||||
|
||||
This section covers the general architecture of the DbiPlayer, the new trace player for DRAMSys that replays the captured trace files.
|
||||
|
||||
For every recorded thread, a new so-called DbiThreadPlayer is spawned, which is a standalone initiator for transactions.
|
||||
Because those threads need to be synchronized to approximate the real behavior, they need to communicate among each other.
|
||||
The detailed mechanism behind this synchronization will be further explained in section \ref{sec:dbiplayer_functionality}.
|
||||
This communication, however, brings up the necessity to containerize the thread players into a single module that can directly be connected to DRAMSys.
|
||||
To achieve this, a new generic initiator interface was developed that makes it possible to connect components to DRAMSys whose internal architecture can be arbitrary.
|
||||
In the case of the DbiPlayer, an additional interconnect module will bundle up all \texttt{simple\_initiator\_sockets} to a single \texttt{multi\_passthrough\_initiator\_socket} as presented in Figure \ref{fig:dbiplayer_without_caches}.
|
||||
|
||||
\begin{figure}
|
||||
\begin{center}
|
||||
\tikzfig{img/without_caching}
|
||||
\caption{Architecture of the DbiPlayer without caches.}
|
||||
\label{fig:dbiplayer_without_caches}
|
||||
\end{center}
|
||||
\end{figure}
|
||||
|
||||
As the memory accesses are directly extracted from the executed instructions, simply sending a transaction to the DRAM subsystem for every data reference would neglect the caches todays processors completely.
|
||||
Therefore, also a cache model is required whose implementation will be explained in more detail in section \ref{sec:cache}.
|
||||
Modern cache hierarchies compose of 3 cache levels: 2 caches for every processor core, the L1 and L2 cache, and one cache that is shared across all cores, the L3 cache.
|
||||
(vlt hier Literaturreferenz)
|
||||
This hierarchy is also reflected in the DbiPlayer as shown in Figure \ref{fig:dbiplayer_with_caches}.
|
||||
|
||||
\begin{landscape}
|
||||
\begin{figure}
|
||||
\begin{center}
|
||||
\tikzfig{img/with_caching}
|
||||
\caption{Architecture of the DbiPlayer with caches.}
|
||||
\label{fig:dbiplayer_with_caches}
|
||||
\end{center}
|
||||
\end{figure}
|
||||
\end{landscape}
|
||||
|
||||
\subsection{DbiPlayer Functionality}
|
||||
\label{sec:dbiplayer_functionality}
|
||||
|
||||
With the overall architecture of the initiator introduced, this section explains the internal functionality of the DbiPlayer and its threads.
|
||||
As mentioned previously, the threads cannot run by themself, rather they require synchronization to ensure the simulated system replicates the real running application as good as possible.
|
||||
The analysis tool appends timestamps into the memory access traces that will be used to pause the execution of a thread, when the global time has not yet reached this far yet, or to advance the global time, when the thread is allowed to run.
|
||||
It is to note that the term global time in this context does not correspond to the SystemC simulation time but denotes a loose time variable that the DbiPlayer uses to schedule its threads.
|
||||
|
||||
A set of rules determine if a thread is allowed to make progress beyond a timestamp that is further than the current global time:
|
||||
\begin{enumerate}
|
||||
\item The main thread at the start of the program is always allowed to run.
|
||||
\item Threads don't go to sleep when they would produce a deadlock. This is the case when they are the only thread currently running.
|
||||
\item When a previous running thread exits and all other threads are sleeping, then they will be woken up.
|
||||
\item As a fallback, when currently all threads are waiting, one thread will be woken up.
|
||||
\end{enumerate}
|
||||
|
||||
Those rules ensure that always at least one thread is running and the simulation does not come to a premature halt.
|
||||
|
||||
bla bla zu instruction count und clk
|
||||
|
||||
Reference in New Issue
Block a user