gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model

Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Tony Gutierrez
2018-05-01 16:59:35 -04:00
committed by Anthony Gutierrez
parent b0eac7857a
commit b8da9abba7
86 changed files with 10299 additions and 3734 deletions

View File

@@ -36,7 +36,6 @@
#include <string>
#include <utility>
#include <vector>
#include "arch/gpu_decoder.hh"
#include "base/statistics.hh"
@@ -58,9 +57,170 @@ class FetchUnit
void initiateFetch(Wavefront *wavefront);
void fetch(PacketPtr pkt, Wavefront *wavefront);
void processFetchReturn(PacketPtr pkt);
void flushBuf(int wfSlotId);
static uint32_t globalFetchUnitID;
private:
/**
* fetch buffer descriptor. holds buffered
* instruction data in the fetch unit.
*/
class FetchBufDesc
{
public:
FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
_decoder(nullptr)
{
}
~FetchBufDesc()
{
delete[] bufStart;
}
/**
* allocate the fetch buffer space, and set the fetch depth
* (number of lines that may be buffered), fetch size
* (cache line size), and parent WF for this fetch buffer.
*/
void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
int
bufferedAndReservedLines() const
{
return bufferedLines() + reservedLines();
}
int bufferedLines() const { return bufferedPCs.size(); }
int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
int reservedLines() const { return reservedPCs.size(); }
bool hasFreeSpace() const { return !freeList.empty(); }
void flushBuf();
Addr nextFetchAddr();
/**
* reserve an entry in the fetch buffer for PC = vaddr,
*/
void reserveBuf(Addr vaddr);
/**
* return a pointer to the raw fetch buffer data.
* this allows the fetch pkt to use this data directly
* to avoid unnecessary memcpy and malloc/new.
*/
uint8_t*
reservedBuf(Addr vaddr) const
{
auto reserved_pc = reservedPCs.find(vaddr);
assert(reserved_pc != reservedPCs.end());
assert(reserved_pc == reservedPCs.begin());
return reserved_pc->second;
}
void fetchDone(Addr vaddr);
/**
* checks if the buffer contains valid data. this essentially
* tells fetch when there is data remaining that needs to be
* decoded into the WF's IB.
*/
bool hasFetchDataToProcess() const;
/**
* each time the fetch stage is ticked, we check if there
* are any data in the fetch buffer that may be decoded and
* sent to the IB. because we are modeling the fetch buffer
* as a circular buffer, it is possible that an instruction
* can straddle the end/beginning of the fetch buffer, so
* decodeSplitInsts() handles that case.
*/
void decodeInsts();
/**
* checks if the wavefront can release any of its fetch
* buffer entries. this will occur when the WF's PC goes
* beyond any of the currently buffered cache lines.
*/
void checkWaveReleaseBuf();
void
decoder(TheGpuISA::Decoder *dec)
{
_decoder = dec;
}
bool
pcBuffered(Addr pc) const
{
bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
&& reservedPCs.find(pc) != reservedPCs.end();
return buffered;
}
/**
* calculates the number of fetched bytes that have yet
* to be decoded.
*/
int fetchBytesRemaining() const;
private:
void decodeSplitInst();
/**
* check if the next instruction to be processed out of
* the fetch buffer is split across the end/beginning of
* the fetch buffer.
*/
bool splitDecode() const;
/**
* the set of PCs (fetch addresses) that are currently
* buffered. bufferedPCs are valid, reservedPCs are
* waiting for their buffers to be filled with valid
* fetch data.
*/
std::map<Addr, uint8_t*> bufferedPCs;
std::map<Addr, uint8_t*> reservedPCs;
/**
* represents the fetch buffer free list. holds buffer space
* that is currently free. each pointer in this array must
* have enough space to hold a cache line. in reality we
* have one actual fetch buffer: 'bufStart', these pointers
* point to addresses within bufStart that are aligned to the
* cache line size.
*/
std::deque<uint8_t*> freeList;
/**
* raw instruction buffer. holds cache line data associated with
* the set of PCs (fetch addresses) that are buffered here.
*/
uint8_t *bufStart;
uint8_t *bufEnd;
/**
* pointer that points to the next chunk of inst data to be
* decoded.
*/
uint8_t *readPtr;
// how many lines the fetch unit may buffer
int fetchDepth;
// maximum size (in number of insts) of the WF's IB
int maxIbSize;
// maximum size (in bytes) of this fetch buffer
int maxFbSize;
int cacheLineSize;
int cacheLineBits;
bool restartFromBranch;
// wavefront whose IB is serviced by this fetch buffer
Wavefront *wavefront;
TheGpuISA::Decoder *_decoder;
};
bool timingSim;
ComputeUnit *computeUnit;
TheGpuISA::Decoder decoder;
@@ -82,6 +242,15 @@ class FetchUnit
// Pointer to list of waves dispatched on to this SIMD unit
std::vector<Wavefront*> *waveList;
// holds the fetch buffers. each wave has 1 entry.
std::vector<FetchBufDesc> fetchBuf;
/**
* number of cache lines we can fetch and buffer.
* this includes the currently fetched line (i.e., the
* line that corresponds to the WF's current PC), as
* well as any lines that may be prefetched.
*/
int fetchDepth;
};
#endif // __FETCH_UNIT_HH__