gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model
Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
b0eac7857a
commit
b8da9abba7
@@ -45,11 +45,9 @@
|
||||
|
||||
uint32_t FetchUnit::globalFetchUnitID;
|
||||
|
||||
FetchUnit::FetchUnit(const ComputeUnitParams* params) :
|
||||
timingSim(true),
|
||||
computeUnit(nullptr),
|
||||
fetchScheduler(params),
|
||||
waveList(nullptr)
|
||||
FetchUnit::FetchUnit(const ComputeUnitParams* params)
|
||||
: timingSim(true), computeUnit(nullptr), fetchScheduler(params),
|
||||
waveList(nullptr), fetchDepth(params->fetch_depth)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -66,9 +64,14 @@ FetchUnit::init(ComputeUnit *cu)
|
||||
timingSim = computeUnit->shader->timingSim;
|
||||
fetchQueue.clear();
|
||||
fetchStatusQueue.resize(computeUnit->shader->n_wf);
|
||||
fetchBuf.resize(computeUnit->shader->n_wf, FetchBufDesc());
|
||||
|
||||
for (int j = 0; j < computeUnit->shader->n_wf; ++j) {
|
||||
fetchStatusQueue[j] = std::make_pair(waveList->at(j), false);
|
||||
for (int i = 0; i < computeUnit->shader->n_wf; ++i) {
|
||||
Wavefront *wf = waveList->at(i);
|
||||
assert(wf->wfSlotId == i);
|
||||
fetchStatusQueue[i] = std::make_pair(wf, false);
|
||||
fetchBuf[i].allocateBuf(fetchDepth, computeUnit->cacheLineSize(), wf);
|
||||
fetchBuf[i].decoder(&decoder);
|
||||
}
|
||||
|
||||
fetchScheduler.bindList(&fetchQueue);
|
||||
@@ -77,6 +80,23 @@ FetchUnit::init(ComputeUnit *cu)
|
||||
void
|
||||
FetchUnit::exec()
|
||||
{
|
||||
/**
|
||||
* now we check if any of the fetch buffers have
|
||||
* buffered instruction data that can be decoded
|
||||
* and sent to its wavefront's instruction buffer.
|
||||
* then we check if any of the fetch buffer entries
|
||||
* can be released. we only check if we can
|
||||
* release a buffer
|
||||
*/
|
||||
for (auto &fetch_buf : fetchBuf) {
|
||||
if (!fetch_buf.hasFreeSpace()) {
|
||||
fetch_buf.checkWaveReleaseBuf();
|
||||
}
|
||||
if (fetch_buf.hasFetchDataToProcess()) {
|
||||
fetch_buf.decodeInsts();
|
||||
}
|
||||
}
|
||||
|
||||
// re-evaluate waves which are marked as not ready for fetch
|
||||
for (int j = 0; j < computeUnit->shader->n_wf; ++j) {
|
||||
// Following code assumes 64-bit opertaion and all insts are
|
||||
@@ -88,9 +108,10 @@ FetchUnit::exec()
|
||||
// 4 or less instructions and it can not have any branches to
|
||||
// prevent speculative instruction fetches
|
||||
if (!fetchStatusQueue[j].second) {
|
||||
if (curWave->status == Wavefront::S_RUNNING &&
|
||||
curWave->instructionBuffer.size() <= 4 &&
|
||||
!curWave->instructionBufferHasBranch() &&
|
||||
if ((curWave->getStatus() == Wavefront::S_RUNNING ||
|
||||
curWave->getStatus() == Wavefront::S_WAITCNT) &&
|
||||
fetchBuf[j].hasFreeSpace() &&
|
||||
!curWave->stopFetch() &&
|
||||
!curWave->pendingFetch) {
|
||||
fetchQueue.push_back(curWave);
|
||||
fetchStatusQueue[j].second = true;
|
||||
@@ -111,45 +132,38 @@ FetchUnit::exec()
|
||||
void
|
||||
FetchUnit::initiateFetch(Wavefront *wavefront)
|
||||
{
|
||||
// calculate the virtual address to fetch from the SQC
|
||||
Addr vaddr = wavefront->pc();
|
||||
assert(fetchBuf.at(wavefront->wfSlotId).hasFreeSpace());
|
||||
|
||||
/**
|
||||
* the instruction buffer holds one instruction per entry, regardless
|
||||
* of the underlying instruction's size. the PC, however, addresses
|
||||
* instrutions on a 32b granularity so we must account for that here.
|
||||
*/
|
||||
for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
|
||||
vaddr +=
|
||||
wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
|
||||
}
|
||||
vaddr = wavefront->basePtr + vaddr;
|
||||
* calculate the virtual address to fetch from the SQC. the fetch
|
||||
* buffer holds a configurable number of cache lines. we start
|
||||
* fetching at the address of the cache line immediately following
|
||||
* the buffered line(s).
|
||||
*/
|
||||
Addr vaddr = fetchBuf.at(wavefront->wfSlotId).nextFetchAddr();
|
||||
|
||||
// this should already be aligned to a cache line
|
||||
assert(vaddr == makeLineAddress(vaddr,
|
||||
computeUnit->getCacheLineBits()));
|
||||
|
||||
// shouldn't be fetching a line that is already buffered
|
||||
assert(!fetchBuf.at(wavefront->wfSlotId).pcBuffered(vaddr));
|
||||
|
||||
fetchBuf.at(wavefront->wfSlotId).reserveBuf(vaddr);
|
||||
|
||||
DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Id%d: Initiate fetch "
|
||||
"from pc: %d %#x\n", computeUnit->cu_id, wavefront->simdId,
|
||||
wavefront->wfSlotId, wavefront->wfDynId, wavefront->pc(), vaddr);
|
||||
|
||||
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
|
||||
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
|
||||
|
||||
// Since this is an instruction prefetch, if you're split then just finish
|
||||
// out the current line.
|
||||
int block_size = computeUnit->cacheLineSize();
|
||||
// check for split accesses
|
||||
Addr split_addr = roundDown(vaddr + block_size - 1, block_size);
|
||||
int size = block_size;
|
||||
|
||||
if (split_addr > vaddr) {
|
||||
// misaligned access, just grab the rest of the line
|
||||
size = split_addr - vaddr;
|
||||
}
|
||||
|
||||
// set up virtual request
|
||||
RequestPtr req = std::make_shared<Request>(
|
||||
vaddr, size, Request::INST_FETCH,
|
||||
vaddr, computeUnit->cacheLineSize(), Request::INST_FETCH,
|
||||
computeUnit->masterId(), 0, 0, nullptr);
|
||||
|
||||
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
|
||||
// This fetchBlock is kind of faux right now - because the translations so
|
||||
// far don't actually return Data
|
||||
uint64_t fetchBlock;
|
||||
pkt->dataStatic(&fetchBlock);
|
||||
|
||||
if (timingSim) {
|
||||
// SenderState needed on Return
|
||||
@@ -210,19 +224,23 @@ FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront)
|
||||
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId,
|
||||
pkt->req->getPaddr());
|
||||
|
||||
// this is necessary because the GPU TLB receives packets instead of
|
||||
// requests. when the translation is complete, all relevent fields in the
|
||||
// request will be populated, but not in the packet. here we create the
|
||||
// new packet so we can set the size, addr, and proper flags.
|
||||
/**
|
||||
* this is necessary because the GPU TLB receives packets instead of
|
||||
* requests. when the translation is complete, all relevent fields in
|
||||
* the request will be populated, but not in the packet. here we create
|
||||
* the new packet so we can set the size, addr, and proper flags.
|
||||
*/
|
||||
PacketPtr oldPkt = pkt;
|
||||
pkt = new Packet(oldPkt->req, oldPkt->cmd);
|
||||
delete oldPkt;
|
||||
|
||||
TheGpuISA::RawMachInst *data =
|
||||
new TheGpuISA::RawMachInst[pkt->req->getSize() /
|
||||
sizeof(TheGpuISA::RawMachInst)];
|
||||
|
||||
pkt->dataDynamic<TheGpuISA::RawMachInst>(data);
|
||||
/**
|
||||
* we should have reserved an entry in the fetch buffer
|
||||
* for this cache line. here we get the pointer to the
|
||||
* entry used to buffer this request's line data.
|
||||
*/
|
||||
pkt->dataStatic(fetchBuf.at(wavefront->wfSlotId)
|
||||
.reservedBuf(pkt->req->getVaddr()));
|
||||
|
||||
// New SenderState for the memory access
|
||||
pkt->senderState = new ComputeUnit::SQCPort::SenderState(wavefront);
|
||||
@@ -257,47 +275,15 @@ FetchUnit::processFetchReturn(PacketPtr pkt)
|
||||
Wavefront *wavefront = sender_state->wavefront;
|
||||
|
||||
DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch addr %#x returned "
|
||||
"%d bytes, %d instructions!\n", computeUnit->cu_id,
|
||||
wavefront->simdId, wavefront->wfSlotId, pkt->req->getPaddr(),
|
||||
pkt->req->getSize(), pkt->req->getSize() /
|
||||
sizeof(TheGpuISA::RawMachInst));
|
||||
"%d bytes!\n", computeUnit->cu_id, wavefront->simdId,
|
||||
wavefront->wfSlotId, pkt->req->getPaddr(), pkt->req->getSize());
|
||||
|
||||
if (wavefront->dropFetch) {
|
||||
assert(wavefront->instructionBuffer.empty());
|
||||
assert(!fetchBuf.at(wavefront->wfSlotId).hasFetchDataToProcess());
|
||||
wavefront->dropFetch = false;
|
||||
} else {
|
||||
TheGpuISA::RawMachInst *inst_index_ptr =
|
||||
(TheGpuISA::RawMachInst*)pkt->getPtr<uint8_t>();
|
||||
|
||||
assert(wavefront->instructionBuffer.size() <= 4);
|
||||
|
||||
for (int i = 0; i < pkt->req->getSize() /
|
||||
sizeof(TheGpuISA::RawMachInst); ++i) {
|
||||
GPUStaticInst *inst_ptr = decoder.decode(inst_index_ptr[i]);
|
||||
|
||||
assert(inst_ptr);
|
||||
|
||||
if (inst_ptr->instSize() == 8) {
|
||||
/**
|
||||
* this instruction occupies 2 consecutive
|
||||
* entries in the instruction array, the
|
||||
* second of which contains a nullptr. so if
|
||||
* this inst is 8 bytes we advance two entries
|
||||
* instead of 1
|
||||
*/
|
||||
++i;
|
||||
}
|
||||
|
||||
DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: added %s\n",
|
||||
computeUnit->cu_id, wavefront->simdId,
|
||||
wavefront->wfSlotId, inst_ptr->disassemble());
|
||||
|
||||
GPUDynInstPtr gpuDynInst =
|
||||
std::make_shared<GPUDynInst>(computeUnit, wavefront, inst_ptr,
|
||||
computeUnit->getAndIncSeqNum());
|
||||
|
||||
wavefront->instructionBuffer.push_back(gpuDynInst);
|
||||
}
|
||||
fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt->req->getVaddr());
|
||||
}
|
||||
|
||||
wavefront->pendingFetch = false;
|
||||
@@ -306,8 +292,337 @@ FetchUnit::processFetchReturn(PacketPtr pkt)
|
||||
delete pkt;
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::flushBuf(int wfSlotId)
|
||||
{
|
||||
fetchBuf.at(wfSlotId).flushBuf();
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::bindWaveList(std::vector<Wavefront*> *wave_list)
|
||||
{
|
||||
waveList = wave_list;
|
||||
}
|
||||
|
||||
/** FetchBufDesc */
|
||||
void
|
||||
FetchUnit::FetchBufDesc::allocateBuf(int fetch_depth, int cache_line_size,
|
||||
Wavefront *wf)
|
||||
{
|
||||
wavefront = wf;
|
||||
fetchDepth = fetch_depth;
|
||||
maxIbSize = wavefront->maxIbSize;
|
||||
cacheLineSize = cache_line_size;
|
||||
maxFbSize = cacheLineSize * fetchDepth;
|
||||
|
||||
// Calculate the number of bits to address a cache line
|
||||
panic_if(!isPowerOf2(cacheLineSize),
|
||||
"Cache line size should be a power of two.");
|
||||
cacheLineBits = floorLog2(cacheLineSize);
|
||||
|
||||
bufStart = new uint8_t[maxFbSize];
|
||||
readPtr = bufStart;
|
||||
bufEnd = bufStart + maxFbSize;
|
||||
|
||||
for (int i = 0; i < fetchDepth; ++i) {
|
||||
freeList.emplace_back(readPtr + i * cacheLineSize);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::flushBuf()
|
||||
{
|
||||
restartFromBranch = true;
|
||||
/**
|
||||
* free list may have some entries
|
||||
* so we clear it here to avoid duplicates
|
||||
*/
|
||||
freeList.clear();
|
||||
bufferedPCs.clear();
|
||||
reservedPCs.clear();
|
||||
readPtr = bufStart;
|
||||
|
||||
for (int i = 0; i < fetchDepth; ++i) {
|
||||
freeList.push_back(bufStart + i * cacheLineSize);
|
||||
}
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
|
||||
"buffer\n", wavefront->simdId, wavefront->wfSlotId,
|
||||
wavefront->wfDynId);
|
||||
}
|
||||
|
||||
Addr
|
||||
FetchUnit::FetchBufDesc::nextFetchAddr()
|
||||
{
|
||||
Addr next_line = 0;
|
||||
|
||||
if (bufferedAndReservedLines()) {
|
||||
Addr last_line_fetched = 0;
|
||||
if (!reservedLines()) {
|
||||
/**
|
||||
* get the PC of the most recently fetched cache line,
|
||||
* then return the address of the next line.
|
||||
*/
|
||||
last_line_fetched = bufferedPCs.rbegin()->first;
|
||||
} else {
|
||||
last_line_fetched = reservedPCs.rbegin()->first;
|
||||
}
|
||||
|
||||
next_line = last_line_fetched + cacheLineSize;
|
||||
|
||||
/**
|
||||
* should not be trying to fetch a line that has already
|
||||
* been fetched.
|
||||
*/
|
||||
assert(bufferedPCs.find(next_line) == bufferedPCs.end());
|
||||
assert(reservedPCs.find(next_line) == reservedPCs.end());
|
||||
} else {
|
||||
/**
|
||||
* we do not have any buffered cache lines yet, so we
|
||||
* assume this is the initial fetch, or the first fetch
|
||||
* after a branch, and get the PC directly from the WF.
|
||||
* in the case of a branch, we may not start at the
|
||||
* beginning of a cache line, so we adjust the readPtr by
|
||||
* the current PC's offset from the start of the line.
|
||||
*/
|
||||
next_line = makeLineAddress(wavefront->pc(), cacheLineBits);
|
||||
readPtr = bufStart;
|
||||
|
||||
/**
|
||||
* if we are here we have no buffered lines. in the case we flushed
|
||||
* the buffer due to a branch, we may need to start fetching from
|
||||
* some offset from the start of the fetch buffer, so we adjust for
|
||||
* that here.
|
||||
*/
|
||||
if (restartFromBranch) {
|
||||
restartFromBranch = false;
|
||||
int byte_offset
|
||||
= wavefront->pc() - makeLineAddress(wavefront->pc(),
|
||||
cacheLineBits);
|
||||
readPtr += byte_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return next_line;
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
|
||||
{
|
||||
// we should have free buffer space, and the line
|
||||
// at vaddr should not already be cached.
|
||||
assert(hasFreeSpace());
|
||||
assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
|
||||
assert(reservedPCs.find(vaddr) == reservedPCs.end());
|
||||
assert(bufferedAndReservedLines() < fetchDepth);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d reserved fetch buffer entry "
|
||||
"for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
|
||||
wavefront->wfDynId, vaddr);
|
||||
|
||||
/**
|
||||
* we reserve buffer space, by moving it out of the
|
||||
* free list, however we do not mark the buffered
|
||||
* line as valid until the fetch unit for this buffer
|
||||
* has receieved the response from the memory system.
|
||||
*/
|
||||
uint8_t *inst_buf = freeList.front();
|
||||
reservedPCs.emplace(vaddr, inst_buf);
|
||||
freeList.pop_front();
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
|
||||
{
|
||||
assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
|
||||
wavefront->simdId, wavefront->wfSlotId,
|
||||
wavefront->wfDynId, vaddr);
|
||||
|
||||
/**
|
||||
* this address should have an entry reserved in the
|
||||
* fetch buffer already, however it should be invalid
|
||||
* until the fetch completes.
|
||||
*/
|
||||
auto reserved_pc = reservedPCs.find(vaddr);
|
||||
assert(reserved_pc != reservedPCs.end());
|
||||
bufferedPCs.emplace(vaddr, reserved_pc->second);
|
||||
|
||||
if (readPtr == bufEnd) {
|
||||
readPtr = bufStart;
|
||||
}
|
||||
|
||||
reserved_pc->second = nullptr;
|
||||
reservedPCs.erase(reserved_pc);
|
||||
}
|
||||
|
||||
bool
|
||||
FetchUnit::FetchBufDesc::hasFetchDataToProcess() const
|
||||
{
|
||||
return fetchBytesRemaining() >= sizeof(TheGpuISA::RawMachInst);
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::checkWaveReleaseBuf()
|
||||
{
|
||||
Addr cur_wave_pc = roundDown(wavefront->pc(),
|
||||
wavefront->computeUnit->cacheLineSize());
|
||||
if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d current wave PC(%#x) still "
|
||||
"being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
|
||||
wavefront->wfDynId, cur_wave_pc);
|
||||
|
||||
// should be reserved, but not buffered yet
|
||||
assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
|
||||
auto oldest_buffered_pc = bufferedPCs.begin();
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d checking if PC block addr = %#x"
|
||||
"(PC = %#x) can be released.\n", wavefront->simdId,
|
||||
wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
|
||||
wavefront->pc());
|
||||
|
||||
#ifdef DEBUG
|
||||
int idx = 0;
|
||||
for (const auto &buf_pc : bufferedPCs) {
|
||||
DPRINTF(GPUFetch, "PC[%d] = %#x\n", idx, buf_pc.first);
|
||||
++idx;
|
||||
}
|
||||
#endif
|
||||
|
||||
// if we haven't buffered data for this PC, we shouldn't
|
||||
// be fetching from it.
|
||||
assert(current_buffered_pc != bufferedPCs.end());
|
||||
|
||||
/**
|
||||
* we're using a std::map so the addresses are sorted. if this
|
||||
* PC is not the oldest one in the map, we must be fetching from
|
||||
* a newer block, and we can release the oldest PC's fetch buffer
|
||||
* entry back to the free list.
|
||||
*/
|
||||
if (current_buffered_pc != oldest_buffered_pc) {
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for PC = %#x, "
|
||||
"removing it from the fetch buffer.\n", wavefront->simdId,
|
||||
wavefront->wfSlotId, wavefront->wfDynId,
|
||||
oldest_buffered_pc->first);
|
||||
|
||||
freeList.emplace_back(oldest_buffered_pc->second);
|
||||
oldest_buffered_pc->second = nullptr;
|
||||
bufferedPCs.erase(oldest_buffered_pc);
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d has %d lines buffered.\n",
|
||||
wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
|
||||
bufferedLines());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::decodeInsts()
|
||||
{
|
||||
assert(readPtr);
|
||||
|
||||
if (splitDecode()) {
|
||||
decodeSplitInst();
|
||||
}
|
||||
|
||||
while (wavefront->instructionBuffer.size() < maxIbSize
|
||||
&& hasFetchDataToProcess()) {
|
||||
if (splitDecode()) {
|
||||
decodeSplitInst();
|
||||
} else {
|
||||
TheGpuISA::MachInst mach_inst
|
||||
= reinterpret_cast<TheGpuISA::MachInst>(readPtr);
|
||||
GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
|
||||
readPtr += gpu_static_inst->instSize();
|
||||
|
||||
assert(readPtr <= bufEnd);
|
||||
|
||||
GPUDynInstPtr gpu_dyn_inst
|
||||
= std::make_shared<GPUDynInst>(wavefront->computeUnit,
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
|
||||
"%d bytes remain.\n", wavefront->simdId,
|
||||
wavefront->wfSlotId, wavefront->wfDynId,
|
||||
gpu_static_inst->disassemble(),
|
||||
gpu_static_inst->instSize(),
|
||||
fetchBytesRemaining());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::decodeSplitInst()
|
||||
{
|
||||
TheGpuISA::RawMachInst split_inst = 0;
|
||||
int dword_size = sizeof(uint32_t);
|
||||
int num_dwords = sizeof(TheGpuISA::RawMachInst) / dword_size;
|
||||
|
||||
for (int i = 0; i < num_dwords; ++i) {
|
||||
((uint32_t*)(&split_inst))[i] = *reinterpret_cast<uint32_t*>(readPtr);
|
||||
if (readPtr + dword_size >= bufEnd) {
|
||||
readPtr = bufStart;
|
||||
}
|
||||
}
|
||||
|
||||
assert(readPtr == bufStart);
|
||||
|
||||
TheGpuISA::MachInst mach_inst
|
||||
= reinterpret_cast<TheGpuISA::MachInst>(&split_inst);
|
||||
GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
|
||||
readPtr += (gpu_static_inst->instSize() - dword_size);
|
||||
assert(readPtr < bufEnd);
|
||||
|
||||
GPUDynInstPtr gpu_dyn_inst
|
||||
= std::make_shared<GPUDynInst>(wavefront->computeUnit,
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
|
||||
"(%d bytes). %d bytes remain in %d buffered lines.\n",
|
||||
wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
|
||||
gpu_static_inst->disassemble(), split_inst,
|
||||
gpu_static_inst->instSize(), fetchBytesRemaining(),
|
||||
bufferedLines());
|
||||
}
|
||||
|
||||
bool
|
||||
FetchUnit::FetchBufDesc::splitDecode() const
|
||||
{
|
||||
/**
|
||||
* if a read of a raw instruction would go beyond the end
|
||||
* of the fetch buffer, then we must perform a split decode.
|
||||
*/
|
||||
bool is_split = (readPtr + sizeof(TheGpuISA::RawMachInst)) > bufEnd;
|
||||
|
||||
return is_split;
|
||||
}
|
||||
|
||||
int
|
||||
FetchUnit::FetchBufDesc::fetchBytesRemaining() const
|
||||
{
|
||||
int bytes_remaining = 0;
|
||||
|
||||
if (bufferedLines() && readPtr != bufEnd) {
|
||||
auto last_buf_pc = bufferedPCs.rbegin();
|
||||
uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
|
||||
int byte_diff = end_ptr - readPtr;
|
||||
|
||||
if (end_ptr > readPtr) {
|
||||
bytes_remaining = byte_diff;
|
||||
} else if (end_ptr < readPtr) {
|
||||
bytes_remaining = bufferedBytes() + byte_diff;
|
||||
}
|
||||
}
|
||||
|
||||
assert(bytes_remaining <= bufferedBytes());
|
||||
return bytes_remaining;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user