arch-gcn3: Implement s_buffer_load_dwordx16

Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29957
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Tony Gutierrez
2018-12-06 19:24:19 -05:00
committed by Anthony Gutierrez
parent 0c3b84fd33
commit 513e75d99a
2 changed files with 90 additions and 31 deletions

View File

@@ -4857,17 +4857,45 @@ namespace Gcn3ISA
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
}
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.getGMReqFIFO().push(gpuDynInst);
wf->scalarRdGmReqsInPipe--;
wf->scalarOutstandingReqsRdGm++;
gpuDynInst->wavefront()->outstandingReqs++;
gpuDynInst->wavefront()->validateRequestCounters();
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<16>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)

View File

@@ -46,6 +46,29 @@
namespace Gcn3ISA
{
struct BufferRsrcDescriptor
{
uint64_t baseAddr : 48;
uint32_t stride : 14;
uint32_t cacheSwizzle : 1;
uint32_t swizzleEn : 1;
uint32_t numRecords : 32;
uint32_t dstSelX : 3;
uint32_t dstSelY : 3;
uint32_t dstSelZ : 3;
uint32_t dstSelW : 3;
uint32_t numFmt : 3;
uint32_t dataFmt : 4;
uint32_t elemSize : 2;
uint32_t idxStride : 2;
uint32_t addTidEn : 1;
uint32_t atc : 1;
uint32_t hashEn : 1;
uint32_t heap : 1;
uint32_t mType : 3;
uint32_t type : 2;
};
// --- purely virtual instruction classes ---
class Inst_SOP2 : public GCN3GPUStaticInst
@@ -197,14 +220,45 @@ namespace Gcn3ISA
MemCmd::WriteReq);
}
/**
* For normal s_load_dword/s_store_dword instruction addresses.
*/
void
calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr,
ScalarRegU32 offset)
calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
ScalarRegU32 offset)
{
Addr vaddr = addr.rawData();
vaddr += offset;
vaddr &= ~0x3;
gpuDynInst->scalarAddr = vaddr;
Addr vaddr = ((addr.rawData() + offset) & ~0x3);
gpu_dyn_inst->scalarAddr = vaddr;
}
/**
* For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
* The s_buffer instructions use the same buffer resource descriptor
* as the MUBUF instructions.
*/
void
calcAddr(GPUDynInstPtr gpu_dyn_inst,
ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
{
BufferRsrcDescriptor rsrc_desc;
ScalarRegU32 clamped_offset(offset);
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
sizeof(BufferRsrcDescriptor));
/**
* The address is clamped if:
* Stride is zero: clamp if offset >= num_records
* Stride is non-zero: clamp if offset > (stride * num_records)
*/
if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
clamped_offset = rsrc_desc.numRecords;
} else if (rsrc_desc.stride && offset
> (rsrc_desc.stride * rsrc_desc.numRecords)) {
clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
}
Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
gpu_dyn_inst->scalarAddr = vaddr;
}
// first instruction DWORD
@@ -469,29 +523,6 @@ namespace Gcn3ISA
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
protected:
struct BufferRsrcDescriptor
{
uint64_t baseAddr : 48;
uint32_t stride : 14;
uint32_t cacheSwizzle : 1;
uint32_t swizzleEn : 1;
uint32_t numRecords : 32;
uint32_t dstSelX : 3;
uint32_t dstSelY : 3;
uint32_t dstSelZ : 3;
uint32_t dstSelW : 3;
uint32_t numFmt : 3;
uint32_t dataFmt : 4;
uint32_t elemSize : 2;
uint32_t idxStride : 2;
uint32_t addTidEn : 1;
uint32_t atc : 1;
uint32_t hashEn : 1;
uint32_t heap : 1;
uint32_t mType : 3;
uint32_t type : 2;
};
template<typename T>
void
initMemRead(GPUDynInstPtr gpuDynInst)