arch-gcn3: Implement s_buffer_load_dwordx16
Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29957 Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
0c3b84fd33
commit
513e75d99a
@@ -4857,17 +4857,45 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
}
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
|
||||
ScalarRegU32 offset(0);
|
||||
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
|
||||
|
||||
rsrcDesc.read();
|
||||
|
||||
if (instData.IMM) {
|
||||
offset = extData.OFFSET;
|
||||
} else {
|
||||
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
|
||||
off_sgpr.read();
|
||||
offset = off_sgpr.rawData();
|
||||
}
|
||||
|
||||
calcAddr(gpuDynInst, rsrcDesc, offset);
|
||||
|
||||
gpuDynInst->computeUnit()->scalarMemoryPipe
|
||||
.getGMReqFIFO().push(gpuDynInst);
|
||||
|
||||
wf->scalarRdGmReqsInPipe--;
|
||||
wf->scalarOutstandingReqsRdGm++;
|
||||
gpuDynInst->wavefront()->outstandingReqs++;
|
||||
gpuDynInst->wavefront()->validateRequestCounters();
|
||||
} // execute
|
||||
|
||||
void
|
||||
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemRead<16>(gpuDynInst);
|
||||
} // initiateAcc
|
||||
|
||||
void
|
||||
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
|
||||
sdst.write();
|
||||
} // completeAcc
|
||||
|
||||
Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
|
||||
|
||||
@@ -46,6 +46,29 @@
|
||||
|
||||
namespace Gcn3ISA
|
||||
{
|
||||
struct BufferRsrcDescriptor
|
||||
{
|
||||
uint64_t baseAddr : 48;
|
||||
uint32_t stride : 14;
|
||||
uint32_t cacheSwizzle : 1;
|
||||
uint32_t swizzleEn : 1;
|
||||
uint32_t numRecords : 32;
|
||||
uint32_t dstSelX : 3;
|
||||
uint32_t dstSelY : 3;
|
||||
uint32_t dstSelZ : 3;
|
||||
uint32_t dstSelW : 3;
|
||||
uint32_t numFmt : 3;
|
||||
uint32_t dataFmt : 4;
|
||||
uint32_t elemSize : 2;
|
||||
uint32_t idxStride : 2;
|
||||
uint32_t addTidEn : 1;
|
||||
uint32_t atc : 1;
|
||||
uint32_t hashEn : 1;
|
||||
uint32_t heap : 1;
|
||||
uint32_t mType : 3;
|
||||
uint32_t type : 2;
|
||||
};
|
||||
|
||||
// --- purely virtual instruction classes ---
|
||||
|
||||
class Inst_SOP2 : public GCN3GPUStaticInst
|
||||
@@ -197,14 +220,45 @@ namespace Gcn3ISA
|
||||
MemCmd::WriteReq);
|
||||
}
|
||||
|
||||
/**
|
||||
* For normal s_load_dword/s_store_dword instruction addresses.
|
||||
*/
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr,
|
||||
ScalarRegU32 offset)
|
||||
calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
|
||||
ScalarRegU32 offset)
|
||||
{
|
||||
Addr vaddr = addr.rawData();
|
||||
vaddr += offset;
|
||||
vaddr &= ~0x3;
|
||||
gpuDynInst->scalarAddr = vaddr;
|
||||
Addr vaddr = ((addr.rawData() + offset) & ~0x3);
|
||||
gpu_dyn_inst->scalarAddr = vaddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
|
||||
* The s_buffer instructions use the same buffer resource descriptor
|
||||
* as the MUBUF instructions.
|
||||
*/
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpu_dyn_inst,
|
||||
ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
|
||||
{
|
||||
BufferRsrcDescriptor rsrc_desc;
|
||||
ScalarRegU32 clamped_offset(offset);
|
||||
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
|
||||
sizeof(BufferRsrcDescriptor));
|
||||
|
||||
/**
|
||||
* The address is clamped if:
|
||||
* Stride is zero: clamp if offset >= num_records
|
||||
* Stride is non-zero: clamp if offset > (stride * num_records)
|
||||
*/
|
||||
if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
|
||||
clamped_offset = rsrc_desc.numRecords;
|
||||
} else if (rsrc_desc.stride && offset
|
||||
> (rsrc_desc.stride * rsrc_desc.numRecords)) {
|
||||
clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
|
||||
}
|
||||
|
||||
Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
|
||||
gpu_dyn_inst->scalarAddr = vaddr;
|
||||
}
|
||||
|
||||
// first instruction DWORD
|
||||
@@ -469,29 +523,6 @@ namespace Gcn3ISA
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
|
||||
protected:
|
||||
struct BufferRsrcDescriptor
|
||||
{
|
||||
uint64_t baseAddr : 48;
|
||||
uint32_t stride : 14;
|
||||
uint32_t cacheSwizzle : 1;
|
||||
uint32_t swizzleEn : 1;
|
||||
uint32_t numRecords : 32;
|
||||
uint32_t dstSelX : 3;
|
||||
uint32_t dstSelY : 3;
|
||||
uint32_t dstSelZ : 3;
|
||||
uint32_t dstSelW : 3;
|
||||
uint32_t numFmt : 3;
|
||||
uint32_t dataFmt : 4;
|
||||
uint32_t elemSize : 2;
|
||||
uint32_t idxStride : 2;
|
||||
uint32_t addTidEn : 1;
|
||||
uint32_t atc : 1;
|
||||
uint32_t hashEn : 1;
|
||||
uint32_t heap : 1;
|
||||
uint32_t mType : 3;
|
||||
uint32_t type : 2;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
|
||||
Reference in New Issue
Block a user