arch-vega: Rework flat instructions to support global

Global instructions are new in Vega and are essentially FLAT
instructions from GCN3 but guaranteed to go to global memory where as
flat can go to global or local memory.

This reworks the flat instruction classes so that the initiateAcc /
execute / completeAcc logic can be reused for flat, global, and later
scratch subtypes of flat instructions. The decoder creates a flat
instruction class which sets instruction flags based on the flat
instruction's SEG field. There are new initOperandInfo and
generateDissasmbly methods for flat and global. The number of operands
and operand index getters are modified to check the flags and return the
correct value for the subtype.

Change-Id: I1db4a3742aeec62424189e54c38c59d6b1a8d3c1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47106
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Kyle Roarty <kyleroarty1716@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2021-06-21 13:51:32 -05:00
parent 2b86278a86
commit c15e472199
11 changed files with 477 additions and 220 deletions

View File

@@ -8465,64 +8465,55 @@ namespace VegaISA
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_UBYTE(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_SBYTE(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_USHORT(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_SSHORT(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_DWORD(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_DWORD(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_DWORDX2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_DWORDX2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_DWORDX3(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_DWORDX3(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_LOAD_DWORDX4(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_LOAD_DWORDX4(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_BYTE(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
@@ -8535,8 +8526,7 @@ namespace VegaISA
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_SHORT(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
@@ -8549,29 +8539,26 @@ namespace VegaISA
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORD(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT);
return nullptr;
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORDX2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_STORE_DWORDX2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORDX3(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_STORE_DWORDX3(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORDX4(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_STORE_DWORDX4(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
@@ -8619,183 +8606,157 @@ namespace VegaISA
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SWAP(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_CMPSWAP(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_CMPSWAP(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_ADD(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SUB(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SUB(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SMIN(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SMIN(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_UMIN(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_UMIN(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SMAX(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SMAX(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_UMAX(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_UMAX(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_AND(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_AND(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_OR(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_OR(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_XOR(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_INC(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_INC(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SWAP_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_CMPSWAP_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_ADD_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SUB_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SUB_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SMIN_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SMIN_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_UMIN_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_UMIN_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SMAX_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_SMAX_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_UMAX_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_UMAX_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_AND_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_AND_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_OR_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_OR_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_XOR_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_INC_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_INC_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*
Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC_X2(MachInst iFmt)
{
fatal("Trying to decode instruction without a class\n");
return nullptr;
return new Inst_FLAT__FLAT_ATOMIC_DEC_X2(&iFmt->iFmt_FLAT);
}
GPUStaticInst*

View File

@@ -42799,7 +42799,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -42813,7 +42813,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -42887,7 +42887,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -42901,7 +42901,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -42976,7 +42976,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -42990,7 +42990,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -43035,7 +43035,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43049,7 +43049,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -43094,7 +43094,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43108,7 +43108,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -43162,7 +43162,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43176,7 +43176,7 @@ namespace VegaISA
addr.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
@@ -43233,7 +43233,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43250,7 +43250,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43292,7 +43292,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43309,7 +43309,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43351,7 +43351,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43368,7 +43368,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43411,7 +43411,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43428,7 +43428,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43471,7 +43471,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43492,7 +43492,7 @@ namespace VegaISA
data1.read();
data2.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43539,7 +43539,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
@@ -43562,7 +43562,7 @@ namespace VegaISA
data2.read();
data3.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43618,7 +43618,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43634,7 +43634,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43700,7 +43700,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43718,7 +43718,7 @@ namespace VegaISA
data.read();
cmp.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -43782,7 +43782,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -43798,7 +43798,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -44172,7 +44172,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -44190,7 +44190,7 @@ namespace VegaISA
data.read();
cmp.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
@@ -44255,7 +44255,7 @@ namespace VegaISA
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
@@ -44271,7 +44271,7 @@ namespace VegaISA
addr.read();
data.read();
calcAddr(gpuDynInst, addr, instData.OFFSET);
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {

View File

@@ -41445,7 +41445,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41453,7 +41453,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 1 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 1;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41479,7 +41482,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41487,8 +41490,11 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
return 32;
case 1: //vgpr_dst or saddr
return isFlat() ? 1 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 1;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41513,7 +41519,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41521,7 +41527,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 2 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 2;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41547,7 +41556,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41555,8 +41564,11 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
return 32;
case 1: //vgpr_dst or saddr
return isFlat() ? 2 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 2;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41581,7 +41593,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41589,7 +41601,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41615,7 +41630,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41623,7 +41638,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41649,7 +41667,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41657,7 +41675,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 12 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 12;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41683,7 +41704,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int numSrcRegOperands() override { return isFlat() ? 1 : 2; }
int
getOperandSize(int opIdx) override
@@ -41691,7 +41712,10 @@ namespace VegaISA
switch (opIdx) {
case 0: //vgpr_addr
return 8;
case 1: //vgpr_dst
case 1: //vgpr_dst or saddr
return isFlat() ? 16 : 8;
case 2: //vgpr_dst
assert(!isFlat());
return 16;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41717,7 +41741,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41727,6 +41751,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 1;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41751,7 +41778,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41761,6 +41788,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 2;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41785,7 +41815,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41795,6 +41825,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 4;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41819,7 +41852,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41829,6 +41862,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41853,7 +41889,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41863,6 +41899,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 12;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41887,7 +41926,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41897,6 +41936,9 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 16;
case 2: //saddr
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -41921,7 +41963,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41931,7 +41973,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 4;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41957,7 +42002,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -41967,7 +42012,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -41993,7 +42041,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42003,7 +42051,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 4;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42029,7 +42080,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42038,9 +42089,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42063,7 +42117,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42072,9 +42126,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42097,7 +42154,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42106,9 +42163,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42131,7 +42191,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42140,9 +42200,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42165,7 +42228,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42174,9 +42237,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42199,7 +42265,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42208,9 +42274,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42233,7 +42302,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42242,9 +42311,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42267,7 +42339,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42276,9 +42348,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42301,7 +42376,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42310,9 +42385,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42335,7 +42413,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42344,9 +42422,12 @@ namespace VegaISA
case 0: //vgpr_addr
return 8;
case 1: //vgpr_src
return 32;
case 2: //vgpr_dst
return 32;
return 4;
case 2: //vgpr_dst or saddr
return isFlat() ? 4 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
@@ -42369,7 +42450,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42379,7 +42460,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42403,7 +42487,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42413,7 +42497,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 16;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42439,7 +42526,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42449,7 +42536,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42475,7 +42565,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42485,7 +42575,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42509,7 +42602,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42519,7 +42612,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42543,7 +42639,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42553,7 +42649,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42577,7 +42676,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42587,7 +42686,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42611,7 +42713,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42621,7 +42723,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42645,7 +42750,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42655,7 +42760,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42679,7 +42787,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42689,7 +42797,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42713,7 +42824,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42723,7 +42834,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42747,7 +42861,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42757,7 +42871,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
@@ -42781,7 +42898,7 @@ namespace VegaISA
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int numSrcRegOperands() override { return isFlat() ? 2 : 3; }
int
getOperandSize(int opIdx) override
@@ -42791,7 +42908,10 @@ namespace VegaISA
return 8;
case 1: //vgpr_src
return 8;
case 2: //vgpr_dst
case 2: //vgpr_dst or saddr
return isFlat() ? 8 : 8;
case 3: //vgpr_dst
assert(!isFlat());
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);

View File

@@ -1512,7 +1512,15 @@ namespace VegaISA
Inst_FLAT::Inst_FLAT(InFmt_FLAT *iFmt, const std::string &opcode)
: VEGAGPUStaticInst(opcode)
{
setFlag(Flat);
// The SEG field specifies FLAT(0) SCRATCH(1) or GLOBAL(2)
if (iFmt->SEG == 0) {
setFlag(Flat);
} else if (iFmt->SEG == 2) {
setFlag(FlatGlobal);
} else {
panic("Unknown flat segment: %d\n", iFmt->SEG);
}
// copy first instruction DWORD
instData = iFmt[0];
// copy second instruction DWORD
@@ -1532,6 +1540,21 @@ namespace VegaISA
void
Inst_FLAT::initOperandInfo()
{
// One of the flat subtypes should be specified via flags
assert(isFlat() ^ isFlatGlobal());
if (isFlat()) {
initFlatOperandInfo();
} else if (isFlatGlobal()) {
initGlobalOperandInfo();
} else {
panic("Unknown flat subtype!\n");
}
}
void
Inst_FLAT::initFlatOperandInfo()
{
//3 formats:
// 1 dst + 1 src (load)
@@ -1567,6 +1590,66 @@ namespace VegaISA
assert(dstOps.size() == numDstRegOperands());
}
void
Inst_FLAT::initGlobalOperandInfo()
{
//3 formats:
// 1 dst + 2 src (load)
// 0 dst + 3 src (store)
// 1 dst + 3 src (atomic)
int opNum = 0;
// Needed because can't take addr of bitfield
int reg = 0;
if (getNumOperands() > 3)
assert(isAtomic());
reg = extData.ADDR;
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
if (numSrcRegOperands() == 2) {
reg = extData.SADDR;
// 0x7f (off) means the sgpr is not used. Don't read it
if (reg != 0x7f) {
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
}
opNum++;
}
if (numSrcRegOperands() == 3) {
reg = extData.DATA;
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SADDR;
// 0x7f (off) means the sgpr is not used. Don't read it
if (reg != 0x7f) {
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
}
opNum++;
}
if (numDstRegOperands()) {
reg = extData.VDST;
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
reg = extData.SADDR;
if (reg != 0x7f) {
assert(srcOps.size() == numSrcRegOperands());
} else {
assert(srcOps.size() == numSrcRegOperands() - 1);
}
assert(dstOps.size() == numDstRegOperands());
}
int
Inst_FLAT::instSize() const
{
@@ -1575,6 +1658,21 @@ namespace VegaISA
void
Inst_FLAT::generateDisassembly()
{
// One of the flat subtypes should be specified via flags
assert(isFlat() ^ isFlatGlobal());
if (isFlat()) {
generateFlatDisassembly();
} else if (isFlatGlobal()) {
generateGlobalDisassembly();
} else {
panic("Unknown flat subtype!\n");
}
}
void
Inst_FLAT::generateFlatDisassembly()
{
std::stringstream dis_stream;
dis_stream << _opcode << " ";
@@ -1589,5 +1687,31 @@ namespace VegaISA
disassembly = dis_stream.str();
}
void
Inst_FLAT::generateGlobalDisassembly()
{
// Replace flat_ with global_ in assembly string
std::string global_opcode = _opcode;
global_opcode.replace(0, 4, "global");
std::stringstream dis_stream;
dis_stream << global_opcode << " ";
if (isLoad())
dis_stream << "v" << extData.VDST << ", ";
dis_stream << "v[" << extData.ADDR << ":" << extData.ADDR + 1 << "]";
if (isStore())
dis_stream << ", v" << extData.DATA;
if (extData.SADDR == 0x7f)
dis_stream << ", off";
else
dis_stream << ", " << extData.SADDR;
disassembly = dis_stream.str();
}
} // namespace VegaISA
} // namespace gem5

View File

@@ -793,24 +793,35 @@ namespace VegaISA
}
void
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr,
ScalarRegU32 offset)
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
ScalarRegU32 saddr, ScalarRegU32 offset)
{
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
gpuDynInst->addr.at(lane) = addr[lane] + offset;
}
// If saddr = 0x7f there is no scalar reg to read and address will
// be a 64-bit address. Otherwise, saddr is the reg index for a
// scalar reg used as the base address for a 32-bit address.
if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
calcAddr64(gpuDynInst, vaddr, offset);
} else {
ConstScalarOperandU32 sbase(gpuDynInst, saddr);
sbase.read();
calcAddr32(gpuDynInst, vaddr, sbase, offset);
}
if (isFlat()) {
gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
}
gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
}
void
issueRequestHelper(GPUDynInstPtr gpuDynInst)
{
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
if ((gpuDynInst->executedAs() == enums::SC_GLOBAL && isFlat())
|| isFlatGlobal()) {
gpuDynInst->computeUnit()->globalMemoryPipe
.issueRequest(gpuDynInst);
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
assert(isFlat());
gpuDynInst->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst);
} else {
@@ -822,6 +833,36 @@ namespace VegaISA
InFmt_FLAT instData;
// second instruction DWORD
InFmt_FLAT_1 extData;
private:
void initFlatOperandInfo();
void initGlobalOperandInfo();
void generateFlatDisassembly();
void generateGlobalDisassembly();
void
calcAddr32(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
ConstScalarOperandU32 &saddr, ScalarRegU32 offset)
{
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
gpuDynInst->addr.at(lane) =
(vaddr[lane] + saddr.rawData() + offset) & 0xffffffff;
}
}
}
void
calcAddr64(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr,
ScalarRegU32 offset)
{
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
gpuDynInst->addr.at(lane) = addr[lane] + offset;
}
}
}
}; // Inst_FLAT
} // namespace VegaISA
} // namespace gem5

View File

@@ -55,6 +55,7 @@ class GPUStaticInstFlags(Enum):
'MemSync', # Synchronizing instruction
'MemoryRef', # References memory (load, store, or atomic)
'Flat', # Flat memory op
'FlatGlobal', # Global memory op
'Load', # Reads from memory
'Store', # Writes to memory

View File

@@ -1829,6 +1829,8 @@ ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
} else {
stats.flatVMemInsts++;
}
} else if (gpuDynInst->isFlatGlobal()) {
stats.flatVMemInsts++;
} else if (gpuDynInst->isLocalMem()) {
stats.ldsNoFlatInsts++;
} else if (gpuDynInst->isLoad()) {

View File

@@ -428,6 +428,12 @@ GPUDynInst::isFlat() const
return _staticInst->isFlat();
}
bool
GPUDynInst::isFlatGlobal() const
{
return _staticInst->isFlatGlobal();
}
bool
GPUDynInst::isLoad() const
{

View File

@@ -235,6 +235,7 @@ class GPUDynInst : public GPUExecContext
bool isMemSync() const;
bool isMemRef() const;
bool isFlat() const;
bool isFlatGlobal() const;
bool isLoad() const;
bool isStore() const;

View File

@@ -131,6 +131,7 @@ class GPUStaticInst : public GPUStaticInstFlags
bool isMemSync() const { return _flags[MemSync]; }
bool isMemRef() const { return _flags[MemoryRef]; }
bool isFlat() const { return _flags[Flat]; }
bool isFlatGlobal() const { return _flags[FlatGlobal]; }
bool isLoad() const { return _flags[Load]; }
bool isStore() const { return _flags[Store]; }
@@ -179,7 +180,7 @@ class GPUStaticInst : public GPUStaticInstFlags
{
return _flags[MemoryRef] && (_flags[GlobalSegment] ||
_flags[PrivateSegment] || _flags[ReadOnlySegment] ||
_flags[SpillSegment]);
_flags[SpillSegment] || _flags[FlatGlobal]);
}
bool

View File

@@ -156,7 +156,7 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus,
if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
ii->isSleep())) {
ii->isFlatGlobal() || ii->isSleep())) {
panic("next instruction: %s is of unknown type\n", ii->disassemble());
}