arch-vega: Implement new VOP2 using VOP3 insts

Vega adds three new VOP2 instructions that may use VOP3 encoding that
are not part of the GCN3 ISA: v_add_u32, v_sub_u32, v_subrev_u32. This
changeset implements those three new instructions to fix errors related
to "invalid encoding" when those instructions are seen.

Tested using srad from Rodinia 3.0 HIP port which compiles a v_add_u32
instruction with VOP3 encoding.

Change-Id: I409a9f72f5c37895c3a0ab7ceb14a4dd121874a4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/61330
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Matthew Poremba
2022-07-14 14:47:54 -07:00
parent 152ffb0d43
commit 3a73aa3ac1
4 changed files with 255 additions and 3 deletions

View File

@@ -877,9 +877,9 @@ namespace VegaISA
&Decoder::decode_OPU_VOP3__V_MIN_U16,
&Decoder::decode_OPU_VOP3__V_MIN_I16,
&Decoder::decode_OPU_VOP3__V_LDEXP_F16,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
&Decoder::decode_OPU_VOP3__V_ADD_U32,
&Decoder::decode_OPU_VOP3__V_SUB_U32,
&Decoder::decode_OPU_VOP3__V_SUBREV_U32,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
@@ -6105,6 +6105,24 @@ namespace VegaISA
return new Inst_VOP3__V_LDEXP_F16(&iFmt->iFmt_VOP3A);
} // decode_OPU_VOP3__V_LDEXP_F16
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt)
{
return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3A);
} // decode_OPU_VOP3__V_ADD_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3A);
} // decode_OPU_VOP3__V_SUB_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3A);
} // decode_OPU_VOP3__V_SUBREV_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_NOP(MachInst iFmt)
{

View File

@@ -322,6 +322,9 @@ namespace VegaISA
GPUStaticInst* decode_OPU_VOP3__V_MIN_U16(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_MIN_I16(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_LDEXP_F16(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_NOP(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_MOV_B32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_CVT_I32_F64(MachInst);

View File

@@ -27400,6 +27400,135 @@ namespace VegaISA
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_ADD_U32 class methods ---
Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ADD_U32
Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
{
} // ~Inst_VOP3__V_ADD_U32
// --- description from .arch file ---
// D.u32 = S0.u32 + S1.u32.
void
Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUB_U32 class methods ---
Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sub_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SUB_U32
Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
{
} // ~Inst_VOP3__V_SUB_U32
// --- description from .arch file ---
// D.u32 = S0.u32 - S1.u32.
void
Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUBREV_U32 class methods ---
Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_subrev_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SUBREV_U32
Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
{
} // ~Inst_VOP3__V_SUBREV_U32
// --- description from .arch file ---
// D.u32 = S1.u32 - S0.u32.
void
Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_NOP class methods ---
Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt)

View File

@@ -25712,6 +25712,108 @@ namespace VegaISA
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_LDEXP_F16
class Inst_VOP3__V_ADD_U32 : public Inst_VOP3A
{
public:
Inst_VOP3__V_ADD_U32(InFmt_VOP3A*);
~Inst_VOP3__V_ADD_U32();
int
getNumOperands() override
{
return numDstRegOperands() + numSrcRegOperands();
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int
getOperandSize(int opIdx) override
{
switch (opIdx) {
case 0: //src_0
return 4;
case 1: //src_1
return 4;
case 2: //vdst
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
}
} // getOperandSize
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_ADD_U32
class Inst_VOP3__V_SUB_U32 : public Inst_VOP3A
{
public:
Inst_VOP3__V_SUB_U32(InFmt_VOP3A*);
~Inst_VOP3__V_SUB_U32();
int
getNumOperands() override
{
return numDstRegOperands() + numSrcRegOperands();
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int
getOperandSize(int opIdx) override
{
switch (opIdx) {
case 0: //src_0
return 4;
case 1: //src_1
return 4;
case 2: //vdst
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
}
} // getOperandSize
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_SUB_U32
class Inst_VOP3__V_SUBREV_U32 : public Inst_VOP3A
{
public:
Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A*);
~Inst_VOP3__V_SUBREV_U32();
int
getNumOperands() override
{
return numDstRegOperands() + numSrcRegOperands();
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 2; }
int
getOperandSize(int opIdx) override
{
switch (opIdx) {
case 0: //src_0
return 4;
case 1: //src_1
return 4;
case 2: //vdst
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
}
} // getOperandSize
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_SUBREV_U32
class Inst_VOP3__V_NOP : public Inst_VOP3A
{
public: