arch-vega: Implement V_LSHL_ADD_U64
This is a new instruction in MI300 and operates similar to V_LSHL_ADD_U32 but on 64-bit values. Change-Id: Ia4ac65160bdad748fccdcb28286ba03157cc4046
This commit is contained in:
@@ -1091,7 +1091,7 @@ namespace VegaISA
|
||||
&Decoder::decode_OPU_VOP3__V_MAD_I16,
|
||||
&Decoder::decode_OPU_VOP3__V_FMA_F16,
|
||||
&Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F16,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_OPU_VOP3__V_LSHL_ADD_U64,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
@@ -7054,6 +7054,12 @@ namespace VegaISA
|
||||
return new Inst_VOP3__V_DIV_FIXUP_F16(&iFmt->iFmt_VOP3A);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_LSHL_ADD_U64(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_LSHL_ADD_U64(&iFmt->iFmt_VOP3A);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_INTERP_P1_F32(MachInst iFmt)
|
||||
{
|
||||
|
||||
@@ -470,6 +470,7 @@ namespace VegaISA
|
||||
GPUStaticInst* decode_OPU_VOP3__V_MAD_I16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_FMA_F16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_DIV_FIXUP_F16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_LSHL_ADD_U64(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_INTERP_P1_F32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_INTERP_P2_F32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_INTERP_MOV_F32(MachInst);
|
||||
|
||||
@@ -30158,6 +30158,42 @@ namespace VegaISA
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_DIV_FIXUP_F16
|
||||
|
||||
class Inst_VOP3__V_LSHL_ADD_U64 : public Inst_VOP3A
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A*);
|
||||
~Inst_VOP3__V_LSHL_ADD_U64();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 3; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src_0
|
||||
return 8;
|
||||
case 1: //src_1
|
||||
return 4;
|
||||
case 2: //src_2
|
||||
return 8;
|
||||
case 3: //vdst
|
||||
return 8;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_LSHL_ADD_U64
|
||||
|
||||
class Inst_VOP3__V_CVT_PKACCUM_U8_F32 : public Inst_VOP3A
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -7630,6 +7630,54 @@ namespace VegaISA
|
||||
{
|
||||
panicUnimplemented();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_LSHL_ADD_U64 class methods ---
|
||||
|
||||
Inst_VOP3__V_LSHL_ADD_U64::Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A *iFmt)
|
||||
: Inst_VOP3A(iFmt, "v_lshl_add_u64", false)
|
||||
{
|
||||
setFlag(ALU);
|
||||
} // Inst_VOP3__V_LSHL_ADD_U64
|
||||
|
||||
Inst_VOP3__V_LSHL_ADD_U64::~Inst_VOP3__V_LSHL_ADD_U64()
|
||||
{
|
||||
} // ~Inst_VOP3__V_LSHL_ADD_U64
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = (S0.u << S1.u[4:0]) + S2.u.
|
||||
void
|
||||
Inst_VOP3__V_LSHL_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
|
||||
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
|
||||
ConstVecOperandU64 src2(gpuDynInst, extData.SRC2);
|
||||
VecOperandU64 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.readSrc();
|
||||
src2.readSrc();
|
||||
|
||||
/**
|
||||
* input modifiers are supported by FP operations only
|
||||
*/
|
||||
assert(!(instData.ABS & 0x1));
|
||||
assert(!(instData.ABS & 0x2));
|
||||
assert(!(instData.ABS & 0x4));
|
||||
assert(!(extData.NEG & 0x1));
|
||||
assert(!(extData.NEG & 0x2));
|
||||
assert(!(extData.NEG & 0x4));
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
int shift_amount = bits(src1[lane], 2, 0);
|
||||
shift_amount = shift_amount > 4 ? 0 : shift_amount;
|
||||
vdst[lane] = (src0[lane] << shift_amount)
|
||||
+ src2[lane];
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods ---
|
||||
|
||||
Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
|
||||
|
||||
Reference in New Issue
Block a user