arch-vega: Implement v_mov_b64

This instruction is new in MI300 and is used in some of the example
applications used to test MFMAs.

Change-Id: I739f8ab2be6a93ee3b6bdc4120d0117724edb0d4
This commit is contained in:
Matthew Poremba
2024-05-14 08:12:41 -07:00
parent 91955ae879
commit a062229ac3
4 changed files with 75 additions and 1 deletions

View File

@@ -3118,7 +3118,7 @@ namespace VegaISA
&Decoder::decode_OP_VOP1__V_CLREXCP,
&Decoder::decode_invalid,
&Decoder::decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32,
&Decoder::decode_invalid,
&Decoder::decode_OP_VOP1__V_MOV_B64,
&Decoder::decode_OP_VOP1__V_CVT_F16_U16,
&Decoder::decode_OP_VOP1__V_CVT_F16_I16,
&Decoder::decode_OP_VOP1__V_CVT_U16_F16,
@@ -11634,6 +11634,12 @@ namespace VegaISA
return nullptr;
}
GPUStaticInst*
Decoder::decode_OP_VOP1__V_MOV_B64(MachInst iFmt)
{
return new Inst_VOP1__V_MOV_B64(&iFmt->iFmt_VOP1);
} // decode_OP_VOP1__V_MOV_B64
GPUStaticInst*
Decoder::decode_OP_VOP1__V_CVT_F16_U16(MachInst iFmt)
{

View File

@@ -1289,6 +1289,7 @@ namespace VegaISA
GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F32(MachInst);
GPUStaticInst* decode_OP_VOP1__V_CLREXCP(MachInst);
GPUStaticInst* decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32(MachInst);
GPUStaticInst* decode_OP_VOP1__V_MOV_B64(MachInst);
GPUStaticInst* decode_OP_VOP1__V_CVT_F16_U16(MachInst);
GPUStaticInst* decode_OP_VOP1__V_CVT_F16_I16(MachInst);
GPUStaticInst* decode_OP_VOP1__V_CVT_U16_F16(MachInst);

View File

@@ -9890,6 +9890,38 @@ namespace VegaISA
void execute(GPUDynInstPtr) override;
}; // Inst_VOP1__V_CLREXCP
class Inst_VOP1__V_MOV_B64 : public Inst_VOP1
{
public:
Inst_VOP1__V_MOV_B64(InFmt_VOP1*);
~Inst_VOP1__V_MOV_B64();
int
getNumOperands() override
{
return numDstRegOperands() + numSrcRegOperands();
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int
getOperandSize(int opIdx) override
{
switch (opIdx) {
case 0: //src
return 8;
case 1: //vdst
return 8;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
}
} // getOperandSize
void execute(GPUDynInstPtr) override;
}; // Inst_VOP1__V_MOV_B64
class Inst_VOP1__V_CVT_F16_U16 : public Inst_VOP1
{
public:

View File

@@ -1874,6 +1874,41 @@ namespace VegaISA
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_MOV_B64 class methods ---
Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_mov_b64")
{
setFlag(ALU);
} // Inst_VOP1__V_MOV_B64
Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64()
{
} // ~Inst_VOP1__V_MOV_B64
// --- description from .arch file ---
// D.u = S0.u.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP1__V_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src(gpuDynInst, instData.SRC0);
VecOperandU64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F16_U16 class methods ---
Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)