arch-vega: Implement V_ACCVGPR_MOV_B32 instruction

This instruction is a simple move from accumulation register to
accumulation register. It is essentially a move with the accumulation
offset added to the register index.

Change-Id: Ic93ae72599b75c91213f56ebafe5bbd7b2867089
This commit is contained in:
Matthew Poremba
2024-05-15 12:05:54 -07:00
parent 7cdb69bf21
commit 6c8caf83c6
4 changed files with 72 additions and 1 deletions

View File

@@ -3144,7 +3144,7 @@ namespace VegaISA
&Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16,
&Decoder::decode_invalid,
&Decoder::decode_OP_VOP1__V_SWAP_B32,
&Decoder::decode_invalid,
&Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
@@ -11777,6 +11777,12 @@ namespace VegaISA
return nullptr;
}
GPUStaticInst*
Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst iFmt)
{
return new Inst_VOP1__V_ACCVGPR_MOV_B32(&iFmt->iFmt_VOP1);
}
GPUStaticInst*
Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt)
{

View File

@@ -1314,6 +1314,7 @@ namespace VegaISA
GPUStaticInst* decode_OP_VOP1__V_CVT_NORM_U16_F16(MachInst);
GPUStaticInst* decode_OP_VOP1__V_SAT_PK_U8_I16(MachInst);
GPUStaticInst* decode_OP_VOP1__V_SWAP_B32(MachInst);
GPUStaticInst* decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst);
GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst);
GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst);
GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst);

View File

@@ -10562,6 +10562,38 @@ namespace VegaISA
void execute(GPUDynInstPtr) override;
}; // Inst_VOP1__V_LOG_LEGACY_F32
class Inst_VOP1__V_ACCVGPR_MOV_B32 : public Inst_VOP1
{
public:
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1*);
~Inst_VOP1__V_ACCVGPR_MOV_B32();
int
getNumOperands() override
{
return numDstRegOperands() + numSrcRegOperands();
} // getNumOperands
int numDstRegOperands() override { return 1; }
int numSrcRegOperands() override { return 1; }
int
getOperandSize(int opIdx) override
{
switch (opIdx) {
case 0: //src
return 4;
case 1: //vdst
return 4;
default:
fatal("op idx %i out of bounds\n", opIdx);
return -1;
}
} // getOperandSize
void execute(GPUDynInstPtr) override;
}; // Inst_VOP1__V_ACCVGPR_MOV_B32
class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC
{
public:

View File

@@ -2397,6 +2397,38 @@ namespace VegaISA
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods ---
Inst_VOP1__V_ACCVGPR_MOV_B32::
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_accvgpr_mov_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_ACCVGPR_MOV_B32
Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32()
{
} // ~Inst_VOP1__V_ACCVGPR_MOV_B32
void
Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
unsigned accum_offset = wf->accumOffset;
ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset);
VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src[lane];
}
}
vdst.write();
} // execute
} // namespace VegaISA