arch-vega: More scratch, accvgpr instructions (#1190)
- Implements the remaining scratch instruction which have corresponding flat implementations - Implements the remaining v_accvgpr instructions.
This commit is contained in:
@@ -3144,7 +3144,7 @@ namespace VegaISA
|
||||
&Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_OP_VOP1__V_SWAP_B32,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
@@ -8622,7 +8622,6 @@ namespace VegaISA
|
||||
Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORD(MachInst iFmt)
|
||||
{
|
||||
return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
@@ -9898,29 +9897,25 @@ namespace VegaISA
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_UBYTE(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SBYTE(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_USHORT(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SSHORT(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
@@ -9950,8 +9945,7 @@ namespace VegaISA
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_STORE_BYTE(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
@@ -9964,8 +9958,7 @@ namespace VegaISA
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_SCRATCH__SCRATCH_STORE_SHORT(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
@@ -11784,6 +11777,12 @@ namespace VegaISA
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP1__V_ACCVGPR_MOV_B32(&iFmt->iFmt_VOP1);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt)
|
||||
{
|
||||
|
||||
@@ -1314,6 +1314,7 @@ namespace VegaISA
|
||||
GPUStaticInst* decode_OP_VOP1__V_CVT_NORM_U16_F16(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP1__V_SAT_PK_U8_I16(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP1__V_SWAP_B32(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst);
|
||||
GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst);
|
||||
|
||||
@@ -10562,6 +10562,38 @@ namespace VegaISA
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP1__V_LOG_LEGACY_F32
|
||||
|
||||
class Inst_VOP1__V_ACCVGPR_MOV_B32 : public Inst_VOP1
|
||||
{
|
||||
public:
|
||||
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1*);
|
||||
~Inst_VOP1__V_ACCVGPR_MOV_B32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 1; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src
|
||||
return 4;
|
||||
case 1: //vdst
|
||||
return 4;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP1__V_ACCVGPR_MOV_B32
|
||||
|
||||
class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -2397,6 +2397,38 @@ namespace VegaISA
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods ---
|
||||
|
||||
Inst_VOP1__V_ACCVGPR_MOV_B32::
|
||||
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt)
|
||||
: Inst_VOP1(iFmt, "v_accvgpr_mov_b32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
} // Inst_VOP1__V_ACCVGPR_MOV_B32
|
||||
|
||||
Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32()
|
||||
{
|
||||
} // ~Inst_VOP1__V_ACCVGPR_MOV_B32
|
||||
|
||||
void
|
||||
Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
unsigned accum_offset = wf->accumOffset;
|
||||
|
||||
ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset);
|
||||
|
||||
src.readSrc();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src[lane];
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
} // namespace VegaISA
|
||||
|
||||
@@ -579,8 +579,30 @@ namespace VegaISA
|
||||
case REG_SRC_SWDA:
|
||||
case REG_SRC_DPP:
|
||||
case REG_SRC_LITERAL:
|
||||
assert(NumDwords == 1);
|
||||
/**
|
||||
* From the Vega specification:
|
||||
* When a literal constant is used with a 64 bit instruction,
|
||||
* the literal is expanded to 64 bits by: padding the LSBs
|
||||
* with zeros for floats, padding the MSBs with zeros for
|
||||
* unsigned ints, and by sign-extending signed ints.
|
||||
*/
|
||||
srfData[0] = _gpuDynInst->srcLiteral();
|
||||
if constexpr (NumDwords == 2) {
|
||||
if constexpr (std::is_integral_v<DataType>) {
|
||||
if constexpr (std::is_signed_v<DataType>) {
|
||||
if (bits(srfData[0], 31, 31) == 1) {
|
||||
srfData[1] = 0xffffffff;
|
||||
} else {
|
||||
srfData[1] = 0;
|
||||
}
|
||||
} else {
|
||||
srfData[1] = 0;
|
||||
}
|
||||
} else {
|
||||
srfData[1] = _gpuDynInst->srcLiteral();
|
||||
srfData[0] = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_SHARED_BASE:
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user