arch-vega: Implement v_cvt_pk_fp8_f32
This instruction serves as a test for the MXFP8 type. Change-Id: I2ce30bf7f3a3ecc850a445aebdf971c37c39a79e
This commit is contained in:
@@ -1245,7 +1245,7 @@ namespace VegaISA
|
||||
&Decoder::decode_OPU_VOP3__V_SUB_I16,
|
||||
&Decoder::decode_OPU_VOP3__V_PACK_B32_F16,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
@@ -7295,6 +7295,12 @@ namespace VegaISA
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_CVT_PK_FP8_F32(&iFmt->iFmt_VOP3A);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_DS__DS_ADD_U32(MachInst iFmt)
|
||||
{
|
||||
|
||||
@@ -509,6 +509,7 @@ namespace VegaISA
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADD_I16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUB_I16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_PACK_B32_F16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst);
|
||||
GPUStaticInst* decode_OP_DS__DS_ADD_U32(MachInst);
|
||||
GPUStaticInst* decode_OP_DS__DS_SUB_U32(MachInst);
|
||||
GPUStaticInst* decode_OP_DS__DS_RSUB_U32(MachInst);
|
||||
|
||||
@@ -44145,6 +44145,39 @@ namespace VegaISA
|
||||
Inst_VOP3P_MAI__V_MFMA<2, 16, 16, 4, 1, ConstVecOperandF64,
|
||||
VecOperandF64>;
|
||||
|
||||
class Inst_VOP3__V_CVT_PK_FP8_F32 : public Inst_VOP3A
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A*);
|
||||
~Inst_VOP3__V_CVT_PK_FP8_F32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 2; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src_0
|
||||
return 4;
|
||||
case 1: //src_1
|
||||
return 4;
|
||||
case 2: //vdst
|
||||
return 4;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_CVT_PK_FP8_F32
|
||||
} // namespace VegaISA
|
||||
} // namespace gem5
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "arch/amdgpu/common/dtype/mxfp_types.hh"
|
||||
#include "arch/amdgpu/vega/insts/inst_util.hh"
|
||||
#include "arch/amdgpu/vega/insts/instructions.hh"
|
||||
|
||||
@@ -8920,5 +8921,60 @@ namespace VegaISA
|
||||
{
|
||||
panicUnimplemented();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_CVT_PK_FP8_F32 class methods ---
|
||||
|
||||
Inst_VOP3__V_CVT_PK_FP8_F32::Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *iFmt)
|
||||
: Inst_VOP3A(iFmt, "v_cvt_pk_fp8_f32", false)
|
||||
{
|
||||
setFlag(ALU);
|
||||
} // Inst_VOP3__V_CVT_PK_FP8_F32
|
||||
|
||||
Inst_VOP3__V_CVT_PK_FP8_F32::~Inst_VOP3__V_CVT_PK_FP8_F32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_CVT_PK_FP8_F32
|
||||
|
||||
void
|
||||
Inst_VOP3__V_CVT_PK_FP8_F32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
|
||||
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.readSrc();
|
||||
vdst.read(); // Preserve bits
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
panic_if(instData.CLAMP, "CLAMP not supported for %s", _opcode);
|
||||
panic_if(extData.OMOD, "OMOD not supported for %s", _opcode);
|
||||
|
||||
unsigned opsel = instData.OPSEL;
|
||||
unsigned abs = instData.ABS;
|
||||
unsigned neg = extData.NEG;
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
AMDGPU::mxfloat8 tmp0(src0[lane]), tmp1(src1[lane]);
|
||||
|
||||
if ((abs & 1) && (tmp0 < 0)) tmp0 = -tmp0;
|
||||
if ((abs & 2) && (tmp1 < 0)) tmp1 = -tmp1;
|
||||
if (neg & 1) tmp0 = -tmp0;
|
||||
if (neg & 2) tmp1 = -tmp1;
|
||||
|
||||
uint16_t packed_data = (bits(tmp0.data, 31, 24) << 8)
|
||||
| bits(tmp1.data, 31, 24);
|
||||
|
||||
if (opsel & 8) {
|
||||
replaceBits(vdst[lane], 31, 16, packed_data);
|
||||
} else {
|
||||
replaceBits(vdst[lane], 15, 0, packed_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
} // namespace VegaISA
|
||||
} // namespace gem5
|
||||
|
||||
Reference in New Issue
Block a user