arch-vega: Implement non-carry-out VEGA add, sub, and subrev
In GCN3, the v_add_u32, v_sub_u32, and v_subrev_u32 instructions write the carry-out value to VCC. VEGA introduces explicit carry-out versions of these instructions (v_add_co_u32, v_sub_co_u32, and v_subrev_co_u32), and modifies the behavior of the baseline, non-carry-out versions to not write to VCC. Previously both the carry-out and non-carry-out versions shared a single implementation that wrote to VCC. This patch correctly implements the non-carry-out versions to avoid the VCC write. This patch also makes the following substitutions for GCN3 instructions that no longer exist in VEGA (this renaming has no functional impact): v_addc_u32 -> v_addc_co_u32 v_subb_u32 -> v_subb_co_u32 v_subbrev_u32 -> v_subbrev_co_u32 Change-Id: I002fa6e9316d38fd4cc3554daff047523cfc12c9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47240 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -849,12 +849,12 @@ namespace VegaISA
|
||||
&Decoder::decode_OPU_VOP3__V_MAC_F32,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_invalid,
|
||||
&Decoder::decode_OPU_VOP3__V_ADD_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUB_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBREV_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_ADDC_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBB_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBBREV_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_ADD_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUB_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_ADDC_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBB_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32,
|
||||
&Decoder::decode_OPU_VOP3__V_ADD_F16,
|
||||
&Decoder::decode_OPU_VOP3__V_SUB_F16,
|
||||
&Decoder::decode_OPU_VOP3__V_SUBREV_F16,
|
||||
@@ -3993,37 +3993,37 @@ namespace VegaISA
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_ADD_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_ADD_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_ADD_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_SUB_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_SUB_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_SUB_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_SUBREV_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_SUBREV_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_SUBREV_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_ADDC_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_ADDC_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_ADDC_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_ADDC_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_SUBB_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_SUBB_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_SUBB_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_SUBB_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP2__V_SUBBREV_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP2__V_SUBBREV_U32(&iFmt->iFmt_VOP2);
|
||||
return new Inst_VOP2__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP2);
|
||||
} // decode_OP_VOP2__V_SUBBREV_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
@@ -5947,40 +5947,40 @@ namespace VegaISA
|
||||
} // decode_OPU_VOP3__V_MAC_F32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_ADD_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_ADD_U32
|
||||
return new Inst_VOP3__V_ADD_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_ADD_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_SUB_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUB_U32
|
||||
return new Inst_VOP3__V_SUB_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUB_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBREV_U32
|
||||
return new Inst_VOP3__V_SUBREV_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBREV_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_ADDC_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_ADDC_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_ADDC_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_ADDC_U32
|
||||
return new Inst_VOP3__V_ADDC_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_ADDC_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_SUBB_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_SUBB_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_SUBB_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBB_U32
|
||||
return new Inst_VOP3__V_SUBB_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBB_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_SUBBREV_U32(MachInst iFmt)
|
||||
Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst iFmt)
|
||||
{
|
||||
return new Inst_VOP3__V_SUBBREV_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBBREV_U32
|
||||
return new Inst_VOP3__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP3B);
|
||||
} // decode_OPU_VOP3__V_SUBBREV_CO_U32
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OPU_VOP3__V_ADD_F16(MachInst iFmt)
|
||||
|
||||
@@ -296,12 +296,12 @@ namespace VegaISA
|
||||
GPUStaticInst* decode_OPU_VOP3__V_OR_B32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_XOR_B32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_MAC_F32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADDC_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBB_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADD_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUB_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADDC_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBB_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_ADD_F16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUB_F16(MachInst);
|
||||
GPUStaticInst* decode_OPU_VOP3__V_SUBREV_F16(MachInst);
|
||||
|
||||
@@ -6988,19 +6988,19 @@ namespace VegaISA
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_ADD_U32 class methods ---
|
||||
// --- Inst_VOP2__V_ADD_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_add_u32")
|
||||
Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_add_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_ADD_U32
|
||||
} // Inst_VOP2__V_ADD_CO_U32
|
||||
|
||||
Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
|
||||
Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_ADD_U32
|
||||
} // ~Inst_VOP2__V_ADD_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u + S1.u;
|
||||
@@ -7008,7 +7008,7 @@ namespace VegaISA
|
||||
// --- overflow or carry-out for V_ADDC_U32.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
void
|
||||
Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7031,8 +7031,8 @@ namespace VegaISA
|
||||
origSrc0_sdwa.read();
|
||||
origSrc1.read();
|
||||
|
||||
DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
|
||||
"DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
|
||||
DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
|
||||
"v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
|
||||
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
|
||||
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
|
||||
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
|
||||
@@ -7073,19 +7073,19 @@ namespace VegaISA
|
||||
vcc.write();
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUB_U32 class methods ---
|
||||
// --- Inst_VOP2__V_SUB_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_sub_u32")
|
||||
Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_sub_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUB_U32
|
||||
} // Inst_VOP2__V_SUB_CO_U32
|
||||
|
||||
Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
|
||||
Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUB_U32
|
||||
} // ~Inst_VOP2__V_SUB_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u - S1.u;
|
||||
@@ -7093,7 +7093,7 @@ namespace VegaISA
|
||||
// carry-out for V_SUBB_U32.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
void
|
||||
Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7114,28 +7114,27 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUBREV_U32 class methods ---
|
||||
// --- Inst_VOP2__V_SUBREV_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subrev_u32")
|
||||
Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subrev_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUBREV_U32
|
||||
} // Inst_VOP2__V_SUBREV_CO_U32
|
||||
|
||||
Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
|
||||
Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUBREV_U32
|
||||
} // ~Inst_VOP2__V_SUBREV_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S1.u - S0.u;
|
||||
// VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
|
||||
// carry-out for V_SUBB_U32.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
// SQ translates this to V_SUB_U32 with reversed operands.
|
||||
void
|
||||
Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7156,20 +7155,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_ADDC_U32 class methods ---
|
||||
// --- Inst_VOP2__V_ADDC_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_addc_u32")
|
||||
Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_addc_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_ADDC_U32
|
||||
} // Inst_VOP2__V_ADDC_CO_U32
|
||||
|
||||
Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
|
||||
Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_ADDC_U32
|
||||
} // ~Inst_VOP2__V_ADDC_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u + S1.u + VCC[threadId];
|
||||
@@ -7178,7 +7177,7 @@ namespace VegaISA
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
|
||||
// source comes from the SGPR-pair at S2.u.
|
||||
void
|
||||
Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7204,20 +7203,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUBB_U32 class methods ---
|
||||
// --- Inst_VOP2__V_SUBB_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subb_u32")
|
||||
Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subb_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUBB_U32
|
||||
} // Inst_VOP2__V_SUBB_CO_U32
|
||||
|
||||
Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
|
||||
Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUBB_U32
|
||||
} // ~Inst_VOP2__V_SUBB_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u - S1.u - VCC[threadId];
|
||||
@@ -7226,7 +7225,7 @@ namespace VegaISA
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
|
||||
// --- source comes from the SGPR-pair at S2.u.
|
||||
void
|
||||
Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7250,20 +7249,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUBBREV_U32 class methods ---
|
||||
// --- Inst_VOP2__V_SUBBREV_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subbrev_u32")
|
||||
Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subbrev_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUBBREV_U32
|
||||
} // Inst_VOP2__V_SUBBREV_CO_U32
|
||||
|
||||
Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
|
||||
Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUBBREV_U32
|
||||
} // ~Inst_VOP2__V_SUBBREV_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S1.u - S0.u - VCC[threadId];
|
||||
@@ -7273,7 +7272,7 @@ namespace VegaISA
|
||||
// source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
|
||||
// SQ translates this to V_SUBREV_U32 with reversed operands.
|
||||
void
|
||||
Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
@@ -7893,6 +7892,149 @@ namespace VegaISA
|
||||
{
|
||||
panicUnimplemented();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_ADD_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_add_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_ADD_U32
|
||||
|
||||
Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_ADD_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u + S1.u;
|
||||
void
|
||||
Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
if (isSDWAInst()) {
|
||||
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
|
||||
// use copies of original src0, src1, and dest during selecting
|
||||
VecOperandU32 origSrc0_sdwa(gpuDynInst,
|
||||
extData.iFmt_VOP_SDWA.SRC0);
|
||||
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
|
||||
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0_sdwa.read();
|
||||
origSrc0_sdwa.read();
|
||||
origSrc1.read();
|
||||
|
||||
DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
|
||||
"DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
|
||||
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
|
||||
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
|
||||
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
|
||||
extData.iFmt_VOP_SDWA.DST_U,
|
||||
extData.iFmt_VOP_SDWA.CLMP,
|
||||
extData.iFmt_VOP_SDWA.SRC0_SEL,
|
||||
extData.iFmt_VOP_SDWA.SRC0_SEXT,
|
||||
extData.iFmt_VOP_SDWA.SRC0_NEG,
|
||||
extData.iFmt_VOP_SDWA.SRC0_ABS,
|
||||
extData.iFmt_VOP_SDWA.SRC1_SEL,
|
||||
extData.iFmt_VOP_SDWA.SRC1_SEXT,
|
||||
extData.iFmt_VOP_SDWA.SRC1_NEG,
|
||||
extData.iFmt_VOP_SDWA.SRC1_ABS);
|
||||
|
||||
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
|
||||
src1, origSrc1);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0_sdwa[lane] + src1[lane];
|
||||
origVdst[lane] = vdst[lane]; // keep copy consistent
|
||||
}
|
||||
}
|
||||
|
||||
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
|
||||
} else {
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] + src1[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUB_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_sub_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUB_U32
|
||||
|
||||
Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUB_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u - S1.u;
|
||||
void
|
||||
Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] - src1[lane];
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_SUBREV_U32 class methods ---
|
||||
|
||||
Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
|
||||
: Inst_VOP2(iFmt, "v_subrev_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP2__V_SUBREV_U32
|
||||
|
||||
Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
|
||||
{
|
||||
} // ~Inst_VOP2__V_SUBREV_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S1.u - S0.u;
|
||||
void
|
||||
Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
|
||||
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] - src0[lane];
|
||||
}
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP1__V_NOP class methods ---
|
||||
|
||||
Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
|
||||
@@ -26157,19 +26299,19 @@ namespace VegaISA
|
||||
|
||||
vdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_ADD_U32 class methods ---
|
||||
// --- Inst_VOP3__V_ADD_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_add_u32")
|
||||
Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_add_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_ADD_U32
|
||||
} // Inst_VOP3__V_ADD_CO_U32
|
||||
|
||||
Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
|
||||
Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_ADD_U32
|
||||
} // ~Inst_VOP3__V_ADD_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u + S1.u;
|
||||
@@ -26177,7 +26319,7 @@ namespace VegaISA
|
||||
// --- overflow or carry-out for V_ADDC_U32.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
void
|
||||
Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
|
||||
@@ -26206,19 +26348,19 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_SUB_U32 class methods ---
|
||||
// --- Inst_VOP3__V_SUB_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_sub_u32")
|
||||
Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_sub_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_SUB_U32
|
||||
} // Inst_VOP3__V_SUB_CO_U32
|
||||
|
||||
Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
|
||||
Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_SUB_U32
|
||||
} // ~Inst_VOP3__V_SUB_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u - S1.u;
|
||||
@@ -26226,7 +26368,7 @@ namespace VegaISA
|
||||
// carry-out for V_SUBB_U32.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
void
|
||||
Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
|
||||
@@ -26254,20 +26396,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_SUBREV_U32 class methods ---
|
||||
// --- Inst_VOP3__V_SUBREV_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
|
||||
Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32(
|
||||
InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_subrev_u32")
|
||||
: Inst_VOP3B(iFmt, "v_subrev_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_SUBREV_U32
|
||||
} // Inst_VOP3__V_SUBREV_CO_U32
|
||||
|
||||
Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
|
||||
Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_SUBREV_U32
|
||||
} // ~Inst_VOP3__V_SUBREV_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S1.u - S0.u;
|
||||
@@ -26276,7 +26418,7 @@ namespace VegaISA
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
|
||||
// SQ translates this to V_SUB_U32 with reversed operands.
|
||||
void
|
||||
Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
|
||||
@@ -26304,20 +26446,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
vcc.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_ADDC_U32 class methods ---
|
||||
// --- Inst_VOP3__V_ADDC_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_addc_u32")
|
||||
Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_addc_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_ADDC_U32
|
||||
} // Inst_VOP3__V_ADDC_CO_U32
|
||||
|
||||
Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
|
||||
Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_ADDC_U32
|
||||
} // ~Inst_VOP3__V_ADDC_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u + S1.u + VCC[threadId];
|
||||
@@ -26326,7 +26468,7 @@ namespace VegaISA
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
|
||||
// source comes from the SGPR-pair at S2.u.
|
||||
void
|
||||
Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
|
||||
@@ -26360,20 +26502,20 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
sdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_SUBB_U32 class methods ---
|
||||
// --- Inst_VOP3__V_SUBB_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_subb_u32")
|
||||
Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_subb_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_SUBB_U32
|
||||
} // Inst_VOP3__V_SUBB_CO_U32
|
||||
|
||||
Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
|
||||
Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_SUBB_U32
|
||||
} // ~Inst_VOP3__V_SUBB_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S0.u - S1.u - VCC[threadId];
|
||||
@@ -26382,7 +26524,7 @@ namespace VegaISA
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
|
||||
// --- source comes from the SGPR-pair at S2.u.
|
||||
void
|
||||
Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
|
||||
@@ -26414,21 +26556,21 @@ namespace VegaISA
|
||||
vdst.write();
|
||||
sdst.write();
|
||||
} // execute
|
||||
// --- Inst_VOP3__V_SUBBREV_U32 class methods ---
|
||||
// --- Inst_VOP3__V_SUBBREV_CO_U32 class methods ---
|
||||
|
||||
Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
|
||||
Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32(
|
||||
InFmt_VOP3B *iFmt)
|
||||
: Inst_VOP3B(iFmt, "v_subbrev_u32")
|
||||
: Inst_VOP3B(iFmt, "v_subbrev_co_u32")
|
||||
{
|
||||
setFlag(ALU);
|
||||
setFlag(WritesVCC);
|
||||
setFlag(ReadsVCC);
|
||||
setFlag(ValuCacGrp2);
|
||||
} // Inst_VOP3__V_SUBBREV_U32
|
||||
} // Inst_VOP3__V_SUBBREV_CO_U32
|
||||
|
||||
Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
|
||||
Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32()
|
||||
{
|
||||
} // ~Inst_VOP3__V_SUBBREV_U32
|
||||
} // ~Inst_VOP3__V_SUBBREV_CO_U32
|
||||
|
||||
// --- description from .arch file ---
|
||||
// D.u = S1.u - S0.u - VCC[threadId];
|
||||
@@ -26436,9 +26578,8 @@ namespace VegaISA
|
||||
// overflow.
|
||||
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
|
||||
// source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
|
||||
// SQ translates this to V_SUBREV_U32 with reversed operands.
|
||||
void
|
||||
Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
|
||||
|
||||
@@ -6987,11 +6987,11 @@ namespace VegaISA
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_MADAK_F32
|
||||
|
||||
class Inst_VOP2__V_ADD_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_ADD_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_ADD_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_ADD_U32();
|
||||
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_ADD_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7021,13 +7021,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_ADD_U32
|
||||
}; // Inst_VOP2__V_ADD_CO_U32
|
||||
|
||||
class Inst_VOP2__V_SUB_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_SUB_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUB_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUB_U32();
|
||||
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUB_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7057,13 +7057,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUB_U32
|
||||
}; // Inst_VOP2__V_SUB_CO_U32
|
||||
|
||||
class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_SUBREV_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBREV_U32();
|
||||
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBREV_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7093,13 +7093,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUBREV_U32
|
||||
}; // Inst_VOP2__V_SUBREV_CO_U32
|
||||
|
||||
class Inst_VOP2__V_ADDC_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_ADDC_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_ADDC_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_ADDC_U32();
|
||||
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_ADDC_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7131,13 +7131,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_ADDC_U32
|
||||
}; // Inst_VOP2__V_ADDC_CO_U32
|
||||
|
||||
class Inst_VOP2__V_SUBB_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_SUBB_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUBB_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBB_U32();
|
||||
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBB_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7169,13 +7169,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUBB_U32
|
||||
}; // Inst_VOP2__V_SUBB_CO_U32
|
||||
|
||||
class Inst_VOP2__V_SUBBREV_U32 : public Inst_VOP2
|
||||
class Inst_VOP2__V_SUBBREV_CO_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBBREV_U32();
|
||||
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBBREV_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -7207,7 +7207,7 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUBBREV_U32
|
||||
}; // Inst_VOP2__V_SUBBREV_CO_U32
|
||||
|
||||
class Inst_VOP2__V_ADD_F16 : public Inst_VOP2
|
||||
{
|
||||
@@ -7927,6 +7927,108 @@ namespace VegaISA
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_LDEXP_F16
|
||||
|
||||
class Inst_VOP2__V_ADD_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_ADD_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_ADD_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 2; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src_0
|
||||
return 4;
|
||||
case 1: //src_1
|
||||
return 4;
|
||||
case 2: //vdst
|
||||
return 4;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_ADD_U32
|
||||
|
||||
class Inst_VOP2__V_SUB_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUB_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUB_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 2; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src_0
|
||||
return 4;
|
||||
case 1: //src_1
|
||||
return 4;
|
||||
case 2: //vdst
|
||||
return 4;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUB_U32
|
||||
|
||||
class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2
|
||||
{
|
||||
public:
|
||||
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*);
|
||||
~Inst_VOP2__V_SUBREV_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
{
|
||||
return numDstRegOperands() + numSrcRegOperands();
|
||||
} // getNumOperands
|
||||
|
||||
int numDstRegOperands() override { return 1; }
|
||||
int numSrcRegOperands() override { return 2; }
|
||||
|
||||
int
|
||||
getOperandSize(int opIdx) override
|
||||
{
|
||||
switch (opIdx) {
|
||||
case 0: //src_0
|
||||
return 4;
|
||||
case 1: //src_1
|
||||
return 4;
|
||||
case 2: //vdst
|
||||
return 4;
|
||||
default:
|
||||
fatal("op idx %i out of bounds\n", opIdx);
|
||||
return -1;
|
||||
}
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP2__V_SUBREV_U32
|
||||
|
||||
class Inst_VOP1__V_NOP : public Inst_VOP1
|
||||
{
|
||||
public:
|
||||
@@ -24637,11 +24739,11 @@ namespace VegaISA
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_MAC_F32
|
||||
|
||||
class Inst_VOP3__V_ADD_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_ADD_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_ADD_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_ADD_U32();
|
||||
Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_ADD_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24671,13 +24773,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_ADD_U32
|
||||
}; // Inst_VOP3__V_ADD_CO_U32
|
||||
|
||||
class Inst_VOP3__V_SUB_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_SUB_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_SUB_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUB_U32();
|
||||
Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUB_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24707,13 +24809,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_SUB_U32
|
||||
}; // Inst_VOP3__V_SUB_CO_U32
|
||||
|
||||
class Inst_VOP3__V_SUBREV_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_SUBREV_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_SUBREV_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBREV_U32();
|
||||
Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBREV_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24743,13 +24845,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_SUBREV_U32
|
||||
}; // Inst_VOP3__V_SUBREV_CO_U32
|
||||
|
||||
class Inst_VOP3__V_ADDC_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_ADDC_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_ADDC_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_ADDC_U32();
|
||||
Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_ADDC_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24781,13 +24883,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_ADDC_U32
|
||||
}; // Inst_VOP3__V_ADDC_CO_U32
|
||||
|
||||
class Inst_VOP3__V_SUBB_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_SUBB_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_SUBB_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBB_U32();
|
||||
Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBB_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24819,13 +24921,13 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_SUBB_U32
|
||||
}; // Inst_VOP3__V_SUBB_CO_U32
|
||||
|
||||
class Inst_VOP3__V_SUBBREV_U32 : public Inst_VOP3B
|
||||
class Inst_VOP3__V_SUBBREV_CO_U32 : public Inst_VOP3B
|
||||
{
|
||||
public:
|
||||
Inst_VOP3__V_SUBBREV_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBBREV_U32();
|
||||
Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B*);
|
||||
~Inst_VOP3__V_SUBBREV_CO_U32();
|
||||
|
||||
int
|
||||
getNumOperands() override
|
||||
@@ -24857,7 +24959,7 @@ namespace VegaISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
}; // Inst_VOP3__V_SUBBREV_U32
|
||||
}; // Inst_VOP3__V_SUBBREV_CO_U32
|
||||
|
||||
class Inst_VOP3__V_ADD_F16 : public Inst_VOP3A
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user