From 3f5120e01f1cf5c11448dff600bf1b5ffeb4e063 Mon Sep 17 00:00:00 2001 From: Michael Boyer Date: Wed, 16 Jun 2021 18:14:22 -0500 Subject: [PATCH] arch-vega: Implement non-carry-out VEGA add, sub, and subrev In GCN3, the v_add_u32, v_sub_u32, and v_subrev_u32 instructions write the carry-out value to VCC. VEGA introduces explicit carry-out versions of these instructions (v_add_co_u32, v_sub_co_u32, and v_subrev_co_u32), and modifies the behavior of the baseline, non-carry-out versions to not write to VCC. Previously both the carry-out and non-carry-out versions shared a single implementation that wrote to VCC. This patch correctly implements the non-carry-out versions to avoid the VCC write. This patch also makes the following substitutions for GCN3 instructions that no longer exist in VEGA (this renaming has no functional impact): v_addc_u32 -> v_addc_co_u32 v_subb_u32 -> v_subb_co_u32 v_subbrev_u32 -> v_subbrev_co_u32 Change-Id: I002fa6e9316d38fd4cc3554daff047523cfc12c9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47240 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/decoder.cc | 60 ++-- src/arch/amdgpu/vega/gpu_decoder.hh | 12 +- src/arch/amdgpu/vega/insts/instructions.cc | 317 +++++++++++++++------ src/arch/amdgpu/vega/insts/instructions.hh | 198 +++++++++---- 4 files changed, 415 insertions(+), 172 deletions(-) diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc index 5be0d3d205..363f7e1d99 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/decoder.cc @@ -849,12 +849,12 @@ namespace VegaISA &Decoder::decode_OPU_VOP3__V_MAC_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_ADD_U32, - &Decoder::decode_OPU_VOP3__V_SUB_U32, - &Decoder::decode_OPU_VOP3__V_SUBREV_U32, - &Decoder::decode_OPU_VOP3__V_ADDC_U32, - &Decoder::decode_OPU_VOP3__V_SUBB_U32, - &Decoder::decode_OPU_VOP3__V_SUBBREV_U32, + &Decoder::decode_OPU_VOP3__V_ADD_CO_U32, + &Decoder::decode_OPU_VOP3__V_SUB_CO_U32, + &Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32, + &Decoder::decode_OPU_VOP3__V_ADDC_CO_U32, + &Decoder::decode_OPU_VOP3__V_SUBB_CO_U32, + &Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32, &Decoder::decode_OPU_VOP3__V_ADD_F16, &Decoder::decode_OPU_VOP3__V_SUB_F16, &Decoder::decode_OPU_VOP3__V_SUBREV_F16, @@ -3993,37 +3993,37 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_VOP2__V_ADD_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_ADD_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_ADD_CO_U32 GPUStaticInst* Decoder::decode_OP_VOP2__V_SUB_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_SUB_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_SUB_CO_U32 GPUStaticInst* Decoder::decode_OP_VOP2__V_SUBREV_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_SUBREV_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_SUBREV_CO_U32 GPUStaticInst* Decoder::decode_OP_VOP2__V_ADDC_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_ADDC_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_ADDC_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_ADDC_CO_U32 GPUStaticInst* Decoder::decode_OP_VOP2__V_SUBB_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_SUBB_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_SUBB_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_SUBB_CO_U32 GPUStaticInst* Decoder::decode_OP_VOP2__V_SUBBREV_CO_U32(MachInst iFmt) { - return new Inst_VOP2__V_SUBBREV_U32(&iFmt->iFmt_VOP2); + return new Inst_VOP2__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP2); } // decode_OP_VOP2__V_SUBBREV_CO_U32 GPUStaticInst* @@ -5947,40 +5947,40 @@ namespace VegaISA } // decode_OPU_VOP3__V_MAC_F32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_ADD_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_ADD_U32 + return new Inst_VOP3__V_ADD_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_ADD_CO_U32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_SUB_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_SUB_U32 + return new Inst_VOP3__V_SUB_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_SUB_CO_U32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_SUBREV_U32 + return new Inst_VOP3__V_SUBREV_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_SUBREV_CO_U32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADDC_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_ADDC_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_ADDC_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_ADDC_U32 + return new Inst_VOP3__V_ADDC_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_ADDC_CO_U32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBB_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_SUBB_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_SUBB_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_SUBB_U32 + return new Inst_VOP3__V_SUBB_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_SUBB_CO_U32 GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBBREV_U32(MachInst iFmt) + Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst iFmt) { - return new Inst_VOP3__V_SUBBREV_U32(&iFmt->iFmt_VOP3B); - } // decode_OPU_VOP3__V_SUBBREV_U32 + return new Inst_VOP3__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP3B); + } // decode_OPU_VOP3__V_SUBBREV_CO_U32 GPUStaticInst* Decoder::decode_OPU_VOP3__V_ADD_F16(MachInst iFmt) diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index 69954f8e6b..0159589d9c 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -296,12 +296,12 @@ namespace VegaISA GPUStaticInst* decode_OPU_VOP3__V_OR_B32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_XOR_B32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_MAC_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADDC_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBB_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_ADD_CO_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_SUB_CO_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_ADDC_CO_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_SUBB_CO_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_ADD_F16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUB_F16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUBREV_F16(MachInst); diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 5db3e54398..b0a6cb0151 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -6988,19 +6988,19 @@ namespace VegaISA vdst.write(); } // execute - // --- Inst_VOP2__V_ADD_U32 class methods --- + // --- Inst_VOP2__V_ADD_CO_U32 class methods --- - Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u32") + Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_ADD_U32 + } // Inst_VOP2__V_ADD_CO_U32 - Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() + Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() { - } // ~Inst_VOP2__V_ADD_U32 + } // ~Inst_VOP2__V_ADD_CO_U32 // --- description from .arch file --- // D.u = S0.u + S1.u; @@ -7008,7 +7008,7 @@ namespace VegaISA // --- overflow or carry-out for V_ADDC_U32. // In VOP3 the VCC destination may be an arbitrary SGPR-pair. void - Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7031,8 +7031,8 @@ namespace VegaISA origSrc0_sdwa.read(); origSrc1.read(); - DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register " + "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, @@ -7073,19 +7073,19 @@ namespace VegaISA vcc.write(); vdst.write(); } // execute - // --- Inst_VOP2__V_SUB_U32 class methods --- + // --- Inst_VOP2__V_SUB_CO_U32 class methods --- - Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u32") + Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_SUB_U32 + } // Inst_VOP2__V_SUB_CO_U32 - Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() + Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() { - } // ~Inst_VOP2__V_SUB_U32 + } // ~Inst_VOP2__V_SUB_CO_U32 // --- description from .arch file --- // D.u = S0.u - S1.u; @@ -7093,7 +7093,7 @@ namespace VegaISA // carry-out for V_SUBB_U32. // In VOP3 the VCC destination may be an arbitrary SGPR-pair. void - Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7114,28 +7114,27 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP2__V_SUBREV_U32 class methods --- + // --- Inst_VOP2__V_SUBREV_CO_U32 class methods --- - Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u32") + Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_SUBREV_U32 + } // Inst_VOP2__V_SUBREV_CO_U32 - Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() + Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() { - } // ~Inst_VOP2__V_SUBREV_U32 + } // ~Inst_VOP2__V_SUBREV_CO_U32 // --- description from .arch file --- // D.u = S1.u - S0.u; // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or // carry-out for V_SUBB_U32. // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - // SQ translates this to V_SUB_U32 with reversed operands. void - Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7156,20 +7155,20 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP2__V_ADDC_U32 class methods --- + // --- Inst_VOP2__V_ADDC_CO_U32 class methods --- - Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_addc_u32") + Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_addc_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_ADDC_U32 + } // Inst_VOP2__V_ADDC_CO_U32 - Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32() + Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() { - } // ~Inst_VOP2__V_ADDC_U32 + } // ~Inst_VOP2__V_ADDC_CO_U32 // --- description from .arch file --- // D.u = S0.u + S1.u + VCC[threadId]; @@ -7178,7 +7177,7 @@ namespace VegaISA // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC // source comes from the SGPR-pair at S2.u. void - Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7204,20 +7203,20 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP2__V_SUBB_U32 class methods --- + // --- Inst_VOP2__V_SUBB_CO_U32 class methods --- - Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subb_u32") + Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subb_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_SUBB_U32 + } // Inst_VOP2__V_SUBB_CO_U32 - Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32() + Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() { - } // ~Inst_VOP2__V_SUBB_U32 + } // ~Inst_VOP2__V_SUBB_CO_U32 // --- description from .arch file --- // D.u = S0.u - S1.u - VCC[threadId]; @@ -7226,7 +7225,7 @@ namespace VegaISA // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC // --- source comes from the SGPR-pair at S2.u. void - Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7250,20 +7249,20 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP2__V_SUBBREV_U32 class methods --- + // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods --- - Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subbrev_u32") + Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subbrev_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP2__V_SUBBREV_U32 + } // Inst_VOP2__V_SUBBREV_CO_U32 - Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32() + Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() { - } // ~Inst_VOP2__V_SUBBREV_U32 + } // ~Inst_VOP2__V_SUBBREV_CO_U32 // --- description from .arch file --- // D.u = S1.u - S0.u - VCC[threadId]; @@ -7273,7 +7272,7 @@ namespace VegaISA // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. // SQ translates this to V_SUBREV_U32 with reversed operands. void - Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); @@ -7893,6 +7892,149 @@ namespace VegaISA { panicUnimplemented(); } // execute + // --- Inst_VOP2__V_ADD_U32 class methods --- + + Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_u32") + { + setFlag(ALU); + setFlag(ValuCacGrp2); + } // Inst_VOP2__V_ADD_U32 + + Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() + { + } // ~Inst_VOP2__V_ADD_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + void + Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " + "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] + src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_U32 class methods --- + + Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_u32") + { + setFlag(ALU); + setFlag(ValuCacGrp2); + } // Inst_VOP2__V_SUB_U32 + + Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() + { + } // ~Inst_VOP2__V_SUB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + void + Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_U32 class methods --- + + Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_u32") + { + setFlag(ALU); + setFlag(ValuCacGrp2); + } // Inst_VOP2__V_SUBREV_U32 + + Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() + { + } // ~Inst_VOP2__V_SUBREV_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute // --- Inst_VOP1__V_NOP class methods --- Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) @@ -26157,19 +26299,19 @@ namespace VegaISA vdst.write(); } // execute - // --- Inst_VOP3__V_ADD_U32 class methods --- + // --- Inst_VOP3__V_ADD_CO_U32 class methods --- - Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_add_u32") + Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_add_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_ADD_U32 + } // Inst_VOP3__V_ADD_CO_U32 - Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() + Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32() { - } // ~Inst_VOP3__V_ADD_U32 + } // ~Inst_VOP3__V_ADD_CO_U32 // --- description from .arch file --- // D.u = S0.u + S1.u; @@ -26177,7 +26319,7 @@ namespace VegaISA // --- overflow or carry-out for V_ADDC_U32. // In VOP3 the VCC destination may be an arbitrary SGPR-pair. void - Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); @@ -26206,19 +26348,19 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP3__V_SUB_U32 class methods --- + // --- Inst_VOP3__V_SUB_CO_U32 class methods --- - Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_sub_u32") + Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_sub_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_SUB_U32 + } // Inst_VOP3__V_SUB_CO_U32 - Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() + Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32() { - } // ~Inst_VOP3__V_SUB_U32 + } // ~Inst_VOP3__V_SUB_CO_U32 // --- description from .arch file --- // D.u = S0.u - S1.u; @@ -26226,7 +26368,7 @@ namespace VegaISA // carry-out for V_SUBB_U32. // In VOP3 the VCC destination may be an arbitrary SGPR-pair. void - Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); @@ -26254,20 +26396,20 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP3__V_SUBREV_U32 class methods --- + // --- Inst_VOP3__V_SUBREV_CO_U32 class methods --- - Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32( + Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32( InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subrev_u32") + : Inst_VOP3B(iFmt, "v_subrev_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_SUBREV_U32 + } // Inst_VOP3__V_SUBREV_CO_U32 - Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() + Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32() { - } // ~Inst_VOP3__V_SUBREV_U32 + } // ~Inst_VOP3__V_SUBREV_CO_U32 // --- description from .arch file --- // D.u = S1.u - S0.u; @@ -26276,7 +26418,7 @@ namespace VegaISA // In VOP3 the VCC destination may be an arbitrary SGPR-pair. // SQ translates this to V_SUB_U32 with reversed operands. void - Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); @@ -26304,20 +26446,20 @@ namespace VegaISA vdst.write(); vcc.write(); } // execute - // --- Inst_VOP3__V_ADDC_U32 class methods --- + // --- Inst_VOP3__V_ADDC_CO_U32 class methods --- - Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_addc_u32") + Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_addc_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_ADDC_U32 + } // Inst_VOP3__V_ADDC_CO_U32 - Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32() + Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32() { - } // ~Inst_VOP3__V_ADDC_U32 + } // ~Inst_VOP3__V_ADDC_CO_U32 // --- description from .arch file --- // D.u = S0.u + S1.u + VCC[threadId]; @@ -26326,7 +26468,7 @@ namespace VegaISA // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC // source comes from the SGPR-pair at S2.u. void - Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); @@ -26360,20 +26502,20 @@ namespace VegaISA vdst.write(); sdst.write(); } // execute - // --- Inst_VOP3__V_SUBB_U32 class methods --- + // --- Inst_VOP3__V_SUBB_CO_U32 class methods --- - Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subb_u32") + Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subb_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_SUBB_U32 + } // Inst_VOP3__V_SUBB_CO_U32 - Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32() + Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32() { - } // ~Inst_VOP3__V_SUBB_U32 + } // ~Inst_VOP3__V_SUBB_CO_U32 // --- description from .arch file --- // D.u = S0.u - S1.u - VCC[threadId]; @@ -26382,7 +26524,7 @@ namespace VegaISA // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC // --- source comes from the SGPR-pair at S2.u. void - Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); @@ -26414,21 +26556,21 @@ namespace VegaISA vdst.write(); sdst.write(); } // execute - // --- Inst_VOP3__V_SUBBREV_U32 class methods --- + // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods --- - Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32( + Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32( InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subbrev_u32") + : Inst_VOP3B(iFmt, "v_subbrev_co_u32") { setFlag(ALU); setFlag(WritesVCC); setFlag(ReadsVCC); setFlag(ValuCacGrp2); - } // Inst_VOP3__V_SUBBREV_U32 + } // Inst_VOP3__V_SUBBREV_CO_U32 - Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32() + Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32() { - } // ~Inst_VOP3__V_SUBBREV_U32 + } // ~Inst_VOP3__V_SUBBREV_CO_U32 // --- description from .arch file --- // D.u = S1.u - S0.u - VCC[threadId]; @@ -26436,9 +26578,8 @@ namespace VegaISA // overflow. // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. - // SQ translates this to V_SUBREV_U32 with reversed operands. void - Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) + Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 5c0ea8c806..b815d3ea64 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -6987,11 +6987,11 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP2__V_MADAK_F32 - class Inst_VOP2__V_ADD_U32 : public Inst_VOP2 + class Inst_VOP2__V_ADD_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_ADD_U32(InFmt_VOP2*); - ~Inst_VOP2__V_ADD_U32(); + Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_ADD_CO_U32(); int getNumOperands() override @@ -7021,13 +7021,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADD_U32 + }; // Inst_VOP2__V_ADD_CO_U32 - class Inst_VOP2__V_SUB_U32 : public Inst_VOP2 + class Inst_VOP2__V_SUB_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_SUB_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUB_U32(); + Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUB_CO_U32(); int getNumOperands() override @@ -7057,13 +7057,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUB_U32 + }; // Inst_VOP2__V_SUB_CO_U32 - class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2 + class Inst_VOP2__V_SUBREV_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBREV_U32(); + Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUBREV_CO_U32(); int getNumOperands() override @@ -7093,13 +7093,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBREV_U32 + }; // Inst_VOP2__V_SUBREV_CO_U32 - class Inst_VOP2__V_ADDC_U32 : public Inst_VOP2 + class Inst_VOP2__V_ADDC_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_ADDC_U32(InFmt_VOP2*); - ~Inst_VOP2__V_ADDC_U32(); + Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_ADDC_CO_U32(); int getNumOperands() override @@ -7131,13 +7131,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADDC_U32 + }; // Inst_VOP2__V_ADDC_CO_U32 - class Inst_VOP2__V_SUBB_U32 : public Inst_VOP2 + class Inst_VOP2__V_SUBB_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_SUBB_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBB_U32(); + Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUBB_CO_U32(); int getNumOperands() override @@ -7169,13 +7169,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBB_U32 + }; // Inst_VOP2__V_SUBB_CO_U32 - class Inst_VOP2__V_SUBBREV_U32 : public Inst_VOP2 + class Inst_VOP2__V_SUBBREV_CO_U32 : public Inst_VOP2 { public: - Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBBREV_U32(); + Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUBBREV_CO_U32(); int getNumOperands() override @@ -7207,7 +7207,7 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBBREV_U32 + }; // Inst_VOP2__V_SUBBREV_CO_U32 class Inst_VOP2__V_ADD_F16 : public Inst_VOP2 { @@ -7927,6 +7927,108 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP2__V_LDEXP_F16 + class Inst_VOP2__V_ADD_U32 : public Inst_VOP2 + { + public: + Inst_VOP2__V_ADD_U32(InFmt_VOP2*); + ~Inst_VOP2__V_ADD_U32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP2__V_ADD_U32 + + class Inst_VOP2__V_SUB_U32 : public Inst_VOP2 + { + public: + Inst_VOP2__V_SUB_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUB_U32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP2__V_SUB_U32 + + class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2 + { + public: + Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*); + ~Inst_VOP2__V_SUBREV_U32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP2__V_SUBREV_U32 + class Inst_VOP1__V_NOP : public Inst_VOP1 { public: @@ -24637,11 +24739,11 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP3__V_MAC_F32 - class Inst_VOP3__V_ADD_U32 : public Inst_VOP3B + class Inst_VOP3__V_ADD_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_ADD_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_ADD_U32(); + Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_ADD_CO_U32(); int getNumOperands() override @@ -24671,13 +24773,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_U32 + }; // Inst_VOP3__V_ADD_CO_U32 - class Inst_VOP3__V_SUB_U32 : public Inst_VOP3B + class Inst_VOP3__V_SUB_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_SUB_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_SUB_U32(); + Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_SUB_CO_U32(); int getNumOperands() override @@ -24707,13 +24809,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUB_U32 + }; // Inst_VOP3__V_SUB_CO_U32 - class Inst_VOP3__V_SUBREV_U32 : public Inst_VOP3B + class Inst_VOP3__V_SUBREV_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_SUBREV_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_SUBREV_U32(); + Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_SUBREV_CO_U32(); int getNumOperands() override @@ -24743,13 +24845,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBREV_U32 + }; // Inst_VOP3__V_SUBREV_CO_U32 - class Inst_VOP3__V_ADDC_U32 : public Inst_VOP3B + class Inst_VOP3__V_ADDC_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_ADDC_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_ADDC_U32(); + Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_ADDC_CO_U32(); int getNumOperands() override @@ -24781,13 +24883,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADDC_U32 + }; // Inst_VOP3__V_ADDC_CO_U32 - class Inst_VOP3__V_SUBB_U32 : public Inst_VOP3B + class Inst_VOP3__V_SUBB_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_SUBB_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_SUBB_U32(); + Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_SUBB_CO_U32(); int getNumOperands() override @@ -24819,13 +24921,13 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBB_U32 + }; // Inst_VOP3__V_SUBB_CO_U32 - class Inst_VOP3__V_SUBBREV_U32 : public Inst_VOP3B + class Inst_VOP3__V_SUBBREV_CO_U32 : public Inst_VOP3B { public: - Inst_VOP3__V_SUBBREV_U32(InFmt_VOP3B*); - ~Inst_VOP3__V_SUBBREV_U32(); + Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B*); + ~Inst_VOP3__V_SUBBREV_CO_U32(); int getNumOperands() override @@ -24857,7 +24959,7 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBBREV_U32 + }; // Inst_VOP3__V_SUBBREV_CO_U32 class Inst_VOP3__V_ADD_F16 : public Inst_VOP3A {