arch-vega: Implement non-carry-out VEGA add, sub, and subrev

In GCN3, the v_add_u32, v_sub_u32, and v_subrev_u32 instructions write
the carry-out value to VCC. VEGA introduces explicit carry-out versions
of these instructions (v_add_co_u32, v_sub_co_u32, and v_subrev_co_u32),
and modifies the behavior of the baseline, non-carry-out versions to not
write to VCC. Previously both the carry-out and non-carry-out versions
shared a single implementation that wrote to VCC. This patch correctly
implements the non-carry-out versions to avoid the VCC write.

This patch also makes the following substitutions for GCN3 instructions
that no longer exist in VEGA (this renaming has no functional impact):
v_addc_u32 -> v_addc_co_u32
v_subb_u32 -> v_subb_co_u32
v_subbrev_u32 -> v_subbrev_co_u32

Change-Id: I002fa6e9316d38fd4cc3554daff047523cfc12c9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47240
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Michael Boyer
2021-06-16 18:14:22 -05:00
parent bb46264d40
commit 3f5120e01f
4 changed files with 415 additions and 172 deletions

View File

@@ -849,12 +849,12 @@ namespace VegaISA
&Decoder::decode_OPU_VOP3__V_MAC_F32,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
&Decoder::decode_OPU_VOP3__V_ADD_U32,
&Decoder::decode_OPU_VOP3__V_SUB_U32,
&Decoder::decode_OPU_VOP3__V_SUBREV_U32,
&Decoder::decode_OPU_VOP3__V_ADDC_U32,
&Decoder::decode_OPU_VOP3__V_SUBB_U32,
&Decoder::decode_OPU_VOP3__V_SUBBREV_U32,
&Decoder::decode_OPU_VOP3__V_ADD_CO_U32,
&Decoder::decode_OPU_VOP3__V_SUB_CO_U32,
&Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32,
&Decoder::decode_OPU_VOP3__V_ADDC_CO_U32,
&Decoder::decode_OPU_VOP3__V_SUBB_CO_U32,
&Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32,
&Decoder::decode_OPU_VOP3__V_ADD_F16,
&Decoder::decode_OPU_VOP3__V_SUB_F16,
&Decoder::decode_OPU_VOP3__V_SUBREV_F16,
@@ -3993,37 +3993,37 @@ namespace VegaISA
GPUStaticInst*
Decoder::decode_OP_VOP2__V_ADD_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_ADD_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_ADD_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUB_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_SUB_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUB_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBREV_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_SUBREV_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBREV_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_ADDC_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_ADDC_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_ADDC_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_ADDC_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBB_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_SUBB_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_SUBB_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBB_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBBREV_CO_U32(MachInst iFmt)
{
return new Inst_VOP2__V_SUBBREV_U32(&iFmt->iFmt_VOP2);
return new Inst_VOP2__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBBREV_CO_U32
GPUStaticInst*
@@ -5947,40 +5947,40 @@ namespace VegaISA
} // decode_OPU_VOP3__V_MAC_F32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_ADD_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_ADD_U32
return new Inst_VOP3__V_ADD_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_ADD_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_SUB_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUB_U32
return new Inst_VOP3__V_SUB_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUB_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBREV_U32
return new Inst_VOP3__V_SUBREV_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBREV_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADDC_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_ADDC_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_ADDC_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_ADDC_U32
return new Inst_VOP3__V_ADDC_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_ADDC_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUBB_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_SUBB_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUBB_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBB_U32
return new Inst_VOP3__V_SUBB_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBB_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_SUBBREV_U32(MachInst iFmt)
Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst iFmt)
{
return new Inst_VOP3__V_SUBBREV_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBBREV_U32
return new Inst_VOP3__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP3B);
} // decode_OPU_VOP3__V_SUBBREV_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADD_F16(MachInst iFmt)