arch-gcn3: Explicitly sign-extend simm16
In some instructions, simm16 needs to be sign extended. Previous code simply casted the simm16 to a 32-bit or 64-bit datatype, however this didn't actually sign-extend the value. This patch explicitly calls sext<16> on simm16 whenever it's supposed to be sign-extended. Change-Id: I32f02e51fbab220d1a73dc7e68c7410937db21c7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37495 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matthew Poremba <matthew.poremba@amd.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1369,7 +1369,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
|
||||
|
||||
sdst = simm16;
|
||||
@@ -1393,7 +1393,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
|
||||
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1419,7 +1419,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1444,7 +1444,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1469,7 +1469,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1494,7 +1494,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1519,7 +1519,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1544,7 +1544,7 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
|
||||
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
|
||||
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
|
||||
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
|
||||
|
||||
@@ -1727,7 +1727,7 @@ namespace Gcn3ISA
|
||||
|
||||
src.read();
|
||||
|
||||
sdst = src.rawData() + (ScalarRegI32)simm16;
|
||||
sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
|
||||
scc = (bits(src.rawData(), 31) == bits(simm16, 15)
|
||||
&& bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
|
||||
|
||||
@@ -1754,7 +1754,7 @@ namespace Gcn3ISA
|
||||
|
||||
sdst.read();
|
||||
|
||||
sdst = sdst.rawData() * (ScalarRegI32)simm16;
|
||||
sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16);
|
||||
|
||||
sdst.write();
|
||||
}
|
||||
@@ -3902,7 +3902,7 @@ namespace Gcn3ISA
|
||||
Addr pc = wf->pc();
|
||||
ScalarRegI16 simm16 = instData.SIMM16;
|
||||
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
|
||||
wf->pc(pc);
|
||||
}
|
||||
@@ -3948,7 +3948,7 @@ namespace Gcn3ISA
|
||||
scc.read();
|
||||
|
||||
if (!scc.rawData()) {
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
}
|
||||
|
||||
wf->pc(pc);
|
||||
@@ -3977,7 +3977,7 @@ namespace Gcn3ISA
|
||||
scc.read();
|
||||
|
||||
if (scc.rawData()) {
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
}
|
||||
|
||||
wf->pc(pc);
|
||||
@@ -4007,7 +4007,7 @@ namespace Gcn3ISA
|
||||
vcc.read();
|
||||
|
||||
if (!vcc.rawData()) {
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
}
|
||||
|
||||
wf->pc(pc);
|
||||
@@ -4037,7 +4037,7 @@ namespace Gcn3ISA
|
||||
if (vcc.rawData()) {
|
||||
Addr pc = wf->pc();
|
||||
ScalarRegI16 simm16 = instData.SIMM16;
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
wf->pc(pc);
|
||||
}
|
||||
}
|
||||
@@ -4062,7 +4062,7 @@ namespace Gcn3ISA
|
||||
if (wf->execMask().none()) {
|
||||
Addr pc = wf->pc();
|
||||
ScalarRegI16 simm16 = instData.SIMM16;
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
wf->pc(pc);
|
||||
}
|
||||
}
|
||||
@@ -4087,7 +4087,7 @@ namespace Gcn3ISA
|
||||
if (wf->execMask().any()) {
|
||||
Addr pc = wf->pc();
|
||||
ScalarRegI16 simm16 = instData.SIMM16;
|
||||
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
|
||||
pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL;
|
||||
wf->pc(pc);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user