arch-gcn3,arch-vega: Fix ds_read2st64_b32

This instruction has two issues. The first is that it should write two
consecutive registers, starting with vdst because it is writing two
dwords. The second is that the data assignment to the lanes from the
dynamic instruction should cast to a U32 type otherwise the array index
goes out of bounds and returns the wrong data.

The first issue was fixed in GCN3 a few years ago in this review:
https://gem5-review.googlesource.com/c/public/gem5/+/32236. This
changeset makes the same change for Vega and applies the U32 cast in
both ISAs.

Tested with rocPRIM unit test. The test was failing before this
changeset and now passes.

Change-Id: Ifb110fc9a36ad198da7eaf86b1e3e37eccd3bb10
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70577
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2023-05-12 18:28:00 -05:00
parent 8dac7f572b
commit ae7476bcdc
2 changed files with 5 additions and 5 deletions

View File

@@ -32123,9 +32123,9 @@ namespace Gcn3ISA
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU64*>(
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU64*>(
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}

View File

@@ -35665,13 +35665,13 @@ namespace VegaISA
Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 2);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU64*>(
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU64*>(
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}