arch-gcn3: Remove invalid assert when reading EXEC_LO

This assert assumed all reads to EXEC_LO would be 64b, that is, we would always read the entire EXEC mask. This is invalid as some kernels read only the low 32b of EXEC. The write to EXEC_LO is also updated to handle 32b writes. Change-Id: Ifeb167578515bf112b1eab70bbf2201a5e936358 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29960 Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
2019-02-14 13:27:55 -05:00
parent 1d816250f8
commit 550f0203aa
1 changed files with 3 additions and 3 deletions
--- a/src/arch/gcn3/operand.hh
+++ b/src/arch/gcn3/operand.hh
@@ -435,9 +435,10 @@ namespace Gcn3ISA

            if (!isScalarReg(_opIdx)) {
                if (_opIdx == REG_EXEC_LO) {
-                    ScalarRegU64 new_exec_mask_val(0);
+                    ScalarRegU64 new_exec_mask_val
+                        = wf->execMask().to_ullong();
                    std::memcpy((void*)&new_exec_mask_val,
-                        (void*)srfData.data(), sizeof(new_exec_mask_val));
+                        (void*)srfData.data(), sizeof(srfData));
                    VectorMask new_exec_mask(new_exec_mask_val);
                    wf->execMask() = new_exec_mask;
                    DPRINTF(GPUSRF, "Write EXEC\n");
@@ -513,7 +514,6 @@ namespace Gcn3ISA
            switch(_opIdx) {
              case REG_EXEC_LO:
                {
-                    assert(NumDwords == 2);
                    ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
                        execMask().to_ullong();
                    std::memcpy((void*)srfData.data(), (void*)&exec_mask,