arch-gcn3: Remove invalid assert when reading EXEC_LO
This assert assumed all reads to EXEC_LO would be 64b, that is, we would always read the entire EXEC mask. This is invalid as some kernels read only the low 32b of EXEC. The write to EXEC_LO is also updated to handle 32b writes. Change-Id: Ifeb167578515bf112b1eab70bbf2201a5e936358 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29960 Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
1d816250f8
commit
550f0203aa
@@ -435,9 +435,10 @@ namespace Gcn3ISA
|
||||
|
||||
if (!isScalarReg(_opIdx)) {
|
||||
if (_opIdx == REG_EXEC_LO) {
|
||||
ScalarRegU64 new_exec_mask_val(0);
|
||||
ScalarRegU64 new_exec_mask_val
|
||||
= wf->execMask().to_ullong();
|
||||
std::memcpy((void*)&new_exec_mask_val,
|
||||
(void*)srfData.data(), sizeof(new_exec_mask_val));
|
||||
(void*)srfData.data(), sizeof(srfData));
|
||||
VectorMask new_exec_mask(new_exec_mask_val);
|
||||
wf->execMask() = new_exec_mask;
|
||||
DPRINTF(GPUSRF, "Write EXEC\n");
|
||||
@@ -513,7 +514,6 @@ namespace Gcn3ISA
|
||||
switch(_opIdx) {
|
||||
case REG_EXEC_LO:
|
||||
{
|
||||
assert(NumDwords == 2);
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
|
||||
execMask().to_ullong();
|
||||
std::memcpy((void*)srfData.data(), (void*)&exec_mask,
|
||||
|
||||
Reference in New Issue
Block a user