From deb8f983a1cb8bd94885efa290f1f293eea63711 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 14 Oct 2024 10:19:52 -0700 Subject: [PATCH] arch-vega: Fix multi-dword setElem in PackedReg (#1664) There are two issues related to setting an element in PackedReg where the element spans multiple dwords. First, the mask value is wrong and is clobbering both dwords. Second, a portion of the value is shifted out of the narrower input type. Fix this by using the correct mask to clear the bits where the value will be placed and use a larger data type to shift the value into place. --- src/arch/amdgpu/vega/operand.hh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh index 1bb9b43d1f..8e76405562 100644 --- a/src/arch/amdgpu/vega/operand.hh +++ b/src/arch/amdgpu/vega/operand.hh @@ -960,11 +960,14 @@ class PackedReg uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1; value &= elem_mask; + // Clear the bits where the value goes so that operator| can be used. elem_mask <<= qw_lbit; - qword &= elem_mask; + qword &= ~elem_mask; - value <<= qw_lbit; - qword |= value; + // Promote to 64-bit to prevent shifting out of range + uint64_t value64 = value; + value64 <<= qw_lbit; + qword |= value64; dwords[udw] = uint32_t(qword >> 32); dwords[ldw] = uint32_t(qword & mask(32));