From e3c2a322a1f940c03bb15d32a0c7fc6fb49d22b4 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 10 May 2024 08:49:13 -0700 Subject: [PATCH] arch-vega: Fix SDWA dst select (#1120) The destination select should take a value of the selection size (dword, word, or byte) starting at bit 0, move that to the selected destination, and then apply the unused constraint (DST_U) to the remaining word or bytes. Currently the code is selecting the word/byte currently being iterated over, rather than the least significant word/byte. As a result, any selection that is not word 0 or byte 0 will be replaced with the original destination value at those bits. This results in the wrong value. This commit changes the orig bits to be the original dest value at the lowest word / byte location. Tested with the mfma_i32_16x16x16i8 example which uses an SDWA V_OR_B32 to pack i8 values into VGPRs for the MFMA. Change-Id: I54ed819479a25fa9276d29a8f14f0fea7fd71afe --- src/arch/amdgpu/vega/insts/inst_util.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/inst_util.hh b/src/arch/amdgpu/vega/insts/inst_util.hh index bc64ff88da..ac8c572d77 100644 --- a/src/arch/amdgpu/vega/insts/inst_util.hh +++ b/src/arch/amdgpu/vega/insts/inst_util.hh @@ -701,7 +701,7 @@ namespace VegaISA if (sel < SDWA_WORD_0) { // we are selecting 1 byte // if we sign extended depends on upper-most bit of byte 0 signExt = (signExt && - (bits(currDstVal, VegaISA::MSB_PER_WORD, 0) & 0x80)); + (bits(currDstVal, VegaISA::MSB_PER_BYTE, 0) & 0x80)); for (int byte = 0; byte < 4; ++byte) { low_bit = byte * VegaISA::BITS_PER_BYTE; @@ -714,7 +714,7 @@ namespace VegaISA 3. byte > sel && signExt: we're sign extending and this byte is one of the bytes we need to sign extend */ - origBits_thisByte = bits(origDstVal, high_bit, low_bit); + origBits_thisByte = bits(origDstVal, VegaISA::MSB_PER_BYTE, 0); currBits_thisByte = bits(currDstVal, high_bit, low_bit); newBits = ((byte == sel) ? origBits_thisByte : ((preserve) ? currBits_thisByte : @@ -739,7 +739,7 @@ namespace VegaISA 3. word > (sel & 1) && signExt: we're sign extending and this word is one of the words we need to sign extend */ - origBits_thisWord = bits(origDstVal, high_bit, low_bit); + origBits_thisWord = bits(origDstVal, VegaISA::MSB_PER_WORD, 0); currBits_thisWord = bits(currDstVal, high_bit, low_bit); newBits = ((word == (sel & 0x1)) ? origBits_thisWord : ((preserve) ? currBits_thisWord :