arch-arm: Fix SMM* instructions

A recent clean up patch [1] introduced some bugs in the instruction
implementation of:

SMMLA, SMMLAR, SMMLS, SMMLSR, SMMUL, SMMULR

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/42387

Change-Id: I459fe99bd2711e00027e9ef0c7796af7a374a509
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/44945
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Giacomo Travaglini
2021-04-28 12:45:52 +01:00
parent 924fb60aad
commit 35b6961dcb

View File

@@ -158,43 +158,43 @@ let {{
''', "overflow")
buildMult4Inst("smlal", '''
resTemp = PInt2.sw * PInt3.sw +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''', "llbit")
buildMult4InstUnCc("smlalbb", '''
resTemp = PInt2.sh0 * PInt3.sh0 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlalbt", '''
resTemp = PInt2.sh0 * PInt3.sh1 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlaltb", '''
resTemp = PInt2.sh1 * PInt3.sh0 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlaltt", '''
resTemp = PInt2.sh1 * PInt3.sh1 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlald", '''
resTemp = PInt2.sh1 * PInt3.sh1 + PInt2.sh0 * PInt3.sh0 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlaldx", '''
resTemp = PInt2.sh1 * PInt3.sh0 + PInt2.sh0 * PInt3.sh1 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
@@ -220,33 +220,37 @@ let {{
''', "overflow")
buildMult4InstUnCc("smlsld", '''
resTemp = PInt2.sh0 * PInt3.sh0 - PInt2.sh1 * PInt3.sh1 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smlsldx", '''
resTemp = PInt2.sh0 * PInt3.sh1 - PInt2.sh1 * PInt3.sh0 +
(int64_t)((PInt1.uw << 32) | PInt0.uw);
(int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw);
PInt0 = (uint32_t)resTemp;
PInt1 = (uint32_t)(resTemp >> 32);
''')
buildMult4InstUnCc("smmla", '''
PInt0 = PInt3.sw + ((PInt1.sw * PInt2.sw) >> 32);
PInt0 = (((int64_t)PInt3.sw << 32) + (PInt1.sw * PInt2.sw)) >> 32;
''')
buildMult4InstUnCc("smmlar", '''
PInt0 = PInt3.sw + ((PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32);
PInt0 = (((int64_t)PInt3.sw << 32) +
(PInt1.sw * PInt2.sw) +
(0x1ULL << 31)) >> 32;
''')
buildMult4InstUnCc("smmls", '''
PInt0 = PInt3.sw - ((PInt1.sw * PInt2.sw) >> 32);
PInt0 = (((int64_t)PInt3.sw << 32) - (PInt1.sw * PInt2.sw)) >> 32;
''')
buildMult4InstUnCc("smmlsr", '''
PInt0 = PInt3.sw - ((PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32);
PInt0 = (((int64_t)PInt3.sw << 32) -
(PInt1.sw * PInt2.sw) +
(0x1ULL << 31)) >> 32;
''')
buildMult3InstUnCc("smmul", '''
PInt0 = (PInt1.sw * PInt2.sw) >> 32;
PInt0 = ((int64_t)PInt1.sw * PInt2.sw) >> 32;
''')
buildMult3InstUnCc("smmulr", '''
PInt0 = (PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32;
PInt0 = (((int64_t)PInt1.sw * PInt2.sw) + (0x1ULL << 31)) >> 32;
''')
buildMult3InstCc("smuad", '''
PInt0 = resTemp = PInt1.sh0 * PInt2.sh0 + PInt1.sh1 * PInt2.sh1;