From 35b6961dcb907407dcf28820eeed71038b1c2fa8 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 28 Apr 2021 12:45:52 +0100 Subject: [PATCH] arch-arm: Fix SMM* instructions A recent clean up patch [1] introduced some bugs in the instruction implementation of: SMMLA, SMMLAR, SMMLS, SMMLSR, SMMUL, SMMULR [1]: https://gem5-review.googlesource.com/c/public/gem5/+/42387 Change-Id: I459fe99bd2711e00027e9ef0c7796af7a374a509 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/44945 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/arm/isa/insts/mult.isa | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/arch/arm/isa/insts/mult.isa b/src/arch/arm/isa/insts/mult.isa index 528aa669ed..7f37099d60 100644 --- a/src/arch/arm/isa/insts/mult.isa +++ b/src/arch/arm/isa/insts/mult.isa @@ -158,43 +158,43 @@ let {{ ''', "overflow") buildMult4Inst("smlal", ''' resTemp = PInt2.sw * PInt3.sw + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''', "llbit") buildMult4InstUnCc("smlalbb", ''' resTemp = PInt2.sh0 * PInt3.sh0 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlalbt", ''' resTemp = PInt2.sh0 * PInt3.sh1 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlaltb", ''' resTemp = PInt2.sh1 * PInt3.sh0 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlaltt", ''' resTemp = PInt2.sh1 * PInt3.sh1 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlald", ''' resTemp = PInt2.sh1 * PInt3.sh1 + PInt2.sh0 * PInt3.sh0 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlaldx", ''' resTemp = PInt2.sh1 * PInt3.sh0 + PInt2.sh0 * PInt3.sh1 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') @@ -220,33 +220,37 @@ let {{ ''', "overflow") buildMult4InstUnCc("smlsld", ''' resTemp = PInt2.sh0 * PInt3.sh0 - PInt2.sh1 * PInt3.sh1 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smlsldx", ''' resTemp = PInt2.sh0 * PInt3.sh1 - PInt2.sh1 * PInt3.sh0 + - (int64_t)((PInt1.uw << 32) | PInt0.uw); + (int64_t)(((uint64_t)PInt1.uw << 32) | PInt0.uw); PInt0 = (uint32_t)resTemp; PInt1 = (uint32_t)(resTemp >> 32); ''') buildMult4InstUnCc("smmla", ''' - PInt0 = PInt3.sw + ((PInt1.sw * PInt2.sw) >> 32); + PInt0 = (((int64_t)PInt3.sw << 32) + (PInt1.sw * PInt2.sw)) >> 32; ''') buildMult4InstUnCc("smmlar", ''' - PInt0 = PInt3.sw + ((PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32); + PInt0 = (((int64_t)PInt3.sw << 32) + + (PInt1.sw * PInt2.sw) + + (0x1ULL << 31)) >> 32; ''') buildMult4InstUnCc("smmls", ''' - PInt0 = PInt3.sw - ((PInt1.sw * PInt2.sw) >> 32); + PInt0 = (((int64_t)PInt3.sw << 32) - (PInt1.sw * PInt2.sw)) >> 32; ''') buildMult4InstUnCc("smmlsr", ''' - PInt0 = PInt3.sw - ((PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32); + PInt0 = (((int64_t)PInt3.sw << 32) - + (PInt1.sw * PInt2.sw) + + (0x1ULL << 31)) >> 32; ''') buildMult3InstUnCc("smmul", ''' - PInt0 = (PInt1.sw * PInt2.sw) >> 32; + PInt0 = ((int64_t)PInt1.sw * PInt2.sw) >> 32; ''') buildMult3InstUnCc("smmulr", ''' - PInt0 = (PInt1.sw * PInt2.sw + (0x1ULL << 31)) >> 32; + PInt0 = (((int64_t)PInt1.sw * PInt2.sw) + (0x1ULL << 31)) >> 32; ''') buildMult3InstCc("smuad", ''' PInt0 = resTemp = PInt1.sh0 * PInt2.sh0 + PInt1.sh1 * PInt2.sh1;