From 8dde32d2dca1eb7bf9618cf2f1d5bfa62cf5585b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl?= <33020671+saul44203@users.noreply.github.com> Date: Thu, 11 Jul 2024 07:08:49 +0200 Subject: [PATCH] arch-riscv: fix initialization for some vector reduction insts (#1340) Vector reduce float (widening and non-widening) and integer (widening) instructions initialize the reduce loop operation with the first element of the destination register (i.e. `Vd[0]`). Since all reductions per spec seem to be `Vd[0] = Vs1[0] + Vs2[*]` (where `+` is an arbitrary binary op and `*` indicates all active elements) gem5 will calculate this incorrectly if `Vd[0]` and/or `Vs1[0]` are non-neutral for the operation (the later case being because it's not taken into account at all). To solve this we just have to initialize the reduction loop to `Vs1[0]` (the non-widening integer reduction already does this). --- src/arch/riscv/isa/templates/vector_arith.isa | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 31d2c26147..33e383b8bf 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -1826,7 +1826,7 @@ Fault auto reduce_loop = [&, this](const auto& f, const auto* _, const auto* vs2) { - vu tmp_val = Vd[0]; + vu tmp_val = Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; @@ -1876,7 +1876,7 @@ Fault auto reduce_loop = [&, this](const auto& f, const auto* _, const auto* vs2) { - vwu tmp_val = Vd[0]; + vwu tmp_val = Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; @@ -2230,7 +2230,7 @@ Fault auto reduce_loop = [&, this](const auto& f, const auto* _, const auto* vs2) { - vwu tmp_val = Vd[0]; + vwu tmp_val = Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;