From c54132bdd9e459ac05eecd5b951fdf0e93bb631a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl?= <33020671+saul44203@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:51:26 +0100 Subject: [PATCH] arch-riscv: fix reg dep autoref on vslide with vcpy micro (#1782) Vector slide instructions can have the same register group as source and destination. Because we are pinning the destination this will provoke an auto-reference in the dependency graph. The solution is to use the `vcpy` micro. This way we use the `vtmp` register group as source and pin the destination without issues. --- src/arch/riscv/isa/formats/vector_arith.isa | 2 +- src/arch/riscv/isa/templates/vector_arith.isa | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index 3f10691cbc..dc0b902e43 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -1475,7 +1475,7 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" - src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]" + src2_reg_id = "vecRegClass[VecMemInternalReg0 + vs2Idx]" src1_ireg_id = "intRegClass[_machInst.rs1]" src1_freg_id = "floatRegClass[_machInst.rs1]" diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index f11581094b..11c6d36e1e 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -2309,6 +2309,10 @@ template } for (uint32_t i = 0; i < ceil((float) this->vl/micro_vlmax); i++) { + microop = new VCpyVsMicroInst(machInst, i, machInst.vs2, elen, vlen); + microop->setFlag(IsDelayedCommit); + this->microops.push_back(microop); + microop = new VPinVdMicroInst(machInst, i, i+1, elen, vlen, true); microop->setFlag(IsDelayedCommit); this->microops.push_back(microop); @@ -2354,6 +2358,10 @@ template } for (uint32_t i = 0; i < ceil((float) this->vl / micro_vlmax); i++) { + microop = new VCpyVsMicroInst(machInst, i, machInst.vs2, elen, vlen); + microop->setFlag(IsDelayedCommit); + this->microops.push_back(microop); + microop = new VPinVdMicroInst(machInst, i, num_microops-i, elen, vlen, false); microop->setFlag(IsDelayedCommit);