From 605ec6899e6b2983e4ec9ea8e257447b19eb9a7a Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 29 Aug 2023 19:53:26 +0800 Subject: [PATCH 1/3] arch-riscv: Move VMem implementation from header to source Move the VMem implementation from header_output to decoder_output and exec_output respectively. Change-Id: I699e197f37f22a59ecb9f92a64b5e296d2e9f5fa --- src/arch/riscv/isa/formats/vector_mem.isa | 28 +- src/arch/riscv/isa/includes.isa | 2 - src/arch/riscv/isa/templates/vector_mem.isa | 322 ++++++++++++-------- 3 files changed, 216 insertions(+), 136 deletions(-) diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index da53d80d0a..3b3309797c 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -34,6 +34,14 @@ def setVlen(): def setVlenb(): return "uint32_t vlenb = VlenbBits;\n" +def declareVMemTemplate(class_name): + return f''' + template class {class_name}; + template class {class_name}; + template class {class_name}; + template class {class_name}; + ''' + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, @@ -47,22 +55,20 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, iop = InstObjParams(name, Name, base_class, {'ea_code': ea_code, 'memacc_code': memacc_code, - 'postacc_code': postacc_code }, + 'postacc_code': postacc_code, + 'declare_vmem_template': declareVMemTemplate(Name)}, inst_flags) constructTemplate = eval(exec_template_base + 'Constructor') header_output = declare_template_base.subst(iop) - decoder_output = '' - if declare_template_base is not VMemTemplateMacroDeclare: - decoder_output += constructTemplate.subst(iop) - else: - header_output += constructTemplate.subst(iop) + decoder_output = constructTemplate.subst(iop) decode_block = decode_template.subst(iop) exec_output = '' if not is_macroop: return (header_output, decoder_output, decode_block, exec_output) + micro_class_name = exec_template_base + 'MicroInst' microiop = InstObjParams(name + '_micro', Name + 'Micro', exec_template_base + 'MicroInst', @@ -70,7 +76,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, 'memacc_code': memacc_code, 'postacc_code': postacc_code, 'set_vlenb': setVlenb(), - 'set_vlen': setVlen()}, + 'set_vlen': setVlen(), + 'declare_vmem_template': declareVMemTemplate(Name + 'Micro')}, inst_flags) if mem_flags: @@ -79,17 +86,16 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, microiop.constructor += s microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare') + microConsTemplate = eval(exec_template_base + 'Micro' + 'Constructor') microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute') microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc') microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc') header_output = microDeclTemplate.subst(microiop) + header_output + decoder_output = microConsTemplate.subst(microiop) + decoder_output micro_exec_output = (microExecTemplate.subst(microiop) + microInitTemplate.subst(microiop) + microCompTemplate.subst(microiop)) - if declare_template_base is not VMemTemplateMacroDeclare: - exec_output += micro_exec_output - else: - header_output += micro_exec_output + exec_output += micro_exec_output return (header_output, decoder_output, decode_block, exec_output) diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index 76f2388faf..b37e62bca8 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -46,8 +46,6 @@ output header {{ #include #include -#include "arch/generic/memhelpers.hh" -#include "arch/riscv/decoder.hh" #include "arch/riscv/insts/amo.hh" #include "arch/riscv/insts/bs.hh" #include "arch/riscv/insts/compressed.hh" diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 8cbab044ec..84cee9af73 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -96,22 +96,8 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, - uint8_t _microIdx, uint32_t _vlen) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx, _vlen) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); - _numTypedDestRegs[VecRegClass]++; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - } + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, uint32_t _vlen); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -123,6 +109,27 @@ public: }}; +def template VleMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s( + "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } +} + +}}; + def template VleMicroExecute {{ Fault @@ -293,21 +300,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, - uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx, _vlen) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - this->flags[IsVector] = true; - this->flags[IsStore] = true; - } + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -318,6 +311,27 @@ public: }}; +def template VseMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s( + "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsVector] = true; + this->flags[IsStore] = true; +} + +}}; + def template VseMicroExecute {{ Fault @@ -518,18 +532,8 @@ private: RegId srcRegIdxArr[2]; public: %(class_name)s(ExtMachInst _machInst, - uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) - : %(base_class)s("%(mnemonic)s", _machInst, - %(op_class)s, _microVl, _microIdx, _vlen) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); - this->flags[IsVector] = true; - this->flags[IsStore] = true; - } + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen); + Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; Fault completeAcc(PacketPtr, ExecContext *, @@ -539,6 +543,24 @@ public: }}; +def template VsWholeMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s( + "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + this->flags[IsVector] = true; + this->flags[IsStore] = true; +} + +}}; + def template VsWholeMicroExecute {{ Fault @@ -644,19 +666,8 @@ private: RegId srcRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, - uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) - : %(base_class)s("%(mnemonic)s_micro", _machInst, - %(op_class)s, _microVl, _microIdx, _vlen) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); - _numTypedDestRegs[VecRegClass]++; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - this->flags[IsVector] = true; - this->flags[IsLoad] = true; - } + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen); + Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; Fault completeAcc(PacketPtr, ExecContext *, @@ -666,6 +677,25 @@ public: }}; +def template VlWholeMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, _microVl, + _microIdx, _vlen) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + this->flags[IsVector] = true; + this->flags[IsLoad] = true; +} + +}}; + def template VlWholeMicroExecute {{ Fault @@ -803,24 +833,7 @@ private: RegId destRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint32_t _microVl) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _regIdx, _microIdx, _microVl) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]); - _numTypedDestRegs[VecRegClass]++; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); - // We treat agnostic as undistrubed - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - this->flags[IsLoad] = true; - } + uint32_t _microVl); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -831,6 +844,31 @@ public: }}; +def template VlStrideMicroConstructor {{ + +%(class_name)s::%(class_name)s( + ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint32_t _microVl) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; +} + +}}; + def template VlStrideMicroExecute {{ Fault @@ -1019,21 +1057,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint32_t _microVl) - : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, - _regIdx, _microIdx, _microVl) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - this->flags[IsStore] = true; - } + uint32_t _microVl); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -1044,6 +1068,28 @@ public: }}; +def template VsStrideMicroConstructor {{ + +%(class_name)s::%(class_name)s( + ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint32_t _microVl) + : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; +} + +}}; + def template VsStrideMicroExecute {{ Fault @@ -1185,6 +1231,8 @@ template this->flags[IsVector] = true; } +%(declare_vmem_template)s; + }}; def template VlIndexMicroDeclare {{ @@ -1199,24 +1247,7 @@ private: public: %(class_name)s(ExtMachInst _machInst, uint8_t _vdRegIdx, uint8_t _vdElemIdx, - uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); - _numTypedDestRegs[VecRegClass]++; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); - // We treat agnostic as undistrubed - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - this->flags[IsLoad] = true; - } + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -1227,6 +1258,34 @@ public: }}; +def template VlIndexMicroConstructor {{ + +template +%(class_name)s::%(class_name)s( + ExtMachInst _machInst,uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; +} + +%(declare_vmem_template)s; + +}}; + def template VlIndexMicroExecute {{ template @@ -1364,6 +1423,8 @@ Fault return NoFault; } +%(declare_vmem_template)s; + }}; def template VsIndexConstructor {{ @@ -1410,6 +1471,8 @@ template this->flags[IsVector] = true; } +%(declare_vmem_template)s; + }}; def template VsIndexMicroDeclare {{ @@ -1424,22 +1487,7 @@ private: public: %(class_name)s(ExtMachInst _machInst, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, - uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx) - { - %(set_reg_idx_arr)s; - _numSrcRegs = 0; - _numDestRegs = 0; - setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); - // We treat agnostic as undistrubed - setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]); - if (!_machInst.vm) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); - } - this->flags[IsStore] = true; - } + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx); Fault execute(ExecContext *, trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; @@ -1450,6 +1498,32 @@ public: }}; +def template VsIndexMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; +} + +%(declare_vmem_template)s; + +}}; + def template VsIndexMicroExecute {{ template @@ -1548,6 +1622,8 @@ Fault return NoFault; } +%(declare_vmem_template)s; + }}; def template VMemBaseDecodeBlock {{ From 62af678d5c51b1fa15ec40c5d4ff4e36971e34f1 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 24 Oct 2023 14:47:04 +0800 Subject: [PATCH 2/3] arch-riscv: Move VArith implementations from header to source Move VArith implementations from heaher_output to decoder_output and exec_output respectively Change-Id: I406eedbd9dd625aa939ec0e20aa29ef4f18ba79c --- src/arch/riscv/isa/formats/vector_arith.isa | 564 +++++++++++------- src/arch/riscv/isa/templates/vector_arith.isa | 145 ++++- 2 files changed, 487 insertions(+), 222 deletions(-) diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index 0d5055ea8f..1ddf323f04 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -121,6 +121,28 @@ let {{ softfloat_exceptionFlags = 0; xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); ''' + + def declareVArithTemplate( + class_name, type_name='uint', min_size=8, max_size=64): + sizes = [8, 16, 32, 64] + code = '' + for size in sizes: + if size < min_size or size > max_size: + continue + code += f'template class {class_name}<{type_name}{size}_t>;\n' + return code + + def declareGatherTemplate(class_name, index_type): + sizes = [8, 16, 32, 64] + code = '' + for size in sizes: + if index_type == 'elem_type': + idx_type = f'uint{size}_t' + else: + idx_type = index_type + code += ('template class' + f' {class_name};\n') + return code }}; @@ -132,8 +154,14 @@ def format VectorIntFormat(code, category, *flags) {{ macroop_class_name = 'VectorVMUNARY0MacroInst' microp_class_name = 'VectorVMUNARY0MicroInst' - iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + macroop_class_name, + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) v0_required = inst_name not in ["vmv"] mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim']) @@ -192,25 +220,30 @@ def format VectorIntFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb' : set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntMicroDeclare.subst(microiop) + \ + VectorIntMacroDeclare.subst(iop) + decoder_output = \ VectorIntMicroConstructor.subst(microiop) + \ - VectorIntMicroExecute.subst(microiop) + \ - VectorIntMacroDeclare.subst(iop) + \ VectorIntMacroConstructor.subst(iop) - + exec_output = VectorIntMicroExecute.subst(microiop) decode_block = VectorIntDecodeBlock.subst(iop) }}; def format VectorIntExtFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) ext_div = int(inst_suffix[-1]) @@ -245,24 +278,31 @@ def format VectorIntExtFormat(code, category, *flags) {{ 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), - 'ext_div': ext_div}, + 'ext_div': ext_div, + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntExtMicroDeclare.subst(microiop) + \ + VectorIntExtMacroDeclare.subst(iop) + decoder_output = \ VectorIntMicroConstructor.subst(microiop) + \ - VectorIntExtMicroExecute.subst(microiop) + \ - VectorIntExtMacroDeclare.subst(iop) + \ VectorIntMacroConstructor.subst(iop) - + exec_output = \ + VectorIntExtMicroExecute.subst(microiop) + \ + VectorIntExtMacroExecute.subst(iop) decode_block = VectorIntDecodeBlock.subst(iop) }}; def format VectorIntWideningFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, max_size=32)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) v0_required = True mask_cond = v0_required @@ -308,6 +348,7 @@ def format VectorIntWideningFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -317,24 +358,29 @@ def format VectorIntWideningFormat(code, category, *flags) {{ 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + decoder_output = \ VectorIntWideningMicroConstructor.subst(microiop) + \ - VectorIntWideningMicroExecute.subst(microiop) + \ - VectorIntWideningMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) - + exec_output = VectorIntWideningMicroExecute.subst(microiop) decode_block = VectorIntWideningDecodeBlock.subst(iop) }}; def format VectorIntNarrowingFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, max_size=32)}, + flags + ) mask_cond = True need_elem_idx = True @@ -368,6 +414,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -378,18 +425,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), - }, + 'declare_varith_template': varith_micro_declare + }, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + decoder_output = \ VectorIntWideningMicroConstructor.subst(microiop) + \ - VectorIntNarrowingMicroExecute.subst(microiop) + \ - VectorIntWideningMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) - + exec_output = VectorIntNarrowingMicroExecute.subst(microiop) decode_block = VectorIntWideningDecodeBlock.subst(iop) }}; @@ -397,7 +443,8 @@ def format VectorIntMaskFormat(code, category, *flags) {{ iop = InstObjParams(name, Name, 'VectorArithMacroInst', - {'code': code}, + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, flags) inst_name, inst_suffix = name.split("_", maxsplit=1) v0_required = not (inst_name in ["vmadc", "vmsbc"] \ @@ -448,17 +495,17 @@ def format VectorIntMaskFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntMaskMicroDeclare.subst(microiop) + \ + VectorIntMaskMacroDeclare.subst(iop) + decoder_output = \ VectorIntMaskMicroConstructor.subst(microiop) + \ - VectorIntMaskMicroExecute.subst(microiop) + \ - VectorIntMaskMacroDeclare.subst(iop) + \ VectorIntMaskMacroConstructor.subst(iop) + exec_output = VectorIntMaskMicroExecute.subst(microiop) decode_block = VectorIntDecodeBlock.subst(iop) }}; @@ -470,7 +517,8 @@ def format VectorGatherFormat(code, category, *flags) {{ idx_type = "elem_type" iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'idx_type': idx_type, - 'code': code}, + 'code': code, + 'declare_varith_template': declareGatherTemplate(Name, idx_type)}, flags) old_vd_idx = 2 dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]" @@ -502,6 +550,7 @@ def format VectorGatherFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareGatherTemplate(Name + "Micro", idx_type) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -512,25 +561,30 @@ def format VectorGatherFormat(code, category, *flags) {{ 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), - 'idx_type': idx_type}, + 'idx_type': idx_type, + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorGatherMicroDeclare.subst(microiop) + \ + VectorGatherMacroDeclare.subst(iop) + decoder_output = \ VectorGatherMicroConstructor.subst(microiop) + \ - VectorGatherMicroExecute.subst(microiop) + \ - VectorGatherMacroDeclare.subst(iop) + \ VectorGatherMacroConstructor.subst(iop) - + exec_output = VectorGatherMicroExecute.subst(microiop) decode_block = VectorGatherDecodeBlock.subst(iop) }}; def format VectorFloatFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, 'float', 32)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) v0_required = inst_name not in ["vfmv"] mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm']) @@ -569,6 +623,7 @@ def format VectorFloatFormat(code, category, *flags) {{ set_vlenb = setVlenb(); + varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -577,24 +632,29 @@ def format VectorFloatFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorFloatMicroDeclare.subst(microiop) + \ + VectorFloatMacroDeclare.subst(iop) + decoder_output = \ VectorFloatMicroConstructor.subst(microiop) + \ - VectorFloatMicroExecute.subst(microiop) + \ - VectorFloatMacroDeclare.subst(iop) + \ VectorFloatMacroConstructor.subst(iop) - + exec_output = VectorFloatMicroExecute.subst(microiop) decode_block = VectorFloatDecodeBlock.subst(iop) }}; def format VectorFloatCvtFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, 'float', 32)}, + flags + ) old_vd_idx = 1 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" @@ -616,6 +676,7 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ set_vlenb = setVlenb(); + varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -624,24 +685,30 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + decoder_output = \ VectorFloatMicroConstructor.subst(microiop) + \ - VectorFloatMicroExecute.subst(microiop) + \ - VectorFloatCvtMacroDeclare.subst(iop) + \ VectorFloatMacroConstructor.subst(iop) - + exec_output = VectorFloatMicroExecute.subst(microiop) decode_block = VectorFloatDecodeBlock.subst(iop) }}; def format VectorFloatWideningFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': varith_macro_declare}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) v0_required = True mask_cond = v0_required @@ -688,6 +755,8 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareVArithTemplate( + Name + "Micro", 'float', 32, 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -697,24 +766,30 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + decoder_output = \ VectorIntWideningMicroConstructor.subst(microiop) + \ - VectorFloatWideningMicroExecute.subst(microiop) + \ - VectorIntWideningMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) - + exec_output = VectorFloatWideningMicroExecute.subst(microiop) decode_block = VectorFloatWideningDecodeBlock.subst(iop) }}; def format VectorFloatWideningCvtFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': varith_macro_declare}, + flags + ) old_vd_idx = 1 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" @@ -737,6 +812,8 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareVArithTemplate( + Name + "Micro", 'float', 32, 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -746,24 +823,30 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + decoder_output = \ VectorFloatMicroConstructor.subst(microiop) + \ - VectorFloatWideningMicroExecute.subst(microiop) + \ - VectorFloatCvtMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) - + exec_output = VectorFloatWideningMicroExecute.subst(microiop) decode_block = VectorFloatWideningDecodeBlock.subst(iop) }}; def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': varith_macro_declare}, + flags + ) old_vd_idx = 1 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" @@ -787,6 +870,8 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ set_vlenb = setVlenb(); set_vlen = setVlen(); + varith_micro_declare = declareVArithTemplate( + Name + "Micro", 'float', 32, 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -796,18 +881,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + decoder_output = \ VectorFloatMicroConstructor.subst(microiop) + \ - VectorFloatNarrowingMicroExecute.subst(microiop) + \ - VectorFloatCvtMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) - + exec_output = VectorFloatNarrowingMicroExecute.subst(microiop) decode_block = VectorFloatWideningDecodeBlock.subst(iop) }}; @@ -815,8 +899,10 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ iop = InstObjParams(name, Name, 'VectorArithMacroInst', - {'code': code}, - flags) + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, 'float', 32)}, + flags + ) dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" src1_reg_id = "" if category == "OPFVV": @@ -841,6 +927,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ code = loopWrapper(code) code = fflags_wrapper(code) + varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -849,17 +936,17 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorFloatMaskMicroDeclare.subst(microiop) + \ + VectorFloatMaskMacroDeclare.subst(iop) + decoder_output = \ VectorFloatMaskMicroConstructor.subst(microiop) + \ - VectorFloatMaskMicroExecute.subst(microiop) + \ - VectorFloatMaskMacroDeclare.subst(iop) + \ VectorFloatMaskMacroConstructor.subst(iop) + exec_output = VectorFloatMaskMicroExecute.subst(microiop) decode_block = VectorFloatDecodeBlock.subst(iop) }}; @@ -884,8 +971,14 @@ def format VMvWholeFormat(code, category, *flags) {{ }}; def format ViotaFormat(code, category, *flags){{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" @@ -912,17 +1005,17 @@ def format ViotaFormat(code, category, *flags){{ 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, - 'copy_old_vd': copyOldVd(1)}, + 'copy_old_vd': copyOldVd(1), + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. + header_output = \ ViotaMicroDeclare.subst(microiop) + \ + ViotaMacroDeclare.subst(iop) + decoder_output = \ ViotaMicroConstructor.subst(microiop) + \ - ViotaMicroExecute.subst(microiop)+\ - ViotaMacroDeclare.subst(iop) + \ ViotaMacroConstructor.subst(iop) - + exec_output = ViotaMicroExecute.subst(microiop) decode_block = VectorIntDecodeBlock.subst(iop) }}; @@ -951,15 +1044,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{ 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, - 'copy_old_vd': copyOldVd(1)}, + 'copy_old_vd': copyOldVd(1), + 'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8), + }, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. - header_output = \ - Vector1Vs1RdMaskDeclare.subst(iop) + \ - Vector1Vs1VdMaskConstructor.subst(iop) + \ - Vector1Vs1VdMaskExecute.subst(iop) + header_output = Vector1Vs1RdMaskDeclare.subst(iop) + decoder_output = Vector1Vs1VdMaskConstructor.subst(iop) + exec_output = Vector1Vs1VdMaskExecute.subst(iop) decode_block = VectorMaskDecodeBlock.subst(iop) }}; @@ -972,15 +1064,14 @@ def format Vector1Vs1RdMaskFormat(code, category, *flags){{ 'VectorNonSplitInst', {'code': code, 'vm_decl_rd': vm_decl_rd, - 'set_vm_idx': set_vm_idx}, + 'set_vm_idx': set_vm_idx, + 'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8) + }, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. - header_output = \ - Vector1Vs1RdMaskDeclare.subst(iop) + \ - Vector1Vs1RdMaskConstructor.subst(iop) + \ - Vector1Vs1RdMaskExecute.subst(iop) + header_output = Vector1Vs1RdMaskDeclare.subst(iop) + decoder_output = Vector1Vs1RdMaskConstructor.subst(iop) + exec_output = Vector1Vs1RdMaskExecute.subst(iop) decode_block = VectorMaskDecodeBlock.subst(iop) }}; @@ -993,31 +1084,36 @@ def format VectorNonSplitFormat(code, category, *flags) {{ if inst_name == "vfmv" : code = fflags_wrapper(code) - iop = InstObjParams(name, - Name, - 'VectorNonSplitInst', - {'code': code, - 'vm_decl_rd': vm_decl_rd, - 'set_vm_idx': set_vm_idx}, - flags) - - if inst_name == "vfmv" : - execute_block = VectorFloatNonSplitExecute.subst(iop) + varith_template = declareVArithTemplate(Name, 'float', 32) + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'declare_varith_template': varith_template}, + flags) + header_output = VectorNonSplitDeclare.subst(iop) + decoder_output = VectorNonSplitConstructor.subst(iop) + exec_output = VectorFloatNonSplitExecute.subst(iop) decode_block = VectorFloatNonSplitDecodeBlock.subst(iop) elif inst_name == "vmv" : - execute_block = VectorIntNonSplitExecute.subst(iop) + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags) + header_output = VectorNonSplitDeclare.subst(iop) + decoder_output = VectorNonSplitConstructor.subst(iop) + exec_output = VectorIntNonSplitExecute.subst(iop) decode_block = VectorIntNonSplitDecodeBlock.subst(iop) else : error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. - header_output = \ - VectorNonSplitDeclare.subst(iop) + \ - VectorNonSplitConstructor.subst(iop) + \ - execute_block - }}; def format VectorMaskFormat(code, category, *flags) {{ @@ -1053,21 +1149,26 @@ def format VectorMaskFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8) + }, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. - header_output = \ - VectorMaskDeclare.subst(iop) + \ - VectorMaskConstructor.subst(iop) + \ - VectorMaskExecute.subst(iop) + header_output = VectorMaskDeclare.subst(iop) + decoder_output = VectorMaskConstructor.subst(iop) + exec_output = VectorMaskExecute.subst(iop) decode_block = VectorMaskDecodeBlock.subst(iop) }}; def format VectorReduceIntFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd]" src1_reg_id = "vecRegClass[_machInst.vs1]" @@ -1098,23 +1199,29 @@ def format VectorReduceIntFormat(code, category, *flags) {{ 'set_vlen' : set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + decoder_output = \ VectorReduceMicroConstructor.subst(microiop) + \ - VectorReduceIntMicroExecute.subst(microiop) + \ - VectorReduceMacroDeclare.subst(iop) + \ VectorReduceMacroConstructor.subst(iop) + exec_output = VectorReduceIntMicroExecute.subst(microiop) decode_block = VectorIntDecodeBlock.subst(iop) }}; def format VectorReduceFloatFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, 'float', 32)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd]" src1_reg_id = "vecRegClass[_machInst.vs1]" @@ -1138,6 +1245,7 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ code = fflags_wrapper(code) + varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -1148,23 +1256,30 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + decoder_output = \ VectorReduceMicroConstructor.subst(microiop) + \ - VectorReduceFloatMicroExecute.subst(microiop) + \ - VectorReduceMacroDeclare.subst(iop) + \ VectorReduceMacroConstructor.subst(iop) + exec_output = VectorReduceFloatMicroExecute.subst(microiop) decode_block = VectorFloatDecodeBlock.subst(iop) }}; def format VectorReduceFloatWideningFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': varith_macro_declare}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd]" src1_reg_id = "vecRegClass[_machInst.vs1]" @@ -1186,6 +1301,9 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ using ewt = typename double_width::type; using vwu = decltype(ewt::v); ''' + + varith_micro_declare = declareVArithTemplate( + Name + "Micro", 'float', 32, 32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -1196,23 +1314,29 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + decoder_output = \ VectorReduceMicroConstructor.subst(microiop) + \ - VectorReduceFloatWideningMicroExecute.subst(microiop) + \ - VectorReduceMacroDeclare.subst(iop) + \ VectorReduceMacroConstructor.subst(iop) + exec_output = VectorReduceFloatWideningMicroExecute.subst(microiop) decode_block = VectorFloatWideningDecodeBlock.subst(iop) }}; def format VectorIntVxsatFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) old_vd_idx = 2 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" @@ -1251,24 +1375,29 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': declareVArithTemplate(Name + "Micro")}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorIntVxsatMicroDeclare.subst(microiop) + \ + VectorIntVxsatMacroDeclare.subst(iop) + decoder_output = \ VectorIntVxsatMicroConstructor.subst(microiop) + \ - VectorIntMicroExecute.subst(microiop) + \ - VectorIntVxsatMacroDeclare.subst(iop) + \ VectorIntVxsatMacroConstructor.subst(iop) - + exec_output = VectorIntMicroExecute.subst(microiop) decode_block = VectorIntDecodeBlock.subst(iop) }}; def format VectorReduceIntWideningFormat(code, category, *flags) {{ - iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, - flags) + iop = InstObjParams( + name, + Name, + 'VectorArithMacroInst', + {'code': code, + 'declare_varith_template': declareVArithTemplate(Name, max_size=32)}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd]" src1_reg_id = "vecRegClass[_machInst.vs1]" @@ -1284,6 +1413,8 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() set_vlenb = setVlenb() set_vlen = setVlen() + + varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32) microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', @@ -1293,17 +1424,17 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. header_output = \ VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + decoder_output = \ VectorReduceMicroConstructor.subst(microiop) + \ - VectorReduceIntWideningMicroExecute.subst(microiop) + \ - VectorReduceMacroDeclare.subst(iop) + \ VectorReduceMacroConstructor.subst(iop) + exec_output = VectorReduceIntWideningMicroExecute.subst(microiop) decode_block = VectorIntWideningDecodeBlock.subst(iop) }}; @@ -1315,8 +1446,20 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, microop_class_name = 'VectorSlideMicroInst' # Make sure flags are in lists (convert to lists if not). flags = makeList(flags) - iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, - flags) + + if decode_template is VectorIntDecodeBlock: + varith_macro_declare = declareVArithTemplate(Name) + elif decode_template is VectorFloatDecodeBlock: + varith_macro_declare = declareVArithTemplate(Name, 'float', 32) + + iop = InstObjParams( + name, + Name, + macroop_class_name, + {'code': code, + 'declare_varith_template': varith_macro_declare}, + flags + ) inst_name, inst_suffix = name.split("_", maxsplit=1) dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]" @@ -1345,6 +1488,13 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, set_src_reg_idx += setSrcVm() set_vlenb = setVlenb() set_vlen = setVlen() + + if decode_template is VectorIntDecodeBlock: + varith_micro_declare = declareVArithTemplate(Name + "Micro") + elif decode_template is VectorFloatDecodeBlock: + varith_micro_declare = declareVArithTemplate( + Name + "Micro", 'float', 32) + microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, @@ -1354,52 +1504,54 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, 'set_vlenb': set_vlenb, 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'declare_varith_template': varith_micro_declare}, flags) - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. - # Because of the use of templates, we had to put all parts in header to - # keep the compiler happy. + header_output = \ VectorSlideMicroDeclare.subst(microiop) + \ + VectorSlideMacroDeclare.subst(iop) + decoder_output = \ VectorSlideMicroConstructor.subst(microiop) + \ - micro_execute_template.subst(microiop) + \ - VectorSlideMacroDeclare.subst(iop) + \ macro_construtor.subst(iop) - + exec_output = micro_execute_template.subst(microiop) decode_block = decode_template.subst(iop) - return (header_output, decode_block) + return (header_output, decoder_output, decode_block, exec_output) }}; def format VectorSlideUpFormat(code, category, *flags) {{ - (header_output, decode_block) = VectorSlideBase(name, Name, category, code, - flags, - macro_construtor = VectorSlideUpMacroConstructor, - decode_template = VectorIntDecodeBlock, - micro_execute_template = VectorSlideMicroExecute) + (header_output, decoder_output, decode_block, exec_output) = \ + VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) }}; def format VectorSlideDownFormat(code, category, *flags) {{ - (header_output, decode_block) = VectorSlideBase(name, Name, category, code, - flags, - macro_construtor = VectorSlideDownMacroConstructor, - decode_template = VectorIntDecodeBlock, - micro_execute_template = VectorSlideMicroExecute) + (header_output, decoder_output, decode_block, exec_output) = \ + VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) }}; def format VectorFloatSlideUpFormat(code, category, *flags) {{ - (header_output, decode_block) = VectorSlideBase(name, Name, category, code, - flags, - macro_construtor = VectorSlideUpMacroConstructor, - decode_template = VectorFloatDecodeBlock, - micro_execute_template = VectorFloatSlideMicroExecute) + (header_output, decoder_output, decode_block, exec_output) = \ + VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) }}; def format VectorFloatSlideDownFormat(code, category, *flags) {{ - (header_output, decode_block) = VectorSlideBase(name, Name, category, code, - flags, - macro_construtor = VectorSlideDownMacroConstructor, - decode_template = VectorFloatDecodeBlock, - micro_execute_template = VectorFloatSlideMicroExecute) + (header_output, decoder_output, decode_block, exec_output) = \ + VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) }}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 306b1c53f1..3a528f1198 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -107,6 +107,9 @@ template this->microops.front()->setFirstMicroop(); this->microops.back()->setLastMicroop(); } + +%(declare_varith_template)s; + }}; def template VectorIntMicroDeclare {{ @@ -145,6 +148,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorIntMicroExecute {{ @@ -182,6 +187,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorIntExtMacroDeclare {{ @@ -193,14 +200,7 @@ private: public: %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, - const loader::SymbolTable *symtab) const override - { - std::stringstream ss; - ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << registerName(srcRegIdx(0)); - if (machInst.vm == 0) ss << ", v0.t"; - return ss.str(); - } + const loader::SymbolTable *symtab) const override; }; }}; @@ -219,14 +219,7 @@ public: uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, - const loader::SymbolTable *symtab) const override - { - std::stringstream ss; - ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << registerName(srcRegIdx(0)); - if (machInst.vm == 0) ss << ", v0.t"; - return ss.str(); - } + const loader::SymbolTable *symtab) const override; }; }}; @@ -303,6 +296,38 @@ Fault return NoFault; } +template +std::string +%(class_name)s::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +%(declare_varith_template)s; + +}}; + +def template VectorIntExtMacroExecute {{ + +template +std::string +%(class_name)s::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +%(declare_varith_template)s; + }}; def template VectorIntDecodeBlock {{ @@ -365,6 +390,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorIntWideningMicroDeclare {{ @@ -402,6 +429,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorIntWideningMicroExecute {{ @@ -447,6 +476,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorIntNarrowingMicroExecute {{ @@ -493,6 +524,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorIntWideningDecodeBlock {{ @@ -546,6 +579,9 @@ template this->microops.front()->setFirstMicroop(); this->microops.back()->setLastMicroop(); } + +%(declare_varith_template)s; + }}; def template VectorFloatMicroDeclare {{ @@ -582,6 +618,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorFloatMicroExecute {{ @@ -620,6 +658,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatDecodeBlock {{ @@ -725,6 +765,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatNarrowingMicroExecute {{ @@ -772,6 +814,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatWideningDecodeBlock {{ @@ -826,6 +870,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template ViotaMicroDeclare {{ @@ -865,6 +911,8 @@ template setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]); } +%(declare_varith_template)s; + }}; def template ViotaMicroExecute {{ @@ -899,6 +947,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; @@ -915,6 +965,8 @@ template %(set_vm_idx)s; } +%(declare_varith_template)s; + }}; def template Vector1Vs1VdMaskExecute {{ @@ -948,6 +1000,8 @@ Fault return NoFault; }; +%(declare_varith_template)s; + }}; def template Vector1Vs1RdMaskDeclare {{ @@ -978,6 +1032,8 @@ template %(set_vm_idx)s; } +%(declare_varith_template)s; + }}; def template Vector1Vs1RdMaskExecute {{ @@ -1010,6 +1066,8 @@ Fault return NoFault; }; +%(declare_varith_template)s; + }}; def template VectorIntMaskMacroDeclare {{ @@ -1057,6 +1115,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorIntMaskMicroDeclare {{ @@ -1095,6 +1155,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorIntMaskMicroExecute {{ @@ -1133,6 +1195,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatMaskMacroDeclare {{ @@ -1180,6 +1244,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorFloatMaskMicroDeclare {{ @@ -1217,6 +1283,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorFloatMaskMicroExecute {{ @@ -1255,6 +1323,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VMvWholeMacroDeclare {{ @@ -1381,6 +1451,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorMaskExecute {{ @@ -1415,6 +1487,8 @@ Fault return NoFault; }; +%(declare_varith_template)s; + }}; def template VectorMaskDecodeBlock {{ @@ -1449,6 +1523,8 @@ template %(set_vm_idx)s; } +%(declare_varith_template)s; + }}; def template VectorIntNonSplitExecute {{ @@ -1481,6 +1557,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatNonSplitExecute {{ @@ -1513,6 +1591,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorFloatNonSplitDecodeBlock {{ @@ -1578,6 +1658,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorReduceMicroDeclare {{ @@ -1615,6 +1697,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorReduceIntMicroExecute {{ @@ -1664,6 +1748,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorReduceFloatMicroExecute {{ @@ -1715,6 +1801,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorReduceFloatWideningMicroExecute {{ @@ -1765,6 +1853,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorGatherMacroDeclare {{ @@ -1824,6 +1914,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorGatherMicroDeclare {{ @@ -1873,6 +1965,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorGatherMicroExecute {{ @@ -1930,6 +2024,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorGatherDecodeBlock {{ @@ -2003,6 +2099,9 @@ template this->microops.front()->setFirstMicroop(); this->microops.back()->setLastMicroop(); } + +%(declare_varith_template)s; + }}; def template VectorIntVxsatMicroDeclare {{ @@ -2041,6 +2140,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorReduceIntWideningMicroExecute {{ @@ -2094,6 +2195,8 @@ Fault return NoFault; } +%(declare_varith_template)s; + }}; def template VectorSlideMacroDeclare {{ @@ -2142,6 +2245,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorSlideDownMacroConstructor {{ @@ -2177,6 +2282,8 @@ template this->microops.back()->setLastMicroop(); } +%(declare_varith_template)s; + }}; def template VectorSlideMicroDeclare {{ @@ -2215,6 +2322,8 @@ template %(set_src_reg_idx)s; } +%(declare_varith_template)s; + }}; def template VectorSlideMicroExecute {{ @@ -2254,6 +2363,8 @@ Fault return NoFault; }; +%(declare_varith_template)s; + }}; def template VectorFloatSlideMicroExecute {{ @@ -2293,4 +2404,6 @@ Fault return NoFault; }; +%(declare_varith_template)s; + }}; From e561f3b6f149f45f291c8ad7b310f157bfcba154 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 24 Oct 2023 15:29:40 +0800 Subject: [PATCH 3/3] arch-riscv: Move insts/vector from header to source Move the implemention of following classes - VMaskMergeMicroInst - VxsatMicroInst Change-Id: I42ec45681064a0f599c3b2313c2125da7cfc849b --- src/arch/riscv/insts/vector.cc | 93 ++++++++++++++++++ src/arch/riscv/insts/vector.hh | 94 ++----------------- src/arch/riscv/isa/templates/vector_arith.isa | 8 +- 3 files changed, 106 insertions(+), 89 deletions(-) diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index c99e806e9b..7f17bb055e 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -32,6 +32,9 @@ #include #include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/isa.hh" +#include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" #include "cpu/static_inst.hh" @@ -408,5 +411,95 @@ VMvWholeMicroInst::generateDisassembly(Addr pc, return ss.str(); } +VMaskMergeMicroInst::VMaskMergeMicroInst(ExtMachInst extMachInst, + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize) + : VectorArithMicroInst("vmask_mv_micro", extMachInst, + VectorIntegerArithOp, 0, 0), + vlen(_vlen), + elemSize(_elemSize) +{ + setRegIdxArrays( + reinterpret_cast( + &std::remove_pointer_t::srcRegIdxArr), + reinterpret_cast( + &std::remove_pointer_t::destRegIdxArr)); + + _numSrcRegs = 0; + _numDestRegs = 0; + + setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]); + _numTypedDestRegs[VecRegClass]++; + for (uint8_t i=0; i<_numSrcs; i++) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]); + } +} + +Fault +VMaskMergeMicroInst::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); + auto Vd = tmp_d0.as(); + uint32_t vlenb = pc_ptr->as().vlenb(); + const uint32_t elems_per_vreg = vlenb / elemSize; + size_t bit_cnt = elems_per_vreg; + vreg_t tmp_s; + xc->getRegOperand(this, 0, &tmp_s); + auto s = tmp_s.as(); + // cp the first result and tail + memcpy(Vd, s, vlenb); + for (uint8_t i = 1; i < this->_numSrcRegs; i++) { + xc->getRegOperand(this, i, &tmp_s); + s = tmp_s.as(); + if (elems_per_vreg < 8) { + const uint32_t m = (1 << elems_per_vreg) - 1; + const uint32_t mask = m << (i * elems_per_vreg % 8); + // clr & ext bits + Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; + Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; + bit_cnt += elems_per_vreg; + } else { + const uint32_t byte_offset = elems_per_vreg / 8; + memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); + } + } + if (traceData) + traceData->setData(vecRegClass, &tmp_d0); + return NoFault; +} + +std::string +VMaskMergeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + for (uint8_t i = 0; i < this->_numSrcRegs; i++) { + ss << ", " << registerName(srcRegIdx(i)); + } + unsigned vlenb = vlen >> 3; + ss << ", offset:" << vlenb / elemSize; + return ss.str(); +} + +Fault +VxsatMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const +{ + xc->setMiscReg(MISCREG_VXSAT, *vxsat); + auto vcsr = xc->readMiscReg(MISCREG_VCSR); + xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat)); + return NoFault; +} + +std::string +VxsatMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index c986c99c72..4127060e4a 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -34,7 +34,6 @@ #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" -#include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" #include "cpu/exec_context.hh" #include "cpu/static_inst.hh" @@ -539,7 +538,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst Addr pc, const loader::SymbolTable *symtab) const override; }; -template + class VMaskMergeMicroInst : public VectorArithMicroInst { private: @@ -548,75 +547,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst public: uint32_t vlen; + size_t elemSize; VMaskMergeMicroInst(ExtMachInst extMachInst, - uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen) - : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0), - vlen(_vlen) - { - setRegIdxArrays( - reinterpret_cast( - &std::remove_pointer_t::srcRegIdxArr), - reinterpret_cast( - &std::remove_pointer_t::destRegIdxArr)); - - _numSrcRegs = 0; - _numDestRegs = 0; - - setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]); - _numTypedDestRegs[VecRegClass]++; - for (uint8_t i=0; i<_numSrcs; i++) { - setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]); - } - } - - Fault - execute(ExecContext* xc, trace::InstRecord* traceData) const override - { - vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); - PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); - auto Vd = tmp_d0.as(); - uint32_t vlenb = pc_ptr->as().vlenb(); - const uint32_t elems_per_vreg = vlenb / sizeof(ElemType); - size_t bit_cnt = elems_per_vreg; - vreg_t tmp_s; - xc->getRegOperand(this, 0, &tmp_s); - auto s = tmp_s.as(); - // cp the first result and tail - memcpy(Vd, s, vlenb); - for (uint8_t i = 1; i < this->_numSrcRegs; i++) { - xc->getRegOperand(this, i, &tmp_s); - s = tmp_s.as(); - if (elems_per_vreg < 8) { - const uint32_t m = (1 << elems_per_vreg) - 1; - const uint32_t mask = m << (i * elems_per_vreg % 8); - // clr & ext bits - Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; - Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; - bit_cnt += elems_per_vreg; - } else { - const uint32_t byte_offset = elems_per_vreg / 8; - memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); - } - } - if (traceData) - traceData->setData(vecRegClass, &tmp_d0); - return NoFault; - } - - std::string - generateDisassembly(Addr pc, const loader::SymbolTable *symtab) - const override - { - std::stringstream ss; - ss << mnemonic << ' ' << registerName(destRegIdx(0)); - for (uint8_t i = 0; i < this->_numSrcRegs; i++) { - ss << ", " << registerName(srcRegIdx(i)); - } - unsigned vlenb = vlen >> 3; - ss << ", offset:" << vlenb / sizeof(ElemType); - return ss.str(); - } + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize); + Fault execute(ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly(Addr, + const loader::SymbolTable *) const override; }; class VxsatMicroInst : public VectorArithMicroInst @@ -630,21 +566,9 @@ class VxsatMicroInst : public VectorArithMicroInst { vxsat = Vxsat; } - Fault - execute(ExecContext* xc, trace::InstRecord* traceData) const override - { - xc->setMiscReg(MISCREG_VXSAT,*vxsat); - auto vcsr = xc->readMiscReg(MISCREG_VCSR); - xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat)); - return NoFault; - } - std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) - const override - { - std::stringstream ss; - ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); - return ss.str(); - } + Fault execute(ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly(Addr, const loader::SymbolTable *) + const override; }; } // namespace RiscvISA diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 3a528f1198..364639a716 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -1107,8 +1107,8 @@ template this->microops.push_back(microop); micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } - microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size(), _vlen); + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size(), _vlen, sizeof(ElemType)); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1236,8 +1236,8 @@ template this->microops.push_back(microop); micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } - microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size(), _vlen); + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size(), _vlen, sizeof(ElemType)); this->microops.push_back(microop); this->microops.front()->setFirstMicroop();