arch-riscv: Add dynamic VLEN and ELEN configuration support to RVV path (#171)

At this moment, VLEN and ELEN RVV parameters are set as constants that
need to be modified at compile time if you want to experiment with
different values. With this patch, I want to set a first point to
discuss how to configure these parameters dynamically.

Also, I have modified some data types that were provoking wrong
behaviour in particular instructions when using a large enough VLEN
value in the considered range inside the specification.
This commit is contained in:
Jason Lowe-Power
2023-10-19 07:41:39 -07:00
committed by GitHub
19 changed files with 802 additions and 301 deletions

View File

@@ -124,6 +124,24 @@ readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
return fault;
}
/// Read from memory in atomic mode.
template <ByteOrder Order, class XC, class MemT>
Fault
readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
size_t size, Request::Flags flags)
{
memset(&mem, 0, size);
static const std::vector<bool> byte_enable(size, true);
Fault fault = readMemAtomic(xc, addr, (uint8_t*)&mem,
size, flags, byte_enable);
if (fault == NoFault) {
mem = gtoh(mem, Order);
if (traceData)
traceData->setData(mem);
}
return fault;
}
template <class XC, class MemT>
Fault
readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
@@ -133,6 +151,16 @@ readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
xc, traceData, addr, mem, flags);
}
template <class XC, class MemT>
Fault
readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
size_t size, Request::Flags flags)
{
return readMemAtomic<ByteOrder::little>(
xc, traceData, addr, mem, size, flags);
}
template <class XC, class MemT>
Fault
readMemAtomicBE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
@@ -165,6 +193,20 @@ writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
sizeof(MemT), flags, res, byte_enable);
}
template <ByteOrder Order, class XC, class MemT>
Fault
writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
size_t size, Request::Flags flags, uint64_t *res)
{
if (traceData) {
traceData->setData(mem);
}
mem = htog(mem, Order);
static const std::vector<bool> byte_enable(size, true);
return writeMemTiming(xc, (uint8_t*)&mem, addr,
size, flags, res, byte_enable);
}
template <class XC, class MemT>
Fault
writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
@@ -174,6 +216,15 @@ writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
xc, traceData, mem, addr, flags, res);
}
template <class XC, class MemT>
Fault
writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
size_t size, Request::Flags flags, uint64_t *res)
{
return writeMemTiming<ByteOrder::little>(
xc, traceData, mem, addr, size, flags, res);
}
template <class XC, class MemT>
Fault
writeMemTimingBE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
@@ -214,6 +265,27 @@ writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem,
return fault;
}
template <ByteOrder Order, class XC, class MemT>
Fault
writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem,
Addr addr, size_t size, Request::Flags flags, uint64_t *res)
{
if (traceData) {
traceData->setData(mem);
}
MemT host_mem = htog(mem, Order);
static const std::vector<bool> byte_enable(size, true);
Fault fault = writeMemAtomic(xc, (uint8_t*)&host_mem,
addr, size, flags, res, byte_enable);
if (fault == NoFault && res != NULL) {
if (flags & Request::MEM_SWAP || flags & Request::MEM_SWAP_COND)
*(MemT *)res = gtoh(*(MemT *)res, Order);
else
*res = gtoh(*res, Order);
}
return fault;
}
template <class XC, class MemT>
Fault
writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
@@ -223,6 +295,15 @@ writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
xc, traceData, mem, addr, flags, res);
}
template <class XC, class MemT>
Fault
writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
size_t size, Addr addr, Request::Flags flags, uint64_t *res)
{
return writeMemAtomic<ByteOrder::little>(
xc, traceData, mem, addr, size, flags, res);
}
template <class XC, class MemT>
Fault
writeMemAtomicBE(XC *xc, trace::InstRecord *traceData, const MemT &mem,

View File

@@ -38,11 +38,37 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import Enum
from m5.params import Enum, UInt32
from m5.params import Param
from m5.objects.BaseISA import BaseISA
class RiscvVectorLength(UInt32):
min = 8
max = 65536
def _check(self):
super()._check()
# VLEN needs to be a whole power of 2. We already know value is
# not zero. Hence:
if self.value & (self.value - 1) != 0:
raise TypeError("VLEN is not a power of 2: %d" % self.value)
class RiscvVectorElementLength(UInt32):
min = 8
max = 64
def _check(self):
super()._check()
# ELEN needs to be a whole power of 2. We already know value is
# not zero. Hence:
if self.value & (self.value - 1) != 0:
raise TypeError("ELEN is not a power of 2: %d" % self.value)
class RiscvType(Enum):
vals = ["RV32", "RV64"]
@@ -58,3 +84,13 @@ class RiscvISA(BaseISA):
riscv_type = Param.RiscvType("RV64", "RV32 or RV64")
enable_rvv = Param.Bool(True, "Enable vector extension")
vlen = Param.RiscvVectorLength(
256,
"Length of each vector register in bits. \
VLEN in Ch. 2 of RISC-V vector spec",
)
elen = Param.RiscvVectorElementLength(
64,
"Length of each vector element in bits. \
ELEN in Ch. 2 of RISC-V vector spec",
)

View File

@@ -41,6 +41,9 @@ namespace RiscvISA
Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
{
ISA *isa = dynamic_cast<ISA*>(p.isa);
vlen = isa->getVecLenInBits();
elen = isa->getVecElemLenInBits();
reset();
}

View File

@@ -60,6 +60,9 @@ class Decoder : public InstDecoder
ExtMachInst emi;
uint32_t machInst;
uint32_t vlen;
uint32_t elen;
virtual StaticInstPtr decodeInst(ExtMachInst mach_inst);
/// Decode a machine instruction.

View File

@@ -215,8 +215,9 @@ std::string VleMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
unsigned vlenb = vlen >> 3;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", "
<< vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", "
<< registerName(srcRegIdx(1));
if (!machInst.vm) ss << ", v0.t";
return ss.str();
@@ -226,8 +227,9 @@ std::string VlWholeMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
unsigned vlenb = vlen >> 3;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
<< vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
return ss.str();
}
@@ -235,8 +237,9 @@ std::string VseMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
unsigned vlenb = vlen >> 3;
ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", "
<< VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
<< vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
if (!machInst.vm) ss << ", v0.t";
return ss.str();
}
@@ -245,8 +248,9 @@ std::string VsWholeMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
unsigned vlenb = vlen >> 3;
ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", "
<< VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
<< vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
return ss.str();
}

View File

@@ -32,6 +32,7 @@
#include <string>
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/isa.hh"
#include "arch/riscv/regs/misc.hh"
#include "arch/riscv/regs/vector.hh"
#include "arch/riscv/utility.hh"
@@ -68,12 +69,15 @@ class VConfOp : public RiscvStaticInst
uint64_t zimm10;
uint64_t zimm11;
uint64_t uimm;
VConfOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass)
uint32_t elen;
VConfOp(const char *mnem, ExtMachInst _extMachInst,
uint32_t _elen, OpClass __opClass)
: RiscvStaticInst(mnem, _extMachInst, __opClass),
bit30(_extMachInst.bit30), bit31(_extMachInst.bit31),
zimm10(_extMachInst.zimm_vsetivli),
zimm11(_extMachInst.zimm_vsetvli),
uimm(_extMachInst.uimm_vsetivli)
uimm(_extMachInst.uimm_vsetivli),
elen(_elen)
{
this->flags[IsVector] = true;
}
@@ -116,11 +120,14 @@ class VectorMacroInst : public RiscvMacroInst
protected:
uint32_t vl;
uint8_t vtype;
uint32_t vlen;
VectorMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
OpClass __opClass, uint32_t _vlen = 256)
: RiscvMacroInst(mnem, _machInst, __opClass),
vl(_machInst.vl),
vtype(_machInst.vtype8)
vtype(_machInst.vtype8),
vlen(_vlen)
{
this->flags[IsVector] = true;
}
@@ -128,13 +135,15 @@ class VectorMacroInst : public RiscvMacroInst
class VectorMicroInst : public RiscvMicroInst
{
protected:
uint8_t microVl;
protected:
uint32_t vlen;
uint32_t microVl;
uint8_t microIdx;
uint8_t vtype;
VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256)
: RiscvMicroInst(mnem, _machInst, __opClass),
vlen(_vlen),
microVl(_microVl),
microIdx(_microIdx),
vtype(_machInst.vtype8)
@@ -169,7 +178,7 @@ class VectorArithMicroInst : public VectorMicroInst
{
protected:
VectorArithMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl,
OpClass __opClass, uint32_t _microVl,
uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
{}
@@ -182,12 +191,11 @@ class VectorArithMacroInst : public VectorMacroInst
{
protected:
VectorArithMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen = 256)
: VectorMacroInst(mnem, _machInst, __opClass, _vlen)
{
this->flags[IsVector] = true;
}
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
@@ -196,7 +204,7 @@ class VectorVMUNARY0MicroInst : public VectorMicroInst
{
protected:
VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl,
OpClass __opClass, uint32_t _microVl,
uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
{}
@@ -209,8 +217,8 @@ class VectorVMUNARY0MacroInst : public VectorMacroInst
{
protected:
VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMacroInst(mnem, _machInst, __opClass, _vlen)
{
this->flags[IsVector] = true;
}
@@ -223,8 +231,8 @@ class VectorSlideMacroInst : public VectorMacroInst
{
protected:
VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen = 256)
: VectorMacroInst(mnem, _machInst, __opClass, _vlen)
{
this->flags[IsVector] = true;
}
@@ -239,7 +247,7 @@ class VectorSlideMicroInst : public VectorMicroInst
uint8_t vdIdx;
uint8_t vs2Idx;
VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl,
OpClass __opClass, uint32_t _microVl,
uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
, vdIdx(_vdIdx), vs2Idx(_vs2Idx)
@@ -256,7 +264,7 @@ class VectorMemMicroInst : public VectorMicroInst
Request::Flags memAccessFlags;
VectorMemMicroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl, uint8_t _microIdx,
OpClass __opClass, uint32_t _microVl, uint8_t _microIdx,
uint32_t _offset)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
, offset(_offset)
@@ -268,8 +276,8 @@ class VectorMemMacroInst : public VectorMacroInst
{
protected:
VectorMemMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen = 256)
: VectorMacroInst(mnem, _machInst, __opClass, _vlen)
{}
};
@@ -277,8 +285,8 @@ class VleMacroInst : public VectorMemMacroInst
{
protected:
VleMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -289,8 +297,8 @@ class VseMacroInst : public VectorMemMacroInst
{
protected:
VseMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -302,9 +310,10 @@ class VleMicroInst : public VectorMicroInst
protected:
Request::Flags memAccessFlags;
VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
uint8_t _microVl, uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
{
this->flags[IsLoad] = true;
}
@@ -319,8 +328,9 @@ class VseMicroInst : public VectorMicroInst
Request::Flags memAccessFlags;
VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
uint8_t _microVl, uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
{
this->flags[IsStore] = true;
}
@@ -333,8 +343,8 @@ class VlWholeMacroInst : public VectorMemMacroInst
{
protected:
VlWholeMacroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -347,8 +357,10 @@ class VlWholeMicroInst : public VectorMicroInst
Request::Flags memAccessFlags;
VlWholeMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl, uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
OpClass __opClass, uint32_t _microVl, uint8_t _microIdx,
uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
{}
std::string generateDisassembly(
@@ -359,8 +371,8 @@ class VsWholeMacroInst : public VectorMemMacroInst
{
protected:
VsWholeMacroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -373,8 +385,10 @@ class VsWholeMicroInst : public VectorMicroInst
Request::Flags memAccessFlags;
VsWholeMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl, uint8_t _microIdx)
: VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx)
OpClass __opClass, uint32_t _microVl,
uint8_t _microIdx, uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass , _microVl,
_microIdx, _vlen)
{}
std::string generateDisassembly(
@@ -385,8 +399,8 @@ class VlStrideMacroInst : public VectorMemMacroInst
{
protected:
VlStrideMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -399,7 +413,7 @@ class VlStrideMicroInst : public VectorMemMicroInst
uint8_t regIdx;
VlStrideMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _regIdx,
uint8_t _microIdx, uint8_t _microVl)
uint8_t _microIdx, uint32_t _microVl)
: VectorMemMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, 0)
, regIdx(_regIdx)
@@ -413,8 +427,8 @@ class VsStrideMacroInst : public VectorMemMacroInst
{
protected:
VsStrideMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -427,7 +441,7 @@ class VsStrideMicroInst : public VectorMemMicroInst
uint8_t regIdx;
VsStrideMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _regIdx,
uint8_t _microIdx, uint8_t _microVl)
uint8_t _microIdx, uint32_t _microVl)
: VectorMemMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, 0)
, regIdx(_regIdx)
@@ -441,8 +455,8 @@ class VlIndexMacroInst : public VectorMemMacroInst
{
protected:
VlIndexMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -473,8 +487,8 @@ class VsIndexMacroInst : public VectorMemMacroInst
{
protected:
VsIndexMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass)
: VectorMemMacroInst(mnem, _machInst, __opClass)
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
@@ -516,7 +530,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst
{
protected:
VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint8_t _microVl,
OpClass __opClass, uint32_t _microVl,
uint8_t _microIdx)
: VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
{}
@@ -533,10 +547,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst
RegId destRegIdxArr[1];
public:
VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg,
uint8_t _numSrcs)
uint32_t vlen;
VMaskMergeMicroInst(ExtMachInst extMachInst,
uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen)
: VectorArithMicroInst("vmask_mv_micro", extMachInst,
VectorIntegerArithOp, 0, 0)
VectorIntegerArithOp, 0, 0),
vlen(_vlen)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
@@ -558,26 +574,28 @@ class VMaskMergeMicroInst : public VectorArithMicroInst
execute(ExecContext* xc, trace::InstRecord* traceData) const override
{
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
PCStateBase *pc_ptr = xc->tcBase()->pcState().clone();
auto Vd = tmp_d0.as<uint8_t>();
constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType);
uint32_t vlenb = pc_ptr->as<PCState>().vlenb();
const uint32_t elems_per_vreg = vlenb / sizeof(ElemType);
size_t bit_cnt = elems_per_vreg;
vreg_t tmp_s;
xc->getRegOperand(this, 0, &tmp_s);
auto s = tmp_s.as<uint8_t>();
// cp the first result and tail
memcpy(Vd, s, VLENB);
memcpy(Vd, s, vlenb);
for (uint8_t i = 1; i < this->_numSrcRegs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
if constexpr (elems_per_vreg < 8) {
constexpr uint8_t m = (1 << elems_per_vreg) - 1;
const uint8_t mask = m << (i * elems_per_vreg % 8);
if (elems_per_vreg < 8) {
const uint32_t m = (1 << elems_per_vreg) - 1;
const uint32_t mask = m << (i * elems_per_vreg % 8);
// clr & ext bits
Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask;
Vd[bit_cnt/8] |= s[bit_cnt/8] & mask;
bit_cnt += elems_per_vreg;
} else {
constexpr uint8_t byte_offset = elems_per_vreg / 8;
const uint32_t byte_offset = elems_per_vreg / 8;
memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset);
}
}
@@ -595,7 +613,8 @@ class VMaskMergeMicroInst : public VectorArithMicroInst
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
ss << ", " << registerName(srcRegIdx(i));
}
ss << ", offset:" << VLENB / sizeof(ElemType);
unsigned vlenb = vlen >> 3;
ss << ", offset:" << vlenb / sizeof(ElemType);
return ss.str();
}
};

View File

@@ -36,6 +36,7 @@
#include <sstream>
#include "arch/riscv/faults.hh"
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/interrupts.hh"
#include "arch/riscv/mmu.hh"
#include "arch/riscv/pagetable.hh"
@@ -253,10 +254,9 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
} // anonymous namespace
ISA::ISA(const Params &p) :
BaseISA(p), _rvType(p.riscv_type), checkAlignment(p.check_alignment),
enableRvv(p.enable_rvv)
ISA::ISA(const Params &p) :BaseISA(p),
_rvType(p.riscv_type), checkAlignment(p.check_alignment),
enableRvv(p.enable_rvv),vlen(p.vlen),elen(p.elen)
{
_regClasses.push_back(&intRegClass);
_regClasses.push_back(&floatRegClass);
@@ -267,6 +267,14 @@ ISA::ISA(const Params &p) :
_regClasses.push_back(&ccRegClass);
_regClasses.push_back(&miscRegClass);
fatal_if( p.vlen < p.elen,
"VLEN should be greater or equal",
"than ELEN. Ch. 2RISC-V vector spec.");
inform("RVV enabled, VLEN = %d bits, ELEN = %d bits",
p.vlen, p.elen);
miscRegFile.resize(NUM_MISCREGS);
clear();
}

View File

@@ -84,6 +84,16 @@ class ISA : public BaseISA
const Addr INVALID_RESERVATION_ADDR = (Addr)-1;
std::unordered_map<int, Addr> load_reservation_addrs;
/** Length of each vector register in bits.
* VLEN in Ch. 2 of RISC-V vector spec
*/
unsigned vlen;
/** Length of each vector element in bits.
* ELEN in Ch. 2 of RISC-V vector spec
*/
unsigned elen;
public:
using Params = RiscvISAParams;
@@ -92,7 +102,8 @@ class ISA : public BaseISA
PCStateBase*
newPCState(Addr new_inst_addr=0) const override
{
return new PCState(new_inst_addr, _rvType, VLENB);
unsigned vlenb = vlen >> 3;
return new PCState(new_inst_addr, _rvType, vlenb);
}
public:
@@ -147,6 +158,10 @@ class ISA : public BaseISA
Addr& load_reservation_addr = load_reservation_addrs[cid];
load_reservation_addr = INVALID_RESERVATION_ADDR;
}
/** Methods for getting VLEN, VLENB and ELEN values */
unsigned getVecLenInBits() { return vlen; }
unsigned getVecLenInBytes() { return vlen >> 3; }
unsigned getVecElemLenInBits() { return elen; }
};
} // namespace RiscvISA

View File

@@ -3110,21 +3110,33 @@ decode QUADRANT default Unknown::unknown() {
0x12: decode VS1 {
format VectorIntExtFormat {
0x02: vzext_vf8({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x03: vsext_vf8({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x04: vzext_vf4({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x05: vsext_vf4({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x06: vzext_vf2({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x07: vsext_vf2({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
}
@@ -3185,7 +3197,8 @@ decode QUADRANT default Unknown::unknown() {
auto Vs2bit = tmp_s2.as<vu>();
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i +
vtype_VLMAX(vtype, true) * this->microIdx;
vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
bool vs2_lsb = elem_mask(Vs2bit, ei);
bool do_mask = elem_mask(v0, ei);
bool has_one = false;
@@ -3406,7 +3419,8 @@ decode QUADRANT default Unknown::unknown() {
}}, OPIVI, VectorMiscOp);
0x0e: VectorSlideUpFormat::vslideup_vi({{
const int offset = (int)(uint64_t)(SIMM5);
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
@@ -3431,7 +3445,8 @@ decode QUADRANT default Unknown::unknown() {
}}, OPIVI, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslidedown_vi({{
const int offset = (int)(uint64_t)(SIMM5);
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
@@ -3662,7 +3677,8 @@ decode QUADRANT default Unknown::unknown() {
}
0x0e: VectorSlideUpFormat::vslideup_vx({{
const int offset = (int)Rs1_vu;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
@@ -3687,7 +3703,8 @@ decode QUADRANT default Unknown::unknown() {
}}, OPIVX, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslidedown_vx({{
const int offset = (int)Rs1_vu;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
@@ -3964,7 +3981,8 @@ decode QUADRANT default Unknown::unknown() {
}
0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
@@ -3994,7 +4012,8 @@ decode QUADRANT default Unknown::unknown() {
}}, OPFVF, VectorMiscOp);
0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
@@ -4239,7 +4258,8 @@ decode QUADRANT default Unknown::unknown() {
}
0x0e: VectorSlideUpFormat::vslide1up_vx({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
@@ -4269,7 +4289,8 @@ decode QUADRANT default Unknown::unknown() {
}}, OPIVX, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslide1down_vx({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8, true);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);

View File

@@ -28,6 +28,10 @@
let {{
def setVlen():
return "uint32_t vlen = VlenbBits * 8;\n"
def setVlenb():
return "uint32_t vlenb = VlenbBits;\n"
def setDestWrapper(destRegId):
return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \
"_numTypedDestRegs[VecRegClass]++;\n"
@@ -67,7 +71,7 @@ let {{
''' + code
else:
return '''
uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;
''' + code
def wideningOpRegisterConstraintChecks(code):
@@ -178,12 +182,15 @@ def format VectorIntFormat(code, category, *flags) {{
if v0_required:
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
microiop = InstObjParams(name + "_micro",
Name + "Micro",
microop_class_name,
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb' : set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -225,12 +232,17 @@ def format VectorIntExtFormat(code, category, *flags) {{
code = loopWrapper(code)
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx),
'ext_div': ext_div},
@@ -293,12 +305,17 @@ def format VectorIntWideningFormat(code, category, *flags) {{
if v0_required:
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -348,12 +365,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
code = narrowingOpRegisterConstraintChecks(code)
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx),
},
@@ -416,12 +438,15 @@ def format VectorIntMaskFormat(code, category, *flags) {{
if v0_required:
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -474,12 +499,17 @@ def format VectorGatherFormat(code, category, *flags) {{
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx),
'idx_type': idx_type},
@@ -537,12 +567,15 @@ def format VectorFloatFormat(code, category, *flags) {{
if v0_required:
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(2)},
flags)
@@ -581,12 +614,15 @@ def format VectorFloatCvtFormat(code, category, *flags) {{
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -649,12 +685,17 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
if v0_required:
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(2)},
flags)
@@ -693,12 +734,17 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -738,12 +784,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb();
set_vlen = setVlen();
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -783,6 +834,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{
set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
code = maskCondWrapper(code)
code = eiDeclarePrefix(code)
@@ -795,6 +847,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(2)},
flags)
@@ -816,7 +869,8 @@ def format VMvWholeFormat(code, category, *flags) {{
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VMvWholeMicroInst',
{'code': code},
{'code': code,
'set_vlen': setVlen()},
flags)
header_output = \
@@ -847,6 +901,7 @@ def format ViotaFormat(code, category, *flags){{
set_dest_reg_idx = setDestWrapper(dest_reg_id)
vm_decl_rd = vmDeclAndReadData()
set_vm_idx = setSrcVm()
set_vlenb = setVlenb()
microiop = InstObjParams(name+"_micro",
Name+"Micro",
@@ -854,6 +909,7 @@ def format ViotaFormat(code, category, *flags){{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'set_vm_idx': set_vm_idx,
'copy_old_vd': copyOldVd(1)},
@@ -885,12 +941,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{
set_dest_reg_idx = setDestWrapper(dest_reg_id)
vm_decl_rd = vmDeclAndReadData()
set_vm_idx = setSrcVm()
set_vlenb = setVlenb()
iop = InstObjParams(name,
Name,
'VectorNonSplitInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'set_vm_idx': set_vm_idx,
'copy_old_vd': copyOldVd(1)},
@@ -946,10 +1004,10 @@ def format VectorNonSplitFormat(code, category, *flags) {{
if inst_name == "vfmv" :
execute_block = VectorFloatNonSplitExecute.subst(iop)
decode_block = VectorFloatDecodeBlock.subst(iop)
decode_block = VectorFloatNonSplitDecodeBlock.subst(iop)
elif inst_name == "vmv" :
execute_block = VectorIntNonSplitExecute.subst(iop)
decode_block = VectorIntDecodeBlock.subst(iop)
decode_block = VectorIntNonSplitDecodeBlock.subst(iop)
else :
error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name)
@@ -984,6 +1042,8 @@ def format VectorMaskFormat(code, category, *flags) {{
set_dest_reg_idx = setDestWrapper(dest_reg_id)
set_vlenb = setVlenb()
code = loopWrapper(code, micro_inst = False)
iop = InstObjParams(name,
@@ -992,6 +1052,7 @@ def format VectorMaskFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
# Because of the use of templates, we had to put all parts in header to
@@ -1020,6 +1081,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{
set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
set_vlen = setVlen()
type_def = '''
using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
@@ -1030,6 +1094,8 @@ def format VectorReduceIntFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb' : set_vlenb,
'set_vlen' : set_vlen,
'vm_decl_rd': vm_decl_rd,
'type_def': type_def,
'copy_old_vd': copyOldVd(2)},
@@ -1062,6 +1128,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{
set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
set_vlen = setVlen()
type_def = '''
using et = ElemType;
using vu = decltype(et::v);
@@ -1075,6 +1144,8 @@ def format VectorReduceFloatFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'type_def': type_def,
'copy_old_vd': copyOldVd(2)},
@@ -1107,6 +1178,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{
set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
set_vlen = setVlen()
type_def = '''
using et = ElemType;
using vu [[maybe_unused]] = decltype(et::v);
@@ -1119,6 +1192,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'type_def': type_def,
'copy_old_vd': copyOldVd(2)},
@@ -1162,6 +1237,8 @@ def format VectorIntVxsatFormat(code, category, *flags) {{
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
code = maskCondWrapper(code)
code = eiDeclarePrefix(code)
code = loopWrapper(code)
@@ -1172,6 +1249,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)
@@ -1204,12 +1282,16 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{
set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
set_src_reg_idx += setSrcVm()
vm_decl_rd = vmDeclAndReadData()
set_vlenb = setVlenb()
set_vlen = setVlen()
microiop = InstObjParams(name + "_micro",
Name + "Micro",
'VectorArithMicroInst',
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(2)},
flags)
@@ -1261,12 +1343,16 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor,
set_dest_reg_idx = setDestWrapper(dest_reg_id)
vm_decl_rd = vmDeclAndReadData()
set_src_reg_idx += setSrcVm()
set_vlenb = setVlenb()
set_vlen = setVlen()
microiop = InstObjParams(name + "_micro",
Name + "Micro",
microop_class_name,
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb': set_vlenb,
'set_vlen': set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx)},
flags)

View File

@@ -42,8 +42,8 @@ def format VConfOp(code, write_code, declare_class, branch_class, *flags) {{
branchTargetTemplate = eval(branch_class)
header_output = declareTemplate.subst(iop)
decoder_output = BasicConstructor.subst(iop)
decode_block = BasicDecode.subst(iop)
decoder_output = VConfConstructor.subst(iop)
decode_block = VConfDecodeBlock.subst(iop)
exec_output = VConfExecute.subst(iop) + branchTargetTemplate.subst(iop)
}};
@@ -61,7 +61,7 @@ def template VSetVlDeclare {{
public:
/// Constructor.
%(class_name)s(ExtMachInst machInst);
%(class_name)s(ExtMachInst machInst, uint32_t elen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::unique_ptr<PCStateBase> branchTarget(
ThreadContext *tc) const override;
@@ -86,7 +86,7 @@ def template VSetiVliDeclare {{
public:
/// Constructor.
%(class_name)s(ExtMachInst machInst);
%(class_name)s(ExtMachInst machInst, uint32_t elen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::unique_ptr<PCStateBase> branchTarget(
const PCStateBase &branch_pc) const override;
@@ -97,6 +97,19 @@ def template VSetiVliDeclare {{
};
}};
def template VConfConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen)
: %(base_class)s("%(mnemonic)s", _machInst, _elen, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template VConfDecodeBlock {{
return new %(class_name)s(machInst,elen);
}};
def template VConfExecute {{
VTYPE
%(class_name)s::getNewVtype(
@@ -112,7 +125,7 @@ def template VConfExecute {{
uint32_t newVill =
!(vflmul >= 0.125 && vflmul <= 8) ||
sew > std::min(vflmul, 1.0f) * ELEN ||
sew > std::min(vflmul, 1.0f) * elen ||
bits(reqVtype, 62, 8) != 0;
if (newVill) {
newVtype = 0;
@@ -157,7 +170,8 @@ def template VConfExecute {{
tc->setMiscReg(MISCREG_VSTART, 0);
VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, vlen);
VTYPE new_vtype = getNewVtype(Vtype, requested_vtype,
vlen);
vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen);
uint32_t new_vl = getNewVL(
current_vl, requested_vl, vlmax, rd_bits, rs1_bits);

View File

@@ -29,10 +29,15 @@
let {{
def setVlen():
return "uint32_t vlen = VlenbBits * 8;\n"
def setVlenb():
return "uint32_t vlenb = VlenbBits;\n"
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
inst_flags, base_class, postacc_code='',
declare_template_base=VMemMacroDeclare,
decode_template=BasicDecode, exec_template_base='',
decode_template=VMemBaseDecodeBlock, exec_template_base='',
# If it's a macroop, the corresponding microops will be
# generated.
is_macroop=True):
@@ -63,7 +68,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
exec_template_base + 'MicroInst',
{'ea_code': ea_code,
'memacc_code': memacc_code,
'postacc_code': postacc_code},
'postacc_code': postacc_code,
'set_vlenb': setVlenb(),
'set_vlen': setVlen()},
inst_flags)
if mem_flags:
@@ -90,7 +97,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
def format VleOp(
memacc_code,
ea_code={{ EA = Rs1 + VLENB * microIdx; }},
ea_code={{
EA = Rs1 + vlenb * microIdx;
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -101,7 +110,9 @@ def format VleOp(
def format VseOp(
memacc_code,
ea_code={{ EA = Rs1 + VLENB * microIdx; }},
ea_code={{
EA = Rs1 + vlenb * microIdx;
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -134,7 +145,9 @@ def format VsmOp(
def format VlWholeOp(
memacc_code,
ea_code={{ EA = Rs1 + VLENB * microIdx; }},
ea_code={{
EA = Rs1 + vlenb * microIdx;
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -145,7 +158,9 @@ def format VlWholeOp(
def format VsWholeOp(
memacc_code,
ea_code={{ EA = Rs1 + VLENB * microIdx; }},
ea_code={{
EA = Rs1 + vlenb * microIdx;
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -156,7 +171,9 @@ def format VsWholeOp(
def format VlStrideOp(
memacc_code,
ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }},
ea_code={{
EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx);
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -167,7 +184,9 @@ def format VlStrideOp(
def format VsStrideOp(
memacc_code,
ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }},
ea_code={{
EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx);
}},
mem_flags=[],
inst_flags=[]
) {{
@@ -186,7 +205,7 @@ def format VlIndexOp(
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
'VlIndexMacroInst', exec_template_base='VlIndex',
declare_template_base=VMemTemplateMacroDeclare,
decode_template=VMemTemplateDecodeBlock
decode_template=VMemSplitTemplateDecodeBlock
)
}};
@@ -200,6 +219,6 @@ def format VsIndexOp(
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
'VsIndexMacroInst', exec_template_base='VsIndex',
declare_template_base=VMemTemplateMacroDeclare,
decode_template=VMemTemplateDecodeBlock
decode_template=VMemSplitTemplateDecodeBlock
)
}};

View File

@@ -35,8 +35,8 @@ output header {{
[[maybe_unused]] RiscvISA::vreg_t old_vd; \
[[maybe_unused]] decltype(Vd) old_Vd = nullptr; \
xc->getRegOperand(this, (idx), &old_vd); \
old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >(); \
memcpy(Vd, old_Vd, VLENB);
old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >(); \
memcpy(Vd, old_Vd, vlenb);
#define VRM_REQUIRED \
uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \
@@ -73,7 +73,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -82,14 +82,14 @@ public:
def template VectorIntMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -121,7 +121,7 @@ private:
RegId destRegIdxArr[1];
bool vm;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -133,7 +133,7 @@ def template VectorIntMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -173,6 +173,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -190,7 +191,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
std::string generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const override
{
@@ -214,7 +215,7 @@ private:
RegId destRegIdxArr[1];
bool vm;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
std::string generateDisassembly(Addr pc,
@@ -254,13 +255,16 @@ Fault
xc->setMiscReg(MISCREG_STATUS, status);
auto SEW = vtype_SEW(vtype);
auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d);
auto index = (microIdx % %(ext_div)d);
switch (SEW / %(ext_div)d) {
case 8: {
using vext [[maybe_unused]] = int8_t;
using vextu [[maybe_unused]] = uint8_t;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -272,6 +276,8 @@ Fault
using vextu [[maybe_unused]] = uint16_t;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -283,6 +289,8 @@ Fault
using vextu [[maybe_unused]] = uint32_t;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -300,10 +308,10 @@ Fault
def template VectorIntDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b000: return new %(class_name)s<uint8_t>(machInst);
case 0b001: return new %(class_name)s<uint16_t>(machInst);
case 0b010: return new %(class_name)s<uint32_t>(machInst);
case 0b011: return new %(class_name)s<uint64_t>(machInst);
case 0b000: return new %(class_name)s<uint8_t>(machInst, vlen);
case 0b001: return new %(class_name)s<uint16_t>(machInst, vlen);
case 0b010: return new %(class_name)s<uint32_t>(machInst, vlen);
case 0b011: return new %(class_name)s<uint64_t>(machInst, vlen);
default: GEM5_UNREACHABLE;
}
@@ -316,7 +324,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -325,8 +333,8 @@ public:
def template VectorIntWideningMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
@@ -337,7 +345,7 @@ template<typename ElemType>
const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
int32_t tmp_vl = this->vl;
const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -370,7 +378,7 @@ private:
RegId destRegIdxArr[1];
bool vm;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -382,7 +390,7 @@ def template VectorIntWideningMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -415,6 +423,10 @@ Fault
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
@@ -423,13 +435,11 @@ Fault
xc->setMiscReg(MISCREG_STATUS, status);
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
[[maybe_unused]] const size_t offset =
(this->microIdx % 2 == 0) ? 0 : micro_vlmax;
%(op_decl)s;
%(op_rd)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -459,6 +469,11 @@ Fault
"RVV is disabled or VPU is off", machInst);
}
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
@@ -466,13 +481,11 @@ Fault
xc->setMiscReg(MISCREG_STATUS, status);
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
[[maybe_unused]] const size_t offset =
(this->microIdx % 2 == 0) ? 0 : micro_vlmax;
%(op_decl)s;
%(op_rd)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -485,9 +498,9 @@ Fault
def template VectorIntWideningDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b000: return new %(class_name)s<uint8_t>(machInst);
case 0b001: return new %(class_name)s<uint16_t>(machInst);
case 0b010: return new %(class_name)s<uint32_t>(machInst);
case 0b000: return new %(class_name)s<uint8_t>(machInst, vlen);
case 0b001: return new %(class_name)s<uint16_t>(machInst, vlen);
case 0b010: return new %(class_name)s<uint32_t>(machInst, vlen);
default: GEM5_UNREACHABLE;
}
@@ -500,7 +513,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -508,14 +521,14 @@ public:
def template VectorFloatMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -547,7 +560,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
};
@@ -557,7 +570,7 @@ public:
def template VectorFloatMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -598,6 +611,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -611,8 +625,8 @@ Fault
def template VectorFloatDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b010: return new %(class_name)s<float32_t>(machInst);
case 0b011: return new %(class_name)s<float64_t>(machInst);
case 0b010: return new %(class_name)s<float32_t>(machInst, vlen);
case 0b011: return new %(class_name)s<float64_t>(machInst, vlen);
default: GEM5_UNREACHABLE;
}
@@ -625,7 +639,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
std::string generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const override
{
@@ -650,7 +664,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
std::string generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const override
@@ -693,14 +707,17 @@ Fault
VRM_REQUIRED;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
[[maybe_unused]] const size_t offset =
(this->microIdx % 2 == 0) ? 0 : micro_vlmax;
%(op_decl)s;
%(op_rd)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -737,14 +754,17 @@ Fault
VRM_REQUIRED;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
[[maybe_unused]] const size_t offset =
(this->microIdx % 2 == 0) ? 0 : micro_vlmax;
%(op_decl)s;
%(op_rd)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -757,7 +777,7 @@ Fault
def template VectorFloatWideningDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b010: return new %(class_name)s<float32_t>(machInst);
case 0b010: return new %(class_name)s<float32_t>(machInst, vlen);
default: GEM5_UNREACHABLE;
}
@@ -771,7 +791,7 @@ private:
int cnt = 0;
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -781,14 +801,14 @@ public:
def template ViotaMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -819,7 +839,7 @@ private:
bool vm;
int* cnt;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx, int* cnt);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -831,7 +851,7 @@ def template ViotaMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, int* cnt)
uint32_t _microVl, uint8_t _microIdx, int* cnt)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -871,6 +891,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -919,6 +940,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -997,7 +1019,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -1006,14 +1028,14 @@ public:
def template VectorIntMaskMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1028,7 +1050,7 @@ template<typename ElemType>
micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
}
microop = new VMaskMergeMicroInst<ElemType>(_machInst, _machInst.vd,
this->microops.size());
this->microops.size(), _vlen);
this->microops.push_back(microop);
this->microops.front()->setFirstMicroop();
@@ -1050,7 +1072,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
};
@@ -1061,7 +1083,7 @@ def template VectorIntMaskMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -1099,10 +1121,11 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
constexpr uint16_t bit_offset = VLENB / sizeof(ElemType);
const uint16_t bit_offset = vlenb / sizeof(ElemType);
const uint16_t offset = bit_offset * microIdx;
%(code)s;
@@ -1119,7 +1142,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -1128,14 +1151,14 @@ public:
def template VectorFloatMaskMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1150,7 +1173,7 @@ template<typename ElemType>
micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
}
microop = new VMaskMergeMicroInst<ElemType>(_machInst, _machInst.vd,
this->microops.size());
this->microops.size(), _vlen);
this->microops.push_back(microop);
this->microops.front()->setFirstMicroop();
@@ -1171,7 +1194,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
};
@@ -1182,7 +1205,7 @@ def template VectorFloatMaskMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -1220,10 +1243,11 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
constexpr uint16_t bit_offset = VLENB / sizeof(ElemType);
const uint16_t bit_offset = vlenb / sizeof(ElemType);
const uint16_t offset = bit_offset * microIdx;
%(code)s;
@@ -1276,7 +1300,7 @@ private:
RegId destRegIdxArr[1];
bool vm;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -1287,7 +1311,7 @@ public:
def template VMvWholeMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -1321,7 +1345,8 @@ Fault
%(op_decl)s;
%(op_rd)s;
for (size_t i = 0; i < (VLEN / 64); i++) {
%(set_vlen)s;
for (size_t i = 0; i < (vlen / 64); i++) {
%(code)s;
}
%(op_wb)s;
@@ -1382,6 +1407,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
// TODO: remove it
%(set_vlenb)s;
%(copy_old_vd)s;
%(code)s;
%(op_wb)s;
@@ -1489,6 +1515,28 @@ Fault
}};
def template VectorFloatNonSplitDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b010: return new %(class_name)s<float32_t>(machInst);
case 0b011: return new %(class_name)s<float64_t>(machInst);
default: GEM5_UNREACHABLE;
}
}};
def template VectorIntNonSplitDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b000: return new %(class_name)s<uint8_t>(machInst);
case 0b001: return new %(class_name)s<uint16_t>(machInst);
case 0b010: return new %(class_name)s<uint32_t>(machInst);
case 0b011: return new %(class_name)s<uint64_t>(machInst);
default: GEM5_UNREACHABLE;
}
}};
def template VectorReduceMacroDeclare {{
template<typename ElemType>
@@ -1496,7 +1544,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -1505,14 +1553,14 @@ public:
def template VectorReduceMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1544,7 +1592,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
};
@@ -1555,7 +1603,7 @@ def template VectorReduceMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -1593,6 +1641,8 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
@@ -1600,7 +1650,8 @@ Fault
[&, this](const auto& f, const auto* _, const auto* vs2) {
ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0];
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
if (this->vm || elem_mask(v0, ei)) {
microop_result = f(microop_result, Vs2[i]);
}
@@ -1625,6 +1676,7 @@ Fault
%(type_def)s;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
@@ -1638,6 +1690,8 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
@@ -1647,7 +1701,8 @@ Fault
[&, this](const auto& f, const auto* _, const auto* vs2) {
vu tmp_val = Vd[0];
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
if (this->vm || elem_mask(v0, ei)) {
tmp_val = f(tmp_val, Vs2[i]).v;
}
@@ -1685,6 +1740,8 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
@@ -1694,7 +1751,8 @@ Fault
[&, this](const auto& f, const auto* _, const auto* vs2) {
vwu tmp_val = Vd[0];
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
if (this->vm || elem_mask(v0, ei)) {
tmp_val = f(tmp_val, Vs2[i]).v;
}
@@ -1716,7 +1774,7 @@ class %(class_name)s : public %(base_class)s{
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -1725,8 +1783,9 @@ public:
def template VectorGatherMacroConstructor {{
template<typename ElemType, typename IndexType>
%(class_name)s<ElemType, IndexType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType, IndexType>::%(class_name)s(ExtMachInst _machInst,
uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
@@ -1740,7 +1799,8 @@ template<typename ElemType, typename IndexType>
const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul;
const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul;
const uint8_t vd_vregs = vs2_vregs;
const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb);
uint32_t vlenb = vlen >> 3;
const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb);
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1778,7 +1838,7 @@ private:
bool vm;
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx);
uint32_t _microVl, uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
};
@@ -1789,7 +1849,7 @@ def template VectorGatherMicroConstructor {{
template<typename ElemType, typename IndexType>
%(class_name)s<ElemType, IndexType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx)
uint32_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -1839,17 +1899,19 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
const uint32_t vlmax = vtype_VLMAX(vtype);
const uint32_t vlmax = vtype_VLMAX(vtype,vlen);
constexpr uint8_t vd_eewb = sizeof(ElemType);
constexpr uint8_t vs1_eewb = sizeof(IndexType);
constexpr uint8_t vs2_eewb = sizeof(ElemType);
constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb;
constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb;
[[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb;
[[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb;
[[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb;
[[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb;
[[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb;
[[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb;
[[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype);
[[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul;
[[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs;
@@ -1875,19 +1937,19 @@ def template VectorGatherDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b000: {
using elem_type [[maybe_unused]] = uint8_t;
return new %(class_name)s<uint8_t, %(idx_type)s>(machInst);
return new %(class_name)s<uint8_t, %(idx_type)s>(machInst, vlen);
}
case 0b001: {
using elem_type [[maybe_unused]] = uint16_t;
return new %(class_name)s<uint16_t, %(idx_type)s>(machInst);
return new %(class_name)s<uint16_t, %(idx_type)s>(machInst, vlen);
}
case 0b010: {
using elem_type [[maybe_unused]] = uint32_t;
return new %(class_name)s<uint32_t, %(idx_type)s>(machInst);
return new %(class_name)s<uint32_t, %(idx_type)s>(machInst, vlen);
}
case 0b011: {
using elem_type [[maybe_unused]] = uint64_t;
return new %(class_name)s<uint64_t, %(idx_type)s>(machInst);
return new %(class_name)s<uint64_t, %(idx_type)s>(machInst, vlen);
}
default: GEM5_UNREACHABLE;
}
@@ -1902,7 +1964,7 @@ private:
%(reg_idx_arr_decl)s;
bool vxsat = false;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -1911,14 +1973,14 @@ public:
def template VectorIntVxsatMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1954,7 +2016,7 @@ private:
bool vm;
bool* vxsatptr;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx, bool* vxsatptr);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -1966,7 +2028,7 @@ def template VectorIntVxsatMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr)
uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx)
{
@@ -2007,6 +2069,8 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(vm_decl_rd)s;
%(copy_old_vd)s;
@@ -2016,7 +2080,8 @@ Fault
[&, this](const auto& f, const auto* _, const auto* vs2) {
vwu tmp_val = Vd[0];
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
if (this->vm || elem_mask(v0, ei)) {
tmp_val = f(tmp_val, Vs2[i]);
}
@@ -2038,7 +2103,7 @@ class %(class_name)s : public %(base_class)s {
private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -2047,14 +2112,14 @@ public:
def template VectorSlideUpMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -2082,14 +2147,14 @@ template<typename ElemType>
def template VectorSlideDownMacroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t num_microops = vtype_regs_per_group(vtype);
int32_t tmp_vl = this->vl;
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true);
const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
StaticInstPtr microop;
@@ -2126,7 +2191,7 @@ private:
RegId destRegIdxArr[1];
bool vm;
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
using %(base_class)s::generateDisassembly;
@@ -2138,7 +2203,7 @@ def template VectorSlideMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx)
uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
_microIdx, _vdIdx, _vs2Idx)
{
@@ -2174,10 +2239,13 @@ Fault
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
[[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype);
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
[[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen);
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;
@@ -2210,10 +2278,13 @@ Fault
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
[[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype);
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
[[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen);
%(vm_decl_rd)s;
%(copy_old_vd)s;
%(code)s;

View File

@@ -34,6 +34,7 @@ private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -48,6 +49,7 @@ private:
%(reg_idx_arr_decl)s;
public:
%(class_name)s(ExtMachInst _machInst);
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
using %(base_class)s::generateDisassembly;
};
@@ -55,16 +57,17 @@ public:
def template VleConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
StaticInstPtr microop;
if (micro_vl == 0) {
@@ -72,7 +75,7 @@ def template VleConstructor {{
this->microops.push_back(microop);
}
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vl, i);
microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen);
microop->setDelayedCommit();
microop->setFlag(IsLoad);
this->microops.push_back(microop);
@@ -93,9 +96,10 @@ private:
RegId srcRegIdxArr[3];
RegId destRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
_microIdx)
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
@@ -127,12 +131,15 @@ Fault
Addr EA;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(ea_code)s;
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
@@ -150,15 +157,18 @@ Fault
}
uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl;
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
byte_enable);
if (fault != NoFault)
return fault;
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const size_t micro_elems = VLEN / width_EEW(machInst.width);
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);
size_t ei;
for (size_t i = 0; i < micro_elems; i++) {
ei = i + micro_vlmax * microIdx;
%(memacc_code)s;
@@ -176,10 +186,12 @@ Fault
%(class_name)s::initiateAcc(ExecContext* xc,
trace::InstRecord* traceData) const
{
Addr EA;
%(op_src_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
MISA misa = xc->readMiscReg(MISCREG_ISA);
@@ -192,6 +204,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
byte_enable);
@@ -208,6 +221,7 @@ Fault
{
%(op_decl)s;
%(op_rd)s;
%(set_vlen)s;
STATUS status = xc->readMiscReg(MISCREG_STATUS);
status.vs = VPUStatus::DIRTY;
@@ -222,8 +236,9 @@ Fault
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const size_t micro_elems = VLEN / width_EEW(machInst.width);
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);
size_t ei;
for (size_t i = 0; i < micro_elems; i++) {
ei = i + micro_vlmax * microIdx;
@@ -238,13 +253,13 @@ Fault
def template VseConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
@@ -256,7 +271,7 @@ def template VseConstructor {{
this->microops.push_back(microop);
}
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vl, i);
microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen);
microop->setDelayedCommit();
microop->setFlag(IsStore);
this->microops.push_back(microop);
@@ -277,9 +292,10 @@ private:
RegId srcRegIdxArr[3];
RegId destRegIdxArr[0];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx)
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
@@ -326,9 +342,11 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(ea_code)s;
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t eewb = width_EEW(machInst.width) / 8;
const size_t mem_size = eewb * microVl;
std::vector<bool> byte_enable(mem_size, false);
@@ -375,9 +393,11 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(ea_code)s;
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t eewb = width_EEW(machInst.width) / 8;
const size_t mem_size = eewb * microVl;
std::vector<bool> byte_enable(mem_size, false);
@@ -412,20 +432,20 @@ Fault
def template VlmConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width);
int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8;
StaticInstPtr microop;
if (micro_vl == 0) {
microop = new VectorNopMicroInst(_machInst);
} else {
microop = new Vle8_vMicro(_machInst, micro_vl, 0);
microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen);
microop->setDelayedCommit();
microop->setFlag(IsLoad);
}
@@ -439,20 +459,20 @@ def template VlmConstructor {{
def template VsmConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width);
int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8;
StaticInstPtr microop;
if (micro_vl == 0) {
microop = new VectorNopMicroInst(_machInst);
} else {
microop = new Vse8_vMicro(_machInst, micro_vl, 0);
microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen);
microop->setDelayedCommit();
microop->setFlag(IsStore);
}
@@ -466,18 +486,18 @@ def template VsmConstructor {{
def template VsWholeConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
size_t NFIELDS = machInst.nf + 1;
const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
StaticInstPtr microop;
for (int i = 0; i < NFIELDS; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vlmax, i);
microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen);
microop->setDelayedCommit();
microop->setFlag(IsStore);
this->microops.push_back(microop);
@@ -497,9 +517,10 @@ private:
RegId destRegIdxArr[0];
RegId srcRegIdxArr[2];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx)
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
@@ -532,14 +553,16 @@ Fault
}
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
for (size_t i = 0; i < VLENB; i++) {
for (size_t i = 0; i < vlenb; i++) {
%(memacc_code)s;
}
Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem),
EA, memAccessFlags, nullptr);
vlenb, EA, memAccessFlags, nullptr);
return fault;
}
@@ -560,14 +583,16 @@ Fault
}
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
for (size_t i = 0; i < VLENB; i++) {
for (size_t i = 0; i < vlenb; i++) {
%(memacc_code)s;
}
Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem),
EA, memAccessFlags, nullptr);
EA, vlenb, memAccessFlags, nullptr);
return fault;
}
@@ -586,18 +611,19 @@ Fault
def template VlWholeConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
size_t NFIELDS = machInst.nf + 1;
const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width);
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
StaticInstPtr microop;
for (int i = 0; i < NFIELDS; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vlmax, i);
microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen);
microop->setDelayedCommit();
microop->setFlag(IsLoad);
this->microops.push_back(microop);
@@ -617,9 +643,10 @@ private:
RegId destRegIdxArr[1];
RegId srcRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx)
: %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s,
_microVl, _microIdx)
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s_micro", _machInst,
%(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
@@ -657,14 +684,17 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(set_vlen)s;
%(ea_code)s;
Fault fault = readMemAtomicLE(xc, traceData, EA,
*(vreg_t::Container*)(&Mem), memAccessFlags);
*(vreg_t::Container*)(&Mem), vlenb,
memAccessFlags);
if (fault != NoFault)
return fault;
size_t elem_per_reg = VLEN / width_EEW(machInst.width);
size_t elem_per_reg = vlen / width_EEW(machInst.width);
for (size_t i = 0; i < elem_per_reg; i++) {
%(memacc_code)s;
}
@@ -690,9 +720,11 @@ Fault
}
%(op_src_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags);
const std::vector<bool> byte_enable(vlenb, true);
Fault fault = initiateMemRead(xc, EA, vlenb, memAccessFlags, byte_enable);
return fault;
}
@@ -706,6 +738,7 @@ Fault
{
%(op_decl)s;
%(op_rd)s;
%(set_vlen)s;
STATUS status = xc->readMiscReg(MISCREG_STATUS);
status.vs = VPUStatus::DIRTY;
@@ -713,7 +746,7 @@ Fault
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
size_t elem_per_reg = VLEN / width_EEW(machInst.width);
size_t elem_per_reg = vlen / width_EEW(machInst.width);
for (size_t i = 0; i < elem_per_reg; ++i) {
%(memacc_code)s;
}
@@ -726,13 +759,13 @@ Fault
def template VlStrideConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width);
const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
int32_t remaining_vl = this->vl;
// Num of elems in one vreg
int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
@@ -770,7 +803,7 @@ private:
RegId destRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint8_t _microVl)
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
@@ -820,6 +853,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vd[0]);
%(ea_code)s; // ea_code depends on elem_size
@@ -833,7 +867,7 @@ Fault
uint32_t mem_size = elem_size;
const std::vector<bool> byte_enable(mem_size, true);
size_t ei = this->regIdx * VLENB / elem_size + this->microIdx;
size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
if (machInst.vm || elem_mask(v0, ei)) {
fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
memAccessFlags, byte_enable);
@@ -866,6 +900,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
%(op_src_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vd[0]);
%(ea_code)s; // ea_code depends on elem_size
@@ -877,7 +912,7 @@ Fault
}
uint32_t mem_size = elem_size;
size_t ei = this->regIdx * VLENB / elem_size + this->microIdx;
size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
bool need_load = machInst.vm || elem_mask(v0, ei);
const std::vector<bool> byte_enable(mem_size, need_load);
fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
@@ -894,6 +929,7 @@ Fault
{
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
STATUS status = xc->readMiscReg(MISCREG_STATUS);
status.vs = VPUStatus::DIRTY;
@@ -920,12 +956,12 @@ Fault
memcpy(Vd, old_Vd, microVl * elem_size);
// treat vta as vtu
// if (machInst.vtype8.vta == 0)
memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size);
memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size);
} else {
memcpy(Vd, old_Vd, VLENB);
memcpy(Vd, old_Vd, vlenb);
}
size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx;
size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx;
if (machInst.vm || elem_mask(v0, ei)) {
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
%(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
@@ -939,13 +975,13 @@ Fault
def template VsStrideConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width);
const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
int32_t remaining_vl = this->vl;
// Num of elems in one vreg
int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
@@ -983,7 +1019,7 @@ private:
RegId destRegIdxArr[0];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint8_t _microVl)
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
@@ -1025,6 +1061,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vs3[0]);
%(ea_code)s;
@@ -1038,7 +1075,7 @@ Fault
uint32_t mem_size = elem_size;
const std::vector<bool> byte_enable(mem_size, true);
size_t ei = this->regIdx * VLENB / elem_size + this->microIdx;
size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
if (machInst.vm || elem_mask(v0, ei)) {
%(memacc_code)s;
fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
@@ -1074,11 +1111,13 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vs3[0]);
%(ea_code)s;
uint32_t mem_size = elem_size;
size_t ei = this->regIdx * VLENB / elem_size + this->microIdx;
size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
bool need_store = machInst.vm || elem_mask(v0, ei);
if (need_store) {
const std::vector<bool> byte_enable(mem_size, need_store);
@@ -1105,8 +1144,8 @@ Fault
def template VlIndexConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
@@ -1115,7 +1154,8 @@ template<typename ElemType>
const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb;
const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb;
const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb);
uint32_t vlenb = vlen >> 3;
const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb);
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1212,6 +1252,7 @@ Fault
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
constexpr uint8_t elem_size = sizeof(Vd[0]);
RiscvISA::vreg_t tmp_v0;
@@ -1223,8 +1264,7 @@ Fault
uint32_t mem_size = elem_size;
const std::vector<bool> byte_enable(mem_size, true);
size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx;
size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
if (machInst.vm || elem_mask(v0, ei)) {
fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
memAccessFlags, byte_enable);
@@ -1259,6 +1299,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
%(op_src_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vd[0]);
%(ea_code)s; // ea_code depends on elem_size
@@ -1270,7 +1311,8 @@ Fault
}
uint32_t mem_size = elem_size;
size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx;
size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
bool need_load = machInst.vm || elem_mask(v0, ei);
const std::vector<bool> byte_enable(mem_size, need_load);
fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
@@ -1293,10 +1335,11 @@ Fault
using vu = std::make_unsigned_t<ElemType>;
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
constexpr uint8_t elem_size = sizeof(Vd[0]);
RiscvISA::vreg_t old_vd;
RiscvISA::vreg_t old_vd;;
decltype(Vd) old_Vd = nullptr;
// We treat agnostic as undistrubed
xc->getRegOperand(this, 2, &old_vd);
@@ -1309,9 +1352,9 @@ Fault
v0 = tmp_v0.as<uint8_t>();
}
memcpy(Vd, old_Vd, VLENB);
memcpy(Vd, old_Vd, vlenb);
size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx;
size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
if (machInst.vm || elem_mask(v0, ei)) {
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
%(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
@@ -1326,8 +1369,8 @@ Fault
def template VsIndexConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
@@ -1336,7 +1379,8 @@ template<typename ElemType>
const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb;
const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb;
const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb);
uint32_t vlenb = vlen >> 3;
const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb);
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
StaticInstPtr microop;
@@ -1426,6 +1470,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
%(op_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
constexpr uint8_t elem_size = sizeof(Vs3[0]);
RiscvISA::vreg_t tmp_v0;
@@ -1438,7 +1483,7 @@ Fault
uint32_t mem_size = elem_size;
const std::vector<bool> byte_enable(mem_size, true);
size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx;
size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
if (machInst.vm || elem_mask(v0, ei)) {
%(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
@@ -1469,6 +1514,7 @@ Fault
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
%(op_src_decl)s;
%(op_rd)s;
%(set_vlenb)s;
%(ea_code)s;
constexpr uint8_t elem_size = sizeof(Vs3[0]);
RiscvISA::vreg_t tmp_v0;
@@ -1481,7 +1527,7 @@ Fault
constexpr uint8_t mem_size = elem_size;
const std::vector<bool> byte_enable(mem_size, true);
size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx;
size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
if (machInst.vm || elem_mask(v0, ei)) {
%(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
@@ -1504,6 +1550,10 @@ Fault
}};
def template VMemBaseDecodeBlock {{
return new %(class_name)s(machInst, vlen);
}};
def template VMemTemplateDecodeBlock {{
switch(machInst.vtype8.vsew) {
@@ -1523,3 +1573,23 @@ switch(machInst.vtype8.vsew) {
}
}};
def template VMemSplitTemplateDecodeBlock {{
switch(machInst.vtype8.vsew) {
case 0b000: {
return new %(class_name)s<uint8_t>(machInst, vlen);
}
case 0b001: {
return new %(class_name)s<uint16_t>(machInst, vlen);
}
case 0b010: {
return new %(class_name)s<uint32_t>(machInst, vlen);
}
case 0b011: {
return new %(class_name)s<uint64_t>(machInst, vlen);
}
default: GEM5_UNREACHABLE;
}
}};

View File

@@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4>
bool _compressed = false;
RiscvType _rvType = RV64;
uint64_t _vlenb = VLENB;
uint64_t _vlenb = 32;
VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial;
uint32_t _vl = 0;
@@ -74,7 +74,7 @@ class PCState : public GenericISA::UPCState<4>
PCState &operator=(const PCState &other) = default;
PCState() = default;
explicit PCState(Addr addr) { set(addr); }
explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb = VLENB)
explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb)
{
set(addr);
_rvType = rvType;

View File

@@ -36,6 +36,7 @@
#include "arch/generic/vec_pred_reg.hh"
#include "arch/generic/vec_reg.hh"
#include "arch/riscv/types.hh"
#include "base/bitunion.hh"
#include "cpu/reg_class.hh"
#include "debug/VecRegs.hh"
@@ -46,13 +47,10 @@ namespace gem5
namespace RiscvISA
{
constexpr unsigned ELEN = 64;
constexpr unsigned VLEN = 256;
constexpr unsigned VLENB = VLEN / 8;
using VecRegContainer = gem5::VecRegContainer<VLENB>;
using VecRegContainer = gem5::VecRegContainer<MaxVecLenInBytes>;
using vreg_t = VecRegContainer;
const int NumVecStandardRegs = 32;
const int NumVecInternalRegs = 8; // Used by vector uop
const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs;

View File

@@ -42,7 +42,6 @@
#ifndef __ARCH_RISCV_TYPES_HH__
#define __ARCH_RISCV_TYPES_HH__
#include "arch/riscv/pcstate.hh"
#include "base/bitunion.hh"
namespace gem5
@@ -178,6 +177,10 @@ BitUnion64(ExtMachInst)
EndBitUnion(ExtMachInst)
constexpr unsigned MaxVecLenInBits = 65536;
constexpr unsigned MaxVecLenInBytes = MaxVecLenInBits >> 3;
} // namespace RiscvISA
} // namespace gem5

View File

@@ -268,12 +268,13 @@ vtype_SEW(const uint64_t vtype)
* Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h
*/
inline uint64_t
vtype_VLMAX(const uint64_t vtype, const bool per_reg = false)
vtype_VLMAX(const uint64_t vtype, const uint64_t vlen,
const bool per_reg = false)
{
int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0));
lmul = per_reg ? std::min<int64_t>(0, lmul) : lmul;
int64_t vsew = bits(vtype, 5, 3);
return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul);
return vlen >> (vsew + 3 - lmul);
}
inline int64_t

View File

@@ -0,0 +1,49 @@
# Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
def upgrader(cpt):
"""
Update the checkpoint to support initial RVV implemtation.
The updater is taking the following steps.
Set vector registers to occupy 327680 bytes (40regs * 8192bytes).
Vector registers now ocupy this space regardless of VLEN as the
VecRegContainer is always MaxVecLenInBytes.
"""
for sec in cpt.sections():
import re
# Search for all XC sections
if re.search(r".*processor.*\.core.*\.xc.*", sec):
# Updating RVV vector registers (dummy values)
mr = cpt.get(sec, "regs.vector").split()
if len(mr) != 327680:
cpt.set(
sec, "regs.vector", " ".join("0" for i in range(327680))
)