From 0dade68dae6a7bf188585384a17a7aca4cc8c784 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 26 Feb 2021 00:07:14 -0800 Subject: [PATCH] arch,cpu,gpu-compute: Further simplify VecRegContainer. Get rid of VecRegT, and a few redundant or unused methods. Change-Id: I6c88c40653e1939fe74b8ffb847ef50ab8064670 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41995 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/amdgpu/gcn3/gpu_registers.hh | 29 +---- src/arch/amdgpu/gcn3/operand.hh | 8 +- src/arch/arm/isa.cc | 9 -- src/arch/arm/isa.hh | 12 +- src/arch/arm/isa/templates/sve_mem.isa | 16 +-- src/arch/arm/nativetrace.cc | 3 +- src/arch/arm/regs/vec.hh | 5 +- src/arch/gcn3/registers.hh | 29 +---- src/arch/generic/vec_reg.hh | 147 +++++-------------------- src/arch/mips/registers.hh | 2 - src/arch/null/registers.hh | 2 - src/arch/power/registers.hh | 2 - src/arch/riscv/registers.hh | 2 - src/arch/sparc/registers.hh | 2 - src/arch/x86/registers.hh | 2 - src/cpu/o3/rename_map.cc | 2 +- src/gpu-compute/wavefront.cc | 6 +- 17 files changed, 60 insertions(+), 218 deletions(-) diff --git a/src/arch/amdgpu/gcn3/gpu_registers.hh b/src/arch/amdgpu/gcn3/gpu_registers.hh index c56ada2f95..783b16f648 100644 --- a/src/arch/amdgpu/gcn3/gpu_registers.hh +++ b/src/arch/amdgpu/gcn3/gpu_registers.hh @@ -174,33 +174,8 @@ namespace Gcn3ISA */ const int RegSizeDWords = sizeof(VecElemU32) / DWordSize; - // typedefs for the various sizes/types of vector regs - using VecRegU8 = ::VecRegT; - using VecRegI8 = ::VecRegT; - using VecRegU16 = ::VecRegT; - using VecRegI16 = ::VecRegT; - using VecRegU32 = ::VecRegT; - using VecRegI32 = ::VecRegT; - using VecRegF32 = ::VecRegT; - using VecRegU64 = ::VecRegT; - using VecRegI64 = ::VecRegT; - using VecRegF64 = ::VecRegT; - // non-writeable versions of vector regs - using ConstVecRegU8 = ::VecRegT; - using ConstVecRegI8 = ::VecRegT; - using ConstVecRegU16 = ::VecRegT; - using ConstVecRegI16 = ::VecRegT; - using ConstVecRegU32 = ::VecRegT; - using ConstVecRegI32 = ::VecRegT; - using ConstVecRegF32 = ::VecRegT; - using ConstVecRegU64 = ::VecRegT; - using ConstVecRegI64 = ::VecRegT; - using ConstVecRegF64 = ::VecRegT; - - using VecRegContainerU8 = VecRegU8::Container; - using VecRegContainerU16 = VecRegU16::Container; - using VecRegContainerU32 = VecRegU32::Container; - using VecRegContainerU64 = VecRegU64::Container; + using VecRegContainerU32 = + VecRegContainer; struct StatusReg { diff --git a/src/arch/amdgpu/gcn3/operand.hh b/src/arch/amdgpu/gcn3/operand.hh index 5397b8eff8..9ff4c8cc46 100644 --- a/src/arch/amdgpu/gcn3/operand.hh +++ b/src/arch/amdgpu/gcn3/operand.hh @@ -326,12 +326,8 @@ namespace Gcn3ISA scRegData.read(); } - using VecRegCont = typename std::conditional::type>::type>::type; + using VecRegCont = + VecRegContainer; /** * whether this operand a scalar or not. diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index b1d8d3f4a7..596a861210 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -2396,15 +2396,6 @@ ISA::getCurSveVecLenInBits() const return (len + 1) * 128; } -void -ISA::zeroSveVecRegUpperPart(VecRegContainer &vc, unsigned eCount) -{ - auto vv = vc.as(); - for (int i = 2; i < eCount; ++i) { - vv[i] = 0; - } -} - void ISA::serialize(CheckpointOut &cp) const { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index a0db3ee96d..2d2b33a61c 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -852,8 +852,16 @@ namespace ArmISA unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; } - static void zeroSveVecRegUpperPart(VecRegContainer &vc, - unsigned eCount); + template + static void + zeroSveVecRegUpperPart(Elem *v, unsigned eCount) + { + static_assert(sizeof(Elem) <= sizeof(uint64_t)); + eCount *= (sizeof(uint64_t) / sizeof(Elem)); + for (int i = 16 / sizeof(Elem); i < eCount; ++i) { + v[i] = 0; + } + } void serialize(CheckpointOut &cp) const override; void unserialize(CheckpointIn &cp) override; diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa index f635870b06..9b1ab84022 100644 --- a/src/arch/arm/isa/templates/sve_mem.isa +++ b/src/arch/arm/isa/templates/sve_mem.isa @@ -170,7 +170,7 @@ def template SveContigLoadExecute {{ %(rden_code)s; - fault = readMemAtomic(xc, EA, memData.raw_ptr(), + fault = readMemAtomic(xc, EA, memData.as(), memAccessSize, this->memAccessFlags, rdEn); %(fault_code)s; @@ -228,7 +228,7 @@ def template SveContigLoadCompleteAcc {{ auto memDataView = memData.as(); if (xc->readMemAccPredicate()) { - memcpy(memData.raw_ptr(), pkt->getPtr(), + memcpy(memData.as(), pkt->getPtr(), pkt->getSize()); } @@ -265,7 +265,7 @@ def template SveContigStoreExecute {{ } if (fault == NoFault) { - fault = writeMemAtomic(xc, memData.raw_ptr(), + fault = writeMemAtomic(xc, memData.as(), EA, memAccessSize, this->memAccessFlags, nullptr, wrEn); } @@ -303,7 +303,7 @@ def template SveContigStoreInitiateAcc {{ } if (fault == NoFault) { - fault = writeMemTiming(xc, memData.raw_ptr(), + fault = writeMemTiming(xc, memData.as(), EA, memAccessSize, this->memAccessFlags, nullptr, wrEn); } @@ -1001,7 +1001,7 @@ def template SveStructLoadExecute {{ auto memDataView = memData.as(); if (fault == NoFault) { - fault = readMemAtomic(xc, EA, memData.raw_ptr(), + fault = readMemAtomic(xc, EA, memData.as(), memAccessSize, this->memAccessFlags, std::vector(memAccessSize, true)); %(memacc_code)s; @@ -1059,7 +1059,7 @@ def template SveStructLoadCompleteAcc {{ ArmISA::VecRegContainer memData; auto memDataView = memData.as(); - memcpy(memData.raw_ptr(), pkt->getPtr(), + memcpy(memData.as(), pkt->getPtr(), pkt->getSize()); if (fault == NoFault) { @@ -1100,7 +1100,7 @@ def template SveStructStoreExecute {{ } if (fault == NoFault) { - fault = writeMemAtomic(xc, memData.raw_ptr(), + fault = writeMemAtomic(xc, memData.as(), EA, memAccessSize, this->memAccessFlags, nullptr, wrEn); } @@ -1138,7 +1138,7 @@ def template SveStructStoreInitiateAcc {{ } if (fault == NoFault) { - fault = writeMemTiming(xc, memData.raw_ptr(), + fault = writeMemTiming(xc, memData.as(), EA, memAccessSize, this->memAccessFlags, nullptr, wrEn); } diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc index be464485f7..805c139c2b 100644 --- a/src/arch/arm/nativetrace.cc +++ b/src/arch/arm/nativetrace.cc @@ -126,8 +126,7 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc) changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]); for (int i = 0; i < NumVecV7ArchRegs; i++) { - auto vec(tc->readVecReg(RegId(VecRegClass,i)) - .as()); + auto *vec = tc->readVecReg(RegId(VecRegClass,i)).as(); newState[STATE_F0 + 2*i] = vec[0]; newState[STATE_F0 + 2*i + 1] = vec[1]; } diff --git a/src/arch/arm/regs/vec.hh b/src/arch/arm/regs/vec.hh index 5b32159c63..a209e52ccf 100644 --- a/src/arch/arm/regs/vec.hh +++ b/src/arch/arm/regs/vec.hh @@ -55,9 +55,8 @@ constexpr unsigned NumVecElemPerNeonVecReg = 4; constexpr unsigned NumVecElemPerVecReg = MaxSveVecLenInWords; using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; +using VecRegContainer = + ::VecRegContainer; using VecPredReg = ::VecPredRegT; diff --git a/src/arch/gcn3/registers.hh b/src/arch/gcn3/registers.hh index df1ef4ebce..21ff322f68 100644 --- a/src/arch/gcn3/registers.hh +++ b/src/arch/gcn3/registers.hh @@ -174,33 +174,8 @@ namespace Gcn3ISA */ const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize; - // typedefs for the various sizes/types of vector regs - using VecRegU8 = ::VecRegT; - using VecRegI8 = ::VecRegT; - using VecRegU16 = ::VecRegT; - using VecRegI16 = ::VecRegT; - using VecRegU32 = ::VecRegT; - using VecRegI32 = ::VecRegT; - using VecRegF32 = ::VecRegT; - using VecRegU64 = ::VecRegT; - using VecRegI64 = ::VecRegT; - using VecRegF64 = ::VecRegT; - // non-writeable versions of vector regs - using ConstVecRegU8 = ::VecRegT; - using ConstVecRegI8 = ::VecRegT; - using ConstVecRegU16 = ::VecRegT; - using ConstVecRegI16 = ::VecRegT; - using ConstVecRegU32 = ::VecRegT; - using ConstVecRegI32 = ::VecRegT; - using ConstVecRegF32 = ::VecRegT; - using ConstVecRegU64 = ::VecRegT; - using ConstVecRegI64 = ::VecRegT; - using ConstVecRegF64 = ::VecRegT; - - using VecRegContainerU8 = VecRegU8::Container; - using VecRegContainerU16 = VecRegU16::Container; - using VecRegContainerU32 = VecRegU32::Container; - using VecRegContainerU64 = VecRegU64::Container; + using VecRegContainerU32 = + VecRegContainer; struct StatusReg { diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh index 64c131f0e3..070fcfb06a 100644 --- a/src/arch/generic/vec_reg.hh +++ b/src/arch/generic/vec_reg.hh @@ -97,9 +97,9 @@ #define __ARCH_GENERIC_VEC_REG_HH__ #include +#include #include #include -#include #include "base/cprintf.hh" #include "base/logging.hh" @@ -107,81 +107,6 @@ constexpr unsigned MaxVecRegLenInBytes = 4096; -template -class VecRegContainer; - -/** Vector Register Abstraction - * This generic class is a view in a particularization of MVC, to vector - * registers. There is a VecRegContainer that implements the model, and - * contains the data. To that model we can interpose different instantiations - * of VecRegT to view the container as a vector of NumElems elems of type - * VecElem. - * @tparam VecElem Type of each element of the vector. - * @tparam NumElems Amount of components of the vector. - * @tparam Const Indicate if the underlying container can be modified through - * the view. - */ -template -class VecRegT -{ - private: - /** Size of the register in bytes. */ - static constexpr inline size_t - size() - { - return sizeof(VecElem) * NumElems; - } - - public: - /** Container type alias. */ - using Container = typename std::conditional, - VecRegContainer>::type; - private: - /** My type alias. */ - using MyClass = VecRegT; - /** Reference to container. */ - Container& container; - - public: - /** Constructor. */ - VecRegT(Container& cnt) : container(cnt) {}; - - /** Index operator. */ - const VecElem & - operator[](size_t idx) const - { - return container.template raw_ptr()[idx]; - } - - /** Index operator. */ - template - typename std::enable_if_t - operator[](size_t idx) - { - return container.template raw_ptr()[idx]; - } - - /** Output stream operator. */ - friend std::ostream& - operator<<(std::ostream& os, const MyClass& vr) - { - /* 0-sized is not allowed */ - os << "[" << std::hex << (uint32_t)vr[0]; - for (uint32_t e = 1; e < vr.size(); e++) - os << " " << std::hex << (uint32_t)vr[e]; - os << ']'; - return os; - } - - /** - * Cast to VecRegContainer& - * It is useful to get the reference to the container for ISA tricks, - * because casting to reference prevents unnecessary copies. - */ - operator Container&() { return container; } -}; - /** * Vector Register Abstraction * This generic class is the model in a particularization of MVC, to vector @@ -203,7 +128,6 @@ class VecRegContainer private: // 16-byte aligned to support 128bit element view alignas(16) Container container; - using MyClass = VecRegContainer; public: VecRegContainer() {} @@ -215,19 +139,11 @@ class VecRegContainer /** Assignment operators. */ /** @{ */ /** From VecRegContainer */ - MyClass& - operator=(const MyClass& that) + VecRegContainer& + operator=(const VecRegContainer& that) { - if (&that == this) - return *this; - return *this = that.container; - } - - /** From appropriately sized uint8_t[]. */ - MyClass& - operator=(const Container& that) - { - std::memcpy(container.data(), that.data(), SIZE); + if (&that != this) + std::memcpy(container.data(), that.container.data(), SIZE); return *this; } /** @} */ @@ -252,13 +168,6 @@ class VecRegContainer return !operator==(that); } - /** Get pointer to bytes. */ - template - const Ret* raw_ptr() const { return (const Ret*)container.data(); } - - template - Ret* raw_ptr() { return (Ret*)container.data(); } - /** * View interposers. * Create a view of this container as a vector of VecElems with an @@ -270,34 +179,37 @@ class VecRegContainer * @tparam NumElem Amount of elements in the view. */ /** @{ */ - template - VecRegT - as() const - { - static_assert(SIZE % sizeof(VecElem) == 0, - "VecElem does not evenly divide the register size"); - static_assert(sizeof(VecElem) * NumElems <= SIZE, - "Viewing VecReg as something bigger than it is"); - return VecRegT(*this); - } - - template - VecRegT + template + VecElem * as() { static_assert(SIZE % sizeof(VecElem) == 0, "VecElem does not evenly divide the register size"); - static_assert(sizeof(VecElem) * NumElems <= SIZE, - "Viewing VecReg as something bigger than it is"); - return VecRegT(*this); + return (VecElem *)container.data(); + } + + template + const VecElem * + as() const + { + static_assert(SIZE % sizeof(VecElem) == 0, + "VecElem does not evenly divide the register size"); + return (VecElem *)container.data(); } friend std::ostream& - operator<<(std::ostream& os, const MyClass& v) + operator<<(std::ostream& os, const VecRegContainer& v) { + // When printing for human consumption, break into 4 byte chunks. + ccprintf(os, "["); + size_t count = 0; for (auto& b: v.container) { + if (count && (count % 4) == 0) + os << "_"; ccprintf(os, "%02x", b); + count++; } + ccprintf(os, "]"); return os; } @@ -305,7 +217,7 @@ class VecRegContainer /** * Used for serialization. */ - friend ShowParam; + friend ShowParam>; }; /** @@ -325,7 +237,7 @@ struct ParseParam> uint8_t b = 0; if (2 * i < value.size()) b = stoul(str.substr(i * 2, 2), nullptr, 16); - value.template raw_ptr()[i] = b; + value.template as()[i] = b; } return true; } @@ -350,9 +262,8 @@ struct ShowParam> /** @{ */ using DummyVecElem = uint32_t; constexpr unsigned DummyNumVecElemPerVecReg = 2; -using DummyVecReg = VecRegT; -using DummyConstVecReg = VecRegT; -using DummyVecRegContainer = DummyVecReg::Container; +using DummyVecRegContainer = + VecRegContainer; constexpr size_t DummyVecRegSizeBytes = DummyNumVecElemPerVecReg * sizeof(DummyVecElem); /** @} */ diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index 1f49262e57..dfe0271426 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -40,8 +40,6 @@ const int ZeroReg = 0; // Not applicable to MIPS using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh index 3e96472dff..d56a6a7a2c 100644 --- a/src/arch/null/registers.hh +++ b/src/arch/null/registers.hh @@ -49,8 +49,6 @@ const RegIndex ZeroReg = 0; // Not applicable to null using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index 5bdc0588bd..75e9125ac7 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -40,8 +40,6 @@ namespace PowerISA // Not applicable to Power using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/arch/riscv/registers.hh b/src/arch/riscv/registers.hh index df79a91256..71aa4aa6a0 100644 --- a/src/arch/riscv/registers.hh +++ b/src/arch/riscv/registers.hh @@ -93,8 +93,6 @@ static constexpr freg_t freg(uint_fast16_t f) { return {f}; } // Not applicable to RISC-V using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index 62fd3f557c..025fa28ecc 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -37,8 +37,6 @@ namespace SparcISA // Not applicable to SPARC using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index f52d4eab58..f94469bfd6 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -66,8 +66,6 @@ const int ZeroReg = NUM_INTREGS; // Not applicable to x86 using VecElem = ::DummyVecElem; -using VecReg = ::DummyVecReg; -using ConstVecReg = ::DummyConstVecReg; using VecRegContainer = ::DummyVecRegContainer; constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 55799294b7..8046be72c7 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -207,7 +207,7 @@ UnifiedRenameMap::switchMode(VecMode newVecMode) const size_t numElems = vecElemMap.numArchRegs(); const size_t elemsPerVec = numElems / numVecs; for (uint32_t i = 0; i < numVecs; i++) { - TheISA::VecReg dst = new_RF[i].as(); + TheISA::VecElem *dst = new_RF[i].as(); for (uint32_t l = 0; l < elemsPerVec; l++) { RegId s_rid(VecElemClass, i, l); PhysRegIdPtr s_prid = vecElemMap.lookup(s_rid); diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 5fa0d899ba..e8bcc14f9c 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -453,7 +453,7 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) { physVgprIdx = computeUnit->registerManager ->mapVgpr(this, regInitIdx); - TheGpuISA::VecRegU32 vgpr_x + TheGpuISA::VecElemU32 *vgpr_x = raw_vgpr.as(); for (int lane = 0; lane < workItemId[0].size(); ++lane) { @@ -469,7 +469,7 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) { physVgprIdx = computeUnit->registerManager ->mapVgpr(this, regInitIdx); - TheGpuISA::VecRegU32 vgpr_y + TheGpuISA::VecElemU32 *vgpr_y = raw_vgpr.as(); for (int lane = 0; lane < workItemId[1].size(); ++lane) { @@ -485,7 +485,7 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) { physVgprIdx = computeUnit->registerManager-> mapVgpr(this, regInitIdx); - TheGpuISA::VecRegU32 vgpr_z + TheGpuISA::VecElemU32 *vgpr_z = raw_vgpr.as(); for (int lane = 0; lane < workItemId[2].size(); ++lane) {