From 44c78d843c6cb865097492106cba452d7d1782bb Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 24 Jan 2024 11:19:43 -0800 Subject: [PATCH] arch-vega: Implement memory aperture operands (#803) Vega (gfx900) introduced new memory aperture registers to get the base address and limit for LDS and private (scratch) memory. These have not commonly been used by the compiler until ROCm 6. Now that the compiler is generating reads from these special registers, implement the support for them. Tested with LULESH which is using the SHARED_BASE register (LDS) with ROCm 6.0. This assembly seems to replace S_GETREG_B32 emitted by the ROCm 5 compiler. Change-Id: Id2bd26ce8ef687c84a647fa2ac2da54d657913e5 --- src/arch/amdgpu/vega/gpu_registers.cc | 12 +++++++++ src/arch/amdgpu/vega/gpu_registers.hh | 8 +++--- src/arch/amdgpu/vega/operand.hh | 38 +++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/arch/amdgpu/vega/gpu_registers.cc b/src/arch/amdgpu/vega/gpu_registers.cc index b7404379cc..302a89e2b8 100644 --- a/src/arch/amdgpu/vega/gpu_registers.cc +++ b/src/arch/amdgpu/vega/gpu_registers.cc @@ -89,6 +89,18 @@ namespace VegaISA case REG_ZERO: reg_sym = "0"; break; + case REG_SHARED_BASE: + reg_sym = "src_shared_base"; + break; + case REG_SHARED_LIMIT: + reg_sym = "src_shared_limit"; + break; + case REG_PRIVATE_BASE: + reg_sym = "src_private_base"; + break; + case REG_PRIVATE_LIMIT: + reg_sym = "src_private_limit"; + break; case REG_POS_HALF: reg_sym = "0.5"; break; diff --git a/src/arch/amdgpu/vega/gpu_registers.hh b/src/arch/amdgpu/vega/gpu_registers.hh index 63929d5917..90fd601213 100644 --- a/src/arch/amdgpu/vega/gpu_registers.hh +++ b/src/arch/amdgpu/vega/gpu_registers.hh @@ -106,10 +106,10 @@ namespace VegaISA REG_RESERVED_25 = 232, REG_RESERVED_26 = 233, REG_RESERVED_27 = 234, - REG_RESERVED_28 = 235, - REG_RESERVED_29 = 236, - REG_RESERVED_30 = 237, - REG_RESERVED_31 = 238, + REG_SHARED_BASE = 235, + REG_SHARED_LIMIT = 236, + REG_PRIVATE_BASE = 237, + REG_PRIVATE_LIMIT = 238, REG_RESERVED_32 = 239, REG_POS_HALF = 240, REG_NEG_HALF = 241, diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh index 1760bd7213..43ff3400e6 100644 --- a/src/arch/amdgpu/vega/operand.hh +++ b/src/arch/amdgpu/vega/operand.hh @@ -37,6 +37,7 @@ #include "arch/amdgpu/vega/gpu_registers.hh" #include "arch/generic/vec_reg.hh" #include "gpu-compute/scalar_register_file.hh" +#include "gpu-compute/shader.hh" #include "gpu-compute/vector_register_file.hh" #include "gpu-compute/wavefront.hh" @@ -547,6 +548,43 @@ namespace VegaISA assert(NumDwords == 1); srfData[0] = _gpuDynInst->srcLiteral(); break; + case REG_SHARED_BASE: + { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 shared_base = cu->shader->ldsApe().base; + std::memcpy((void*)srfData.data(), (void*)&shared_base, + sizeof(shared_base)); + DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", shared_base); + } + break; + case REG_SHARED_LIMIT: + { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 shared_limit = cu->shader->ldsApe().limit; + std::memcpy((void*)srfData.data(), (void*)&shared_limit, + sizeof(shared_limit)); + DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", shared_limit); + } + break; + case REG_PRIVATE_BASE: + { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 priv_base = cu->shader->scratchApe().base; + std::memcpy((void*)srfData.data(), (void*)&priv_base, + sizeof(priv_base)); + DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", priv_base); + } + break; + case REG_PRIVATE_LIMIT: + { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 priv_limit = cu->shader->scratchApe().limit; + std::memcpy((void*)srfData.data(), (void*)&priv_limit, + sizeof(priv_limit)); + DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n", + priv_limit); + } + break; case REG_POS_HALF: { typename OpTraits::FloatT pos_half = 0.5;