arch-vega: Implement memory aperture operands (#803)

Vega (gfx900) introduced new memory aperture registers to get the base
address and limit for LDS and private (scratch) memory. These have not
commonly been used by the compiler until ROCm 6. Now that the compiler
is generating reads from these special registers, implement the support
for them.

Tested with LULESH which is using the SHARED_BASE register (LDS) with
ROCm 6.0. This assembly seems to replace S_GETREG_B32 emitted by the
ROCm 5 compiler.

Change-Id: Id2bd26ce8ef687c84a647fa2ac2da54d657913e5
This commit is contained in:
Matthew Poremba
2024-01-24 11:19:43 -08:00
committed by GitHub
parent 0ac110ac95
commit 44c78d843c
3 changed files with 54 additions and 4 deletions

View File

@@ -89,6 +89,18 @@ namespace VegaISA
case REG_ZERO:
reg_sym = "0";
break;
case REG_SHARED_BASE:
reg_sym = "src_shared_base";
break;
case REG_SHARED_LIMIT:
reg_sym = "src_shared_limit";
break;
case REG_PRIVATE_BASE:
reg_sym = "src_private_base";
break;
case REG_PRIVATE_LIMIT:
reg_sym = "src_private_limit";
break;
case REG_POS_HALF:
reg_sym = "0.5";
break;

View File

@@ -106,10 +106,10 @@ namespace VegaISA
REG_RESERVED_25 = 232,
REG_RESERVED_26 = 233,
REG_RESERVED_27 = 234,
REG_RESERVED_28 = 235,
REG_RESERVED_29 = 236,
REG_RESERVED_30 = 237,
REG_RESERVED_31 = 238,
REG_SHARED_BASE = 235,
REG_SHARED_LIMIT = 236,
REG_PRIVATE_BASE = 237,
REG_PRIVATE_LIMIT = 238,
REG_RESERVED_32 = 239,
REG_POS_HALF = 240,
REG_NEG_HALF = 241,

View File

@@ -37,6 +37,7 @@
#include "arch/amdgpu/vega/gpu_registers.hh"
#include "arch/generic/vec_reg.hh"
#include "gpu-compute/scalar_register_file.hh"
#include "gpu-compute/shader.hh"
#include "gpu-compute/vector_register_file.hh"
#include "gpu-compute/wavefront.hh"
@@ -547,6 +548,43 @@ namespace VegaISA
assert(NumDwords == 1);
srfData[0] = _gpuDynInst->srcLiteral();
break;
case REG_SHARED_BASE:
{
ComputeUnit *cu = _gpuDynInst->computeUnit();
ScalarRegU64 shared_base = cu->shader->ldsApe().base;
std::memcpy((void*)srfData.data(), (void*)&shared_base,
sizeof(shared_base));
DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", shared_base);
}
break;
case REG_SHARED_LIMIT:
{
ComputeUnit *cu = _gpuDynInst->computeUnit();
ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
std::memcpy((void*)srfData.data(), (void*)&shared_limit,
sizeof(shared_limit));
DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", shared_limit);
}
break;
case REG_PRIVATE_BASE:
{
ComputeUnit *cu = _gpuDynInst->computeUnit();
ScalarRegU64 priv_base = cu->shader->scratchApe().base;
std::memcpy((void*)srfData.data(), (void*)&priv_base,
sizeof(priv_base));
DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", priv_base);
}
break;
case REG_PRIVATE_LIMIT:
{
ComputeUnit *cu = _gpuDynInst->computeUnit();
ScalarRegU64 priv_limit = cu->shader->scratchApe().limit;
std::memcpy((void*)srfData.data(), (void*)&priv_limit,
sizeof(priv_limit));
DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
priv_limit);
}
break;
case REG_POS_HALF:
{
typename OpTraits<DataType>::FloatT pos_half = 0.5;