arch-vega: Implement memory aperture operands (#803)
Vega (gfx900) introduced new memory aperture registers to get the base address and limit for LDS and private (scratch) memory. These have not commonly been used by the compiler until ROCm 6. Now that the compiler is generating reads from these special registers, implement the support for them. Tested with LULESH which is using the SHARED_BASE register (LDS) with ROCm 6.0. This assembly seems to replace S_GETREG_B32 emitted by the ROCm 5 compiler. Change-Id: Id2bd26ce8ef687c84a647fa2ac2da54d657913e5
This commit is contained in:
@@ -89,6 +89,18 @@ namespace VegaISA
|
||||
case REG_ZERO:
|
||||
reg_sym = "0";
|
||||
break;
|
||||
case REG_SHARED_BASE:
|
||||
reg_sym = "src_shared_base";
|
||||
break;
|
||||
case REG_SHARED_LIMIT:
|
||||
reg_sym = "src_shared_limit";
|
||||
break;
|
||||
case REG_PRIVATE_BASE:
|
||||
reg_sym = "src_private_base";
|
||||
break;
|
||||
case REG_PRIVATE_LIMIT:
|
||||
reg_sym = "src_private_limit";
|
||||
break;
|
||||
case REG_POS_HALF:
|
||||
reg_sym = "0.5";
|
||||
break;
|
||||
|
||||
@@ -106,10 +106,10 @@ namespace VegaISA
|
||||
REG_RESERVED_25 = 232,
|
||||
REG_RESERVED_26 = 233,
|
||||
REG_RESERVED_27 = 234,
|
||||
REG_RESERVED_28 = 235,
|
||||
REG_RESERVED_29 = 236,
|
||||
REG_RESERVED_30 = 237,
|
||||
REG_RESERVED_31 = 238,
|
||||
REG_SHARED_BASE = 235,
|
||||
REG_SHARED_LIMIT = 236,
|
||||
REG_PRIVATE_BASE = 237,
|
||||
REG_PRIVATE_LIMIT = 238,
|
||||
REG_RESERVED_32 = 239,
|
||||
REG_POS_HALF = 240,
|
||||
REG_NEG_HALF = 241,
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
#include "arch/generic/vec_reg.hh"
|
||||
#include "gpu-compute/scalar_register_file.hh"
|
||||
#include "gpu-compute/shader.hh"
|
||||
#include "gpu-compute/vector_register_file.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
@@ -547,6 +548,43 @@ namespace VegaISA
|
||||
assert(NumDwords == 1);
|
||||
srfData[0] = _gpuDynInst->srcLiteral();
|
||||
break;
|
||||
case REG_SHARED_BASE:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_base = cu->shader->ldsApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&shared_base,
|
||||
sizeof(shared_base));
|
||||
DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", shared_base);
|
||||
}
|
||||
break;
|
||||
case REG_SHARED_LIMIT:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
|
||||
std::memcpy((void*)srfData.data(), (void*)&shared_limit,
|
||||
sizeof(shared_limit));
|
||||
DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", shared_limit);
|
||||
}
|
||||
break;
|
||||
case REG_PRIVATE_BASE:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_base = cu->shader->scratchApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_base,
|
||||
sizeof(priv_base));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", priv_base);
|
||||
}
|
||||
break;
|
||||
case REG_PRIVATE_LIMIT:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_limit = cu->shader->scratchApe().limit;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_limit,
|
||||
sizeof(priv_limit));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
|
||||
priv_limit);
|
||||
}
|
||||
break;
|
||||
case REG_POS_HALF:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT pos_half = 0.5;
|
||||
|
||||
Reference in New Issue
Block a user