arch-vega: Implement memory aperture operands (#803)

Vega (gfx900) introduced new memory aperture registers to get the base address and limit for LDS and private (scratch) memory. These have not commonly been used by the compiler until ROCm 6. Now that the compiler is generating reads from these special registers, implement the support for them. Tested with LULESH which is using the SHARED_BASE register (LDS) with ROCm 6.0. This assembly seems to replace S_GETREG_B32 emitted by the ROCm 5 compiler. Change-Id: Id2bd26ce8ef687c84a647fa2ac2da54d657913e5
2024-01-24 11:19:43 -08:00
parent 0ac110ac95
commit 44c78d843c
3 changed files with 54 additions and 4 deletions
--- a/src/arch/amdgpu/vega/gpu_registers.cc
+++ b/src/arch/amdgpu/vega/gpu_registers.cc
@@ -89,6 +89,18 @@ namespace VegaISA
          case REG_ZERO:
            reg_sym = "0";
            break;
+          case REG_SHARED_BASE:
+            reg_sym = "src_shared_base";
+            break;
+          case REG_SHARED_LIMIT:
+            reg_sym = "src_shared_limit";
+            break;
+          case REG_PRIVATE_BASE:
+            reg_sym = "src_private_base";
+            break;
+          case REG_PRIVATE_LIMIT:
+            reg_sym = "src_private_limit";
+            break;
          case REG_POS_HALF:
            reg_sym = "0.5";
            break;
--- a/src/arch/amdgpu/vega/gpu_registers.hh
+++ b/src/arch/amdgpu/vega/gpu_registers.hh
@@ -106,10 +106,10 @@ namespace VegaISA
        REG_RESERVED_25 = 232,
        REG_RESERVED_26 = 233,
        REG_RESERVED_27 = 234,
-        REG_RESERVED_28 = 235,
-        REG_RESERVED_29 = 236,
-        REG_RESERVED_30 = 237,
-        REG_RESERVED_31 = 238,
+        REG_SHARED_BASE = 235,
+        REG_SHARED_LIMIT = 236,
+        REG_PRIVATE_BASE = 237,
+        REG_PRIVATE_LIMIT = 238,
        REG_RESERVED_32 = 239,
        REG_POS_HALF = 240,
        REG_NEG_HALF = 241,
--- a/src/arch/amdgpu/vega/operand.hh
+++ b/src/arch/amdgpu/vega/operand.hh
@@ -37,6 +37,7 @@
 #include "arch/amdgpu/vega/gpu_registers.hh"
 #include "arch/generic/vec_reg.hh"
 #include "gpu-compute/scalar_register_file.hh"
+#include "gpu-compute/shader.hh"
 #include "gpu-compute/vector_register_file.hh"
 #include "gpu-compute/wavefront.hh"

@@ -547,6 +548,43 @@ namespace VegaISA
                assert(NumDwords == 1);
                srfData[0] = _gpuDynInst->srcLiteral();
                break;
+              case REG_SHARED_BASE:
+                {
+                    ComputeUnit *cu = _gpuDynInst->computeUnit();
+                    ScalarRegU64 shared_base = cu->shader->ldsApe().base;
+                    std::memcpy((void*)srfData.data(), (void*)&shared_base,
+                            sizeof(shared_base));
+                    DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", shared_base);
+                }
+                break;
+              case REG_SHARED_LIMIT:
+                {
+                    ComputeUnit *cu = _gpuDynInst->computeUnit();
+                    ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
+                    std::memcpy((void*)srfData.data(), (void*)&shared_limit,
+                            sizeof(shared_limit));
+                    DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", shared_limit);
+                }
+                break;
+              case REG_PRIVATE_BASE:
+                {
+                    ComputeUnit *cu = _gpuDynInst->computeUnit();
+                    ScalarRegU64 priv_base = cu->shader->scratchApe().base;
+                    std::memcpy((void*)srfData.data(), (void*)&priv_base,
+                            sizeof(priv_base));
+                    DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", priv_base);
+                }
+                break;
+              case REG_PRIVATE_LIMIT:
+                {
+                    ComputeUnit *cu = _gpuDynInst->computeUnit();
+                    ScalarRegU64 priv_limit = cu->shader->scratchApe().limit;
+                    std::memcpy((void*)srfData.data(), (void*)&priv_limit,
+                            sizeof(priv_limit));
+                    DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
+                            priv_limit);
+                }
+                break;
              case REG_POS_HALF:
                {
                    typename OpTraits<DataType>::FloatT pos_half = 0.5;