gpu-compute: Set LDS/scratch aperture base register (#247)

Starting with gfx900 (Vega) the LDS and scratch apertures can be queried
using a new s_getreg_b32 instruction. If the instruction is called with
the SH_MEM_BASES argument it returns the upper 16 bits of a 64 bit
address for the LDS and scratch apertures. The current addresses cannot
be encoded in this register, so that addresses are changed to have the
lower 48 bits be all zeros in addition to writing the bases register.
This commit is contained in:
Bobby R. Bruce
2023-08-31 17:38:08 -07:00
committed by GitHub

View File

@@ -41,6 +41,7 @@
#include "debug/GPUMem.hh"
#include "debug/GPUShader.hh"
#include "debug/GPUWgLatency.hh"
#include "dev/amdgpu/hwreg_defines.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/gpu_command_processor.hh"
#include "gpu-compute/gpu_static_inst.hh"
@@ -72,15 +73,25 @@ Shader::Shader(const Params &p) : ClockedObject(p),
gpuCmdProc.setShader(this);
_dispatcher.setShader(this);
// These apertures are set by the driver. In full system mode that is done
// using a PM4 packet but the emulated SE mode driver does not set them
// explicitly, so we need to define some reasonable defaults here.
_gpuVmApe.base = ((Addr)1 << 61) + 0x1000000000000L;
_gpuVmApe.limit = (_gpuVmApe.base & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
_ldsApe.base = ((Addr)1 << 61) + 0x0;
_ldsApe.base = 0x1000000000000;
_ldsApe.limit = (_ldsApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
_scratchApe.base = ((Addr)1 << 61) + 0x100000000L;
_scratchApe.base = 0x2000000000000;
_scratchApe.limit = (_scratchApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
// The scratch and LDS address can be queried starting in gfx900. The
// base addresses are in the SH_MEM_BASES 32-bit register. The upper 16
// bits are for the LDS address and the lower 16 bits are for scratch
// address. In both cases the 16 bits represent bits 63:48 of the address.
// This means bits 47:0 of the base address is always zero.
setHwReg(HW_REG_SH_MEM_BASES, 0x00010002);
shHiddenPrivateBaseVmid = 0;
cuList.resize(n_cu);