From 1812041dc002334864996d92afd8c0f405704527 Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 30 Jun 2021 16:41:13 -0500 Subject: [PATCH] gpu-compute: Update GET_PROCESS_APERTURES IOCTLs The apertures for non-gfx801 GPUs are set differently. If the apertures aren't set properly, ROCm will error out. This change sets the apertures appropriately based on the gfx version of the simulated GPU. It also adds in new functions to set the scratch and lds apertures in GFX9 to mimic the linux kernel. Change-Id: I1fa6f60bc20c7b6eb3896057841d96846460a9f8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47529 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- src/gpu-compute/gpu_compute_driver.cc | 108 ++++++++++++++++++++------ src/gpu-compute/gpu_compute_driver.hh | 2 + 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 472ced41f8..2fe5275d5e 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -316,18 +316,50 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) * ensure that the base/limit addresses are * calculated correctly. */ - args->process_apertures[i].scratch_base - = scratchApeBase(i + 1); + + switch (gfxVersion) { + case GfxVersion::gfx801: + case GfxVersion::gfx803: + args->process_apertures[i].scratch_base = + scratchApeBase(i + 1); + args->process_apertures[i].lds_base = + ldsApeBase(i + 1); + break; + case GfxVersion::gfx900: + args->process_apertures[i].scratch_base = + scratchApeBaseV9(); + args->process_apertures[i].lds_base = + ldsApeBaseV9(); + break; + default: + fatal("Invalid gfx version\n"); + } + + // GFX8 and GFX9 set lds and scratch limits the same way args->process_apertures[i].scratch_limit = scratchApeLimit(args->process_apertures[i].scratch_base); - args->process_apertures[i].lds_base = ldsApeBase(i + 1); args->process_apertures[i].lds_limit = ldsApeLimit(args->process_apertures[i].lds_base); - args->process_apertures[i].gpuvm_base = gpuVmApeBase(i + 1); - args->process_apertures[i].gpuvm_limit = - gpuVmApeLimit(args->process_apertures[i].gpuvm_base); + switch (gfxVersion) { + case GfxVersion::gfx801: + args->process_apertures[i].gpuvm_base = + gpuVmApeBase(i + 1); + args->process_apertures[i].gpuvm_limit = + gpuVmApeLimit(args->process_apertures[i].gpuvm_base); + break; + case GfxVersion::gfx803: + case GfxVersion::gfx900: + // Taken from SVM_USE_BASE in Linux kernel + args->process_apertures[i].gpuvm_base = 0x1000000ull; + // Taken from AMDGPU_GMC_HOLE_START in Linux kernel + args->process_apertures[i].gpuvm_limit = + 0x0000800000000000ULL - 1; + break; + default: + fatal("Invalid gfx version"); + } // NOTE: Must match ID populated by hsaTopology.py // @@ -396,14 +428,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) 47) != 0x1ffff); assert(bits(args->process_apertures[i].lds_limit, 63, 47) != 0); - assert(bits(args->process_apertures[i].gpuvm_base, 63, - 47) != 0x1ffff); - assert(bits(args->process_apertures[i].gpuvm_base, 63, - 47) != 0); - assert(bits(args->process_apertures[i].gpuvm_limit, 63, - 47) != 0x1ffff); - assert(bits(args->process_apertures[i].gpuvm_limit, 63, - 47) != 0); } args.copyOut(virt_proxy); @@ -593,13 +617,41 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) TypedBufferArg ape_args (ioc_args->kfd_process_device_apertures_ptr); - ape_args->scratch_base = scratchApeBase(i + 1); + switch (gfxVersion) { + case GfxVersion::gfx801: + case GfxVersion::gfx803: + ape_args->scratch_base = scratchApeBase(i + 1); + ape_args->lds_base = ldsApeBase(i + 1); + break; + case GfxVersion::gfx900: + ape_args->scratch_base = scratchApeBaseV9(); + ape_args->lds_base = ldsApeBaseV9(); + break; + default: + fatal("Invalid gfx version\n"); + } + + // GFX8 and GFX9 set lds and scratch limits the same way ape_args->scratch_limit = scratchApeLimit(ape_args->scratch_base); - ape_args->lds_base = ldsApeBase(i + 1); ape_args->lds_limit = ldsApeLimit(ape_args->lds_base); - ape_args->gpuvm_base = gpuVmApeBase(i + 1); - ape_args->gpuvm_limit = gpuVmApeLimit(ape_args->gpuvm_base); + + switch (gfxVersion) { + case GfxVersion::gfx801: + ape_args->gpuvm_base = gpuVmApeBase(i + 1); + ape_args->gpuvm_limit = + gpuVmApeLimit(ape_args->gpuvm_base); + break; + case GfxVersion::gfx803: + case GfxVersion::gfx900: + // Taken from SVM_USE_BASE in Linux kernel + ape_args->gpuvm_base = 0x1000000ull; + // Taken from AMDGPU_GMC_HOLE_START in Linux kernel + ape_args->gpuvm_limit = 0x0000800000000000ULL - 1; + break; + default: + fatal("Invalid gfx version\n"); + } // NOTE: Must match ID populated by hsaTopology.py if (isdGPU) { @@ -631,10 +683,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) assert(bits(ape_args->lds_base, 63, 47) != 0); assert(bits(ape_args->lds_limit, 63, 47) != 0x1ffff); assert(bits(ape_args->lds_limit, 63, 47) != 0); - assert(bits(ape_args->gpuvm_base, 63, 47) != 0x1ffff); - assert(bits(ape_args->gpuvm_base, 63, 47) != 0); - assert(bits(ape_args->gpuvm_limit, 63, 47) != 0x1ffff); - assert(bits(ape_args->gpuvm_limit, 63, 47) != 0); ape_args.copyOut(virt_proxy); } @@ -895,6 +943,14 @@ GPUComputeDriver::scratchApeBase(int gpuNum) const return ((Addr)gpuNum << 61) + 0x100000000L; } +// Used for GFX9 devices +// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel +Addr +GPUComputeDriver::scratchApeBaseV9() const +{ + return ((Addr)0x1 << 48); +} + Addr GPUComputeDriver::scratchApeLimit(Addr apeBase) const { @@ -907,6 +963,14 @@ GPUComputeDriver::ldsApeBase(int gpuNum) const return ((Addr)gpuNum << 61) + 0x0; } +//Used for GFX9 devices +// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel +Addr +GPUComputeDriver::ldsApeBaseV9() const +{ + return ((Addr)0x2 << 48); +} + Addr GPUComputeDriver::ldsApeLimit(Addr apeBase) const { diff --git a/src/gpu-compute/gpu_compute_driver.hh b/src/gpu-compute/gpu_compute_driver.hh index 440805295d..9441baaf51 100644 --- a/src/gpu-compute/gpu_compute_driver.hh +++ b/src/gpu-compute/gpu_compute_driver.hh @@ -228,8 +228,10 @@ class GPUComputeDriver final : public EmulatedDriver Addr gpuVmApeBase(int gpuNum) const; Addr gpuVmApeLimit(Addr apeBase) const; Addr scratchApeBase(int gpuNum) const; + Addr scratchApeBaseV9() const; Addr scratchApeLimit(Addr apeBase) const; Addr ldsApeBase(int gpuNum) const; + Addr ldsApeBaseV9() const; Addr ldsApeLimit(Addr apeBase) const; /**