From d019912efaa4df3f09235906c5390e6614935e92 Mon Sep 17 00:00:00 2001 From: Michael LeBeane Date: Fri, 26 Oct 2018 11:32:01 -0400 Subject: [PATCH] dev-hsa: Fix doorbell mmap for APU Commit id ef44dc9a removed mmap-based doorbell allocation since dGPUs use ioctl's instead. However, APUs still need this to work correctly. Add that logic back in as well as some new logic to distinguish doorbells mmaps from other types. Also add some additional commentary regarding Event page mmaps. Change-Id: I8507ac85c8f07886d0fb4f95bde5e18a7790eab8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42218 Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair --- src/dev/hsa/hsa_driver.cc | 72 ++++++++++++++++----------------- src/dev/hsa/kfd_event_defines.h | 8 ++++ 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/dev/hsa/hsa_driver.cc b/src/dev/hsa/hsa_driver.cc index f2db43635c..40f00f3a77 100644 --- a/src/dev/hsa/hsa_driver.cc +++ b/src/dev/hsa/hsa_driver.cc @@ -70,48 +70,41 @@ Addr HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) { - // Is this a signal event mmap - bool is_event_mmap = false; - // If addr == 0, then we may need to do mmap. - bool should_mmap = (start == 0); auto process = tc->getProcessPtr(); auto mem_state = process->memState; - // Check if mmap is for signal events first - if (((offset >> PAGE_SHIFT) & KFD_MMAP_TYPE_MASK) == - KFD_MMAP_TYPE_EVENTS) { - is_event_mmap = true; - DPRINTF(HSADriver, "amdkfd mmap for events(start: %p, length: 0x%x," - "offset: 0x%x, )\n", start, length, offset); - panic_if(start != 0, - "Start address should be provided by KFD\n"); - panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT, - "Requested length %d, expected length %d; length mismatch\n", - length, 8 * KFD_SIGNAL_EVENT_LIMIT); - // For signal event, do mmap only is eventPage is uninitialized - should_mmap = (!eventPage); - } else { - DPRINTF(HSADriver, "amdkfd doorbell mmap (start: %p, length: 0x%x," - "offset: 0x%x)\n", start, length, offset); - } - // Extend global mmap region if necessary. - if (should_mmap) { - // Assume mmap grows down, as in x86 Linux - start = mem_state->getMmapEnd() - length; - mem_state->setMmapEnd(start); - } + Addr pg_off = offset >> PAGE_SHIFT; + Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK; + DPRINTF(HSADriver, "amdkfd mmap (start: %p, length: 0x%x," + "offset: 0x%x)\n", start, length, offset); - if (is_event_mmap) { - if (should_mmap) { - eventPage = start; - } - } else { - // Now map this virtual address to our PIO doorbell interface - // in the page tables (non-cacheable) - process->pTable->map(start, device->hsaPacketProc().pioAddr, - length, false); - - DPRINTF(HSADriver, "amdkfd doorbell mapped to %xp\n", start); + switch (mmap_type) { + case KFD_MMAP_TYPE_DOORBELL: + DPRINTF(HSADriver, "amdkfd mmap type DOORBELL offset\n"); + start = mem_state->extendMmap(length); + process->pTable->map(start, device->hsaPacketProc().pioAddr, + length, false); + break; + case KFD_MMAP_TYPE_EVENTS: + DPRINTF(HSADriver, "amdkfd mmap type EVENTS offset\n"); + panic_if(start != 0, + "Start address should be provided by KFD\n"); + panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT, + "Requested length %d, expected length %d; length " + "mismatch\n", length, 8 * KFD_SIGNAL_EVENT_LIMIT); + /** + * We don't actually access these pages. We just need to reserve + * some VA space. See commit id 5ce8abce for details on how + * events are currently implemented. + */ + if (!eventPage) { + eventPage = mem_state->extendMmap(length); + start = eventPage; + } + break; + default: + warn_once("Unrecognized kfd mmap type %llx\n", mmap_type); + break; } return start; @@ -133,6 +126,9 @@ HSADriver::allocateQueue(ThreadContext *tc, Addr ioc_buf) fatal("%s: Exceeded maximum number of HSA queues allowed\n", name()); } + args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL | + KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT; + args->queue_id = queueId++; auto &hsa_pp = device->hsaPacketProc(); hsa_pp.setDeviceQueueDesc(args->read_pointer_address, diff --git a/src/dev/hsa/kfd_event_defines.h b/src/dev/hsa/kfd_event_defines.h index 0202b3b2e0..f52bb59cb2 100644 --- a/src/dev/hsa/kfd_event_defines.h +++ b/src/dev/hsa/kfd_event_defines.h @@ -35,6 +35,8 @@ #include "dev/hsa/kfd_ioctl.h" +#define KFD_GPU_ID_HASH_WIDTH 16 + #define PAGE_SHIFT 12 #define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) @@ -42,4 +44,10 @@ #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ + << KFD_MMAP_GPU_ID_SHIFT) +#define KFD_MMAP_GPU_ID(gpu_id) \ + ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT) & KFD_MMAP_GPU_ID_MASK) + #endif