dev-hsa: Fix doorbell mmap for APU

Commit id ef44dc9a removed mmap-based doorbell allocation since dGPUs
use ioctl's instead.  However, APUs still need this to work correctly.
Add that logic back in as well as some new logic to distinguish doorbells
mmaps from other types. Also add some additional commentary regarding
Event page mmaps.

Change-Id: I8507ac85c8f07886d0fb4f95bde5e18a7790eab8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42218
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Michael LeBeane
2018-10-26 11:32:01 -04:00
committed by Matthew Poremba
parent eb09361eef
commit d019912efa
2 changed files with 42 additions and 38 deletions

View File

@@ -70,48 +70,41 @@ Addr
HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot,
int tgt_flags, int tgt_fd, off_t offset)
{
// Is this a signal event mmap
bool is_event_mmap = false;
// If addr == 0, then we may need to do mmap.
bool should_mmap = (start == 0);
auto process = tc->getProcessPtr();
auto mem_state = process->memState;
// Check if mmap is for signal events first
if (((offset >> PAGE_SHIFT) & KFD_MMAP_TYPE_MASK) ==
KFD_MMAP_TYPE_EVENTS) {
is_event_mmap = true;
DPRINTF(HSADriver, "amdkfd mmap for events(start: %p, length: 0x%x,"
"offset: 0x%x, )\n", start, length, offset);
panic_if(start != 0,
"Start address should be provided by KFD\n");
panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
"Requested length %d, expected length %d; length mismatch\n",
length, 8 * KFD_SIGNAL_EVENT_LIMIT);
// For signal event, do mmap only is eventPage is uninitialized
should_mmap = (!eventPage);
} else {
DPRINTF(HSADriver, "amdkfd doorbell mmap (start: %p, length: 0x%x,"
"offset: 0x%x)\n", start, length, offset);
}
// Extend global mmap region if necessary.
if (should_mmap) {
// Assume mmap grows down, as in x86 Linux
start = mem_state->getMmapEnd() - length;
mem_state->setMmapEnd(start);
}
Addr pg_off = offset >> PAGE_SHIFT;
Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
DPRINTF(HSADriver, "amdkfd mmap (start: %p, length: 0x%x,"
"offset: 0x%x)\n", start, length, offset);
if (is_event_mmap) {
if (should_mmap) {
eventPage = start;
}
} else {
// Now map this virtual address to our PIO doorbell interface
// in the page tables (non-cacheable)
process->pTable->map(start, device->hsaPacketProc().pioAddr,
length, false);
DPRINTF(HSADriver, "amdkfd doorbell mapped to %xp\n", start);
switch (mmap_type) {
case KFD_MMAP_TYPE_DOORBELL:
DPRINTF(HSADriver, "amdkfd mmap type DOORBELL offset\n");
start = mem_state->extendMmap(length);
process->pTable->map(start, device->hsaPacketProc().pioAddr,
length, false);
break;
case KFD_MMAP_TYPE_EVENTS:
DPRINTF(HSADriver, "amdkfd mmap type EVENTS offset\n");
panic_if(start != 0,
"Start address should be provided by KFD\n");
panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
"Requested length %d, expected length %d; length "
"mismatch\n", length, 8 * KFD_SIGNAL_EVENT_LIMIT);
/**
* We don't actually access these pages. We just need to reserve
* some VA space. See commit id 5ce8abce for details on how
* events are currently implemented.
*/
if (!eventPage) {
eventPage = mem_state->extendMmap(length);
start = eventPage;
}
break;
default:
warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
break;
}
return start;
@@ -133,6 +126,9 @@ HSADriver::allocateQueue(ThreadContext *tc, Addr ioc_buf)
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,

View File

@@ -35,6 +35,8 @@
#include "dev/hsa/kfd_ioctl.h"
#define KFD_GPU_ID_HASH_WIDTH 16
#define PAGE_SHIFT 12
#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
@@ -42,4 +44,10 @@
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_GPU_ID(gpu_id) \
((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT) & KFD_MMAP_GPU_ID_MASK)
#endif