gpu-compute, dev-hsa: Fix doorbell for gfx900

gfx9 changed the size of the doorbell, and what the write index
is when the doorbell is rang. --gfx-version flag is used to set
the doorbell size

Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42220
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Kyle Roarty
2019-06-18 14:03:16 -04:00
committed by Matthew Poremba
parent ec6b325382
commit 529736a7ce
6 changed files with 47 additions and 31 deletions

View File

@@ -91,22 +91,22 @@ HSAPacketProcessor::~HSAPacketProcessor()
}
void
HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
{
hwSchdlr->unregisterQueue(queue_id);
hwSchdlr->unregisterQueue(queue_id, doorbellSize);
}
void
HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
uint32_t size)
uint32_t size, int doorbellSize)
{
DPRINTF(HSAPacketProcessor,
"%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
(void *)basePointer, queue_id, size);
hwSchdlr->registerNewQueue(hostReadIndexPointer,
basePointer, queue_id, size);
basePointer, queue_id, size, doorbellSize);
}
AddrRangeList
@@ -133,7 +133,16 @@ HSAPacketProcessor::write(Packet *pkt)
"%s: write of size %d to reg-offset %d (0x%x)\n",
__FUNCTION__, pkt->getSize(), daddr, daddr);
uint32_t doorbell_reg = pkt->getLE<uint32_t>();
int doorbellSize = gpu_device->driver()->doorbellSize();
assert(doorbellSize == pkt->getSize());
uint64_t doorbell_reg(0);
if (pkt->getSize() == 8)
doorbell_reg = pkt->getLE<uint64_t>() + 1;
else if (pkt->getSize() == 4)
doorbell_reg = pkt->getLE<uint32_t>();
else
fatal("invalid db size");
DPRINTF(HSAPacketProcessor,
"%s: write data 0x%x to offset %d (0x%x)\n",

View File

@@ -331,8 +331,8 @@ class HSAPacketProcessor: public DmaDevice
void setDeviceQueueDesc(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
uint32_t size);
void unsetDeviceQueueDesc(uint64_t queue_id);
uint32_t size, int doorbellSize);
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
void setDevice(GPUCommandProcessor * dev);
void updateReadIndex(int, uint32_t);
void getCommandsFromHost(int pid, uint32_t rl_idx);

View File

@@ -84,18 +84,13 @@ void
HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
uint32_t size)
uint32_t size, int doorbellSize)
{
assert(queue_id < MAX_ACTIVE_QUEUES);
// Map queue ID to doorbell.
// We are only using offset to pio base address as doorbell
// We use the same mapping function used by hsa runtime to do this mapping
//
// Originally
// #define VOID_PTR_ADD32(ptr,n)
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
// (Addr)VOID_PTR_ADD32(0, queue_id)
Addr db_offset = sizeof(uint32_t)*queue_id;
Addr db_offset = queue_id * doorbellSize;
if (dbMap.find(db_offset) != dbMap.end()) {
panic("Creating an already existing queue (queueID %d)", queue_id);
}
@@ -318,7 +313,7 @@ HWScheduler::isRLQIdle(uint32_t rl_idx)
}
void
HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
{
auto dbmap_iter = dbMap.find(db_addr);
if (dbmap_iter == dbMap.end()) {
@@ -335,17 +330,9 @@ HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
}
void
HWScheduler::unregisterQueue(uint64_t queue_id)
HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
{
// Pointer arithmetic on a null pointer is undefined behavior. Clang
// compilers therefore complain if the following reads:
// `(Addr)(VOID_PRT_ADD32(0, queue_id))`
//
// Originally
// #define VOID_PTR_ADD32(ptr,n)
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
// (Addr)VOID_PTR_ADD32(0, queue_id)
Addr db_offset = sizeof(uint32_t)*queue_id;
Addr db_offset = queue_id * doorbellSize;
auto dbmap_iter = dbMap.find(db_offset);
if (dbmap_iter == dbMap.end()) {
panic("Destroying a non-existing queue (db_offset %x)",

View File

@@ -52,12 +52,12 @@ class HWScheduler
: hsaPP(hsa_pp), nextALId(0), nextRLId(0),
wakeupDelay(wakeup_delay), schedWakeupEvent(this)
{}
void write(Addr db_addr, uint32_t doorbell_reg);
void write(Addr db_addr, uint64_t doorbell_reg);
void registerNewQueue(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
uint32_t size);
void unregisterQueue(uint64_t queue_id);
uint32_t size, int doorbellSize);
void unregisterQueue(uint64_t queue_id, int doorbellSize);
void wakeup();
void schedWakeup();
class SchedulerWakeupEvent : public Event

View File

@@ -149,18 +149,22 @@ GPUComputeDriver::allocateQueue(PortProxy &mem_proxy, Addr ioc_buf)
TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
args.copyIn(mem_proxy);
if ((sizeof(uint32_t) * queueId) > 4096) {
if ((doorbellSize() * queueId) > 4096) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
// for vega offset needs to include exact value of doorbell
if (doorbellSize())
args->doorbell_offset += queueId * doorbellSize();
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
args->ring_size);
args->ring_size, doorbellSize());
args.copyOut(mem_proxy);
}
@@ -243,7 +247,8 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
args.copyIn(virt_proxy);
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
"queue offset %d\n", args->queue_id);
device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
doorbellSize());
}
break;
case AMDKFD_IOC_SET_MEMORY_POLICY:

View File

@@ -81,6 +81,21 @@ class GPUComputeDriver final : public EmulatedDriver
*/
void setMtype(RequestPtr req);
int
doorbellSize()
{
switch (gfxVersion) {
case GfxVersion::gfx801:
case GfxVersion::gfx803:
return 4;
case GfxVersion::gfx900:
return 8;
default:
fatal("Invalid GPU type\n");
}
return 4;
}
class DriverWakeupEvent : public Event
{
public: