gpu-compute, dev-hsa: Fix doorbell for gfx900
gfx9 changed the size of the doorbell, and what the write index is when the doorbell is rang. --gfx-version flag is used to set the doorbell size Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42220 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matthew Poremba
parent
ec6b325382
commit
529736a7ce
@@ -91,22 +91,22 @@ HSAPacketProcessor::~HSAPacketProcessor()
|
||||
}
|
||||
|
||||
void
|
||||
HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
|
||||
HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
|
||||
{
|
||||
hwSchdlr->unregisterQueue(queue_id);
|
||||
hwSchdlr->unregisterQueue(queue_id, doorbellSize);
|
||||
}
|
||||
|
||||
void
|
||||
HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
|
||||
uint64_t basePointer,
|
||||
uint64_t queue_id,
|
||||
uint32_t size)
|
||||
uint32_t size, int doorbellSize)
|
||||
{
|
||||
DPRINTF(HSAPacketProcessor,
|
||||
"%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
|
||||
(void *)basePointer, queue_id, size);
|
||||
hwSchdlr->registerNewQueue(hostReadIndexPointer,
|
||||
basePointer, queue_id, size);
|
||||
basePointer, queue_id, size, doorbellSize);
|
||||
}
|
||||
|
||||
AddrRangeList
|
||||
@@ -133,7 +133,16 @@ HSAPacketProcessor::write(Packet *pkt)
|
||||
"%s: write of size %d to reg-offset %d (0x%x)\n",
|
||||
__FUNCTION__, pkt->getSize(), daddr, daddr);
|
||||
|
||||
uint32_t doorbell_reg = pkt->getLE<uint32_t>();
|
||||
int doorbellSize = gpu_device->driver()->doorbellSize();
|
||||
assert(doorbellSize == pkt->getSize());
|
||||
|
||||
uint64_t doorbell_reg(0);
|
||||
if (pkt->getSize() == 8)
|
||||
doorbell_reg = pkt->getLE<uint64_t>() + 1;
|
||||
else if (pkt->getSize() == 4)
|
||||
doorbell_reg = pkt->getLE<uint32_t>();
|
||||
else
|
||||
fatal("invalid db size");
|
||||
|
||||
DPRINTF(HSAPacketProcessor,
|
||||
"%s: write data 0x%x to offset %d (0x%x)\n",
|
||||
|
||||
@@ -331,8 +331,8 @@ class HSAPacketProcessor: public DmaDevice
|
||||
void setDeviceQueueDesc(uint64_t hostReadIndexPointer,
|
||||
uint64_t basePointer,
|
||||
uint64_t queue_id,
|
||||
uint32_t size);
|
||||
void unsetDeviceQueueDesc(uint64_t queue_id);
|
||||
uint32_t size, int doorbellSize);
|
||||
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
|
||||
void setDevice(GPUCommandProcessor * dev);
|
||||
void updateReadIndex(int, uint32_t);
|
||||
void getCommandsFromHost(int pid, uint32_t rl_idx);
|
||||
|
||||
@@ -84,18 +84,13 @@ void
|
||||
HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
|
||||
uint64_t basePointer,
|
||||
uint64_t queue_id,
|
||||
uint32_t size)
|
||||
uint32_t size, int doorbellSize)
|
||||
{
|
||||
assert(queue_id < MAX_ACTIVE_QUEUES);
|
||||
// Map queue ID to doorbell.
|
||||
// We are only using offset to pio base address as doorbell
|
||||
// We use the same mapping function used by hsa runtime to do this mapping
|
||||
//
|
||||
// Originally
|
||||
// #define VOID_PTR_ADD32(ptr,n)
|
||||
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
|
||||
// (Addr)VOID_PTR_ADD32(0, queue_id)
|
||||
Addr db_offset = sizeof(uint32_t)*queue_id;
|
||||
Addr db_offset = queue_id * doorbellSize;
|
||||
if (dbMap.find(db_offset) != dbMap.end()) {
|
||||
panic("Creating an already existing queue (queueID %d)", queue_id);
|
||||
}
|
||||
@@ -318,7 +313,7 @@ HWScheduler::isRLQIdle(uint32_t rl_idx)
|
||||
}
|
||||
|
||||
void
|
||||
HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
|
||||
HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
|
||||
{
|
||||
auto dbmap_iter = dbMap.find(db_addr);
|
||||
if (dbmap_iter == dbMap.end()) {
|
||||
@@ -335,17 +330,9 @@ HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
|
||||
}
|
||||
|
||||
void
|
||||
HWScheduler::unregisterQueue(uint64_t queue_id)
|
||||
HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
|
||||
{
|
||||
// Pointer arithmetic on a null pointer is undefined behavior. Clang
|
||||
// compilers therefore complain if the following reads:
|
||||
// `(Addr)(VOID_PRT_ADD32(0, queue_id))`
|
||||
//
|
||||
// Originally
|
||||
// #define VOID_PTR_ADD32(ptr,n)
|
||||
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
|
||||
// (Addr)VOID_PTR_ADD32(0, queue_id)
|
||||
Addr db_offset = sizeof(uint32_t)*queue_id;
|
||||
Addr db_offset = queue_id * doorbellSize;
|
||||
auto dbmap_iter = dbMap.find(db_offset);
|
||||
if (dbmap_iter == dbMap.end()) {
|
||||
panic("Destroying a non-existing queue (db_offset %x)",
|
||||
|
||||
@@ -52,12 +52,12 @@ class HWScheduler
|
||||
: hsaPP(hsa_pp), nextALId(0), nextRLId(0),
|
||||
wakeupDelay(wakeup_delay), schedWakeupEvent(this)
|
||||
{}
|
||||
void write(Addr db_addr, uint32_t doorbell_reg);
|
||||
void write(Addr db_addr, uint64_t doorbell_reg);
|
||||
void registerNewQueue(uint64_t hostReadIndexPointer,
|
||||
uint64_t basePointer,
|
||||
uint64_t queue_id,
|
||||
uint32_t size);
|
||||
void unregisterQueue(uint64_t queue_id);
|
||||
uint32_t size, int doorbellSize);
|
||||
void unregisterQueue(uint64_t queue_id, int doorbellSize);
|
||||
void wakeup();
|
||||
void schedWakeup();
|
||||
class SchedulerWakeupEvent : public Event
|
||||
|
||||
@@ -149,18 +149,22 @@ GPUComputeDriver::allocateQueue(PortProxy &mem_proxy, Addr ioc_buf)
|
||||
TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
|
||||
args.copyIn(mem_proxy);
|
||||
|
||||
if ((sizeof(uint32_t) * queueId) > 4096) {
|
||||
if ((doorbellSize() * queueId) > 4096) {
|
||||
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
|
||||
}
|
||||
|
||||
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
|
||||
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
|
||||
|
||||
// for vega offset needs to include exact value of doorbell
|
||||
if (doorbellSize())
|
||||
args->doorbell_offset += queueId * doorbellSize();
|
||||
|
||||
args->queue_id = queueId++;
|
||||
auto &hsa_pp = device->hsaPacketProc();
|
||||
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
|
||||
args->ring_base_address, args->queue_id,
|
||||
args->ring_size);
|
||||
args->ring_size, doorbellSize());
|
||||
args.copyOut(mem_proxy);
|
||||
}
|
||||
|
||||
@@ -243,7 +247,8 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
args.copyIn(virt_proxy);
|
||||
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
|
||||
"queue offset %d\n", args->queue_id);
|
||||
device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
|
||||
device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
|
||||
doorbellSize());
|
||||
}
|
||||
break;
|
||||
case AMDKFD_IOC_SET_MEMORY_POLICY:
|
||||
|
||||
@@ -81,6 +81,21 @@ class GPUComputeDriver final : public EmulatedDriver
|
||||
*/
|
||||
void setMtype(RequestPtr req);
|
||||
|
||||
int
|
||||
doorbellSize()
|
||||
{
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx803:
|
||||
return 4;
|
||||
case GfxVersion::gfx900:
|
||||
return 8;
|
||||
default:
|
||||
fatal("Invalid GPU type\n");
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
class DriverWakeupEvent : public Event
|
||||
{
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user