gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model

Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Tony Gutierrez
2018-05-01 16:59:35 -04:00
committed by Anthony Gutierrez
parent b0eac7857a
commit b8da9abba7
86 changed files with 10299 additions and 3734 deletions

View File

@@ -101,7 +101,7 @@ HSADevice::translateOrDie(Addr vaddr, Addr &paddr)
* with new extensions, it will likely be wrong to just arbitrarily
* grab context zero.
*/
auto process = sys->getThreadContext(0)->getProcessPtr();
auto process = sys->threads[0]->getProcessPtr();
if (!process->pTable->translate(vaddr, paddr)) {
fatal("failed translation: vaddr 0x%x\n", vaddr);

View File

@@ -92,3 +92,28 @@ HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot,
DPRINTF(HSADriver, "amdkfd doorbell mapped to %xp\n", start);
return start;
}
/**
* Forward relevant parameters to packet processor; queueID
* is used to link doorbell. The queueIDs are not re-used
* in current implementation, and we allocate only one page
* (4096 bytes) for doorbells, so check if this queue ID can
* be mapped into that page.
*/
void
HSADriver::allocateQueue(PortProxy &mem_proxy, Addr ioc_buf)
{
TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
args.copyIn(mem_proxy);
if (queueId >= 0x1000) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
args->ring_size);
args.copyOut(mem_proxy);
}

View File

@@ -56,7 +56,7 @@
struct HSADriverParams;
class HSADevice;
class SETranslatingPortProxy;
class PortProxy;
class ThreadContext;
class HSADriver : public EmulatedDriver
@@ -74,8 +74,7 @@ class HSADriver : public EmulatedDriver
HSADevice *device;
uint32_t queueId;
void allocateQueue(const SETranslatingPortProxy &mem_proxy,
Addr ioc_buf_addr);
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf);
};
#endif // __DEV_HSA_HSA_DRIVER_HH__

View File

@@ -151,7 +151,7 @@ HSAPacketProcessor::translateOrDie(Addr vaddr, Addr &paddr)
// Grab the process and try to translate the virtual address with it; with
// new extensions, it will likely be wrong to just arbitrarily grab context
// zero.
auto process = sys->getThreadContext(0)->getProcessPtr();
auto process = sys->threads[0]->getProcessPtr();
if (!process->pTable->translate(vaddr, paddr))
fatal("failed translation: vaddr 0x%x\n", vaddr);
@@ -393,7 +393,7 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
* The reason for this is that the DMASequencer does
* not support atomic operations.
*/
auto tc = sys->getThreadContext(0);
auto tc = sys->threads[0];
auto &virt_proxy = tc->getVirtProxy();
TypedBufferArg<uint64_t> prev_signal(signal_addr);
prev_signal.copyIn(virt_proxy);

View File

@@ -92,7 +92,7 @@ HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
// We use the same mapping function used by hsa runtime to do this mapping
//
// Originally
// #define VOID_PTR_ADD32(ptr,n) \
// #define VOID_PTR_ADD32(ptr,n)
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
// (Addr)VOID_PTR_ADD32(0, queue_id)
Addr db_offset = queue_id;
@@ -343,7 +343,7 @@ HWScheduler::unregisterQueue(uint64_t queue_id)
// `(Addr)(VOID_PRT_ADD32(0, queue_id))`
//
// Originally
// #define VOID_PTR_ADD32(ptr,n) \
// #define VOID_PTR_ADD32(ptr,n)
// (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
// (Addr)VOID_PTR_ADD32(0, queue_id)
Addr db_offset = queue_id;