gpu-compute, dev-hsa: Remove HSADriver, HSADevice

HSADriver/HSADevice were primarily used with GPUCommandProcessor/
GPUComputeDriver. This change merges the classes together to
simplify the inheritance hierarchy, as well as removing any casting.

Change-Id: I670eb9b49a16c8aba17e13fd1d1287d0621c9f48
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42219
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Kyle Roarty
2019-06-19 16:04:01 -04:00
committed by Matthew Poremba
parent d019912efa
commit ec6b325382
13 changed files with 386 additions and 636 deletions

View File

@@ -34,12 +34,6 @@ from m5.params import *
from m5.proxy import *
from m5.objects.Device import DmaDevice
class HSADevice(DmaDevice):
type = 'HSADevice'
abstract = True
cxx_header = "dev/hsa/hsa_device.hh"
hsapp = Param.HSAPacketProcessor("PP attached to this device")
class HSAPacketProcessor(DmaDevice):
type = 'HSAPacketProcessor'
cxx_header = 'dev/hsa/hsa_packet_processor.hh'

View File

@@ -37,12 +37,8 @@ if not env['BUILD_GPU']:
Return()
SimObject('HSADevice.py')
SimObject('HSADriver.py')
Source('hsa_device.cc')
Source('hsa_driver.cc')
Source('hsa_packet_processor.cc')
Source('hw_scheduler.cc')
DebugFlag('HSADriver')
DebugFlag('HSAPacketProcessor')

View File

@@ -1,104 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/hsa/hsa_device.hh"
#include "base/chunk_generator.hh"
#include "sim/process.hh"
HSAPacketProcessor&
HSADevice::hsaPacketProc()
{
return *hsaPP;
}
void
HSADevice::dmaReadVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
}
void
HSADevice::dmaWriteVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
}
void
HSADevice::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
if (size == 0) {
if (cb)
schedule(cb->getChunkEvent(), curTick() + delay);
return;
}
// move the buffer data pointer with the chunks
uint8_t *loc_data = (uint8_t*)data;
for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
Addr phys;
// translate pages into their corresponding frames
translateOrDie(gen.addr(), phys);
Event *event = cb ? cb->getChunkEvent() : nullptr;
(this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
loc_data += gen.size();
}
}
/**
* HSADevices will perform DMA operations on VAs, and because
* page faults are not currently supported for HSADevices, we
* must be able to find the pages mapped for the process.
*/
void
HSADevice::translateOrDie(Addr vaddr, Addr &paddr)
{
/**
* Grab the process and try to translate the virtual address with it;
* with new extensions, it will likely be wrong to just arbitrarily
* grab context zero.
*/
auto process = sys->threads[0]->getProcessPtr();
if (!process->pTable->translate(vaddr, paddr)) {
fatal("failed translation: vaddr 0x%x\n", vaddr);
}
}

View File

@@ -1,128 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DEV_HSA_HSA_DEVICE_HH__
#define __DEV_HSA_HSA_DEVICE_HH__
#include <cassert>
#include <cstdint>
#include "base/logging.hh"
#include "base/types.hh"
#include "dev/dma_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "params/HSADevice.hh"
class HSADriver;
class HSADevice : public DmaDevice
{
public:
typedef HSADeviceParams Params;
typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
HSADevice(const Params &p) : DmaDevice(p), hsaPP(p.hsapp)
{
assert(hsaPP);
hsaPP->setDevice(this);
};
HSAPacketProcessor& hsaPacketProc();
/**
* submitAgentDispatchPkt() accepts AQL dispatch packets from the HSA
* packet processor. Not all devices will accept AQL dispatch packets,
* so the default implementation will fatal.
* Implementation added to steal kernel signals.
*/
virtual void
submitAgentDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
{
fatal("%s does not accept dispatch packets\n", name());
}
/**
* submitDispatchPkt() accepts AQL dispatch packets from the HSA packet
* processor. Not all devices will accept AQL dispatch packets, so the
* default implementation will fatal.
*/
virtual void
submitDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
{
fatal("%s does not accept dispatch packets\n", name());
}
/**
* submitVendorPkt() accepts vendor specific packets from the HSA
* packet processor. This method should be overriden in any HSADevice
* that acceptes vendor specific packets, and should interpret the
* packet according to the vendor's specifications. Not all HSA
* devices will accept vendor specific packets, so the default
* implementation will fatal.
*/
virtual void
submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
{
fatal("%s does not accept vendor specific packets\n", name());
}
virtual void
attachDriver(HSADriver *driver)
{
fatal("%s does not need HSA driver\n", name());
}
virtual void
updateHsaSignal(Addr signal_handle, uint64_t signal_value,
HsaSignalCallbackFunction function = [] (const uint64_t &) { })
{
fatal("%s does not have HSA signal update functionality.\n", name());
}
virtual uint64_t
functionalReadHsaSignal(Addr signal_handle)
{
fatal("%s does not have HSA signal read functionality.\n", name());
}
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
protected:
// Typedefing dmaRead and dmaWrite function pointer
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
HSAPacketProcessor *hsaPP;
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void translateOrDie(Addr vaddr, Addr &paddr);
};
#endif // __DEV_HSA_HSA_DEVICE_HH__

View File

@@ -1,188 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/hsa/hsa_driver.hh"
#include "base/trace.hh"
#include "debug/HSADriver.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "dev/hsa/kfd_event_defines.h"
#include "dev/hsa/kfd_ioctl.h"
#include "params/HSADriver.hh"
#include "sim/process.hh"
#include "sim/proxy_ptr.hh"
#include "sim/syscall_emul_buf.hh"
HSADriver::HSADriver(const HSADriverParams &p)
: EmulatedDriver(p), device(p.device), queueId(0)
{
}
/**
* Create an FD entry for the KFD inside of the owning process.
*/
int
HSADriver::open(ThreadContext *tc, int mode, int flags)
{
DPRINTF(HSADriver, "Opened %s\n", filename);
auto process = tc->getProcessPtr();
auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
int tgt_fd = process->fds->allocFD(device_fd_entry);
return tgt_fd;
}
/**
* Currently, mmap() will simply setup a mapping for the associated
* device's packet processor's doorbells and creates the event page.
*/
Addr
HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot,
int tgt_flags, int tgt_fd, off_t offset)
{
auto process = tc->getProcessPtr();
auto mem_state = process->memState;
Addr pg_off = offset >> PAGE_SHIFT;
Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
DPRINTF(HSADriver, "amdkfd mmap (start: %p, length: 0x%x,"
"offset: 0x%x)\n", start, length, offset);
switch (mmap_type) {
case KFD_MMAP_TYPE_DOORBELL:
DPRINTF(HSADriver, "amdkfd mmap type DOORBELL offset\n");
start = mem_state->extendMmap(length);
process->pTable->map(start, device->hsaPacketProc().pioAddr,
length, false);
break;
case KFD_MMAP_TYPE_EVENTS:
DPRINTF(HSADriver, "amdkfd mmap type EVENTS offset\n");
panic_if(start != 0,
"Start address should be provided by KFD\n");
panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
"Requested length %d, expected length %d; length "
"mismatch\n", length, 8 * KFD_SIGNAL_EVENT_LIMIT);
/**
* We don't actually access these pages. We just need to reserve
* some VA space. See commit id 5ce8abce for details on how
* events are currently implemented.
*/
if (!eventPage) {
eventPage = mem_state->extendMmap(length);
start = eventPage;
}
break;
default:
warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
break;
}
return start;
}
/**
* Forward relevant parameters to packet processor; queueID
* is used to link doorbell. The queueIDs are not re-used
* in current implementation, and we allocate only one page
* (4096 bytes) for doorbells, so check if this queue ID can
* be mapped into that page.
*/
void
HSADriver::allocateQueue(ThreadContext *tc, Addr ioc_buf)
{
VPtr<kfd_ioctl_create_queue_args> args(ioc_buf, tc);
if (queueId >= 0x1000) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
args->ring_size);
}
const char*
HSADriver::DriverWakeupEvent::description() const
{
return "DriverWakeupEvent";
}
void
HSADriver::DriverWakeupEvent::scheduleWakeup(Tick wakeup_delay)
{
assert(driver);
driver->schedule(this, curTick() + wakeup_delay);
}
void
HSADriver::signalWakeupEvent(uint32_t event_id)
{
panic_if(event_id >= eventSlotIndex,
"Trying wakeup on an event that is not yet created\n");
if (ETable[event_id].threadWaiting) {
panic_if(!ETable[event_id].tc,
"No thread context to wake up\n");
ThreadContext *tc = ETable[event_id].tc;
DPRINTF(HSADriver,
"Signal event: Waking up CPU %d\n", tc->cpuId());
// Wake up this thread
tc->activate();
// Remove events that can wake up this thread
TCEvents[tc].clearEvents();
} else {
// This may be a race condition between an ioctl call asking to wait on
// this event and this signalWakeupEvent. Taking care of this race
// condition here by setting the event here. The ioctl call should take
// the necessary action when waiting on an already set event. However,
// this may be a genuine instance in which the runtime has decided not
// to wait on this event. But since we cannot distinguish this case with
// the race condition, we are any way setting the event.
ETable[event_id].setEvent = true;
}
}
void
HSADriver::DriverWakeupEvent::process()
{
DPRINTF(HSADriver,
"Timer event: Waking up CPU %d\n", tc->cpuId());
// Wake up this thread
tc->activate();
// Remove events that can wake up this thread
driver->TCEvents[tc].clearEvents();
}

View File

@@ -1,163 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* An HSADriver is an emulated driver that controls an HSA agent,
* or more simply put, an HSA device. An HSA device is a device
* that has an associated HSA packet processor.
*
* In the base HSADriver class the open() method is implemented, as
* well as the mmap() call, which maps the HSA packet processor's
* doorbells. Drivers for other HSA devices should derive from this
* class and implement the necessary methods; typically this is an
* ioctl() method that satisfies the ioctl requests needed to manage
* and control the device.
*/
#ifndef __DEV_HSA_HSA_DRIVER_HH__
#define __DEV_HSA_HSA_DRIVER_HH__
#include <cassert>
#include <cstdint>
#include <set>
#include <unordered_map>
#include "base/logging.hh"
#include "base/types.hh"
#include "cpu/thread_context.hh"
#include "sim/emul_driver.hh"
struct HSADriverParams;
class HSADevice;
class HSADriver : public EmulatedDriver
{
public:
HSADriver(const HSADriverParams &p);
int open(ThreadContext *tc, int mode, int flags);
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset);
virtual void signalWakeupEvent(uint32_t event_id);
class DriverWakeupEvent : public Event
{
public:
DriverWakeupEvent(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
: driver(hsa_driver), tc(thrd_cntxt) {}
void process() override;
const char *description() const override;
void scheduleWakeup(Tick wakeup_delay);
private:
HSADriver *driver;
ThreadContext *tc;
};
class EventTableEntry
{
public:
EventTableEntry() :
mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
{}
// Mail box pointer for this address. Current implementation does not
// use this mailBoxPtr to notify events but directly calls
// signalWakeupEvent from dispatcher (GPU) to notify event. So,
// currently this mailBoxPtr is not used. But a future implementation
// may communicate to the driver using mailBoxPtr.
Addr mailBoxPtr;
// Thread context waiting on this event. We do not support multiple
// threads waiting on an event currently.
ThreadContext *tc;
// threadWaiting = true, if some thread context is waiting on this
// event. A thread context waiting on this event is put to sleep.
bool threadWaiting;
// setEvent = true, if this event is triggered but when this event
// triggered, no thread context was waiting on it. In the future, some
// thread context will try to wait on this event but since event has
// already happened, we will not allow that thread context to go to
// sleep. The above mentioned scenario can happen when the waiting
// thread and wakeup thread race on this event and the wakeup thread
// beat the waiting thread at the driver.
bool setEvent;
};
typedef class EventTableEntry ETEntry;
protected:
Addr eventPage;
uint32_t eventSlotIndex;
// Event table that keeps track of events. It is indexed with event ID.
std::unordered_map<uint32_t, ETEntry> ETable;
// TCEvents map keeps track of the events that can wakeup this thread. When
// multiple events can wake up this thread, this data structure helps to
// reset all events when one of those events wake up this thread. The
// signal events that can wake up this thread are stored in signalEvents
// whereas the timer wakeup event is stored in timerEvent.
class EventList
{
public:
EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
EventList(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
: driver(hsa_driver), timerEvent(hsa_driver, thrd_cntxt)
{ }
void clearEvents() {
assert(driver);
for (auto event : signalEvents) {
assert(event < driver->eventSlotIndex);
panic_if(driver->ETable[event].tc->status() == \
ThreadContext::Suspended,
"Thread should not be suspended\n");
driver->ETable[event].tc = nullptr;
driver->ETable[event].threadWaiting = false;
}
signalEvents.clear();
if (timerEvent.scheduled()) {
driver->deschedule(timerEvent);
}
}
HSADriver *driver;
DriverWakeupEvent timerEvent;
// The set of events that can wake up the same thread.
std::set<uint32_t> signalEvents;
};
std::unordered_map<ThreadContext *, EventList> TCEvents;
/**
* HSA agent (device) that is controled by this driver.
*/
HSADevice *device;
uint32_t queueId;
void allocateQueue(ThreadContext *tc, Addr ioc_buf);
};
#endif // __DEV_HSA_HSA_DRIVER_HH__

View File

@@ -42,9 +42,9 @@
#include "base/trace.hh"
#include "debug/HSAPacketProcessor.hh"
#include "dev/dma_device.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet.hh"
#include "dev/hsa/hw_scheduler.hh"
#include "gpu-compute/gpu_command_processor.hh"
#include "mem/packet_access.hh"
#include "mem/page_table.hh"
#include "sim/process.hh"
@@ -330,14 +330,24 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
gpu_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
} else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
gpu_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
/*
If this packet is using the "barrier bit" to enforce ordering with
subsequent kernels, set the bit for this queue now, after
dispatching.
*/
if (IS_BARRIER(disp_pkt)) {
DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for active" \
" list ID = %d\n", __FUNCTION__, rl_idx);
regdQList[rl_idx]->setBarrierBit(true);
}
} else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
@@ -404,14 +414,14 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
// I'm going to cheat here and read out
// the value from main memory using functional
// access, and then just DMA the decremented value.
uint64_t signal_value = hsa_device->functionalReadHsaSignal(\
uint64_t signal_value = gpu_device->functionalReadHsaSignal(\
bar_and_pkt->completion_signal);
DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
" completion signal! Addr: %x\n",
bar_and_pkt->completion_signal);
hsa_device->updateHsaSignal(bar_and_pkt->completion_signal,
gpu_device->updateHsaSignal(bar_and_pkt->completion_signal,
signal_value - 1);
}
}
@@ -428,7 +438,7 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
DPRINTF(HSAPacketProcessor, "%s: submitting agent dispatch pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitAgentDispatchPkt(
gpu_device->submitAgentDispatchPkt(
(void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
sendAgentDispatchCompletionSignal((void *)disp_pkt,0);
@@ -633,9 +643,9 @@ AQLRingBuffer::freeEntry(void *pkt)
}
void
HSAPacketProcessor::setDevice(HSADevice *dev)
HSAPacketProcessor::setDevice(GPUCommandProcessor *dev)
{
this->hsa_device = dev;
this->gpu_device = dev;
}
int
@@ -670,15 +680,13 @@ HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
DPRINTF(HSAPacketProcessor,
"Unset barrier bit for active list ID %d\n", rl_idx);
regdQList[rl_idx]->setBarrierBit(false);
panic_if(!regdQList[rl_idx]->dispPending(),
"There should be pending kernels in this queue\n");
DPRINTF(HSAPacketProcessor,
"Rescheduling active list ID %d after unsetting barrier "
"bit\n", rl_idx);
// Try to schedule wakeup in the next cycle. There is a minimum
// pktProcessDelay for queue wake up. If that processing delay is
// elapsed, schedAQLProcessing will wakeup next tick.
schedAQLProcessing(rl_idx, 1);
// if pending kernels in the queue after this kernel, reschedule
if (regdQList[rl_idx]->dispPending()) {
DPRINTF(HSAPacketProcessor,
"Rescheduling active list ID %d after unsetting barrier "
"bit\n", rl_idx);
schedAQLProcessing(rl_idx);
}
}
// If set, then blocked schedule, so need to reschedule

View File

@@ -66,7 +66,7 @@ typedef enum
// barrier packet completes.
} Q_STATE;
class HSADevice;
class GPUCommandProcessor;
class HWScheduler;
// Our internal representation of an HSA queue
@@ -120,7 +120,7 @@ class HSAQueueDescriptor
* FREE: Entry is empty
* ALLOCATED: Entry has been allocated for a packet, but the DMA has not
* yet completed
* SUBMITTED: Packet has been submitted to the HSADevice, but has not
* SUBMITTED: Packet has been submitted to the GPUCommandProcessor, but has not
* yet completed
*/
class AQLRingBuffer
@@ -224,7 +224,7 @@ class HSAPacketProcessor: public DmaDevice
friend class HWScheduler;
protected:
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
HSADevice *hsa_device;
GPUCommandProcessor *gpu_device;
HWScheduler *hwSchdlr;
// Structure to store the read values of dependency signals
@@ -333,7 +333,7 @@ class HSAPacketProcessor: public DmaDevice
uint64_t queue_id,
uint32_t size);
void unsetDeviceQueueDesc(uint64_t queue_id);
void setDevice(HSADevice * dev);
void setDevice(GPUCommandProcessor * dev);
void updateReadIndex(int, uint32_t);
void getCommandsFromHost(int pid, uint32_t rl_idx);

View File

@@ -37,8 +37,6 @@ from m5.SimObject import SimObject
from m5.objects.Bridge import Bridge
from m5.objects.ClockedObject import ClockedObject
from m5.objects.Device import DmaDevice
from m5.objects.HSADevice import HSADevice
from m5.objects.HSADriver import HSADriver
from m5.objects.LdsState import LdsState
from m5.objects.Process import EmulatedDriver
@@ -239,9 +237,10 @@ class Shader(ClockedObject):
idlecu_timeout = Param.Tick(0, "Idle CU watchdog timeout threshold")
max_valu_insts = Param.Int(0, "Maximum vALU insts before exiting")
class GPUComputeDriver(HSADriver):
class GPUComputeDriver(EmulatedDriver):
type = 'GPUComputeDriver'
cxx_header = 'gpu-compute/gpu_compute_driver.hh'
device = Param.GPUCommandProcessor('GPU controlled by this driver')
isdGPU = Param.Bool(False, 'Driver is for a dGPU')
gfxVersion = Param.GfxVersion('gfx801', 'ISA of gpu to model')
dGPUPoolID = Param.Int(False, 'Pool ID for dGPU.')
@@ -259,11 +258,13 @@ class GPUDispatcher(SimObject):
type = 'GPUDispatcher'
cxx_header = 'gpu-compute/dispatcher.hh'
class GPUCommandProcessor(HSADevice):
class GPUCommandProcessor(DmaDevice):
type = 'GPUCommandProcessor'
cxx_header = 'gpu-compute/gpu_command_processor.hh'
dispatcher = Param.GPUDispatcher('workgroup dispatcher for the GPU')
hsapp = Param.HSAPacketProcessor('PP attached to this device')
class StorageClassType(Enum): vals = [
'SC_SPILL',
'SC_GLOBAL',

View File

@@ -33,6 +33,9 @@
#include "gpu-compute/gpu_command_processor.hh"
#include <cassert>
#include "base/chunk_generator.hh"
#include "debug/GPUCommandProc.hh"
#include "debug/GPUKernelInfo.hh"
#include "gpu-compute/dispatcher.hh"
@@ -42,11 +45,75 @@
#include "sim/syscall_emul_buf.hh"
GPUCommandProcessor::GPUCommandProcessor(const Params &p)
: HSADevice(p), dispatcher(*p.dispatcher), _driver(nullptr)
: DmaDevice(p), dispatcher(*p.dispatcher), _driver(nullptr), hsaPP(p.hsapp)
{
assert(hsaPP);
hsaPP->setDevice(this);
dispatcher.setCommandProcessor(this);
}
HSAPacketProcessor&
GPUCommandProcessor::hsaPacketProc()
{
return *hsaPP;
}
void
GPUCommandProcessor::dmaReadVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
}
void
GPUCommandProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
}
void
GPUCommandProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
if (size == 0) {
if (cb)
schedule(cb->getChunkEvent(), curTick() + delay);
return;
}
// move the buffer data pointer with the chunks
uint8_t *loc_data = (uint8_t*)data;
for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
Addr phys;
// translate pages into their corresponding frames
translateOrDie(gen.addr(), phys);
Event *event = cb ? cb->getChunkEvent() : nullptr;
(this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
loc_data += gen.size();
}
}
void
GPUCommandProcessor::translateOrDie(Addr vaddr, Addr &paddr)
{
/**
* Grab the process and try to translate the virtual address with it;
* with new extensions, it will likely be wrong to just arbitrarily
* grab context zero.
*/
auto process = sys->threads[0]->getProcessPtr();
if (!process->pTable->translate(vaddr, paddr)) {
fatal("failed translation: vaddr 0x%x\n", vaddr);
}
}
/**
* submitDispatchPkt() is the entry point into the CP from the HSAPP
* and is only meant to be used with AQL kernel dispatch packets.
@@ -192,12 +259,12 @@ GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value,
}
void
GPUCommandProcessor::attachDriver(HSADriver *hsa_driver)
GPUCommandProcessor::attachDriver(GPUComputeDriver *gpu_driver)
{
fatal_if(_driver, "Should not overwrite driver.");
// TODO: GPU Driver inheritance hierarchy doesn't really make sense.
// Should get rid of the base class.
_driver = dynamic_cast<GPUComputeDriver *>(hsa_driver);
_driver = gpu_driver;
assert(_driver);
}

View File

@@ -45,17 +45,27 @@
#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#include <cstdint>
#include <functional>
#include "base/logging.hh"
#include "base/trace.hh"
#include "base/types.hh"
#include "debug/GPUCommandProc.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/dma_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "dev/hsa/hsa_signal.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/gpu_compute_driver.hh"
#include "gpu-compute/hsa_queue_entry.hh"
#include "params/GPUCommandProcessor.hh"
struct GPUCommandProcessorParams;
class GPUComputeDriver;
class GPUDispatcher;
class Shader;
class GPUCommandProcessor : public HSADevice
class GPUCommandProcessor : public DmaDevice
{
public:
typedef GPUCommandProcessorParams Params;
@@ -64,6 +74,13 @@ class GPUCommandProcessor : public HSADevice
GPUCommandProcessor() = delete;
GPUCommandProcessor(const Params &p);
HSAPacketProcessor& hsaPacketProc();
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b,
void *data, Tick delay = 0);
void setShader(Shader *shader);
Shader* shader();
GPUComputeDriver* driver();
@@ -75,12 +92,13 @@ class GPUCommandProcessor : public HSADevice
};
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
Addr host_pkt_addr);
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
Addr host_pkt_addr);
void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
void attachDriver(HSADriver *driver) override;
Addr host_pkt_addr);
void attachDriver(GPUComputeDriver *driver);
void dispatchPkt(HSAQueueEntry *task);
void signalWakeupEvent(uint32_t event_id);
@@ -91,9 +109,9 @@ class GPUCommandProcessor : public HSADevice
void updateHsaSignal(Addr signal_handle, uint64_t signal_value,
HsaSignalCallbackFunction function =
[] (const uint64_t &) { }) override;
[] (const uint64_t &) { });
uint64_t functionalReadHsaSignal(Addr signal_handle) override;
uint64_t functionalReadHsaSignal(Addr signal_handle);
Addr getHsaSignalValueAddr(Addr signal_handle)
{
@@ -115,8 +133,13 @@ class GPUCommandProcessor : public HSADevice
GPUDispatcher &dispatcher;
GPUComputeDriver *_driver;
// Typedefing dmaRead and dmaWrite function pointer
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
void initABI(HSAQueueEntry *task);
HSAPacketProcessor *hsaPP;
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void translateOrDie(Addr vaddr, Addr &paddr);
/**
* Wraps a std::function object in a DmaCallback. Much cleaner than

View File

@@ -33,22 +33,26 @@
#include "gpu-compute/gpu_compute_driver.hh"
#include <memory>
#include "base/logging.hh"
#include "base/trace.hh"
#include "cpu/thread_context.hh"
#include "debug/GPUDriver.hh"
#include "debug/GPUShader.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "dev/hsa/kfd_event_defines.h"
#include "dev/hsa/kfd_ioctl.h"
#include "gpu-compute/gpu_command_processor.hh"
#include "gpu-compute/shader.hh"
#include "mem/port_proxy.hh"
#include "params/GPUComputeDriver.hh"
#include "sim/process.hh"
#include "sim/syscall_emul_buf.hh"
GPUComputeDriver::GPUComputeDriver(const Params &p)
: HSADriver(p), isdGPU(p.isdGPU), gfxVersion(p.gfxVersion),
dGPUPoolID(p.dGPUPoolID)
: EmulatedDriver(p), device(p.device), queueId(0),
isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID)
{
device->attachDriver(this);
DPRINTF(GPUDriver, "Constructing KFD: device\n");
@@ -65,6 +69,146 @@ GPUComputeDriver::GPUComputeDriver(const Params &p)
defaultMtype.set(Request::CACHED);
}
const char*
GPUComputeDriver::DriverWakeupEvent::description() const
{
return "DriverWakeupEvent";
}
/**
* Create an FD entry for the KFD inside of the owning process.
*/
int
GPUComputeDriver::open(ThreadContext *tc, int mode, int flags)
{
DPRINTF(GPUDriver, "Opened %s\n", filename);
auto process = tc->getProcessPtr();
auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
int tgt_fd = process->fds->allocFD(device_fd_entry);
return tgt_fd;
}
/**
* Currently, mmap() will simply setup a mapping for the associated
* device's packet processor's doorbells and creates the event page.
*/
Addr
GPUComputeDriver::mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset)
{
auto process = tc->getProcessPtr();
auto mem_state = process->memState;
Addr pg_off = offset >> PAGE_SHIFT;
Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
DPRINTF(GPUDriver, "amdkfd mmap (start: %p, length: 0x%x,"
"offset: 0x%x)\n", start, length, offset);
switch(mmap_type) {
case KFD_MMAP_TYPE_DOORBELL:
DPRINTF(GPUDriver, "amdkfd mmap type DOORBELL offset\n");
start = mem_state->extendMmap(length);
process->pTable->map(start, device->hsaPacketProc().pioAddr,
length, false);
break;
case KFD_MMAP_TYPE_EVENTS:
DPRINTF(GPUDriver, "amdkfd mmap type EVENTS offset\n");
panic_if(start != 0,
"Start address should be provided by KFD\n");
panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
"Requested length %d, expected length %d; length "
"mismatch\n", length, 8* KFD_SIGNAL_EVENT_LIMIT);
/**
* We don't actually access these pages. We just need to reserve
* some VA space. See commit id 5ce8abce for details on how
* events are currently implemented.
*/
if (!eventPage) {
eventPage = mem_state->extendMmap(length);
start = eventPage;
}
break;
default:
warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
break;
}
return start;
}
/**
* Forward relevant parameters to packet processor; queueId
* is used to link doorbell. The queueIDs are not re-used
* in current implementation, and we allocate only one page
* (4096 bytes) for doorbells, so check if this queueID can
* be mapped into that page.
*/
void
GPUComputeDriver::allocateQueue(PortProxy &mem_proxy, Addr ioc_buf)
{
TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
args.copyIn(mem_proxy);
if ((sizeof(uint32_t) * queueId) > 4096) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
args->ring_size);
args.copyOut(mem_proxy);
}
void
GPUComputeDriver::DriverWakeupEvent::scheduleWakeup(Tick wakeup_delay)
{
assert(driver);
driver->schedule(this, curTick() + wakeup_delay);
}
void
GPUComputeDriver::signalWakeupEvent(uint32_t event_id)
{
panic_if(event_id >= eventSlotIndex,
"Trying wakeup on an event that is not yet created\n");
if (ETable[event_id].threadWaiting) {
panic_if(!ETable[event_id].tc,
"No thread context to wake up\n");
ThreadContext *tc = ETable[event_id].tc;
DPRINTF(GPUDriver,
"Signal event: Waking up CPU %d\n", tc->cpuId());
// Remove events that can wakeup this thread
TCEvents[tc].clearEvents();
// Now wakeup this thread
tc->activate();
} else {
// This may be a race condition between an ioctl call asking to wait on
// this event and this signalWakeupEvent. Taking care of this race
// condition here by setting the event here. The ioctl call should take
// the necessary action when waiting on an already set event. However,
// this may be a genuine instance in which the runtime has decided not
// to wait on this event. But since we cannot distinguish this case with
// the race condition, we are any way setting the event.
ETable[event_id].setEvent = true;
}
}
void
GPUComputeDriver::DriverWakeupEvent::process()
{
DPRINTF(GPUDriver,
"Timer event: Waking up CPU %d\n", tc->cpuId());
// Remove events that can wakeup this thread
driver->TCEvents[tc].clearEvents();
// Now wakeup this thread
tc->activate();
}
int
GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
{
@@ -88,7 +232,7 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
{
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
allocateQueue(tc, ioc_buf);
allocateQueue(virt_proxy, ioc_buf);
DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
}

View File

@@ -42,19 +42,33 @@
#ifndef __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#define __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#include <cassert>
#include <cstdint>
#include <set>
#include <unordered_map>
#include "base/addr_range_map.hh"
#include "dev/hsa/hsa_driver.hh"
#include "base/types.hh"
#include "enums/GfxVersion.hh"
#include "mem/request.hh"
#include "sim/emul_driver.hh"
struct GPUComputeDriverParams;
class GPUCommandProcessor;
class PortProxy;
class ThreadContext;
class GPUComputeDriver final : public HSADriver
class GPUComputeDriver final : public EmulatedDriver
{
public:
typedef GPUComputeDriverParams Params;
GPUComputeDriver(const Params &p);
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override;
int open(ThreadContext *tc, int mode, int flags);
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset);
virtual void signalWakeupEvent(uint32_t event_id);
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout);
/**
* Called by the compute units right before a request is issued to ruby.
@@ -67,10 +81,62 @@ class GPUComputeDriver final : public HSADriver
*/
void setMtype(RequestPtr req);
class DriverWakeupEvent : public Event
{
public:
DriverWakeupEvent(GPUComputeDriver *gpu_driver,
ThreadContext *thrd_cntxt)
: driver(gpu_driver), tc(thrd_cntxt) {}
void process() override;
const char *description() const override;
void scheduleWakeup(Tick wakeup_delay);
private:
GPUComputeDriver *driver;
ThreadContext *tc;
};
class EventTableEntry
{
public:
EventTableEntry() :
mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
{}
// Mail box pointer for this address. Current implementation does not
// use this mailBoxPtr to notify events but directly calls
// signalWakeupEvent from dispatcher (GPU) to notifiy events. So,
// currently this mailBoxPtr is not used. But a future implementation
// may communicate to the driver using mailBoxPtr.
Addr mailBoxPtr;
// Thread context waiting on this even. We do not support multiple
// threads waiting on an event currently.
ThreadContext *tc;
// threadWaiting = true, if some thread context is waiting on this
// event. A thread context waiting on this event is put to sleep.
bool threadWaiting;
// setEvent = true, if this event is triggered but when this event
// triggered, no thread context was waiting on it. In the future, some
// thread context will try to wait on this event but since event has
// already happened, we will not allow that thread context to go to
// sleep. The above mentioned scneario can happen when the waiting
// thread and wakeup thread race on this event and the wakeup thread
// beat the waiting thread at the driver.
bool setEvent;
};
typedef class EventTableEntry ETEntry;
private:
/**
* GPU that is controlled by this driver.
*/
GPUCommandProcessor *device;
uint32_t queueId;
bool isdGPU;
GfxVersion gfxVersion;
int dGPUPoolID;
Addr eventPage;
uint32_t eventSlotIndex;
//Event table that keeps track of events. It is indexed with event ID.
std::unordered_map<uint32_t, ETEntry> ETable;
/**
* VMA structures for GPUVM memory.
@@ -89,6 +155,37 @@ class GPUComputeDriver final : public HSADriver
Request::CacheCoherenceFlags defaultMtype;
// TCEvents map keeps trak of the events that can wakeup this thread. When
// multiple events can wake up this thread, this data structure helps to
// reset all events when one of those events wake up this thread. the
// signal events that can wake up this thread are stored in signalEvents
// whereas the timer wakeup event is stored in timerEvent.
class EventList
{
public:
EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
: driver(gpu_driver), timerEvent(gpu_driver, thrd_cntxt)
{ }
void clearEvents() {
assert(driver);
for (auto event : signalEvents) {
assert(event < driver->eventSlotIndex);
driver->ETable[event].tc = nullptr;
driver->ETable[event].threadWaiting = false;
}
signalEvents.clear();
if (timerEvent.scheduled()) {
driver->deschedule(timerEvent);
}
}
GPUComputeDriver *driver;
DriverWakeupEvent timerEvent;
// The set of events that can wake up the same thread.
std::set<uint32_t> signalEvents;
};
std::unordered_map<ThreadContext *, EventList> TCEvents;
/**
* Register a region of host memory as uncacheable from the perspective
* of the dGPU.
@@ -126,6 +223,9 @@ class GPUComputeDriver final : public HSADriver
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start,
Addr length);
Addr deallocateGpuVma(Addr start);
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr);
};
#endif // __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__