gpu-compute, dev-hsa: Remove HSADriver, HSADevice

HSADriver/HSADevice were primarily used with GPUCommandProcessor/
GPUComputeDriver. This change merges the classes together to
simplify the inheritance hierarchy, as well as removing any casting.

Change-Id: I670eb9b49a16c8aba17e13fd1d1287d0621c9f48
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42219
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Kyle Roarty
2019-06-19 16:04:01 -04:00
committed by Matthew Poremba
parent d019912efa
commit ec6b325382
13 changed files with 386 additions and 636 deletions

View File

@@ -34,12 +34,6 @@ from m5.params import *
from m5.proxy import *
from m5.objects.Device import DmaDevice
class HSADevice(DmaDevice):
type = 'HSADevice'
abstract = True
cxx_header = "dev/hsa/hsa_device.hh"
hsapp = Param.HSAPacketProcessor("PP attached to this device")
class HSAPacketProcessor(DmaDevice):
type = 'HSAPacketProcessor'
cxx_header = 'dev/hsa/hsa_packet_processor.hh'

View File

@@ -37,12 +37,8 @@ if not env['BUILD_GPU']:
Return()
SimObject('HSADevice.py')
SimObject('HSADriver.py')
Source('hsa_device.cc')
Source('hsa_driver.cc')
Source('hsa_packet_processor.cc')
Source('hw_scheduler.cc')
DebugFlag('HSADriver')
DebugFlag('HSAPacketProcessor')

View File

@@ -1,104 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/hsa/hsa_device.hh"
#include "base/chunk_generator.hh"
#include "sim/process.hh"
HSAPacketProcessor&
HSADevice::hsaPacketProc()
{
return *hsaPP;
}
void
HSADevice::dmaReadVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
}
void
HSADevice::dmaWriteVirt(Addr host_addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
}
void
HSADevice::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
DmaCallback *cb, void *data, Tick delay)
{
if (size == 0) {
if (cb)
schedule(cb->getChunkEvent(), curTick() + delay);
return;
}
// move the buffer data pointer with the chunks
uint8_t *loc_data = (uint8_t*)data;
for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
Addr phys;
// translate pages into their corresponding frames
translateOrDie(gen.addr(), phys);
Event *event = cb ? cb->getChunkEvent() : nullptr;
(this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
loc_data += gen.size();
}
}
/**
* HSADevices will perform DMA operations on VAs, and because
* page faults are not currently supported for HSADevices, we
* must be able to find the pages mapped for the process.
*/
void
HSADevice::translateOrDie(Addr vaddr, Addr &paddr)
{
/**
* Grab the process and try to translate the virtual address with it;
* with new extensions, it will likely be wrong to just arbitrarily
* grab context zero.
*/
auto process = sys->threads[0]->getProcessPtr();
if (!process->pTable->translate(vaddr, paddr)) {
fatal("failed translation: vaddr 0x%x\n", vaddr);
}
}

View File

@@ -1,128 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DEV_HSA_HSA_DEVICE_HH__
#define __DEV_HSA_HSA_DEVICE_HH__
#include <cassert>
#include <cstdint>
#include "base/logging.hh"
#include "base/types.hh"
#include "dev/dma_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "params/HSADevice.hh"
class HSADriver;
class HSADevice : public DmaDevice
{
public:
typedef HSADeviceParams Params;
typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
HSADevice(const Params &p) : DmaDevice(p), hsaPP(p.hsapp)
{
assert(hsaPP);
hsaPP->setDevice(this);
};
HSAPacketProcessor& hsaPacketProc();
/**
* submitAgentDispatchPkt() accepts AQL dispatch packets from the HSA
* packet processor. Not all devices will accept AQL dispatch packets,
* so the default implementation will fatal.
* Implementation added to steal kernel signals.
*/
virtual void
submitAgentDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
{
fatal("%s does not accept dispatch packets\n", name());
}
/**
* submitDispatchPkt() accepts AQL dispatch packets from the HSA packet
* processor. Not all devices will accept AQL dispatch packets, so the
* default implementation will fatal.
*/
virtual void
submitDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
{
fatal("%s does not accept dispatch packets\n", name());
}
/**
* submitVendorPkt() accepts vendor specific packets from the HSA
* packet processor. This method should be overriden in any HSADevice
* that acceptes vendor specific packets, and should interpret the
* packet according to the vendor's specifications. Not all HSA
* devices will accept vendor specific packets, so the default
* implementation will fatal.
*/
virtual void
submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
{
fatal("%s does not accept vendor specific packets\n", name());
}
virtual void
attachDriver(HSADriver *driver)
{
fatal("%s does not need HSA driver\n", name());
}
virtual void
updateHsaSignal(Addr signal_handle, uint64_t signal_value,
HsaSignalCallbackFunction function = [] (const uint64_t &) { })
{
fatal("%s does not have HSA signal update functionality.\n", name());
}
virtual uint64_t
functionalReadHsaSignal(Addr signal_handle)
{
fatal("%s does not have HSA signal read functionality.\n", name());
}
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
protected:
// Typedefing dmaRead and dmaWrite function pointer
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
HSAPacketProcessor *hsaPP;
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, DmaCallback *cb,
void *data, Tick delay = 0);
void translateOrDie(Addr vaddr, Addr &paddr);
};
#endif // __DEV_HSA_HSA_DEVICE_HH__

View File

@@ -1,188 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/hsa/hsa_driver.hh"
#include "base/trace.hh"
#include "debug/HSADriver.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet_processor.hh"
#include "dev/hsa/kfd_event_defines.h"
#include "dev/hsa/kfd_ioctl.h"
#include "params/HSADriver.hh"
#include "sim/process.hh"
#include "sim/proxy_ptr.hh"
#include "sim/syscall_emul_buf.hh"
HSADriver::HSADriver(const HSADriverParams &p)
: EmulatedDriver(p), device(p.device), queueId(0)
{
}
/**
* Create an FD entry for the KFD inside of the owning process.
*/
int
HSADriver::open(ThreadContext *tc, int mode, int flags)
{
DPRINTF(HSADriver, "Opened %s\n", filename);
auto process = tc->getProcessPtr();
auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
int tgt_fd = process->fds->allocFD(device_fd_entry);
return tgt_fd;
}
/**
* Currently, mmap() will simply setup a mapping for the associated
* device's packet processor's doorbells and creates the event page.
*/
Addr
HSADriver::mmap(ThreadContext *tc, Addr start, uint64_t length, int prot,
int tgt_flags, int tgt_fd, off_t offset)
{
auto process = tc->getProcessPtr();
auto mem_state = process->memState;
Addr pg_off = offset >> PAGE_SHIFT;
Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
DPRINTF(HSADriver, "amdkfd mmap (start: %p, length: 0x%x,"
"offset: 0x%x)\n", start, length, offset);
switch (mmap_type) {
case KFD_MMAP_TYPE_DOORBELL:
DPRINTF(HSADriver, "amdkfd mmap type DOORBELL offset\n");
start = mem_state->extendMmap(length);
process->pTable->map(start, device->hsaPacketProc().pioAddr,
length, false);
break;
case KFD_MMAP_TYPE_EVENTS:
DPRINTF(HSADriver, "amdkfd mmap type EVENTS offset\n");
panic_if(start != 0,
"Start address should be provided by KFD\n");
panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
"Requested length %d, expected length %d; length "
"mismatch\n", length, 8 * KFD_SIGNAL_EVENT_LIMIT);
/**
* We don't actually access these pages. We just need to reserve
* some VA space. See commit id 5ce8abce for details on how
* events are currently implemented.
*/
if (!eventPage) {
eventPage = mem_state->extendMmap(length);
start = eventPage;
}
break;
default:
warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
break;
}
return start;
}
/**
* Forward relevant parameters to packet processor; queueID
* is used to link doorbell. The queueIDs are not re-used
* in current implementation, and we allocate only one page
* (4096 bytes) for doorbells, so check if this queue ID can
* be mapped into that page.
*/
void
HSADriver::allocateQueue(ThreadContext *tc, Addr ioc_buf)
{
VPtr<kfd_ioctl_create_queue_args> args(ioc_buf, tc);
if (queueId >= 0x1000) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
args->ring_size);
}
const char*
HSADriver::DriverWakeupEvent::description() const
{
return "DriverWakeupEvent";
}
void
HSADriver::DriverWakeupEvent::scheduleWakeup(Tick wakeup_delay)
{
assert(driver);
driver->schedule(this, curTick() + wakeup_delay);
}
void
HSADriver::signalWakeupEvent(uint32_t event_id)
{
panic_if(event_id >= eventSlotIndex,
"Trying wakeup on an event that is not yet created\n");
if (ETable[event_id].threadWaiting) {
panic_if(!ETable[event_id].tc,
"No thread context to wake up\n");
ThreadContext *tc = ETable[event_id].tc;
DPRINTF(HSADriver,
"Signal event: Waking up CPU %d\n", tc->cpuId());
// Wake up this thread
tc->activate();
// Remove events that can wake up this thread
TCEvents[tc].clearEvents();
} else {
// This may be a race condition between an ioctl call asking to wait on
// this event and this signalWakeupEvent. Taking care of this race
// condition here by setting the event here. The ioctl call should take
// the necessary action when waiting on an already set event. However,
// this may be a genuine instance in which the runtime has decided not
// to wait on this event. But since we cannot distinguish this case with
// the race condition, we are any way setting the event.
ETable[event_id].setEvent = true;
}
}
void
HSADriver::DriverWakeupEvent::process()
{
DPRINTF(HSADriver,
"Timer event: Waking up CPU %d\n", tc->cpuId());
// Wake up this thread
tc->activate();
// Remove events that can wake up this thread
driver->TCEvents[tc].clearEvents();
}

View File

@@ -1,163 +0,0 @@
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* An HSADriver is an emulated driver that controls an HSA agent,
* or more simply put, an HSA device. An HSA device is a device
* that has an associated HSA packet processor.
*
* In the base HSADriver class the open() method is implemented, as
* well as the mmap() call, which maps the HSA packet processor's
* doorbells. Drivers for other HSA devices should derive from this
* class and implement the necessary methods; typically this is an
* ioctl() method that satisfies the ioctl requests needed to manage
* and control the device.
*/
#ifndef __DEV_HSA_HSA_DRIVER_HH__
#define __DEV_HSA_HSA_DRIVER_HH__
#include <cassert>
#include <cstdint>
#include <set>
#include <unordered_map>
#include "base/logging.hh"
#include "base/types.hh"
#include "cpu/thread_context.hh"
#include "sim/emul_driver.hh"
struct HSADriverParams;
class HSADevice;
class HSADriver : public EmulatedDriver
{
public:
HSADriver(const HSADriverParams &p);
int open(ThreadContext *tc, int mode, int flags);
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset);
virtual void signalWakeupEvent(uint32_t event_id);
class DriverWakeupEvent : public Event
{
public:
DriverWakeupEvent(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
: driver(hsa_driver), tc(thrd_cntxt) {}
void process() override;
const char *description() const override;
void scheduleWakeup(Tick wakeup_delay);
private:
HSADriver *driver;
ThreadContext *tc;
};
class EventTableEntry
{
public:
EventTableEntry() :
mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
{}
// Mail box pointer for this address. Current implementation does not
// use this mailBoxPtr to notify events but directly calls
// signalWakeupEvent from dispatcher (GPU) to notify event. So,
// currently this mailBoxPtr is not used. But a future implementation
// may communicate to the driver using mailBoxPtr.
Addr mailBoxPtr;
// Thread context waiting on this event. We do not support multiple
// threads waiting on an event currently.
ThreadContext *tc;
// threadWaiting = true, if some thread context is waiting on this
// event. A thread context waiting on this event is put to sleep.
bool threadWaiting;
// setEvent = true, if this event is triggered but when this event
// triggered, no thread context was waiting on it. In the future, some
// thread context will try to wait on this event but since event has
// already happened, we will not allow that thread context to go to
// sleep. The above mentioned scenario can happen when the waiting
// thread and wakeup thread race on this event and the wakeup thread
// beat the waiting thread at the driver.
bool setEvent;
};
typedef class EventTableEntry ETEntry;
protected:
Addr eventPage;
uint32_t eventSlotIndex;
// Event table that keeps track of events. It is indexed with event ID.
std::unordered_map<uint32_t, ETEntry> ETable;
// TCEvents map keeps track of the events that can wakeup this thread. When
// multiple events can wake up this thread, this data structure helps to
// reset all events when one of those events wake up this thread. The
// signal events that can wake up this thread are stored in signalEvents
// whereas the timer wakeup event is stored in timerEvent.
class EventList
{
public:
EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
EventList(HSADriver *hsa_driver, ThreadContext *thrd_cntxt)
: driver(hsa_driver), timerEvent(hsa_driver, thrd_cntxt)
{ }
void clearEvents() {
assert(driver);
for (auto event : signalEvents) {
assert(event < driver->eventSlotIndex);
panic_if(driver->ETable[event].tc->status() == \
ThreadContext::Suspended,
"Thread should not be suspended\n");
driver->ETable[event].tc = nullptr;
driver->ETable[event].threadWaiting = false;
}
signalEvents.clear();
if (timerEvent.scheduled()) {
driver->deschedule(timerEvent);
}
}
HSADriver *driver;
DriverWakeupEvent timerEvent;
// The set of events that can wake up the same thread.
std::set<uint32_t> signalEvents;
};
std::unordered_map<ThreadContext *, EventList> TCEvents;
/**
* HSA agent (device) that is controled by this driver.
*/
HSADevice *device;
uint32_t queueId;
void allocateQueue(ThreadContext *tc, Addr ioc_buf);
};
#endif // __DEV_HSA_HSA_DRIVER_HH__

View File

@@ -42,9 +42,9 @@
#include "base/trace.hh"
#include "debug/HSAPacketProcessor.hh"
#include "dev/dma_device.hh"
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_packet.hh"
#include "dev/hsa/hw_scheduler.hh"
#include "gpu-compute/gpu_command_processor.hh"
#include "mem/packet_access.hh"
#include "mem/page_table.hh"
#include "sim/process.hh"
@@ -330,14 +330,24 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
gpu_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
} else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
gpu_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
/*
If this packet is using the "barrier bit" to enforce ordering with
subsequent kernels, set the bit for this queue now, after
dispatching.
*/
if (IS_BARRIER(disp_pkt)) {
DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for active" \
" list ID = %d\n", __FUNCTION__, rl_idx);
regdQList[rl_idx]->setBarrierBit(true);
}
} else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
@@ -404,14 +414,14 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
// I'm going to cheat here and read out
// the value from main memory using functional
// access, and then just DMA the decremented value.
uint64_t signal_value = hsa_device->functionalReadHsaSignal(\
uint64_t signal_value = gpu_device->functionalReadHsaSignal(\
bar_and_pkt->completion_signal);
DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
" completion signal! Addr: %x\n",
bar_and_pkt->completion_signal);
hsa_device->updateHsaSignal(bar_and_pkt->completion_signal,
gpu_device->updateHsaSignal(bar_and_pkt->completion_signal,
signal_value - 1);
}
}
@@ -428,7 +438,7 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
DPRINTF(HSAPacketProcessor, "%s: submitting agent dispatch pkt" \
" active list ID = %d\n", __FUNCTION__, rl_idx);
// Submit packet to HSA device (dispatcher)
hsa_device->submitAgentDispatchPkt(
gpu_device->submitAgentDispatchPkt(
(void *)disp_pkt, rl_idx, host_pkt_addr);
is_submitted = UNBLOCKED;
sendAgentDispatchCompletionSignal((void *)disp_pkt,0);
@@ -633,9 +643,9 @@ AQLRingBuffer::freeEntry(void *pkt)
}
void
HSAPacketProcessor::setDevice(HSADevice *dev)
HSAPacketProcessor::setDevice(GPUCommandProcessor *dev)
{
this->hsa_device = dev;
this->gpu_device = dev;
}
int
@@ -670,15 +680,13 @@ HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
DPRINTF(HSAPacketProcessor,
"Unset barrier bit for active list ID %d\n", rl_idx);
regdQList[rl_idx]->setBarrierBit(false);
panic_if(!regdQList[rl_idx]->dispPending(),
"There should be pending kernels in this queue\n");
DPRINTF(HSAPacketProcessor,
"Rescheduling active list ID %d after unsetting barrier "
"bit\n", rl_idx);
// Try to schedule wakeup in the next cycle. There is a minimum
// pktProcessDelay for queue wake up. If that processing delay is
// elapsed, schedAQLProcessing will wakeup next tick.
schedAQLProcessing(rl_idx, 1);
// if pending kernels in the queue after this kernel, reschedule
if (regdQList[rl_idx]->dispPending()) {
DPRINTF(HSAPacketProcessor,
"Rescheduling active list ID %d after unsetting barrier "
"bit\n", rl_idx);
schedAQLProcessing(rl_idx);
}
}
// If set, then blocked schedule, so need to reschedule

View File

@@ -66,7 +66,7 @@ typedef enum
// barrier packet completes.
} Q_STATE;
class HSADevice;
class GPUCommandProcessor;
class HWScheduler;
// Our internal representation of an HSA queue
@@ -120,7 +120,7 @@ class HSAQueueDescriptor
* FREE: Entry is empty
* ALLOCATED: Entry has been allocated for a packet, but the DMA has not
* yet completed
* SUBMITTED: Packet has been submitted to the HSADevice, but has not
* SUBMITTED: Packet has been submitted to the GPUCommandProcessor, but has not
* yet completed
*/
class AQLRingBuffer
@@ -224,7 +224,7 @@ class HSAPacketProcessor: public DmaDevice
friend class HWScheduler;
protected:
typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
HSADevice *hsa_device;
GPUCommandProcessor *gpu_device;
HWScheduler *hwSchdlr;
// Structure to store the read values of dependency signals
@@ -333,7 +333,7 @@ class HSAPacketProcessor: public DmaDevice
uint64_t queue_id,
uint32_t size);
void unsetDeviceQueueDesc(uint64_t queue_id);
void setDevice(HSADevice * dev);
void setDevice(GPUCommandProcessor * dev);
void updateReadIndex(int, uint32_t);
void getCommandsFromHost(int pid, uint32_t rl_idx);