Event creation and management support from emulated drivers is required to support interruptible signals in HSA and this support was not available. This changeset adds the event creation and management support in the emulated driver. With this patch, each interruptible signal created by the HSA runtime is associated with a signal event. The HSA runtime can then put a thread waiting on a signal condition to sleep asking the driver to monitor the event associated with that signal. If the signal is modified by the GPU, the dispatcher notifies the driver about signal value change. If the modifier is a CPU thread, the thread will have to make HSA API calls to modify the signal and these API calls will notify the driver about signal value change. Once the driver is notified about a change in the signal value, the driver checks to see if any thread is sleeping on that signal and wake up the sleeping thread associated with that event. The driver has also implemented the time_out wakeup that can wake up the thread after a certain time period has expired. This is also true for barrier packets. Each signal has an event address in a kernel managed and allocated event page that can be used as a mailbox pointer to notify an event. However, this feature used by non-CPU agents to communicate with the driver is not implemented by this changeset because the non-CPU HSA agents in our model can directly communicate with driver in our implementation. Having said that, adding that feature should be trivial because the event address and event pages are correctly setup by this changeset and just adding the event page's virtual address to our PIO doorbell interface in the page tables and registering that pio address to the driver should be sufficient. Managing mailbox pointer for an event is based on event ID and using this event ID as an index into event page, this changeset already provides a unique mailbox pointer for each event. Change-Id: Ic62794076ddd47526b1f952fdb4c1bad632bdd2e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38335 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
531 lines
21 KiB
C++
531 lines
21 KiB
C++
/*
|
|
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Sooraj Puthoor
|
|
* Anthony Gutierrez
|
|
*/
|
|
|
|
#include "gpu-compute/gpu_compute_driver.hh"
|
|
|
|
#include "cpu/thread_context.hh"
|
|
#include "debug/GPUDriver.hh"
|
|
#include "dev/hsa/hsa_device.hh"
|
|
#include "dev/hsa/hsa_packet_processor.hh"
|
|
#include "dev/hsa/kfd_event_defines.h"
|
|
#include "dev/hsa/kfd_ioctl.h"
|
|
#include "params/GPUComputeDriver.hh"
|
|
#include "sim/syscall_emul_buf.hh"
|
|
|
|
GPUComputeDriver::GPUComputeDriver(const Params &p)
|
|
: HSADriver(p)
|
|
{
|
|
device->attachDriver(this);
|
|
DPRINTF(GPUDriver, "Constructing KFD: device\n");
|
|
}
|
|
|
|
int
|
|
GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
|
{
|
|
auto &virt_proxy = tc->getVirtProxy();
|
|
|
|
switch (req) {
|
|
case AMDKFD_IOC_GET_VERSION:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
|
|
|
|
TypedBufferArg<kfd_ioctl_get_version_args> args(ioc_buf);
|
|
args->major_version = KFD_IOCTL_MAJOR_VERSION;
|
|
args->minor_version = KFD_IOCTL_MINOR_VERSION;
|
|
|
|
args.copyOut(virt_proxy);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_CREATE_QUEUE:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
|
|
|
|
allocateQueue(tc, ioc_buf);
|
|
|
|
DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DESTROY_QUEUE:
|
|
{
|
|
TypedBufferArg<kfd_ioctl_destroy_queue_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
|
|
"queue offset %d\n", args->queue_id);
|
|
device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_SET_MEMORY_POLICY:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_GET_CLOCK_COUNTERS:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
|
|
|
|
TypedBufferArg<kfd_ioctl_get_clock_counters_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
|
|
// Set nanosecond resolution
|
|
args->system_clock_freq = 1000000000;
|
|
|
|
/**
|
|
* Derive all clock counters based on the tick. All
|
|
* device clocks are identical and perfectly in sync.
|
|
*/
|
|
uint64_t elapsed_nsec = curTick() / SimClock::Int::ns;
|
|
args->gpu_clock_counter = elapsed_nsec;
|
|
args->cpu_clock_counter = elapsed_nsec;
|
|
args->system_clock_counter = elapsed_nsec;
|
|
|
|
args.copyOut(virt_proxy);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_GET_PROCESS_APERTURES:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
|
|
|
|
TypedBufferArg<kfd_ioctl_get_process_apertures_args> args(ioc_buf);
|
|
args->num_of_nodes = 1;
|
|
|
|
/**
|
|
* Set the GPUVM/LDS/Scratch APEs exactly as they
|
|
* are in the real driver, see the KFD driver
|
|
* in the ROCm Linux kernel source:
|
|
* drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
|
|
*/
|
|
for (int i = 0; i < args->num_of_nodes; ++i) {
|
|
/**
|
|
* While the GPU node numbers start at 0, we add 1
|
|
* to force the count to start at 1. This is to
|
|
* ensure that the base/limit addresses are
|
|
* calculated correctly.
|
|
*/
|
|
args->process_apertures[i].scratch_base
|
|
= scratchApeBase(i + 1);
|
|
args->process_apertures[i].scratch_limit =
|
|
scratchApeLimit(args->process_apertures[i].scratch_base);
|
|
|
|
args->process_apertures[i].lds_base = ldsApeBase(i + 1);
|
|
args->process_apertures[i].lds_limit =
|
|
ldsApeLimit(args->process_apertures[i].lds_base);
|
|
|
|
args->process_apertures[i].gpuvm_base = gpuVmApeBase(i + 1);
|
|
args->process_apertures[i].gpuvm_limit =
|
|
gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
|
|
|
|
// NOTE: Must match ID populated by hsaTopology.py
|
|
args->process_apertures[i].gpu_id = 2765;
|
|
|
|
DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].gpuvm_base);
|
|
DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].gpuvm_limit);
|
|
|
|
DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].lds_base);
|
|
DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].lds_limit);
|
|
|
|
DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].scratch_base);
|
|
DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,
|
|
args->process_apertures[i].scratch_limit);
|
|
|
|
/**
|
|
* The CPU's 64b address space can only use the
|
|
* areas with VA[63:47] == 0x1ffff or VA[63:47] == 0,
|
|
* therefore we must ensure that the apertures do not
|
|
* fall in the CPU's address space.
|
|
*/
|
|
assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
|
|
47) != 0);
|
|
assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
|
|
47) != 0);
|
|
assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
|
|
47) != 0);
|
|
assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
|
|
47) != 0);
|
|
assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
|
|
47) != 0);
|
|
assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
|
|
47) != 0x1ffff);
|
|
assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
|
|
47) != 0);
|
|
}
|
|
|
|
args.copyOut(virt_proxy);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_UPDATE_QUEUE:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_CREATE_EVENT:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");
|
|
|
|
TypedBufferArg<kfd_ioctl_create_event_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
if (args->event_type != KFD_IOC_EVENT_SIGNAL) {
|
|
fatal("Signal events are only supported currently\n");
|
|
} else if (eventSlotIndex == SLOTS_PER_PAGE) {
|
|
fatal("Signal event wasn't created; signal limit reached\n");
|
|
}
|
|
// Currently, we allocate only one signal_page for events.
|
|
// Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT
|
|
uint64_t page_index = 0;
|
|
args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);
|
|
args->event_page_offset <<= PAGE_SHIFT;
|
|
// TODO: Currently we support only signal events, hence using
|
|
// the same ID for both signal slot and event slot
|
|
args->event_slot_index = eventSlotIndex;
|
|
args->event_id = eventSlotIndex++;
|
|
args->event_trigger_data = args->event_id;
|
|
DPRINTF(GPUDriver, "amdkfd create events"
|
|
"(event_id: 0x%x, offset: 0x%x)\n",
|
|
args->event_id, args->event_page_offset);
|
|
// Since eventSlotIndex is increased everytime a new event is
|
|
// created ETable at eventSlotIndex(event_id) is guaranteed to be
|
|
// empty. In a future implementation that reuses deleted event_ids,
|
|
// we should check if event table at this
|
|
// eventSlotIndex(event_id) is empty before inserting a new event
|
|
// table entry
|
|
ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));
|
|
args.copyOut(virt_proxy);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DESTROY_EVENT:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
|
|
TypedBufferArg<kfd_ioctl_destroy_event_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);
|
|
fatal_if(ETable.count(args->event_id) == 0,
|
|
"Event ID invalid, cannot destroy this event\n");
|
|
ETable.erase(args->event_id);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_SET_EVENT:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");
|
|
TypedBufferArg<kfd_ioctl_set_event_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);
|
|
fatal_if(ETable.count(args->event_id) == 0,
|
|
"Event ID invlaid, cannot set this event\n");
|
|
ETable[args->event_id].setEvent = true;
|
|
signalWakeupEvent(args->event_id);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_RESET_EVENT:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_WAIT_EVENTS:
|
|
{
|
|
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
|
|
TypedBufferArg<kfd_ioctl_wait_events_args> args(ioc_buf);
|
|
args.copyIn(virt_proxy);
|
|
kfd_event_data *events =
|
|
(kfd_event_data *)args->events_ptr;
|
|
DPRINTF(GPUDriver, "amdkfd wait for events"
|
|
"(wait on all: %d, timeout : %d, num_events: %s)\n",
|
|
args->wait_for_all, args->timeout, args->num_events);
|
|
panic_if(args->wait_for_all != 0 && args->num_events > 1,
|
|
"Wait for all events not supported\n");
|
|
bool should_sleep = true;
|
|
if (TCEvents.count(tc) == 0) {
|
|
// This thread context trying to wait on an event for the first
|
|
// time, initialize it.
|
|
TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
|
|
std::make_tuple(this, tc));
|
|
DPRINTF(GPUDriver, "\tamdkfd creating event list"
|
|
" for thread %d\n", tc->cpuId());
|
|
}
|
|
panic_if(TCEvents[tc].signalEvents.size() != 0,
|
|
"There are %d events that put this thread to sleep,"
|
|
" this thread should not be running\n",
|
|
TCEvents[tc].signalEvents.size());
|
|
for (int i = 0; i < args->num_events; i++) {
|
|
panic_if(!events,
|
|
"Event pointer invalid\n");
|
|
Addr eventDataAddr = (Addr)(events + i);
|
|
TypedBufferArg<kfd_event_data> EventData(
|
|
eventDataAddr, sizeof(kfd_event_data));
|
|
EventData.copyIn(virt_proxy);
|
|
DPRINTF(GPUDriver,
|
|
"\tamdkfd wait for event %d\n", EventData->event_id);
|
|
panic_if(ETable.count(EventData->event_id) == 0,
|
|
"Event ID invalid, cannot set this event\n");
|
|
panic_if(ETable[EventData->event_id].threadWaiting,
|
|
"Multiple threads waiting on the same event\n");
|
|
if (ETable[EventData->event_id].setEvent) {
|
|
// If event is already set, the event has already happened.
|
|
// Just unset the event and dont put this thread to sleep.
|
|
ETable[EventData->event_id].setEvent = false;
|
|
should_sleep = false;
|
|
}
|
|
if (should_sleep) {
|
|
// Put this thread to sleep
|
|
ETable[EventData->event_id].threadWaiting = true;
|
|
ETable[EventData->event_id].tc = tc;
|
|
TCEvents[tc].signalEvents.insert(EventData->event_id);
|
|
}
|
|
}
|
|
|
|
// TODO: Return the correct wait_result back. Currently, returning
|
|
// success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.
|
|
// Ideally, this needs to be done after the event is triggered and
|
|
// after the thread is woken up.
|
|
args->wait_result = 0;
|
|
args.copyOut(virt_proxy);
|
|
if (should_sleep) {
|
|
// Put this thread to sleep
|
|
sleepCPU(tc, args->timeout);
|
|
} else {
|
|
// Remove events that tried to put this thread to sleep
|
|
TCEvents[tc].clearEvents();
|
|
}
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DBG_REGISTER:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DBG_UNREGISTER:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DBG_ADDRESS_WATCH:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_DBG_WAVE_CONTROL:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_ALLOC_MEMORY_OF_GPU:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_FREE_MEMORY_OF_GPU:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_MAP_MEMORY_TO_GPU:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_SET_CU_MASK:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE"
|
|
"\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_SET_TRAP_HANDLER:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:
|
|
{
|
|
DPRINTF(GPUDriver,
|
|
"ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
|
|
|
|
TypedBufferArg<kfd_ioctl_get_process_apertures_new_args>
|
|
ioc_args(ioc_buf);
|
|
|
|
ioc_args.copyIn(virt_proxy);
|
|
ioc_args->num_of_nodes = 1;
|
|
|
|
for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
|
|
TypedBufferArg<kfd_process_device_apertures> ape_args
|
|
(ioc_args->kfd_process_device_apertures_ptr);
|
|
|
|
ape_args->scratch_base = scratchApeBase(i + 1);
|
|
ape_args->scratch_limit =
|
|
scratchApeLimit(ape_args->scratch_base);
|
|
ape_args->lds_base = ldsApeBase(i + 1);
|
|
ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
|
|
ape_args->gpuvm_base = gpuVmApeBase(i + 1);
|
|
ape_args->gpuvm_limit = gpuVmApeLimit(ape_args->gpuvm_base);
|
|
|
|
ape_args->gpu_id = 2765;
|
|
|
|
assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
|
|
assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
|
|
assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
|
|
assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
|
|
assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0);
|
|
assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0x1ffff);
|
|
assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0);
|
|
|
|
ape_args.copyOut(virt_proxy);
|
|
}
|
|
|
|
ioc_args.copyOut(virt_proxy);
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_GET_DMABUF_INFO:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_IMPORT_DMABUF:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_GET_TILE_CONFIG:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_IPC_IMPORT_HANDLE:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_IPC_IMPORT_HANDLE\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_IPC_EXPORT_HANDLE:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_IPC_EXPORT_HANDLE\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_CROSS_MEMORY_COPY:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_CROSS_MEMORY_COPY\n");
|
|
}
|
|
break;
|
|
case AMDKFD_IOC_OPEN_GRAPHIC_HANDLE:
|
|
{
|
|
warn("unimplemented ioctl: AMDKFD_IOC_OPEN_GRAPHIC_HANDLE\n");
|
|
}
|
|
break;
|
|
default:
|
|
fatal("%s: bad ioctl %d\n", req);
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
|
|
{
|
|
// Convert millisecs to ticks
|
|
Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
|
|
assert(TCEvents.count(tc) == 1);
|
|
TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
|
|
tc->suspend();
|
|
DPRINTF(GPUDriver,
|
|
"CPU %d is put to sleep\n", tc->cpuId());
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::gpuVmApeBase(int gpuNum) const
|
|
{
|
|
return ((Addr)gpuNum << 61) + 0x1000000000000L;
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::gpuVmApeLimit(Addr apeBase) const
|
|
{
|
|
return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::scratchApeBase(int gpuNum) const
|
|
{
|
|
return ((Addr)gpuNum << 61) + 0x100000000L;
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::scratchApeLimit(Addr apeBase) const
|
|
{
|
|
return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::ldsApeBase(int gpuNum) const
|
|
{
|
|
return ((Addr)gpuNum << 61) + 0x0;
|
|
}
|
|
|
|
Addr
|
|
GPUComputeDriver::ldsApeLimit(Addr apeBase) const
|
|
{
|
|
return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
|
|
}
|