Files
gem5/src/gpu-compute/gpu_compute_driver.hh
Matthew Poremba ebd5b3e4ae gpu-compute: Gfx version check for FS and SE mode
There is no GPU device in SE mode to get version from and no GPU driver
in FS mode to get version from, so a conditional needs to be added
depending on the mode to get the gfx version.

Change-Id: I33fdafb60d351ebc5148e2248244537fb5bebd31
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71078
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
2023-06-01 00:15:02 +00:00

257 lines
9.3 KiB
C++

/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* The GPUComputeDriver implements an HSADriver for an HSA AMD GPU
* agent. Other GPU devices, or other HSA agents, should not derive
* from this class. Instead device-specific implementations of an
* HSADriver should be provided for each unique device.
*/
#ifndef __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#define __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#include <cassert>
#include <cstdint>
#include <set>
#include <unordered_map>
#include "base/addr_range_map.hh"
#include "base/types.hh"
#include "enums/GfxVersion.hh"
#include "mem/request.hh"
#include "sim/emul_driver.hh"
namespace gem5
{
struct GPUComputeDriverParams;
class GPUCommandProcessor;
class PortProxy;
class ThreadContext;
class GPUComputeDriver final : public EmulatedDriver
{
public:
typedef GPUComputeDriverParams Params;
GPUComputeDriver(const Params &p);
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override;
int open(ThreadContext *tc, int mode, int flags) override;
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset) override;
virtual void signalWakeupEvent(uint32_t event_id);
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout);
/**
* Called by the compute units right before a request is issued to ruby.
* This uses our VMAs to correctly set the MTYPE on a per-request basis.
* In real hardware, this is actually done through PTE bits in GPUVM.
* Since we are running a single VM (x86 PT) system, the MTYPE bits aren't
* available. Adding GPUVM specific bits to x86 page tables probably
* isn't the best way to proceed. For now we just have the driver set
* these until we implement a proper dual PT system.
*/
void setMtype(RequestPtr req);
int
doorbellSize()
{
switch (gfxVersion) {
case GfxVersion::gfx801:
case GfxVersion::gfx803:
case GfxVersion::gfx902:
return 4;
case GfxVersion::gfx900:
// gfx900 supports large BAR, so it has a larger doorbell
return 8;
default:
fatal("Invalid GPU type\n");
}
return 4;
}
class DriverWakeupEvent : public Event
{
public:
DriverWakeupEvent(GPUComputeDriver *gpu_driver,
ThreadContext *thrd_cntxt)
: driver(gpu_driver), tc(thrd_cntxt) {}
void process() override;
const char *description() const override;
void scheduleWakeup(Tick wakeup_delay);
private:
GPUComputeDriver *driver;
ThreadContext *tc;
};
class EventTableEntry
{
public:
EventTableEntry() :
mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
{}
// Mail box pointer for this address. Current implementation does not
// use this mailBoxPtr to notify events but directly calls
// signalWakeupEvent from dispatcher (GPU) to notifiy events. So,
// currently this mailBoxPtr is not used. But a future implementation
// may communicate to the driver using mailBoxPtr.
Addr mailBoxPtr;
// Thread context waiting on this even. We do not support multiple
// threads waiting on an event currently.
ThreadContext *tc;
// threadWaiting = true, if some thread context is waiting on this
// event. A thread context waiting on this event is put to sleep.
bool threadWaiting;
// setEvent = true, if this event is triggered but when this event
// triggered, no thread context was waiting on it. In the future, some
// thread context will try to wait on this event but since event has
// already happened, we will not allow that thread context to go to
// sleep. The above mentioned scneario can happen when the waiting
// thread and wakeup thread race on this event and the wakeup thread
// beat the waiting thread at the driver.
bool setEvent;
};
typedef class EventTableEntry ETEntry;
GfxVersion getGfxVersion() const { return gfxVersion; }
private:
/**
* GPU that is controlled by this driver.
*/
GPUCommandProcessor *device;
uint32_t queueId;
bool isdGPU;
GfxVersion gfxVersion;
int dGPUPoolID;
Addr eventPage;
uint32_t eventSlotIndex;
//Event table that keeps track of events. It is indexed with event ID.
std::unordered_map<uint32_t, ETEntry> ETable;
/**
* VMA structures for GPUVM memory.
*/
AddrRangeMap<Request::CacheCoherenceFlags, 1> gpuVmas;
/**
* Mtype bits {Cached, Read Write, Shared} for caches
*/
enum MtypeFlags
{
SHARED = 0,
READ_WRITE = 1,
CACHED = 2,
NUM_MTYPE_BITS
};
Request::CacheCoherenceFlags defaultMtype;
// TCEvents map keeps trak of the events that can wakeup this thread. When
// multiple events can wake up this thread, this data structure helps to
// reset all events when one of those events wake up this thread. the
// signal events that can wake up this thread are stored in signalEvents
// whereas the timer wakeup event is stored in timerEvent.
class EventList
{
public:
EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
: driver(gpu_driver), timerEvent(gpu_driver, thrd_cntxt)
{ }
void clearEvents() {
assert(driver);
for (auto event : signalEvents) {
assert(event < driver->eventSlotIndex);
driver->ETable[event].tc = nullptr;
driver->ETable[event].threadWaiting = false;
}
signalEvents.clear();
if (timerEvent.scheduled()) {
driver->deschedule(timerEvent);
}
}
GPUComputeDriver *driver;
DriverWakeupEvent timerEvent;
// The set of events that can wake up the same thread.
std::set<uint32_t> signalEvents;
};
std::unordered_map<ThreadContext *, EventList> TCEvents;
/**
* Register a region of host memory as uncacheable from the perspective
* of the dGPU.
*/
void registerUncacheableMemory(Addr start, Addr length);
/**
* The aperture (APE) base/limit pairs are set
* statically at startup by the real KFD. AMD
* x86_64 CPUs only use the areas in the 64b
* address space where VA[63:47] == 0x1ffff or
* VA[63:47] = 0. These methods generate the APE
* base/limit pairs in exactly the same way as
* the real KFD does, which ensures these APEs do
* not fall into the CPU's address space
*
* see the macros in the KFD driver in the ROCm
* Linux kernel source:
*
* drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
*/
Addr gpuVmApeBase(int gpuNum) const;
Addr gpuVmApeLimit(Addr apeBase) const;
Addr scratchApeBase(int gpuNum) const;
Addr scratchApeBaseV9() const;
Addr scratchApeLimit(Addr apeBase) const;
Addr ldsApeBase(int gpuNum) const;
Addr ldsApeBaseV9() const;
Addr ldsApeLimit(Addr apeBase) const;
/**
* Allocate/deallocate GPUVM VMAs for tracking virtual address allocations
* and properties on DGPUs. For now, we use these to track MTYPE and to
* be able to select which pages to unmap when the user provides us with
* a handle during the free ioctl.
*/
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start,
Addr length);
Addr deallocateGpuVma(Addr start);
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr);
};
} // namespace gem5
#endif // __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__