Files
gem5/src/dev/amdgpu/sdma_engine.hh
Matthew Poremba eee42275ee dev-amdgpu: Writeback RLC queue MQD when unmapped
Currently when RLC queues (user mode queues) are mapped, the read/write
pointers of the ring buffer are set to zero. However, these queues could
be unmapped and then remapped later. In that situation the read/write
pointers should be the previous value before unmapping occurred. Since
the read pointer gets reset to zero, the queue begins reading from the
start of the ring, which usually contains older packets. There is a 99%
chance those packets contain addresses which are no longer in the page
tables which will cause a page fault.

To fix this we update the MQD with the current read/write pointer values
and then writeback the MQD to memory when the queue is unmapped. This
requires adding a pointer to the MQD and the host address of the MQD
where it should be written back to. The interface for registering RLC
queue is also simplified. Since we need to pass the MQD anyway, we can
get values from it as well.

Fixes b+tree and streamcluster from rodinia (when using RLC queues).

Change-Id: Ie5dad4d7d90ea240c3e9f0cddf3e844a3cd34c4f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65791
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
2022-12-01 21:04:05 +00:00

300 lines
10 KiB
C++

/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DEV_AMDGPU_SDMA_ENGINE_HH__
#define __DEV_AMDGPU_SDMA_ENGINE_HH__
#include "base/bitunion.hh"
#include "dev/amdgpu/amdgpu_device.hh"
#include "dev/amdgpu/pm4_queues.hh"
#include "dev/amdgpu/sdma_packets.hh"
#include "dev/dma_virt_device.hh"
#include "params/SDMAEngine.hh"
namespace gem5
{
/**
* System DMA Engine class for AMD dGPU.
*/
class SDMAEngine : public DmaVirtDevice
{
enum SDMAType
{
SDMAGfx,
SDMAPage
};
class SDMAQueue
{
Addr _base;
Addr _rptr;
Addr _wptr;
Addr _size;
Addr _rptr_wb_addr = 0;
Addr _global_rptr = 0;
bool _valid;
bool _processing;
SDMAQueue *_parent;
SDMAQueue *_ib;
SDMAType _type;
SDMAQueueDesc *_mqd;
Addr _mqd_addr = 0;
public:
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
_parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
Addr base() { return _base; }
Addr rptr() { return _base + _rptr; }
Addr getRptr() { return _rptr; }
Addr wptr() { return _base + _wptr; }
Addr getWptr() { return _wptr; }
Addr size() { return _size; }
Addr rptrWbAddr() { return _rptr_wb_addr; }
Addr globalRptr() { return _global_rptr; }
bool valid() { return _valid; }
bool processing() { return _processing; }
SDMAQueue* parent() { return _parent; }
SDMAQueue* ib() { return _ib; }
SDMAType queueType() { return _type; }
SDMAQueueDesc* getMQD() { return _mqd; }
Addr getMQDAddr() { return _mqd_addr; }
void base(Addr value) { _base = value; }
void
incRptr(uint32_t value)
{
_rptr = (_rptr + value) % _size;
_global_rptr += value;
}
void
rptr(Addr value)
{
_rptr = value;
_global_rptr = value;
}
void
setWptr(Addr value)
{
_wptr = value % _size;
}
void wptr(Addr value) { _wptr = value; }
void size(Addr value) { _size = value; }
void rptrWbAddr(Addr value) { _rptr_wb_addr = value; }
void valid(bool v) { _valid = v; }
void processing(bool value) { _processing = value; }
void parent(SDMAQueue* q) { _parent = q; }
void ib(SDMAQueue* ib) { _ib = ib; }
void queueType(SDMAType type) { _type = type; }
void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
};
/* SDMA Engine ID */
int id;
/**
* Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1,
* where RLC stands for Run List Controller. Each one of these
* can have one indirect buffer associated at any particular time.
* The switching order between queues is supposed to be page -> gfx ->
* rlc0 -> page -> gfx -> rlc1, skipping empty queues.
*/
SDMAQueue gfx, page, gfxIb, pageIb;
SDMAQueue rlc0, rlc0Ib, rlc1, rlc1Ib;
/* Gfx ring buffer registers */
uint64_t gfxBase;
uint64_t gfxRptr;
uint64_t gfxDoorbell;
uint64_t gfxDoorbellOffset;
uint64_t gfxWptr;
/* Page ring buffer registers */
uint64_t pageBase;
uint64_t pageRptr;
uint64_t pageDoorbell;
uint64_t pageDoorbellOffset;
uint64_t pageWptr;
AMDGPUDevice *gpuDevice;
VegaISA::Walker *walker;
/* processRLC will select the correct queue for the doorbell */
std::array<Addr, 2> rlcInfo{};
void processRLC0(Addr wptrOffset);
void processRLC1(Addr wptrOffset);
public:
SDMAEngine(const SDMAEngineParams &p);
void setGPUDevice(AMDGPUDevice *gpu_device);
void setId(int _id) { id = _id; }
/**
* Returns the client id for the Interrupt Handler.
*/
int getIHClientId();
/**
* Methods for translation.
*/
Addr getGARTAddr(Addr addr) const;
TranslationGenPtr translate(Addr vaddr, Addr size) override;
/**
* Translate an address in an SDMA packet. Return the device address if
* address in the packet is on the device and 0 if the the address in the
* packet is on the host/system memory.
*/
Addr getDeviceAddress(Addr raw_addr);
/**
* Inherited methods.
*/
Tick write(PacketPtr pkt) override { return 0; }
Tick read(PacketPtr pkt) override { return 0; }
AddrRangeList getAddrRanges() const override;
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/**
* Given a new write ptr offset, communicated to the GPU through a doorbell
* write, the SDMA engine processes the page, gfx, rlc0, or rlc1 queue.
*/
void processGfx(Addr wptrOffset);
void processPage(Addr wptrOffset);
void processRLC(Addr doorbellOffset, Addr wptrOffset);
/**
* This method checks read and write pointers and starts decoding
* packets if the read pointer is less than the write pointer.
* It also marks a queue a being currently processing, in case the
* doorbell is rung again, the newly enqueued packets will be decoded once
* the currently processing once are finished. This is achieved by calling
* decodeNext once an entire SDMA packet has been processed.
*/
void decodeNext(SDMAQueue *q);
/**
* Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which
* encodes the opcode and sub-opcode of the packet. It also creates an
* SDMA packet object and calls the associated processing function.
*/
void decodeHeader(SDMAQueue *q, uint32_t data);
/**
* Methods that implement processing of SDMA packets
*/
void write(SDMAQueue *q, sdmaWrite *pkt);
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer);
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer);
void copy(SDMAQueue *q, sdmaCopy *pkt);
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer);
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer);
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt);
void fence(SDMAQueue *q, sdmaFence *pkt);
void fenceDone(SDMAQueue *q, sdmaFence *pkt);
void trap(SDMAQueue *q, sdmaTrap *pkt);
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header,
sdmaSRBMWrite *pkt);
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header,
sdmaPollRegMem *pkt);
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header,
sdmaPollRegMem *pkt, uint32_t dma_buffer, int count);
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func);
void ptePde(SDMAQueue *q, sdmaPtePde *pkt);
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer);
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt);
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
/**
* Methods for getting the values of SDMA MMIO registers.
*/
uint64_t getGfxBase() { return gfxBase; }
uint64_t getGfxRptr() { return gfxRptr; }
uint64_t getGfxDoorbell() { return gfxDoorbell; }
uint64_t getGfxDoorbellOffset() { return gfxDoorbellOffset; }
uint64_t getGfxWptr() { return gfxWptr; }
uint64_t getPageBase() { return pageBase; }
uint64_t getPageRptr() { return pageRptr; }
uint64_t getPageDoorbell() { return pageDoorbell; }
uint64_t getPageDoorbellOffset() { return pageDoorbellOffset; }
uint64_t getPageWptr() { return pageWptr; }
/**
* Methods for setting the values of SDMA MMIO registers.
*/
void writeMMIO(PacketPtr pkt, Addr mmio_offset);
void setGfxBaseLo(uint32_t data);
void setGfxBaseHi(uint32_t data);
void setGfxRptrLo(uint32_t data);
void setGfxRptrHi(uint32_t data);
void setGfxDoorbellLo(uint32_t data);
void setGfxDoorbellHi(uint32_t data);
void setGfxDoorbellOffsetLo(uint32_t data);
void setGfxDoorbellOffsetHi(uint32_t data);
void setGfxSize(uint64_t data);
void setGfxWptrLo(uint32_t data);
void setGfxWptrHi(uint32_t data);
void setPageBaseLo(uint32_t data);
void setPageBaseHi(uint32_t data);
void setPageRptrLo(uint32_t data);
void setPageRptrHi(uint32_t data);
void setPageDoorbellLo(uint32_t data);
void setPageDoorbellHi(uint32_t data);
void setPageDoorbellOffsetLo(uint32_t data);
void setPageDoorbellOffsetHi(uint32_t data);
void setPageSize(uint64_t data);
void setPageWptrLo(uint32_t data);
void setPageWptrHi(uint32_t data);
/**
* Methods for RLC queues
*/
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd);
void unregisterRLCQueue(Addr doorbell);
void deallocateRLCQueues();
int cur_vmid = 0;
};
} // namespace gem5
#endif // __DEV_AMDGPU_SDMA_ENGINE_HH__