dev-amdgpu: Add memory manager for GPU VRAM

The memory manager is responsible for reading and writes to VRAM memory for direct requests that bypass GPU caches. Change-Id: I4aa1e77737ce52f2f2c01929b58984126bdcb925 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/51850 Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
2021-10-20 15:22:57 -05:00
parent b7826f1329
commit 42b56ceb7b
6 changed files with 255 additions and 3 deletions
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -32,6 +32,7 @@ from m5.proxy import *
 from m5.objects.PciDevice import PciDevice
 from m5.objects.PciDevice import PciMemBar, PciMemUpperBar, PciLegacyIoBar
 from m5.objects.Device import DmaDevice
+from m5.objects.ClockedObject import ClockedObject

 # PCI device model for an AMD Vega 10 based GPU. The PCI codes and BARs
 # correspond to a Vega Frontier Edition hardware device. None of the PCI
@@ -78,8 +79,18 @@ class AMDGPUDevice(PciDevice):
    # The config script should not create a new cp here but rather assign the
    # same cp that is assigned to the Shader SimObject.
    cp = Param.GPUCommandProcessor(NULL, "Command Processor")
+    memory_manager = Param.AMDGPUMemoryManager("GPU Memory Manager")
+    memories = VectorParam.AbstractMemory([], "All memories in the device")
    device_ih = Param.AMDGPUInterruptHandler("GPU Interrupt handler")

+class AMDGPUMemoryManager(ClockedObject):
+    type = 'AMDGPUMemoryManager'
+    cxx_header = 'dev/amdgpu/memory_manager.hh'
+    cxx_class = 'gem5::AMDGPUMemoryManager'
+
+    port = RequestPort('Memory Port to access VRAM (device memory)')
+    system = Param.System(Parent.any, 'System the dGPU belongs to')
+
 class AMDGPUInterruptHandler(DmaDevice):
    type = 'AMDGPUInterruptHandler'
    cxx_header = "dev/amdgpu/interrupt_handler.hh"
--- a/src/dev/amdgpu/SConscript
+++ b/src/dev/amdgpu/SConscript
@@ -33,11 +33,13 @@ if not env['BUILD_GPU']:
    Return()

 # Controllers
-SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler']
-                     , tags='x86 isa')
+SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
+                                    'AMDGPUMemoryManager'], tags='x86 isa')

 Source('amdgpu_device.cc', tags='x86 isa')
 Source('interrupt_handler.cc', tags='x86 isa')
+Source('memory_manager.cc', tags='x86 isa')
 Source('mmio_reader.cc', tags='x86 isa')

 DebugFlag('AMDGPUDevice', tags='x86 isa')
+DebugFlag('AMDGPUMem', tags='x86 isa')
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -250,6 +250,8 @@ AMDGPUDevice::write(PacketPtr pkt)

    switch (barnum) {
      case FRAMEBUFFER_BAR:
+          gpuMemMgr->writeRequest(offset, pkt->getPtr<uint8_t>(),
+                                  pkt->getSize());
          writeFrame(pkt, offset);
          break;
      case DOORBELL_BAR:
@@ -266,6 +268,9 @@ AMDGPUDevice::write(PacketPtr pkt)
    // Reads return 0 by default.
    uint64_t data = pkt->getUintX(ByteOrder::little);

+    DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
+                            pkt->getAddr(), data);
+
    if (data || regs.find(pkt->getAddr()) != regs.end())
        regs[pkt->getAddr()] = data;

--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -36,6 +36,7 @@

 #include "base/bitunion.hh"
 #include "dev/amdgpu/amdgpu_defines.hh"
+#include "dev/amdgpu/memory_manager.hh"
 #include "dev/amdgpu/mmio_reader.hh"
 #include "dev/io_device.hh"
 #include "dev/pci/device.hh"
@@ -100,6 +101,8 @@ class AMDGPUDevice : public PciDevice
     */
    AMDMMIOReader mmioReader;

+    AMDGPUMemoryManager *gpuMemMgr;
+
    /**
     * Blocks of the GPU
     */
@@ -183,7 +186,7 @@ class AMDGPUDevice : public PciDevice
    /**
     * Methods related to translations and system/device memory.
     */
-    RequestorID vramRequestorId() { return 0; }
+    RequestorID vramRequestorId() { return gpuMemMgr->getRequestorID(); }

    Addr
    getPageTableBase(uint16_t vmid)
--- a/src/dev/amdgpu/memory_manager.cc
+++ b/src/dev/amdgpu/memory_manager.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "dev/amdgpu/memory_manager.hh"
+
+#include <memory>
+
+#include "base/chunk_generator.hh"
+#include "debug/AMDGPUMem.hh"
+#include "params/AMDGPUMemoryManager.hh"
+#include "sim/system.hh"
+
+namespace gem5
+{
+
+AMDGPUMemoryManager::AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
+    : ClockedObject(p), _gpuMemPort(csprintf("%s-port", name()), this),
+      cacheLineSize(p.system->cacheLineSize()),
+      _requestorId(p.system->getRequestorId(this))
+{
+}
+
+void
+AMDGPUMemoryManager::writeRequest(Addr addr, uint8_t *data, int size,
+                               Request::Flags flag, Event *callback)
+{
+    assert(data);
+
+    ChunkGenerator gen(addr, size, cacheLineSize);
+    for (; !gen.done(); gen.next()) {
+        RequestPtr req = std::make_shared<Request>(gen.addr(), gen.size(),
+                                                   flag, _requestorId);
+
+        PacketPtr pkt = Packet::createWrite(req);
+        uint8_t *dataPtr = new uint8_t[gen.size()];
+        std::memcpy(dataPtr, data + (gen.complete()/sizeof(uint8_t)),
+                    gen.size());
+        pkt->dataDynamic<uint8_t>(dataPtr);
+
+        // We only want to issue the callback on the last request completing.
+        if (gen.last()) {
+            pkt->pushSenderState(new GPUMemPort::SenderState(callback, addr));
+        } else {
+            pkt->pushSenderState(new GPUMemPort::SenderState(nullptr, addr));
+        }
+
+        if (!_gpuMemPort.sendTimingReq(pkt)) {
+            DPRINTF(AMDGPUMem, "Request to %#lx needs retry\n", gen.addr());
+            _gpuMemPort.retries.push_back(pkt);
+        } else {
+            DPRINTF(AMDGPUMem, "Write request to %#lx sent\n", gen.addr());
+        }
+    }
+}
+
+bool
+AMDGPUMemoryManager::GPUMemPort::recvTimingResp(PacketPtr pkt)
+{
+    // Retrieve sender state
+    [[maybe_unused]] SenderState *sender_state =
+        safe_cast<SenderState*>(pkt->senderState);
+
+    DPRINTF(AMDGPUMem, "Recveived Response for %#x\n", sender_state->_addr);
+
+    // Check if there is a callback event and if so call it
+    if (sender_state->_callback) {
+        sender_state->_callback->process();
+        delete sender_state->_callback;
+    }
+
+    delete pkt->senderState;
+    delete pkt;
+    return true;
+}
+
+void
+AMDGPUMemoryManager::GPUMemPort::recvReqRetry()
+{
+    for (const auto &pkt : retries) {
+        if (!sendTimingReq(pkt)) {
+            break;
+        } else {
+            DPRINTF(AMDGPUMem, "Retry for %#lx sent\n", pkt->getAddr());
+            retries.pop_front();
+        }
+    }
+}
+
+} // namespace gem5
--- a/src/dev/amdgpu/memory_manager.hh
+++ b/src/dev/amdgpu/memory_manager.hh
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DEV_AMDGPU_MEMORY_MANAGER_HH__
+#define __DEV_AMDGPU_MEMORY_MANAGER_HH__
+
+#include <deque>
+
+#include "base/callback.hh"
+#include "mem/port.hh"
+#include "params/AMDGPUMemoryManager.hh"
+#include "sim/clocked_object.hh"
+
+namespace gem5
+{
+
+class AMDGPUMemoryManager : public ClockedObject
+{
+    class GPUMemPort : public MasterPort
+    {
+        public:
+        GPUMemPort(const std::string &_name, AMDGPUMemoryManager *_gpuMemMgr)
+            : MasterPort(_name, _gpuMemMgr)
+        {
+        }
+
+        bool recvTimingResp(PacketPtr pkt) override;
+        void recvReqRetry() override;
+
+        struct SenderState : public Packet::SenderState
+        {
+            SenderState(Event *callback, Addr addr)
+                : _callback(callback), _addr(addr)
+            {}
+
+            Event *_callback;
+            Addr _addr;
+        };
+
+        std::deque<PacketPtr> retries;
+    };
+
+    GPUMemPort _gpuMemPort;
+    const int cacheLineSize;
+    const RequestorID _requestorId;
+
+  public:
+    AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p);
+    ~AMDGPUMemoryManager() {};
+
+    /**
+     * Write size amount of data to device memory at addr using flags and
+     * callback.
+     *
+     * @param addr Device address to write.
+     * @param data Pointer to data to write.
+     * @param size Number of bytes to write.
+     * @param flag Additional request flags for write packets.
+     * @param callback Event callback to call after all bytes are written.
+     */
+    void writeRequest(Addr addr, uint8_t *data, int size,
+                      Request::Flags flag = 0, Event *callback = nullptr);
+
+    /**
+     * Get the requestorID for the memory manager. This ID is used for all
+     * packets which should be routed through the device network.
+     *
+     * @return requestorID of this object.
+     */
+    RequestorID getRequestorID() const { return _requestorId; }
+
+    Port &
+    getPort(const std::string &if_name, PortID idx) override
+    {
+        if (if_name == "port") {
+            return _gpuMemPort;
+        } else {
+            return ClockedObject::getPort(if_name, idx);
+        }
+    }
+};
+
+} // namespace gem5
+
+#endif // __DEV_AMDGPU_MEMORY_MANAGER_HH__