gpu-compute: Add methods to read GPU memory requestor ID

These methods are called from various places to override the requestor ID of a request in order to determine which Ruby network a request should be routed on. Change-Id: Ic0270ddd7123f0457a13144e69ef9132204d4334 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/57651 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
2022-03-14 22:58:06 -05:00
parent 9df61a8aea
commit 51648570ea
6 changed files with 31 additions and 0 deletions
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -2009,6 +2009,15 @@ ComputeUnit::sendToLds(GPUDynInstPtr gpuDynInst)
    return ldsPort.sendTimingReq(newPacket);
 }

+/**
+ * Forward the VRAM requestor ID needed for device memory from shader.
+ */
+RequestorID
+ComputeUnit::vramRequestorId()
+{
+    return FullSystem ? shader->vramRequestorId() : requestorId();
+}
+
 /**
 * get the result of packets sent to the LDS when they return
 */
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -458,6 +458,7 @@ class ComputeUnit : public ClockedObject
    void updatePageDivergenceDist(Addr addr);

    RequestorID requestorId() { return _requestorId; }
+    RequestorID vramRequestorId();

    bool isDone() const;
    bool isVectorAluIdle(uint32_t simdId) const;
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -65,6 +65,15 @@ GPUCommandProcessor::hsaPacketProc()
    return *hsaPP;
 }

+/**
+ * Forward the VRAM requestor ID needed for device memory from GPU device.
+ */
+RequestorID
+GPUCommandProcessor::vramRequestorId()
+{
+    return gpuDevice->vramRequestorId();
+}
+
 TranslationGenPtr
 GPUCommandProcessor::translate(Addr vaddr, Addr size)
 {
--- a/src/gpu-compute/gpu_command_processor.hh
+++ b/src/gpu-compute/gpu_command_processor.hh
@@ -76,6 +76,7 @@ class GPUCommandProcessor : public DmaVirtDevice
    GPUCommandProcessor(const Params &p);

    HSAPacketProcessor& hsaPacketProc();
+    RequestorID vramRequestorId();

    void setGPUDevice(AMDGPUDevice *gpu_device);
    void setShader(Shader *shader);
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -523,6 +523,15 @@ Shader::notifyCuSleep() {
        stats.shaderActiveTicks += curTick() - _lastInactiveTick;
 }

+/**
+ * Forward the VRAM requestor ID needed for device memory from CP.
+ */
+RequestorID
+Shader::vramRequestorId()
+{
+    return gpuCmdProc.vramRequestorId();
+}
+
 Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size)
    : statistics::Group(parent),
      ADD_STAT(allLatencyDist, "delay distribution for all"),
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -183,6 +183,8 @@ class Shader : public ClockedObject
        shHiddenPrivateBaseVmid = sh_hidden_base_new;
    }

+    RequestorID vramRequestorId();
+
    EventFunctionWrapper tickEvent;

    // is this simulation going to be timing mode in the memory?