gpu-compute: Refactor some Event subclasses to lambdas

Change-Id: Ic1332b8e8ba0afacbe591c80f4d06afbf5f04bd9 Signed-off-by: Sean Wilson <spwilson2@wisc.edu> Reviewed-on: https://gem5-review.googlesource.com/3922 Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
2017-06-27 14:18:10 -05:00
parent 55f70760de
commit 741261f10b
8 changed files with 95 additions and 199 deletions
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -669,9 +669,8 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
        return true;
    }

-    ComputeUnit::DataPort::MemRespEvent *mem_resp_event =
-        new ComputeUnit::DataPort::MemRespEvent(computeUnit->memPort[index],
-                                                pkt);
+    EventFunctionWrapper *mem_resp_event =
+        computeUnit->memPort[index]->createMemRespEvent(pkt);

    DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x received!\n",
            computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
@@ -845,8 +844,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)

            // translation is done. Schedule the mem_req_event at the
            // appropriate cycle to send the timing memory request to ruby
-            ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-                new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+            EventFunctionWrapper *mem_req_event =
+                memPort[index]->createMemReqEvent(pkt);

            DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
                    "scheduled\n", cu_id, gpuDynInst->simdId,
@@ -923,8 +922,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
 void
 ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
 {
-    ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-        new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+    EventFunctionWrapper *mem_req_event =
+        memPort[index]->createMemReqEvent(pkt);


    // New SenderState for the memory access
@@ -972,26 +971,20 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
    sendSyncRequest(gpuDynInst, 0, pkt);
 }

-const char*
-ComputeUnit::DataPort::MemRespEvent::description() const
-{
-    return "ComputeUnit memory response event";
-}
-
 void
-ComputeUnit::DataPort::MemRespEvent::process()
+ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
 {
    DataPort::SenderState *sender_state =
        safe_cast<DataPort::SenderState*>(pkt->senderState);

    GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit = dataPort->computeUnit;
+    ComputeUnit *compute_unit = computeUnit;

    assert(gpuDynInst);

    DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
            compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
-            pkt->req->getPaddr(), dataPort->index);
+            pkt->req->getPaddr(), index);

    Addr paddr = pkt->req->getPaddr();

@@ -1045,8 +1038,9 @@ ComputeUnit::DataPort::MemRespEvent::process()
                // this memory request
                if (gpuDynInst->useContinuation) {
                    assert(!gpuDynInst->isNoScope());
-                    gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
-                                                 gpuDynInst);
+                    gpuDynInst->execContinuation(
+                        gpuDynInst->staticInstruction(),
+                        gpuDynInst);
                }
            }
        }
@@ -1230,9 +1224,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)

    // translation is done. Schedule the mem_req_event at the appropriate
    // cycle to send the timing memory request to ruby
-    ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-        new ComputeUnit::DataPort::MemReqEvent(computeUnit->memPort[mp_index],
-                                               new_pkt);
+    EventFunctionWrapper *mem_req_event =
+        computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);

    DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
            computeUnit->cu_id, gpuDynInst->simdId,
@@ -1244,32 +1237,42 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
    return true;
 }

-const char*
-ComputeUnit::DataPort::MemReqEvent::description() const
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemReqEvent(PacketPtr pkt)
 {
-    return "ComputeUnit memory request event";
+    return new EventFunctionWrapper(
+        [this, pkt]{ processMemReqEvent(pkt); },
+        "ComputeUnit memory request event", true);
+}
+
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemRespEvent(PacketPtr pkt)
+{
+    return new EventFunctionWrapper(
+        [this, pkt]{ processMemRespEvent(pkt); },
+        "ComputeUnit memory response event", true);
 }

 void
-ComputeUnit::DataPort::MemReqEvent::process()
+ComputeUnit::DataPort::processMemReqEvent(PacketPtr pkt)
 {
    SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
    GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit M5_VAR_USED = dataPort->computeUnit;
+    ComputeUnit *compute_unit M5_VAR_USED = computeUnit;

-    if (!(dataPort->sendTimingReq(pkt))) {
-        dataPort->retries.push_back(std::make_pair(pkt, gpuDynInst));
+    if (!(sendTimingReq(pkt))) {
+        retries.push_back(std::make_pair(pkt, gpuDynInst));

        DPRINTF(GPUPort,
                "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
                compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, dataPort->index,
+                gpuDynInst->wfSlotId, index,
                pkt->req->getPaddr());
    } else {
        DPRINTF(GPUPort,
                "CU%d: WF[%d][%d]: index %d, addr %#x data req sent!\n",
                compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, dataPort->index,
+                gpuDynInst->wfSlotId, index,
                pkt->req->getPaddr());
    }
 }
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -440,39 +440,11 @@ class ComputeUnit : public MemObject
                  saved(sender_state) { }
        };

-        class MemReqEvent : public Event
-        {
-          private:
-            DataPort *dataPort;
-            PacketPtr pkt;
+        void processMemReqEvent(PacketPtr pkt);
+        EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);

-          public:
-            MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
-                : Event(), dataPort(_data_port), pkt(_pkt)
-            {
-              setFlags(Event::AutoDelete);
-            }
-
-            void process();
-            const char *description() const;
-        };
-
-        class MemRespEvent : public Event
-        {
-          private:
-            DataPort *dataPort;
-            PacketPtr pkt;
-
-          public:
-            MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
-                : Event(), dataPort(_data_port), pkt(_pkt)
-            {
-              setFlags(Event::AutoDelete);
-            }
-
-            void process();
-            const char *description() const;
-        };
+        void processMemRespEvent(PacketPtr pkt);
+        EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);

        std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;

--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -50,7 +50,9 @@ GpuDispatcher::GpuDispatcher(const Params *p)
    : DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")),
      pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
      dispatchCount(0), dispatchActive(false), cpu(p->cpu),
-      shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
+      shader(p->shader_pointer), driver(p->cl_driver),
+      tickEvent([this]{ exec(); }, "GPU Dispatcher tick",
+                false, Event::CPU_Tick_Pri)
 {
    shader->handshake(this);
    driver->handshake(this);
@@ -363,23 +365,6 @@ GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
    }
 }

-GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher)
-    : Event(CPU_Tick_Pri), dispatcher(_dispatcher)
-{
-}
-
-void
-GpuDispatcher::TickEvent::process()
-{
-    dispatcher->exec();
-}
-
-const char*
-GpuDispatcher::TickEvent::description() const
-{
-    return "GPU Dispatcher tick";
-}
-
 // helper functions for driver to retrieve GPU attributes
 int
 GpuDispatcher::getNumCUs()
--- a/src/gpu-compute/dispatcher.hh
+++ b/src/gpu-compute/dispatcher.hh
@@ -55,17 +55,6 @@ class GpuDispatcher : public DmaDevice
    public:
        typedef GpuDispatcherParams Params;

-        class TickEvent : public Event
-        {
-            private:
-                GpuDispatcher *dispatcher;
-
-            public:
-                TickEvent(GpuDispatcher *);
-                void process();
-                const char *description() const;
-        };
-
        MasterID masterId() { return _masterId; }

    protected:
@@ -93,7 +82,8 @@ class GpuDispatcher : public DmaDevice
        BaseCPU *cpu;
        Shader *shader;
        ClDriver *driver;
-        TickEvent tickEvent;
+        EventFunctionWrapper tickEvent;
+

        static GpuDispatcher *instance;

--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -50,14 +50,17 @@
 #include "mem/ruby/system/RubySystem.hh"
 #include "sim/sim_exit.hh"

-Shader::Shader(const Params *p) : ClockedObject(p),
-    clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
-    cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
-    hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
-    separate_acquire_release(p->separate_acquire_release), coissue_return(1),
-    trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
-    globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
-    box_tick_cnt(0), start_tick_cnt(0)
+Shader::Shader(const Params *p)
+    : ClockedObject(p), clock(p->clk_domain->clockPeriod()),
+      cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
+      tickEvent([this]{ processTick(); }, "Shader tick",
+                false, Event::CPU_Tick_Pri),
+      timingSim(p->timing), hsail_mode(SIMT),
+      impl_kern_boundary_sync(p->impl_kern_boundary_sync),
+      separate_acquire_release(p->separate_acquire_release), coissue_return(1),
+      trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
+      globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
+      box_tick_cnt(0), start_tick_cnt(0)
 {

    cuList.resize(n_cu);
@@ -317,27 +320,16 @@ Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
    ++sa_n;
 }

-Shader::TickEvent::TickEvent(Shader *_shader)
-    : Event(CPU_Tick_Pri), shader(_shader)
-{
-}
-

 void
-Shader::TickEvent::process()
+Shader::processTick()
 {
-    if (shader->busy()) {
-        shader->exec();
-        shader->schedule(this, curTick() + shader->ticks(1));
+    if (busy()) {
+        exec();
+        schedule(tickEvent, curTick() + ticks(1));
    }
 }

-const char*
-Shader::TickEvent::description() const
-{
-    return "Shader tick";
-}
-
 void
 Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
                  MemCmd cmd, bool suppress_func_errors)
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -99,18 +99,8 @@ class Shader : public ClockedObject
    ThreadContext *gpuTc;
    BaseCPU *cpuPointer;

-    class TickEvent : public Event
-    {
-      private:
-        Shader *shader;
-
-      public:
-        TickEvent(Shader*);
-        void process();
-        const char* description() const;
-    };
-
-    TickEvent tickEvent;
+    void processTick();
+    EventFunctionWrapper tickEvent;

    // is this simulation going to be timing mode in the memory?
    bool timingSim;
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/gpu-compute/tlb_coalescer.cc
@@ -39,11 +39,18 @@

 #include "debug/GPUTLB.hh"

-TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
-    clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
-    coalescingWindow(p->coalescingWindow),
-    disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
-    cleanupEvent(this)
+TLBCoalescer::TLBCoalescer(const Params *p)
+    : MemObject(p),
+      clock(p->clk_domain->clockPeriod()),
+      TLBProbesPerCycle(p->probesPerCycle),
+      coalescingWindow(p->coalescingWindow),
+      disableCoalescing(p->disableCoalescing),
+      probeTLBEvent([this]{ processProbeTLBEvent(); },
+                    "Probe the TLB below",
+                    false, Event::CPU_Tick_Pri),
+      cleanupEvent([this]{ processCleanupEvent(); },
+                   "Cleanup issuedTranslationsTable hashmap",
+                   false, Event::Maximum_Pri)
 {
    // create the slave ports based on the number of connected ports
    for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
@@ -390,17 +397,6 @@ TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
    fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
 }

-TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
-    : Event(CPU_Tick_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::IssueProbeEvent::description() const
-{
-    return "Probe the TLB below";
-}
-
 /*
 * Here we scan the coalescer FIFO and issue the max
 * number of permitted probes to the TLB below. We
@@ -414,7 +410,7 @@ TLBCoalescer::IssueProbeEvent::description() const
 * track of the outstanding reqs)
 */
 void
-TLBCoalescer::IssueProbeEvent::process()
+TLBCoalescer::processProbeTLBEvent()
 {
    // number of TLB probes sent so far
    int sent_probes = 0;
@@ -425,10 +421,10 @@ TLBCoalescer::IssueProbeEvent::process()
    // returns false or when there is another outstanding request for the
    // same virt. page.

-    DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
+    DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);

-    for (auto iter = coalescer->coalescerFIFO.begin();
-         iter != coalescer->coalescerFIFO.end() && !rejected; ) {
+    for (auto iter = coalescerFIFO.begin();
+         iter != coalescerFIFO.end() && !rejected; ) {
        int coalescedReq_cnt = iter->second.size();
        int i = 0;
        int vector_index = 0;
@@ -446,7 +442,7 @@ TLBCoalescer::IssueProbeEvent::process()

            // is there another outstanding request for the same page addr?
            int pending_reqs =
-                coalescer->issuedTranslationsTable.count(virt_page_addr);
+                issuedTranslationsTable.count(virt_page_addr);

            if (pending_reqs) {
                DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
@@ -459,7 +455,7 @@ TLBCoalescer::IssueProbeEvent::process()
            }

            // send the coalesced request for virt_page_addr
-            if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
+            if (!memSidePort[0]->sendTimingReq(first_packet)) {
                DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
                       virt_page_addr);

@@ -479,22 +475,22 @@ TLBCoalescer::IssueProbeEvent::process()
                    // by the one we just sent counting all the way from
                    // the top of TLB hiearchy (i.e., from the CU)
                    int req_cnt = tmp_sender_state->reqCnt.back();
-                    coalescer->queuingCycles += (curTick() * req_cnt);
+                    queuingCycles += (curTick() * req_cnt);

                    DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
-                            coalescer->name(), req_cnt);
+                            name(), req_cnt);

                    // pkt_cnt is number of packets we coalesced into the one
                    // we just sent but only at this coalescer level
                    int pkt_cnt = iter->second[vector_index].size();
-                    coalescer->localqueuingCycles += (curTick() * pkt_cnt);
+                    localqueuingCycles += (curTick() * pkt_cnt);
                }

                DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
                       virt_page_addr);

                //copy coalescedReq to issuedTranslationsTable
-                coalescer->issuedTranslationsTable[virt_page_addr]
+                issuedTranslationsTable[virt_page_addr]
                    = iter->second[vector_index];

                //erase the entry of this coalesced req
@@ -504,7 +500,7 @@ TLBCoalescer::IssueProbeEvent::process()
                    assert(i == coalescedReq_cnt);

                sent_probes++;
-                if (sent_probes == coalescer->TLBProbesPerCycle)
+                if (sent_probes == TLBProbesPerCycle)
                   return;
            }
        }
@@ -512,31 +508,20 @@ TLBCoalescer::IssueProbeEvent::process()
        //if there are no more coalesced reqs for this tick_index
        //erase the hash_map with the first iterator
        if (iter->second.empty()) {
-            coalescer->coalescerFIFO.erase(iter++);
+            coalescerFIFO.erase(iter++);
        } else {
            ++iter;
        }
    }
 }

-TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
-    : Event(Maximum_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::CleanupEvent::description() const
-{
-    return "Cleanup issuedTranslationsTable hashmap";
-}
-
 void
-TLBCoalescer::CleanupEvent::process()
+TLBCoalescer::processCleanupEvent()
 {
-    while (!coalescer->cleanupQueue.empty()) {
-        Addr cleanup_addr = coalescer->cleanupQueue.front();
-        coalescer->cleanupQueue.pop();
-        coalescer->issuedTranslationsTable.erase(cleanup_addr);
+    while (!cleanupQueue.empty()) {
+        Addr cleanup_addr = cleanupQueue.front();
+        cleanupQueue.pop();
+        issuedTranslationsTable.erase(cleanup_addr);

        DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
                cleanup_addr);
--- a/src/gpu-compute/tlb_coalescer.hh
+++ b/src/gpu-compute/tlb_coalescer.hh
@@ -214,35 +214,14 @@ class TLBCoalescer : public MemObject
    BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
    BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);

-    class IssueProbeEvent : public Event
-    {
-      private:
-        TLBCoalescer *coalescer;
+    void processProbeTLBEvent();
+    /// This event issues the TLB probes
+    EventFunctionWrapper probeTLBEvent;

-      public:
-        IssueProbeEvent(TLBCoalescer *_coalescer);
-        void process();
-        const char *description() const;
-    };
-
-    // this event issues the TLB probes
-    IssueProbeEvent probeTLBEvent;
-
-    // the cleanupEvent is scheduled after a TLBEvent triggers
-    // in order to free memory and do the required clean-up
-    class CleanupEvent : public Event
-    {
-      private:
-        TLBCoalescer *coalescer;
-
-      public:
-        CleanupEvent(TLBCoalescer *_coalescer);
-        void process();
-        const char* description() const;
-     };
-
-    // schedule cleanup
-    CleanupEvent cleanupEvent;
+    void processCleanupEvent();
+    /// The cleanupEvent is scheduled after a TLBEvent triggers
+    /// in order to free memory and do the required clean-up
+    EventFunctionWrapper cleanupEvent;

    // this FIFO queue keeps track of the virt. page
    // addresses that are pending cleanup