gpu-compute: Refactor some Event subclasses to lambdas

Change-Id: Ic1332b8e8ba0afacbe591c80f4d06afbf5f04bd9
Signed-off-by: Sean Wilson <spwilson2@wisc.edu>
Reviewed-on: https://gem5-review.googlesource.com/3922
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
This commit is contained in:
Sean Wilson
2017-06-27 14:18:10 -05:00
parent 55f70760de
commit 741261f10b
8 changed files with 95 additions and 199 deletions

View File

@@ -669,9 +669,8 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true;
}
ComputeUnit::DataPort::MemRespEvent *mem_resp_event =
new ComputeUnit::DataPort::MemRespEvent(computeUnit->memPort[index],
pkt);
EventFunctionWrapper *mem_resp_event =
computeUnit->memPort[index]->createMemRespEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x received!\n",
computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
@@ -845,8 +844,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
// translation is done. Schedule the mem_req_event at the
// appropriate cycle to send the timing memory request to ruby
ComputeUnit::DataPort::MemReqEvent *mem_req_event =
new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
EventFunctionWrapper *mem_req_event =
memPort[index]->createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
"scheduled\n", cu_id, gpuDynInst->simdId,
@@ -923,8 +922,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
void
ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
{
ComputeUnit::DataPort::MemReqEvent *mem_req_event =
new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
EventFunctionWrapper *mem_req_event =
memPort[index]->createMemReqEvent(pkt);
// New SenderState for the memory access
@@ -972,26 +971,20 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
sendSyncRequest(gpuDynInst, 0, pkt);
}
const char*
ComputeUnit::DataPort::MemRespEvent::description() const
{
return "ComputeUnit memory response event";
}
void
ComputeUnit::DataPort::MemRespEvent::process()
ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
{
DataPort::SenderState *sender_state =
safe_cast<DataPort::SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
ComputeUnit *compute_unit = dataPort->computeUnit;
ComputeUnit *compute_unit = computeUnit;
assert(gpuDynInst);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
pkt->req->getPaddr(), dataPort->index);
pkt->req->getPaddr(), index);
Addr paddr = pkt->req->getPaddr();
@@ -1045,8 +1038,9 @@ ComputeUnit::DataPort::MemRespEvent::process()
// this memory request
if (gpuDynInst->useContinuation) {
assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
gpuDynInst->execContinuation(
gpuDynInst->staticInstruction(),
gpuDynInst);
}
}
}
@@ -1230,9 +1224,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
// translation is done. Schedule the mem_req_event at the appropriate
// cycle to send the timing memory request to ruby
ComputeUnit::DataPort::MemReqEvent *mem_req_event =
new ComputeUnit::DataPort::MemReqEvent(computeUnit->memPort[mp_index],
new_pkt);
EventFunctionWrapper *mem_req_event =
computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
computeUnit->cu_id, gpuDynInst->simdId,
@@ -1244,32 +1237,42 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
return true;
}
const char*
ComputeUnit::DataPort::MemReqEvent::description() const
EventFunctionWrapper*
ComputeUnit::DataPort::createMemReqEvent(PacketPtr pkt)
{
return "ComputeUnit memory request event";
return new EventFunctionWrapper(
[this, pkt]{ processMemReqEvent(pkt); },
"ComputeUnit memory request event", true);
}
EventFunctionWrapper*
ComputeUnit::DataPort::createMemRespEvent(PacketPtr pkt)
{
return new EventFunctionWrapper(
[this, pkt]{ processMemRespEvent(pkt); },
"ComputeUnit memory response event", true);
}
void
ComputeUnit::DataPort::MemReqEvent::process()
ComputeUnit::DataPort::processMemReqEvent(PacketPtr pkt)
{
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
ComputeUnit *compute_unit M5_VAR_USED = dataPort->computeUnit;
ComputeUnit *compute_unit M5_VAR_USED = computeUnit;
if (!(dataPort->sendTimingReq(pkt))) {
dataPort->retries.push_back(std::make_pair(pkt, gpuDynInst));
if (!(sendTimingReq(pkt))) {
retries.push_back(std::make_pair(pkt, gpuDynInst));
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
compute_unit->cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, dataPort->index,
gpuDynInst->wfSlotId, index,
pkt->req->getPaddr());
} else {
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x data req sent!\n",
compute_unit->cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, dataPort->index,
gpuDynInst->wfSlotId, index,
pkt->req->getPaddr());
}
}

View File

@@ -440,39 +440,11 @@ class ComputeUnit : public MemObject
saved(sender_state) { }
};
class MemReqEvent : public Event
{
private:
DataPort *dataPort;
PacketPtr pkt;
void processMemReqEvent(PacketPtr pkt);
EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
public:
MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
: Event(), dataPort(_data_port), pkt(_pkt)
{
setFlags(Event::AutoDelete);
}
void process();
const char *description() const;
};
class MemRespEvent : public Event
{
private:
DataPort *dataPort;
PacketPtr pkt;
public:
MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
: Event(), dataPort(_data_port), pkt(_pkt)
{
setFlags(Event::AutoDelete);
}
void process();
const char *description() const;
};
void processMemRespEvent(PacketPtr pkt);
EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;

View File

@@ -50,7 +50,9 @@ GpuDispatcher::GpuDispatcher(const Params *p)
: DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")),
pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
dispatchCount(0), dispatchActive(false), cpu(p->cpu),
shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
shader(p->shader_pointer), driver(p->cl_driver),
tickEvent([this]{ exec(); }, "GPU Dispatcher tick",
false, Event::CPU_Tick_Pri)
{
shader->handshake(this);
driver->handshake(this);
@@ -363,23 +365,6 @@ GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
}
}
GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher)
: Event(CPU_Tick_Pri), dispatcher(_dispatcher)
{
}
void
GpuDispatcher::TickEvent::process()
{
dispatcher->exec();
}
const char*
GpuDispatcher::TickEvent::description() const
{
return "GPU Dispatcher tick";
}
// helper functions for driver to retrieve GPU attributes
int
GpuDispatcher::getNumCUs()

View File

@@ -55,17 +55,6 @@ class GpuDispatcher : public DmaDevice
public:
typedef GpuDispatcherParams Params;
class TickEvent : public Event
{
private:
GpuDispatcher *dispatcher;
public:
TickEvent(GpuDispatcher *);
void process();
const char *description() const;
};
MasterID masterId() { return _masterId; }
protected:
@@ -93,7 +82,8 @@ class GpuDispatcher : public DmaDevice
BaseCPU *cpu;
Shader *shader;
ClDriver *driver;
TickEvent tickEvent;
EventFunctionWrapper tickEvent;
static GpuDispatcher *instance;

View File

@@ -50,14 +50,17 @@
#include "mem/ruby/system/RubySystem.hh"
#include "sim/sim_exit.hh"
Shader::Shader(const Params *p) : ClockedObject(p),
clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
separate_acquire_release(p->separate_acquire_release), coissue_return(1),
trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
box_tick_cnt(0), start_tick_cnt(0)
Shader::Shader(const Params *p)
: ClockedObject(p), clock(p->clk_domain->clockPeriod()),
cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
tickEvent([this]{ processTick(); }, "Shader tick",
false, Event::CPU_Tick_Pri),
timingSim(p->timing), hsail_mode(SIMT),
impl_kern_boundary_sync(p->impl_kern_boundary_sync),
separate_acquire_release(p->separate_acquire_release), coissue_return(1),
trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
box_tick_cnt(0), start_tick_cnt(0)
{
cuList.resize(n_cu);
@@ -317,27 +320,16 @@ Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
++sa_n;
}
Shader::TickEvent::TickEvent(Shader *_shader)
: Event(CPU_Tick_Pri), shader(_shader)
{
}
void
Shader::TickEvent::process()
Shader::processTick()
{
if (shader->busy()) {
shader->exec();
shader->schedule(this, curTick() + shader->ticks(1));
if (busy()) {
exec();
schedule(tickEvent, curTick() + ticks(1));
}
}
const char*
Shader::TickEvent::description() const
{
return "Shader tick";
}
void
Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
MemCmd cmd, bool suppress_func_errors)

View File

@@ -99,18 +99,8 @@ class Shader : public ClockedObject
ThreadContext *gpuTc;
BaseCPU *cpuPointer;
class TickEvent : public Event
{
private:
Shader *shader;
public:
TickEvent(Shader*);
void process();
const char* description() const;
};
TickEvent tickEvent;
void processTick();
EventFunctionWrapper tickEvent;
// is this simulation going to be timing mode in the memory?
bool timingSim;

View File

@@ -39,11 +39,18 @@
#include "debug/GPUTLB.hh"
TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
coalescingWindow(p->coalescingWindow),
disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
cleanupEvent(this)
TLBCoalescer::TLBCoalescer(const Params *p)
: MemObject(p),
clock(p->clk_domain->clockPeriod()),
TLBProbesPerCycle(p->probesPerCycle),
coalescingWindow(p->coalescingWindow),
disableCoalescing(p->disableCoalescing),
probeTLBEvent([this]{ processProbeTLBEvent(); },
"Probe the TLB below",
false, Event::CPU_Tick_Pri),
cleanupEvent([this]{ processCleanupEvent(); },
"Cleanup issuedTranslationsTable hashmap",
false, Event::Maximum_Pri)
{
// create the slave ports based on the number of connected ports
for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
@@ -390,17 +397,6 @@ TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
}
TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
: Event(CPU_Tick_Pri), coalescer(_coalescer)
{
}
const char*
TLBCoalescer::IssueProbeEvent::description() const
{
return "Probe the TLB below";
}
/*
* Here we scan the coalescer FIFO and issue the max
* number of permitted probes to the TLB below. We
@@ -414,7 +410,7 @@ TLBCoalescer::IssueProbeEvent::description() const
* track of the outstanding reqs)
*/
void
TLBCoalescer::IssueProbeEvent::process()
TLBCoalescer::processProbeTLBEvent()
{
// number of TLB probes sent so far
int sent_probes = 0;
@@ -425,10 +421,10 @@ TLBCoalescer::IssueProbeEvent::process()
// returns false or when there is another outstanding request for the
// same virt. page.
DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
for (auto iter = coalescer->coalescerFIFO.begin();
iter != coalescer->coalescerFIFO.end() && !rejected; ) {
for (auto iter = coalescerFIFO.begin();
iter != coalescerFIFO.end() && !rejected; ) {
int coalescedReq_cnt = iter->second.size();
int i = 0;
int vector_index = 0;
@@ -446,7 +442,7 @@ TLBCoalescer::IssueProbeEvent::process()
// is there another outstanding request for the same page addr?
int pending_reqs =
coalescer->issuedTranslationsTable.count(virt_page_addr);
issuedTranslationsTable.count(virt_page_addr);
if (pending_reqs) {
DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
@@ -459,7 +455,7 @@ TLBCoalescer::IssueProbeEvent::process()
}
// send the coalesced request for virt_page_addr
if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
if (!memSidePort[0]->sendTimingReq(first_packet)) {
DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
virt_page_addr);
@@ -479,22 +475,22 @@ TLBCoalescer::IssueProbeEvent::process()
// by the one we just sent counting all the way from
// the top of TLB hiearchy (i.e., from the CU)
int req_cnt = tmp_sender_state->reqCnt.back();
coalescer->queuingCycles += (curTick() * req_cnt);
queuingCycles += (curTick() * req_cnt);
DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
coalescer->name(), req_cnt);
name(), req_cnt);
// pkt_cnt is number of packets we coalesced into the one
// we just sent but only at this coalescer level
int pkt_cnt = iter->second[vector_index].size();
coalescer->localqueuingCycles += (curTick() * pkt_cnt);
localqueuingCycles += (curTick() * pkt_cnt);
}
DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
virt_page_addr);
//copy coalescedReq to issuedTranslationsTable
coalescer->issuedTranslationsTable[virt_page_addr]
issuedTranslationsTable[virt_page_addr]
= iter->second[vector_index];
//erase the entry of this coalesced req
@@ -504,7 +500,7 @@ TLBCoalescer::IssueProbeEvent::process()
assert(i == coalescedReq_cnt);
sent_probes++;
if (sent_probes == coalescer->TLBProbesPerCycle)
if (sent_probes == TLBProbesPerCycle)
return;
}
}
@@ -512,31 +508,20 @@ TLBCoalescer::IssueProbeEvent::process()
//if there are no more coalesced reqs for this tick_index
//erase the hash_map with the first iterator
if (iter->second.empty()) {
coalescer->coalescerFIFO.erase(iter++);
coalescerFIFO.erase(iter++);
} else {
++iter;
}
}
}
TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
: Event(Maximum_Pri), coalescer(_coalescer)
{
}
const char*
TLBCoalescer::CleanupEvent::description() const
{
return "Cleanup issuedTranslationsTable hashmap";
}
void
TLBCoalescer::CleanupEvent::process()
TLBCoalescer::processCleanupEvent()
{
while (!coalescer->cleanupQueue.empty()) {
Addr cleanup_addr = coalescer->cleanupQueue.front();
coalescer->cleanupQueue.pop();
coalescer->issuedTranslationsTable.erase(cleanup_addr);
while (!cleanupQueue.empty()) {
Addr cleanup_addr = cleanupQueue.front();
cleanupQueue.pop();
issuedTranslationsTable.erase(cleanup_addr);
DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
cleanup_addr);

View File

@@ -214,35 +214,14 @@ class TLBCoalescer : public MemObject
BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
class IssueProbeEvent : public Event
{
private:
TLBCoalescer *coalescer;
void processProbeTLBEvent();
/// This event issues the TLB probes
EventFunctionWrapper probeTLBEvent;
public:
IssueProbeEvent(TLBCoalescer *_coalescer);
void process();
const char *description() const;
};
// this event issues the TLB probes
IssueProbeEvent probeTLBEvent;
// the cleanupEvent is scheduled after a TLBEvent triggers
// in order to free memory and do the required clean-up
class CleanupEvent : public Event
{
private:
TLBCoalescer *coalescer;
public:
CleanupEvent(TLBCoalescer *_coalescer);
void process();
const char* description() const;
};
// schedule cleanup
CleanupEvent cleanupEvent;
void processCleanupEvent();
/// The cleanupEvent is scheduled after a TLBEvent triggers
/// in order to free memory and do the required clean-up
EventFunctionWrapper cleanupEvent;
// this FIFO queue keeps track of the virt. page
// addresses that are pending cleanup