arch-vega,gpu-compute,mem-ruby: SQC Invalidation Support (#852)
This PR adds support for SQC (GPU I-cache) invalidation to the GPU model. It does this by updating the GPU-VIPER-SQC protocol to support flushes, the sequencer model to send out invalidates and the gpu compute model to send invalidates and handle responses. It also adds support for S_ICACHE_INV, a VEGA ISA instruction that invalidates the entire GPU I-cache. Additionally, the PR modifies the kernel start behavior to invalidate the I-cache too. It previously invalidated only the L1 D-cache.
This commit is contained in:
@@ -434,6 +434,7 @@ print(
|
||||
# shader is the GPU
|
||||
shader = Shader(
|
||||
n_wf=args.wfs_per_simd,
|
||||
cu_per_sqc=args.cu_per_sqc,
|
||||
clk_domain=SrcClockDomain(
|
||||
clock=args.gpu_clock,
|
||||
voltage_domain=VoltageDomain(voltage=args.gpu_voltage),
|
||||
|
||||
@@ -33,7 +33,10 @@ from m5.objects import *
|
||||
|
||||
def createGPU(system, args):
|
||||
shader = Shader(
|
||||
n_wf=args.wfs_per_simd, timing=True, clk_domain=system.clk_domain
|
||||
n_wf=args.wfs_per_simd,
|
||||
cu_per_sqc=args.cu_per_sqc,
|
||||
timing=True,
|
||||
clk_domain=system.clk_domain,
|
||||
)
|
||||
|
||||
# VIPER GPU protocol implements release consistency at GPU side. So,
|
||||
|
||||
@@ -669,6 +669,9 @@ namespace VegaISA
|
||||
Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt)
|
||||
: Inst_SOPP(iFmt, "s_icache_inv")
|
||||
{
|
||||
setFlag(MemBarrier);
|
||||
setFlag(GPUStaticInst::MemSync);
|
||||
setFlag(MemSync);
|
||||
} // Inst_SOPP__S_ICACHE_INV
|
||||
|
||||
Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
|
||||
@@ -683,7 +686,26 @@ namespace VegaISA
|
||||
void
|
||||
Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
return;
|
||||
}
|
||||
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
|
||||
|
||||
gpuDynInst->resetEntireStatusVector();
|
||||
gpuDynInst->setStatusVector(0, 1);
|
||||
RequestPtr req = std::make_shared<Request>(0, 0, 0,
|
||||
gpuDynInst->computeUnit()->
|
||||
requestorId(), 0,
|
||||
gpuDynInst->wfDynId);
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
gpuDynInst->computeUnit()->scalarMemoryPipe.
|
||||
injectScalarMemFence(gpuDynInst, false, req);
|
||||
} // execute
|
||||
// --- Inst_SOPP__S_INCPERFLEVEL class methods ---
|
||||
|
||||
|
||||
@@ -294,6 +294,7 @@ class Shader(ClockedObject):
|
||||
dispatcher = Param.GPUDispatcher("GPU workgroup dispatcher")
|
||||
system_hub = Param.AMDGPUSystemHub(NULL, "GPU System Hub (FS Mode only)")
|
||||
n_wf = Param.Int(10, "Number of wavefront slots per SIMD")
|
||||
cu_per_sqc = Param.Int(4, "Number of CUs that share an SQC")
|
||||
impl_kern_launch_acq = Param.Bool(
|
||||
True,
|
||||
"""Insert acq packet into
|
||||
|
||||
@@ -397,9 +397,9 @@ ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
|
||||
}
|
||||
|
||||
/**
|
||||
* trigger invalidate operation in the cu
|
||||
* trigger invalidate operation in the CU
|
||||
*
|
||||
* req: request initialized in shader, carrying the invlidate flags
|
||||
* req: request initialized in shader, carrying the invalidate flags
|
||||
*/
|
||||
void
|
||||
ComputeUnit::doInvalidate(RequestPtr req, int kernId){
|
||||
@@ -425,6 +425,26 @@ ComputeUnit::doFlush(GPUDynInstPtr gpuDynInst) {
|
||||
injectGlobalMemFence(gpuDynInst, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* trigger SQCinvalidate operation in the CU
|
||||
*
|
||||
* req: request initialized in shader, carrying the invalidate flags
|
||||
*/
|
||||
void
|
||||
ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){
|
||||
GPUDynInstPtr gpuDynInst
|
||||
= std::make_shared<GPUDynInst>(this, nullptr,
|
||||
new KernelLaunchStaticInst(), getAndIncSeqNum());
|
||||
|
||||
// kern_id will be used in inv responses
|
||||
gpuDynInst->kern_id = kernId;
|
||||
// update contextId field
|
||||
req->setContext(gpuDynInst->wfDynId);
|
||||
|
||||
gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar);
|
||||
scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req);
|
||||
}
|
||||
|
||||
// reseting SIMD register pools
|
||||
// I couldn't think of any other place and
|
||||
// I think it is needed in my implementation
|
||||
@@ -1012,7 +1032,14 @@ ComputeUnit::DataPort::recvReqRetry()
|
||||
bool
|
||||
ComputeUnit::SQCPort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
computeUnit->handleSQCReturn(pkt);
|
||||
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
|
||||
/** Process the response only if there is a wavefront associated with it.
|
||||
* Otherwise, it is from SQC invalidate that was issued at kernel start
|
||||
* and doesn't have a wavefront or instruction associated with it.
|
||||
*/
|
||||
if (sender_state->wavefront != nullptr) {
|
||||
computeUnit->handleSQCReturn(pkt);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1046,6 +1073,26 @@ ComputeUnit::SQCPort::recvReqRetry()
|
||||
}
|
||||
}
|
||||
|
||||
const char*
|
||||
ComputeUnit::SQCPort::MemReqEvent::description() const
|
||||
{
|
||||
return "ComputeUnit SQC memory request event";
|
||||
}
|
||||
|
||||
void
|
||||
ComputeUnit::SQCPort::MemReqEvent::process()
|
||||
{
|
||||
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
|
||||
[[maybe_unused]] ComputeUnit *compute_unit = sqcPort.computeUnit;
|
||||
|
||||
assert(!pkt->req->systemReq());
|
||||
|
||||
if (!(sqcPort.sendTimingReq(pkt))) {
|
||||
sqcPort.retries.push_back(std::pair<PacketPtr, Wavefront*>
|
||||
(pkt, sender_state->wavefront));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
{
|
||||
|
||||
@@ -412,6 +412,7 @@ class ComputeUnit : public ClockedObject
|
||||
|
||||
void doInvalidate(RequestPtr req, int kernId);
|
||||
void doFlush(GPUDynInstPtr gpuDynInst);
|
||||
void doSQCInvalidate(RequestPtr req, int kernId);
|
||||
|
||||
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
|
||||
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);
|
||||
@@ -680,6 +681,23 @@ class ComputeUnit : public ClockedObject
|
||||
kernId(_kernId){ }
|
||||
};
|
||||
|
||||
class MemReqEvent : public Event
|
||||
{
|
||||
private:
|
||||
SQCPort &sqcPort;
|
||||
PacketPtr pkt;
|
||||
|
||||
public:
|
||||
MemReqEvent(SQCPort &_sqc_port, PacketPtr _pkt)
|
||||
: Event(), sqcPort(_sqc_port), pkt(_pkt)
|
||||
{
|
||||
setFlags(Event::AutoDelete);
|
||||
}
|
||||
|
||||
void process();
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
std::deque<std::pair<PacketPtr, Wavefront*>> retries;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -320,7 +320,7 @@ FetchUnit::processFetchReturn(PacketPtr pkt)
|
||||
assert(!fetchBuf.at(wavefront->wfSlotId).hasFetchDataToProcess());
|
||||
wavefront->dropFetch = false;
|
||||
} else {
|
||||
fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt->req->getVaddr());
|
||||
fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt);
|
||||
}
|
||||
|
||||
wavefront->pendingFetch = false;
|
||||
@@ -469,8 +469,23 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
|
||||
FetchUnit::FetchBufDesc::fetchDone(PacketPtr pkt)
|
||||
{
|
||||
// If the return command is MemSyncResp, then it belongs to
|
||||
// an SQC invalidation request. This request calls
|
||||
// incLGKMInstsIssued() function in its execution path.
|
||||
// Since there is no valid memory return response associated with
|
||||
// this instruction, decLGKMInstsIssued() is not executed. Do this
|
||||
// here to decrement the counter and invalidate all buffers
|
||||
if (pkt->cmd == MemCmd::MemSyncResp) {
|
||||
wavefront->decLGKMInstsIssued();
|
||||
flushBuf();
|
||||
restartFromBranch = false;
|
||||
return;
|
||||
}
|
||||
|
||||
Addr vaddr = pkt->req->getVaddr();
|
||||
|
||||
assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
|
||||
wavefront->simdId, wavefront->wfSlotId,
|
||||
|
||||
@@ -138,7 +138,7 @@ class FetchUnit
|
||||
return is_reserved;
|
||||
}
|
||||
|
||||
void fetchDone(Addr vaddr);
|
||||
void fetchDone(PacketPtr ptr);
|
||||
|
||||
/**
|
||||
* checks if the buffer contains valid data. this essentially
|
||||
|
||||
@@ -160,4 +160,55 @@ ScalarMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)
|
||||
issuedRequests.push(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
|
||||
bool kernelMemSync,
|
||||
RequestPtr req)
|
||||
{
|
||||
assert(gpuDynInst->isScalar());
|
||||
|
||||
if (!req) {
|
||||
req = std::make_shared<Request>(
|
||||
0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId);
|
||||
} else {
|
||||
req->requestorId(computeUnit.requestorId());
|
||||
}
|
||||
|
||||
// When the SQC invalidate instruction is executed, it calls
|
||||
// injectScalarMemFence. The instruction does not contain an address
|
||||
// as one of its operands. Therefore, set the physical address of the
|
||||
// invalidation request to 0 and handle it in the sequencer
|
||||
req->setPaddr(0);
|
||||
|
||||
PacketPtr pkt = nullptr;
|
||||
|
||||
// If kernelMemSync is true, then the invalidation request is from
|
||||
// kernel launch and is an implicit invalidation.If false, then it is
|
||||
// due to an S_ICACHE_INV instruction
|
||||
if (kernelMemSync) {
|
||||
req->setCacheCoherenceFlags(Request::INV_L1);
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
req->setFlags(Request::KERNEL);
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::SQCPort::SenderState(
|
||||
gpuDynInst->wavefront(), nullptr));
|
||||
} else {
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::SQCPort::SenderState(
|
||||
gpuDynInst->wavefront(), nullptr));
|
||||
}
|
||||
|
||||
ComputeUnit::SQCPort::MemReqEvent *sqc_event =
|
||||
new ComputeUnit::SQCPort::MemReqEvent
|
||||
(computeUnit.sqcPort, pkt);
|
||||
computeUnit.schedule(
|
||||
sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <string>
|
||||
|
||||
#include "gpu-compute/misc.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "params/ComputeUnit.hh"
|
||||
#include "sim/stats.hh"
|
||||
|
||||
@@ -67,6 +68,9 @@ class ScalarMemPipeline
|
||||
|
||||
void issueRequest(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
void injectScalarMemFence(
|
||||
GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req);
|
||||
|
||||
bool
|
||||
isGMLdRespFIFOWrRdy() const
|
||||
{
|
||||
|
||||
@@ -64,6 +64,7 @@ Shader::Shader(const Params &p) : ClockedObject(p),
|
||||
impl_kern_end_rel(p.impl_kern_end_rel),
|
||||
coissue_return(1),
|
||||
trace_vgpr_all(1), n_cu((p.CUs).size()), n_wf(p.n_wf),
|
||||
n_cu_per_sqc(p.cu_per_sqc),
|
||||
globalMemSize(p.globalmem),
|
||||
nextSchedCu(0), sa_n(0), gpuCmdProc(*p.gpu_cmd_proc),
|
||||
_dispatcher(*p.dispatcher), systemHub(p.system_hub),
|
||||
@@ -221,6 +222,13 @@ Shader::prepareInvalidate(HSAQueueEntry *task) {
|
||||
// all necessary INV flags are all set now, call cu to execute
|
||||
cuList[i_cu]->doInvalidate(req, task->dispatchId());
|
||||
|
||||
|
||||
// A set of CUs share a single SQC cache. Send a single invalidate
|
||||
// request to each SQC
|
||||
if ((i_cu % n_cu_per_sqc) == 0) {
|
||||
cuList[i_cu]->doSQCInvalidate(req, task->dispatchId());
|
||||
}
|
||||
|
||||
// I don't like this. This is intrusive coding.
|
||||
cuList[i_cu]->resetRegisterPool();
|
||||
}
|
||||
|
||||
@@ -237,6 +237,8 @@ class Shader : public ClockedObject
|
||||
int n_cu;
|
||||
// Number of wavefront slots per SIMD per CU
|
||||
int n_wf;
|
||||
//Number of cu units per sqc in the shader
|
||||
int n_cu_per_sqc;
|
||||
|
||||
// The size of global memory
|
||||
int globalMemSize;
|
||||
|
||||
@@ -60,6 +60,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
|
||||
// Mem sys initiated
|
||||
Repl, desc="Replacing block from cache";
|
||||
Data, desc="Received Data";
|
||||
Evict, desc="Evict cache line";
|
||||
}
|
||||
|
||||
enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
|
||||
@@ -67,6 +68,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
|
||||
DataArrayWrite, desc="Write the data array";
|
||||
TagArrayRead, desc="Read the data array";
|
||||
TagArrayWrite, desc="Write the data array";
|
||||
TagArrayFlash, desc="Flash clear the data array";
|
||||
}
|
||||
|
||||
|
||||
@@ -242,7 +244,12 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
|
||||
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
|
||||
Entry cache_entry := getCacheEntry(in_msg.LineAddress);
|
||||
TBE tbe := TBEs.lookup(in_msg.LineAddress);
|
||||
trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
|
||||
DPRINTF(RubySlicc, "%s\n", in_msg);
|
||||
if (in_msg.Type == RubyRequestType:REPLACEMENT) {
|
||||
trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe);
|
||||
} else {
|
||||
trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -313,6 +320,10 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
|
||||
APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
|
||||
}
|
||||
|
||||
action(inv_invDone, "inv", desc="local inv done") {
|
||||
sequencer.invL1Callback();
|
||||
}
|
||||
|
||||
action(w_writeCache, "w", desc="write data to cache") {
|
||||
peek(responseToSQC_in, ResponseMsg) {
|
||||
assert(is_valid(cache_entry));
|
||||
@@ -350,6 +361,13 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
|
||||
ic_invCache;
|
||||
}
|
||||
|
||||
transition({I, IV, V}, Evict, I) {TagArrayRead, TagArrayWrite} {
|
||||
// since we're evicting something, don't bother classifying as hit/miss
|
||||
ic_invCache;
|
||||
inv_invDone;
|
||||
p_popMandatoryQueue;
|
||||
}
|
||||
|
||||
// if we got a response for a load where the line is in I, then
|
||||
// another request must have come in that replaced the line in question in
|
||||
// the cache. Thus, complete this request without allocating the line, but
|
||||
|
||||
@@ -157,6 +157,9 @@ structure (Sequencer, external = "yes") {
|
||||
void llscClearLocalMonitor();
|
||||
|
||||
void evictionCallback(Addr);
|
||||
|
||||
void invL1Callback();
|
||||
|
||||
void recordRequestType(SequencerRequestType);
|
||||
bool checkResourceAvailable(CacheResourceType, Addr);
|
||||
}
|
||||
|
||||
@@ -85,6 +85,8 @@ Sequencer::Sequencer(const Params &p)
|
||||
|
||||
m_runningGarnetStandalone = p.garnet_standalone;
|
||||
|
||||
m_num_pending_invs = 0;
|
||||
m_cache_inv_pkt = nullptr;
|
||||
|
||||
// These statistical variables are not for display.
|
||||
// The profiler will collate these across different
|
||||
@@ -348,6 +350,15 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
|
||||
return RequestStatus_Ready;
|
||||
}
|
||||
|
||||
// If command is MemSyncReq, it is used to invalidate the cache.
|
||||
// As the cache invalidation requests are already issued in invL1(),
|
||||
// there is no need to create a new request for the same here.
|
||||
// Instead, return RequestStatus_Aliased, and make the sequencer skip
|
||||
// an extra issueRequest
|
||||
if (pkt->cmd == MemCmd::MemSyncReq) {
|
||||
return RequestStatus_Aliased;
|
||||
}
|
||||
|
||||
Addr line_addr = makeLineAddress(pkt->getAddr());
|
||||
// Check if there is any outstanding request for the same cache line.
|
||||
auto &seq_req_list = m_RequestTable[line_addr];
|
||||
@@ -576,7 +587,8 @@ Sequencer::readCallback(Addr address, DataBlock& data,
|
||||
}
|
||||
if ((seq_req.m_type != RubyRequestType_LD) &&
|
||||
(seq_req.m_type != RubyRequestType_Load_Linked) &&
|
||||
(seq_req.m_type != RubyRequestType_IFETCH)) {
|
||||
(seq_req.m_type != RubyRequestType_IFETCH) &&
|
||||
(seq_req.m_type != RubyRequestType_REPLACEMENT)) {
|
||||
// Write request: reissue request to the cache hierarchy
|
||||
issueRequest(seq_req.pkt, seq_req.m_second_type);
|
||||
break;
|
||||
@@ -811,6 +823,86 @@ Sequencer::unaddressedCallback(Addr unaddressedReqId,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequencer::completeHitCallback(std::vector<PacketPtr> & mylist)
|
||||
{
|
||||
for (auto& pkt : mylist) {
|
||||
// When Ruby is in warmup or cooldown phase, the requests come
|
||||
// from the cache recorder. They do not track which port to use
|
||||
// and do not need to send the response back
|
||||
if (!RubySystem::getWarmupEnabled()
|
||||
&& !RubySystem::getCooldownEnabled()) {
|
||||
RubyPort::SenderState *ss =
|
||||
safe_cast<RubyPort::SenderState *>(pkt->senderState);
|
||||
MemResponsePort *port = ss->port;
|
||||
assert(port != NULL);
|
||||
|
||||
pkt->senderState = ss->predecessor;
|
||||
|
||||
if (pkt->cmd != MemCmd::WriteReq) {
|
||||
// for WriteReq, we keep the original senderState until
|
||||
// writeCompleteCallback
|
||||
delete ss;
|
||||
}
|
||||
|
||||
port->hitCallback(pkt);
|
||||
trySendRetries();
|
||||
}
|
||||
}
|
||||
|
||||
RubySystem *rs = m_ruby_system;
|
||||
if (RubySystem::getWarmupEnabled()) {
|
||||
rs->m_cache_recorder->enqueueNextFetchRequest();
|
||||
} else if (RubySystem::getCooldownEnabled()) {
|
||||
rs->m_cache_recorder->enqueueNextFlushRequest();
|
||||
} else {
|
||||
testDrainComplete();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequencer::invL1Callback()
|
||||
{
|
||||
// Since L1 invalidate is currently done with paddr = 0
|
||||
assert(m_cache_inv_pkt && m_num_pending_invs > 0);
|
||||
|
||||
m_num_pending_invs--;
|
||||
|
||||
if (m_num_pending_invs == 0) {
|
||||
std::vector<PacketPtr> pkt_list { m_cache_inv_pkt };
|
||||
m_cache_inv_pkt = nullptr;
|
||||
completeHitCallback(pkt_list);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequencer::invL1()
|
||||
{
|
||||
int size = m_dataCache_ptr->getNumBlocks();
|
||||
DPRINTF(RubySequencer,
|
||||
"There are %d Invalidations outstanding before Cache Walk\n",
|
||||
m_num_pending_invs);
|
||||
// Walk the cache
|
||||
for (int i = 0; i < size; i++) {
|
||||
Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
|
||||
// Evict Read-only data
|
||||
RubyRequestType request_type = RubyRequestType_REPLACEMENT;
|
||||
std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
|
||||
clockEdge(), addr, 0, 0,
|
||||
request_type, RubyAccessMode_Supervisor,
|
||||
nullptr);
|
||||
DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr);
|
||||
assert(m_mandatory_q_ptr != NULL);
|
||||
Tick latency = cyclesToTicks(
|
||||
m_controller->mandatoryQueueLatency(request_type));
|
||||
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
|
||||
m_num_pending_invs++;
|
||||
}
|
||||
DPRINTF(RubySequencer,
|
||||
"There are %d Invalidations outstanding after Cache Walk\n",
|
||||
m_num_pending_invs);
|
||||
}
|
||||
|
||||
bool
|
||||
Sequencer::empty() const
|
||||
{
|
||||
@@ -915,6 +1007,11 @@ Sequencer::makeRequest(PacketPtr pkt)
|
||||
}
|
||||
} else if (pkt->isFlush()) {
|
||||
primary_type = secondary_type = RubyRequestType_FLUSH;
|
||||
} else if (pkt->cmd == MemCmd::MemSyncReq) {
|
||||
primary_type = secondary_type = RubyRequestType_REPLACEMENT;
|
||||
assert(!m_cache_inv_pkt);
|
||||
m_cache_inv_pkt = pkt;
|
||||
invL1();
|
||||
} else {
|
||||
panic("Unsupported ruby packet type\n");
|
||||
}
|
||||
|
||||
@@ -141,6 +141,10 @@ class Sequencer : public RubyPort
|
||||
const Cycles forwardRequestTime = Cycles(0),
|
||||
const Cycles firstResponseTime = Cycles(0));
|
||||
|
||||
void completeHitCallback(std::vector<PacketPtr>& list);
|
||||
void invL1Callback();
|
||||
void invL1();
|
||||
|
||||
RequestStatus makeRequest(PacketPtr pkt) override;
|
||||
virtual bool empty() const;
|
||||
int outstandingCount() const override { return m_outstanding_count; }
|
||||
@@ -243,6 +247,10 @@ class Sequencer : public RubyPort
|
||||
private:
|
||||
int m_max_outstanding_requests;
|
||||
|
||||
int m_num_pending_invs;
|
||||
|
||||
PacketPtr m_cache_inv_pkt;
|
||||
|
||||
CacheMemory* m_dataCache_ptr;
|
||||
|
||||
// The cache access latency for top-level caches (L0/L1). These are
|
||||
|
||||
Reference in New Issue
Block a user