mem-ruby: Hit latencies defined by the controllers
Removed the icache/dcache hit latency parameters from the Sequencer. They were replaced by the mandatory queue enqueue latency that is now defined by the top-level cache controller. By default, the latency is defined by the mandatory_queue_latency parameter. When the latency depends on specific protocol states or on the request type, the protocol may override the mandatoryQueueLatency function. Change-Id: I72e57a7ea49501ef81dc7f591bef14134274647c Signed-off-by: Tiago Muck <tiago.muck@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18413 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com> Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Maintainer: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
@@ -115,8 +115,6 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.L2cache.create(options)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer.icache_hit_latency = 2
|
||||
self.sequencer.dcache_hit_latency = 2
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.icache = self.L1Icache
|
||||
self.sequencer.dcache = self.L1D0cache
|
||||
@@ -128,12 +126,13 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.sequencer1.version = self.seqCount()
|
||||
self.sequencer1.icache = self.L1Icache
|
||||
self.sequencer1.dcache = self.L1D1cache
|
||||
self.sequencer1.icache_hit_latency = 2
|
||||
self.sequencer1.dcache_hit_latency = 2
|
||||
self.sequencer1.ruby_system = ruby_system
|
||||
self.sequencer1.coreid = 1
|
||||
self.sequencer1.is_cpu_sequencer = True
|
||||
|
||||
# Defines icache/dcache hit latency
|
||||
self.mandatory_queue_latency = 2
|
||||
|
||||
self.issue_latency = options.cpu_to_dir_latency
|
||||
self.send_evictions = send_evicts(options)
|
||||
|
||||
|
||||
@@ -102,8 +102,6 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.L2cache.create(options)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer.icache_hit_latency = 2
|
||||
self.sequencer.dcache_hit_latency = 2
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.icache = self.L1Icache
|
||||
self.sequencer.dcache = self.L1D0cache
|
||||
@@ -115,12 +113,13 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.sequencer1.version = self.seqCount()
|
||||
self.sequencer1.icache = self.L1Icache
|
||||
self.sequencer1.dcache = self.L1D1cache
|
||||
self.sequencer1.icache_hit_latency = 2
|
||||
self.sequencer1.dcache_hit_latency = 2
|
||||
self.sequencer1.ruby_system = ruby_system
|
||||
self.sequencer1.coreid = 1
|
||||
self.sequencer1.is_cpu_sequencer = True
|
||||
|
||||
# Defines icache/dcache hit latency
|
||||
self.mandatory_queue_latency = 2
|
||||
|
||||
self.issue_latency = options.cpu_to_dir_latency
|
||||
self.send_evictions = send_evicts(options)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017 ARM Limited
|
||||
* Copyright (c) 2017,2019 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -55,6 +55,7 @@ AbstractController::AbstractController(const Params *p)
|
||||
m_number_of_TBEs(p->number_of_TBEs),
|
||||
m_transitions_per_cycle(p->transitions_per_cycle),
|
||||
m_buffer_size(p->buffer_size), m_recycle_latency(p->recycle_latency),
|
||||
m_mandatory_queue_latency(p->mandatory_queue_latency),
|
||||
memoryPort(csprintf("%s.memory", name()), this, ""),
|
||||
addrRanges(p->addr_ranges.begin(), p->addr_ranges.end())
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017 ARM Limited
|
||||
* Copyright (c) 2017,2019 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -102,6 +102,13 @@ class AbstractController : public ClockedObject, public Consumer
|
||||
virtual Sequencer* getCPUSequencer() const = 0;
|
||||
virtual GPUCoalescer* getGPUCoalescer() const = 0;
|
||||
|
||||
// This latency is used by the sequencer when enqueueing requests.
|
||||
// Different latencies may be used depending on the request type.
|
||||
// This is the hit latency unless the top-level cache controller
|
||||
// introduces additional cycles in the response path.
|
||||
virtual Cycles mandatoryQueueLatency(const RubyRequestType& param_type)
|
||||
{ return m_mandatory_queue_latency; }
|
||||
|
||||
//! These functions are used by ruby system to read/write the data blocks
|
||||
//! that exist with in the controller.
|
||||
virtual void functionalRead(const Addr &addr, PacketPtr) = 0;
|
||||
@@ -195,6 +202,7 @@ class AbstractController : public ClockedObject, public Consumer
|
||||
const int m_transitions_per_cycle;
|
||||
const unsigned int m_buffer_size;
|
||||
Cycles m_recycle_latency;
|
||||
const Cycles m_mandatory_queue_latency;
|
||||
|
||||
//! Counter for the number of cycles when the transitions carried out
|
||||
//! were equal to the maximum allowed
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2017 ARM Limited
|
||||
# Copyright (c) 2017,2019 ARM Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -61,5 +61,13 @@ class RubyController(ClockedObject):
|
||||
number_of_TBEs = Param.Int(256, "")
|
||||
ruby_system = Param.RubySystem("")
|
||||
|
||||
# This is typically a proxy to the icache/dcache hit latency.
|
||||
# If the latency depends on the request type or protocol-specific states,
|
||||
# the protocol may ignore this parameter by overriding the
|
||||
# mandatoryQueueLatency function
|
||||
mandatory_queue_latency = \
|
||||
Param.Cycles(1, "Default latency for requests added to the " \
|
||||
"mandatory queue on top-level controllers")
|
||||
|
||||
memory = MasterPort("Port for attaching a memory controller")
|
||||
system = Param.System(Parent.any, "system object parameter")
|
||||
|
||||
@@ -143,8 +143,6 @@ GPUCoalescer::GPUCoalescer(const Params *p)
|
||||
assert(m_instCache_ptr);
|
||||
assert(m_dataCache_ptr);
|
||||
|
||||
m_data_cache_hit_latency = p->dcache_hit_latency;
|
||||
|
||||
m_runningGarnetStandalone = p->garnet_standalone;
|
||||
assumingRfOCoherence = p->assume_rfo;
|
||||
}
|
||||
@@ -950,12 +948,12 @@ GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
|
||||
fatal_if(secondary_type == RubyRequestType_IFETCH,
|
||||
"there should not be any I-Fetch requests in the GPU Coalescer");
|
||||
|
||||
// Send the message to the cache controller
|
||||
fatal_if(m_data_cache_hit_latency == 0,
|
||||
"should not have a latency of zero");
|
||||
Tick latency = cyclesToTicks(
|
||||
m_controller->mandatoryQueueLatency(secondary_type));
|
||||
assert(latency > 0);
|
||||
|
||||
assert(m_mandatory_q_ptr);
|
||||
m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
|
||||
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
|
||||
}
|
||||
|
||||
template <class KEY, class VALUE>
|
||||
|
||||
@@ -266,11 +266,6 @@ class GPUCoalescer : public RubyPort
|
||||
CacheMemory* m_dataCache_ptr;
|
||||
CacheMemory* m_instCache_ptr;
|
||||
|
||||
// The cache access latency for this GPU data cache. This is assessed at the
|
||||
// beginning of each access. This should be very similar to the
|
||||
// implementation in Sequencer() as this is very much like a Sequencer
|
||||
Cycles m_data_cache_hit_latency;
|
||||
|
||||
// We need to track both the primary and secondary request types.
|
||||
// The secondary request type comprises a subset of RubyRequestTypes that
|
||||
// are understood by the L1 Controller. A primary request type can be any
|
||||
|
||||
@@ -54,4 +54,3 @@ class RubyGPUCoalescer(RubyPort):
|
||||
"max outstanding cycles for a request before " \
|
||||
"deadlock/livelock declared")
|
||||
garnet_standalone = Param.Bool(False, "")
|
||||
dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
|
||||
|
||||
@@ -60,8 +60,6 @@ Sequencer::Sequencer(const Params *p)
|
||||
|
||||
m_instCache_ptr = p->icache;
|
||||
m_dataCache_ptr = p->dcache;
|
||||
m_data_cache_hit_latency = p->dcache_hit_latency;
|
||||
m_inst_cache_hit_latency = p->icache_hit_latency;
|
||||
m_max_outstanding_requests = p->max_outstanding_requests;
|
||||
m_deadlock_threshold = p->deadlock_threshold;
|
||||
|
||||
@@ -70,8 +68,6 @@ Sequencer::Sequencer(const Params *p)
|
||||
assert(m_deadlock_threshold > 0);
|
||||
assert(m_instCache_ptr != NULL);
|
||||
assert(m_dataCache_ptr != NULL);
|
||||
assert(m_data_cache_hit_latency > 0);
|
||||
assert(m_inst_cache_hit_latency > 0);
|
||||
|
||||
m_runningGarnetStandalone = p->garnet_standalone;
|
||||
}
|
||||
@@ -650,23 +646,12 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
|
||||
printAddress(msg->getPhysicalAddress()),
|
||||
RubyRequestType_to_string(secondary_type));
|
||||
|
||||
// The Sequencer currently assesses instruction and data cache hit latency
|
||||
// for the top-level caches at the beginning of a memory access.
|
||||
// TODO: Eventually, this latency should be moved to represent the actual
|
||||
// cache access latency portion of the memory access. This will require
|
||||
// changing cache controller protocol files to assess the latency on the
|
||||
// access response path.
|
||||
Cycles latency(0); // Initialize to zero to catch misconfigured latency
|
||||
if (secondary_type == RubyRequestType_IFETCH)
|
||||
latency = m_inst_cache_hit_latency;
|
||||
else
|
||||
latency = m_data_cache_hit_latency;
|
||||
|
||||
// Send the message to the cache controller
|
||||
Tick latency = cyclesToTicks(
|
||||
m_controller->mandatoryQueueLatency(secondary_type));
|
||||
assert(latency > 0);
|
||||
|
||||
assert(m_mandatory_q_ptr != NULL);
|
||||
m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(latency));
|
||||
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
|
||||
}
|
||||
|
||||
template <class KEY, class VALUE>
|
||||
|
||||
@@ -63,12 +63,7 @@ class RubySequencer(RubyPort):
|
||||
|
||||
icache = Param.RubyCache("")
|
||||
dcache = Param.RubyCache("")
|
||||
# Cache latencies currently assessed at the beginning of each access
|
||||
# NOTE: Setting these values to a value greater than one will result in
|
||||
# O3 CPU pipeline bubbles and negatively impact performance
|
||||
# TODO: Latencies should be migrated into each top-level cache controller
|
||||
icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
|
||||
dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
|
||||
|
||||
max_outstanding_requests = Param.Int(16,
|
||||
"max requests (incl. prefetches) outstanding")
|
||||
deadlock_threshold = Param.Cycles(500000,
|
||||
|
||||
Reference in New Issue
Block a user