mem-ruby: MESI_Three_level prefetcher support

Add support for the Ruby stride prefetcher to MESI_Three_Level.

Change-Id: Id68935e2a7d3ccd0e22a59f43a15f167410632a2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27715
Reviewed-by: Bradford Beckmann <brad.beckmann@amd.com>
Maintainer: Bradford Beckmann <brad.beckmann@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Timothy Hayes
2019-10-21 17:08:22 +01:00
committed by Pouya Fotouhi
parent e61f10740b
commit 203efba46a
8 changed files with 386 additions and 3 deletions

View File

@@ -53,6 +53,8 @@ def define_options(parser):
parser.add_option("--l0_transitions_per_cycle", type="int", default=32)
parser.add_option("--l1_transitions_per_cycle", type="int", default=32)
parser.add_option("--l2_transitions_per_cycle", type="int", default=4)
parser.add_option("--enable-prefetch", action="store_true", default=False,\
help="Enable Ruby hardware prefetcher")
return
def create_system(options, full_system, system, dma_ports, bootmem,
@@ -118,10 +120,22 @@ def create_system(options, full_system, system, dma_ports, bootmem,
else:
clk_domain = system.cpu[i].clk_domain
# Ruby prefetcher
prefetcher = RubyPrefetcher.Prefetcher(
num_streams=16,
unit_filter = 256,
nonunit_filter = 256,
train_misses = 5,
num_startup_pfs = 4,
cross_page = False
)
l0_cntrl = L0Cache_Controller(
version = i * num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
transitions_per_cycle = options.l0_transitions_per_cycle,
prefetcher = prefetcher,
enable_prefetch = options.enable_prefetch,
send_evictions = send_evicts(options),
clk_domain = clk_domain,
ruby_system = ruby_system)
@@ -159,6 +173,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl_nodes.append(l1_cntrl)
# Connect the L0 and L1 controllers
l0_cntrl.prefetchQueue = MessageBuffer()
l0_cntrl.mandatoryQueue = MessageBuffer()
l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1

View File

@@ -46,6 +46,9 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
Cycles response_latency := 2;
bool send_evictions;
Prefetcher * prefetcher;
bool enable_prefetch := "False";
// From this node's L0 cache to the network
MessageBuffer * bufferToL1, network="To";
@@ -54,6 +57,9 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
// Message queue between this controller and the processor
MessageBuffer * mandatoryQueue;
// Request Buffer for prefetches
MessageBuffer * prefetchQueue;
{
// STATES
state_declaration(State, desc="Cache states", default="L0Cache_State_I") {
@@ -92,6 +98,11 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
// processor needs to write to it. So, the controller has requested for
// write permission.
SM, AccessPermission:Read_Only, desc="Issued GETX, have not seen response yet";
// Transient states in which block is being prefetched
PF_Inst_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
PF_IE, AccessPermission:Busy, desc="Issued GETX, have not seen response yet";
}
// EVENTS
@@ -123,6 +134,12 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
WB_Ack, desc="Ack for replacement";
Failed_SC, desc="Store conditional request that will fail";
// Prefetch events (generated by prefetcher)
PF_L0_Replacement, desc="L0 Replacement caused by pretcher", format="!pr";
PF_Load, desc="Load request from prefetcher";
PF_Ifetch, desc="Instruction fetch request from prefetcher";
PF_Store, desc="Exclusive load request from prefetcher";
}
// TYPES
@@ -132,6 +149,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
State CacheState, desc="cache state";
DataBlock DataBlk, desc="data for the block";
bool Dirty, default="false", desc="data is dirty";
bool isPrefetched, default="false", desc="Set if this block was prefetched";
}
// TBE fields
@@ -148,6 +166,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
void allocate(Addr);
void deallocate(Addr);
bool isPresent(Addr);
TBE getNullEntry();
}
TBETable TBEs, template="<L0Cache_TBE>", constructor="m_number_of_TBEs";
@@ -161,6 +180,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
void wakeUpBuffers(Addr a);
void wakeUpAllBuffers(Addr a);
void profileMsgDelay(int virtualNetworkType, Cycles c);
MachineID mapAddressToMachine(Addr addr, MachineType mtype);
// inclusive cache returns L0 entries only
Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
@@ -267,11 +287,120 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
}
}
Event prefetch_request_type_to_event(RubyRequestType type) {
if (type == RubyRequestType:LD) {
return Event:PF_Load;
} else if (type == RubyRequestType:IFETCH) {
return Event:PF_Ifetch;
} else if (type == RubyRequestType:ST) {
return Event:PF_Store;
} else {
error("Invalid RubyRequestType");
}
}
int getPendingAcks(TBE tbe) {
return tbe.pendingAcks;
}
out_port(requestNetwork_out, CoherenceMsg, bufferToL1);
out_port(optionalQueue_out, RubyRequest, prefetchQueue);
void enqueuePrefetch(Addr address, RubyRequestType type) {
enqueue(optionalQueue_out, RubyRequest, 1) {
out_msg.LineAddress := address;
out_msg.Type := type;
out_msg.Prefetch := PrefetchBit:Yes;
out_msg.AccessMode := RubyAccessMode:Supervisor;
}
}
// Prefetch queue between the controller and the prefetcher
// As per Spracklen et al. (HPCA 2005), the prefetch queue should be
// implemented as a LIFO structure. The structure would allow for fast
// searches of all entries in the queue, not just the head msg. All
// msgs in the structure can be invalidated if a demand miss matches.
in_port(optionalQueue_in, RubyRequest, prefetchQueue, desc="...", rank = 2) {
if (optionalQueue_in.isReady(clockEdge())) {
peek(optionalQueue_in, RubyRequest) {
if (in_msg.Type == RubyRequestType:IFETCH) {
// Instruction Prefetch
Entry icache_entry := getICacheEntry(in_msg.LineAddress);
if (is_valid(icache_entry)) {
// The block to be prefetched is already present in the
// cache. This request will be made benign and cause the
// prefetch queue to be popped.
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
icache_entry, TBEs[in_msg.LineAddress]);
}
// Check to see if it is in the L0-D
Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
if (is_valid(cache_entry)) {
// The block is in the wrong L0 cache. We should drop
// this request.
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
cache_entry, TBEs[in_msg.LineAddress]);
}
if (Icache.cacheAvail(in_msg.LineAddress)) {
// L0-I does't have the line, but we have space for it
// in the L0-I so let's see if the L1 has it
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
icache_entry, TBEs[in_msg.LineAddress]);
} else {
// No room in the L0-I, so we need to make room in the L0-I
Addr addr := Icache.cacheProbe(in_msg.LineAddress);
check_on_cache_probe(optionalQueue_in, addr);
trigger(Event:PF_L0_Replacement, addr,
getICacheEntry(addr),
TBEs[addr]);
}
} else {
// Data prefetch
Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
if (is_valid(cache_entry)) {
// The block to be prefetched is already present in the
// cache. This request will be made benign and cause the
// prefetch queue to be popped.
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
cache_entry, TBEs[in_msg.LineAddress]);
}
// Check to see if it is in the L0-I
Entry icache_entry := getICacheEntry(in_msg.LineAddress);
if (is_valid(icache_entry)) {
// The block is in the wrong L0. Just drop the prefetch
// request.
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
icache_entry, TBEs[in_msg.LineAddress]);
}
if (Dcache.cacheAvail(in_msg.LineAddress)) {
// L0-D does't have the line, but we have space for it in
// the L0-D let's see if the L1 has it
trigger(prefetch_request_type_to_event(in_msg.Type),
in_msg.LineAddress,
cache_entry, TBEs[in_msg.LineAddress]);
} else {
// No room in the L0-D, so we need to make room in the L0-D
Addr addr := Dcache.cacheProbe(in_msg.LineAddress);
check_on_cache_probe(optionalQueue_in, addr);
trigger(Event:PF_L0_Replacement, addr,
getDCacheEntry(addr),
TBEs[addr]);
}
}
}
}
}
// Messages for this L0 cache from the L1 cache
in_port(messgeBuffer_in, CoherenceMsg, bufferFromL1, rank = 1) {
@@ -654,6 +783,80 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
sequencer.writeCallbackScFail(address, cache_entry.DataBlk);
}
// prefetching
action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") {
peek(optionalQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
out_msg.addr := address;
out_msg.Class := CoherenceClass:GETS;
out_msg.Sender := machineID;
out_msg.Dest := createMachineID(MachineType:L1Cache, version);
DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
address, out_msg.Dest);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
}
}
}
action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") {
peek(optionalQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
out_msg.addr := address;
out_msg.Class := CoherenceClass:GETX;
out_msg.Sender := machineID;
DPRINTF(RubySlicc, "%s\n", machineID);
out_msg.Dest := createMachineID(MachineType:L1Cache, version);
DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
address, out_msg.Dest);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
}
}
}
action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") {
optionalQueue_in.dequeue(clockEdge());
}
action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") {
assert(is_valid(cache_entry));
cache_entry.isPrefetched := true;
}
action(po_observeMiss, "\po", desc="Inform the prefetcher about a cache miss") {
peek(mandatoryQueue_in, RubyRequest) {
if (enable_prefetch) {
prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type);
}
}
}
action(ppm_observePfMiss, "\ppm",
desc="Inform the prefetcher about a cache miss with in-flight prefetch") {
peek(mandatoryQueue_in, RubyRequest) {
prefetcher.observePfMiss(in_msg.LineAddress);
}
}
action(pph_observePfHit, "\pph",
desc="Inform the prefetcher if a cache hit was the result of a prefetch") {
peek(mandatoryQueue_in, RubyRequest) {
if (cache_entry.isPrefetched) {
prefetcher.observePfHit(in_msg.LineAddress);
cache_entry.isPrefetched := false;
}
}
}
action(z_stallAndWaitOptionalQueue, "\pz", desc="recycle prefetch request queue") {
stall_and_wait(optionalQueue_in, address);
}
//*****************************************************
// TRANSITIONS
//*****************************************************
@@ -669,6 +872,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
i_allocateTBE;
a_issueGETS;
uu_profileDataMiss;
po_observeMiss;
k_popMandatoryQueue;
}
@@ -677,6 +881,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
i_allocateTBE;
a_issueGETS;
uu_profileInstMiss;
po_observeMiss;
k_popMandatoryQueue;
}
@@ -685,6 +890,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
i_allocateTBE;
b_issueGETX;
uu_profileDataMiss;
po_observeMiss;
k_popMandatoryQueue;
}
@@ -704,12 +910,14 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
transition({S,E,M}, Load) {
h_load_hit;
uu_profileDataHit;
pph_observePfHit;
k_popMandatoryQueue;
}
transition({S,E,M}, Ifetch) {
h_ifetch_hit;
uu_profileInstHit;
pph_observePfHit;
k_popMandatoryQueue;
}
@@ -720,7 +928,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
k_popMandatoryQueue;
}
transition(S, L0_Replacement, I) {
transition(S, {L0_Replacement,PF_L0_Replacement}, I) {
forward_eviction_to_cpu;
ff_deallocateCacheBlock;
}
@@ -736,10 +944,11 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
transition({E,M}, Store, M) {
hh_store_hit;
uu_profileDataHit;
pph_observePfHit;
k_popMandatoryQueue;
}
transition(E, L0_Replacement, I) {
transition(E, {L0_Replacement,PF_L0_Replacement}, I) {
forward_eviction_to_cpu;
g_issuePUTX;
ff_deallocateCacheBlock;
@@ -759,7 +968,7 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
}
// Transitions from Modified
transition(M, L0_Replacement, I) {
transition(M, {L0_Replacement,PF_L0_Replacement}, I) {
forward_eviction_to_cpu;
g_issuePUTX;
ff_deallocateCacheBlock;
@@ -843,4 +1052,116 @@ machine(MachineType:L0Cache, "MESI Directory L0 Cache")
hhc_storec_fail;
k_popMandatoryQueue;
}
// prefetcher
transition({Inst_IS, IS, IM, SM, PF_Inst_IS, PF_IS, PF_IE}, PF_L0_Replacement) {
z_stallAndWaitOptionalQueue;
}
transition({PF_Inst_IS, PF_IS}, {Store, L0_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
transition({PF_IE}, {Load, Ifetch, L0_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
transition({S,E,M,Inst_IS,IS,IM,SM,PF_Inst_IS,PF_IS,PF_IE},
{PF_Load, PF_Store, PF_Ifetch}) {
pq_popPrefetchQueue;
}
transition(I, PF_Load, PF_IS) {
oo_allocateDCacheBlock;
i_allocateTBE;
pa_issuePfGETS;
pq_popPrefetchQueue;
}
transition(PF_IS, Load, IS) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
}
transition(I, PF_Ifetch, PF_Inst_IS) {
pp_allocateICacheBlock;
i_allocateTBE;
pa_issuePfGETS;
pq_popPrefetchQueue;
}
transition(PF_Inst_IS, Ifetch, Inst_IS) {
uu_profileInstMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
}
transition(I, PF_Store, PF_IE) {
oo_allocateDCacheBlock;
i_allocateTBE;
pb_issuePfGETX;
pq_popPrefetchQueue;
}
transition(PF_IE, Store, IM) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
}
transition({PF_Inst_IS, PF_IS, PF_IE}, {InvOwn, InvElse}) {
fi_sendInvAck;
l_popRequestQueue;
}
transition(PF_IS, Data, S) {
u_writeDataToCache;
s_deallocateTBE;
mp_markPrefetched;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(PF_IS, Data_Exclusive, E) {
u_writeDataToCache;
s_deallocateTBE;
mp_markPrefetched;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(PF_IS, Data_Stale, I) {
u_writeDataToCache;
s_deallocateTBE;
mp_markPrefetched;
ff_deallocateCacheBlock;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(PF_Inst_IS, Data, S) {
u_writeInstToCache;
s_deallocateTBE;
mp_markPrefetched;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(PF_Inst_IS, Data_Exclusive, E) {
u_writeInstToCache;
s_deallocateTBE;
mp_markPrefetched;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
transition(PF_IE, Data_Exclusive, E) {
u_writeDataToCache;
s_deallocateTBE;
mp_markPrefetched;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
}
}

View File

@@ -406,6 +406,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
}
}
}
@@ -423,6 +424,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
}
}
}
@@ -439,6 +441,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
address, out_msg.Destination);
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
}
}
}
@@ -708,6 +711,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
peek(responseNetwork_in, ResponseMsg) {
assert(is_valid(cache_entry));
cache_entry.DataBlk := in_msg.DataBlk;
cache_entry.Dirty := in_msg.Dirty;
}
}

View File

@@ -76,6 +76,7 @@ structure(CoherenceMsg, desc="...", interface="Message") {
MessageSizeType MessageSize, desc="size category of the message";
DataBlock DataBlk, desc="Data for the cache line (if PUTX)";
bool Dirty, default="false", desc="Dirty bit";
PrefetchBit Prefetch, desc="Is this a prefetch request";
bool functionalRead(Packet *pkt) {
// Only PUTX messages contains the data block

View File

@@ -1,4 +1,16 @@
/*
* Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
* All rights reserved.
*
@@ -403,6 +415,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
out_msg.Requestor := machineID;
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
out_msg.MessageSize := MessageSizeType:Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
}
}
}

View File

@@ -205,6 +205,7 @@ structure (DirectoryMemory, external = "yes") {
structure (CacheMemory, external = "yes") {
bool cacheAvail(Addr);
Addr cacheProbe(Addr);
AbstractCacheEntry getNullEntry();
AbstractCacheEntry allocate(Addr, AbstractCacheEntry);
AbstractCacheEntry allocate(Addr, AbstractCacheEntry, bool);
void allocateVoid(Addr, AbstractCacheEntry);

View File

@@ -75,6 +75,13 @@ class CacheMemory : public SimObject
// b) an unused line in the same cache "way"
bool cacheAvail(Addr address) const;
// Returns a NULL entry that acts as a placeholder for invalid lines
AbstractCacheEntry*
getNullEntry() const
{
return nullptr;
}
// find an unused entry and sets the tag appropriate for the address
AbstractCacheEntry* allocate(Addr address, AbstractCacheEntry* new_entry);
void allocateVoid(Addr address, AbstractCacheEntry* new_entry)

View File

@@ -1,4 +1,16 @@
/*
* Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
@@ -52,6 +64,7 @@ class TBETable
return (m_number_of_TBEs - m_map.size()) >= n;
}
ENTRY *getNullEntry();
ENTRY *lookup(Addr address);
// Print cache contents
@@ -105,6 +118,13 @@ TBETable<ENTRY>::deallocate(Addr address)
m_map.erase(address);
}
template<class ENTRY>
inline ENTRY*
TBETable<ENTRY>::getNullEntry()
{
return nullptr;
}
// looks an address up in the cache
template<class ENTRY>
inline ENTRY*