This patch adds support for stalling the requests queued up at different controllers for the MESI CMP directory protocol. Earlier the controllers would recycle the requests using some fixed latency. This results in younger requests getting serviced first at times, and can result in starvation. Instead all the requests that need a particular block to be in a stable state are moved to a separate queue, where they wait till that block returns to a stable state and then they are processed.
967 lines
33 KiB
Plaintext
967 lines
33 KiB
Plaintext
|
|
/*
|
|
* Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
machine(L1Cache, "MESI Directory L1 Cache CMP")
|
|
: Sequencer * sequencer,
|
|
CacheMemory * L1IcacheMemory,
|
|
CacheMemory * L1DcacheMemory,
|
|
int l2_select_num_bits,
|
|
int l1_request_latency = 2,
|
|
int l1_response_latency = 2,
|
|
int to_l2_latency = 1,
|
|
bool send_evictions
|
|
{
|
|
// NODE L1 CACHE
|
|
// From this node's L1 cache TO the network
|
|
// a local L1 -> this L2 bank, currently ordered with directory forwarded requests
|
|
MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false", vnet_type="request";
|
|
// a local L1 -> this L2 bank
|
|
MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false", vnet_type="response";
|
|
MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false", vnet_type="unblock";
|
|
|
|
|
|
// To this node's L1 cache FROM the network
|
|
// a L2 bank -> this L1
|
|
MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false", vnet_type="request";
|
|
// a L2 bank -> this L1
|
|
MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false", vnet_type="response";
|
|
|
|
// STATES
|
|
state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
|
|
// Base states
|
|
NP, AccessPermission:Invalid, desc="Not present in either cache";
|
|
I, AccessPermission:Invalid, desc="a L1 cache entry Idle";
|
|
S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
|
|
E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
|
|
M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
|
|
|
|
// Transient States
|
|
IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
|
|
IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
|
|
SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
|
|
IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
|
|
|
|
M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK";
|
|
SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2";
|
|
|
|
}
|
|
|
|
// EVENTS
|
|
enumeration(Event, desc="Cache events") {
|
|
// L1 events
|
|
Load, desc="Load request from the home processor";
|
|
Ifetch, desc="I-fetch request from the home processor";
|
|
Store, desc="Store request from the home processor";
|
|
|
|
Inv, desc="Invalidate request from L2 bank";
|
|
|
|
// internal generated request
|
|
L1_Replacement, desc="L1 Replacement", format="!r";
|
|
|
|
// other requests
|
|
Fwd_GETX, desc="GETX from other processor";
|
|
Fwd_GETS, desc="GETS from other processor";
|
|
Fwd_GET_INSTR, desc="GET_INSTR from other processor";
|
|
|
|
Data, desc="Data for processor";
|
|
Data_Exclusive, desc="Data for processor";
|
|
DataS_fromL1, desc="data for GETS request, need to unblock directory";
|
|
Data_all_Acks, desc="Data for processor, all acks";
|
|
|
|
Ack, desc="Ack for processor";
|
|
Ack_all, desc="Last ack for processor";
|
|
|
|
WB_Ack, desc="Ack for replacement";
|
|
}
|
|
|
|
// TYPES
|
|
|
|
// CacheEntry
|
|
structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
|
|
State CacheState, desc="cache state";
|
|
DataBlock DataBlk, desc="data for the block";
|
|
bool Dirty, default="false", desc="data is dirty";
|
|
}
|
|
|
|
// TBE fields
|
|
structure(TBE, desc="...") {
|
|
Address Address, desc="Physical address for this TBE";
|
|
State TBEState, desc="Transient state";
|
|
DataBlock DataBlk, desc="Buffer for the data block";
|
|
bool Dirty, default="false", desc="data is dirty";
|
|
bool isPrefetch, desc="Set if this was caused by a prefetch";
|
|
int pendingAcks, default="0", desc="number of pending acks";
|
|
}
|
|
|
|
structure(TBETable, external="yes") {
|
|
TBE lookup(Address);
|
|
void allocate(Address);
|
|
void deallocate(Address);
|
|
bool isPresent(Address);
|
|
}
|
|
|
|
TBETable L1_TBEs, template_hack="<L1Cache_TBE>";
|
|
|
|
MessageBuffer mandatoryQueue, ordered="false";
|
|
|
|
int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
|
|
|
|
void set_cache_entry(AbstractCacheEntry a);
|
|
void unset_cache_entry();
|
|
void set_tbe(TBE a);
|
|
void unset_tbe();
|
|
void wakeUpBuffers(Address a);
|
|
|
|
// inclusive cache returns L1 entries only
|
|
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
|
|
Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory[addr]);
|
|
if(is_valid(L1Dcache_entry)) {
|
|
return L1Dcache_entry;
|
|
}
|
|
|
|
Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory[addr]);
|
|
return L1Icache_entry;
|
|
}
|
|
|
|
Entry getL1DCacheEntry(Address addr), return_by_pointer="yes" {
|
|
Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory[addr]);
|
|
return L1Dcache_entry;
|
|
}
|
|
|
|
Entry getL1ICacheEntry(Address addr), return_by_pointer="yes" {
|
|
Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory[addr]);
|
|
return L1Icache_entry;
|
|
}
|
|
|
|
State getState(TBE tbe, Entry cache_entry, Address addr) {
|
|
assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
|
|
|
|
if(is_valid(tbe)) {
|
|
return tbe.TBEState;
|
|
} else if (is_valid(cache_entry)) {
|
|
return cache_entry.CacheState;
|
|
}
|
|
return State:NP;
|
|
}
|
|
|
|
void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
|
|
assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
|
|
|
|
// MUST CHANGE
|
|
if(is_valid(tbe)) {
|
|
tbe.TBEState := state;
|
|
}
|
|
|
|
if (is_valid(cache_entry)) {
|
|
cache_entry.CacheState := state;
|
|
}
|
|
}
|
|
|
|
AccessPermission getAccessPermission(Address addr) {
|
|
TBE tbe := L1_TBEs[addr];
|
|
if(is_valid(tbe)) {
|
|
DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
|
|
return L1Cache_State_to_permission(tbe.TBEState);
|
|
}
|
|
|
|
Entry cache_entry := getCacheEntry(addr);
|
|
if(is_valid(cache_entry)) {
|
|
DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
|
|
return L1Cache_State_to_permission(cache_entry.CacheState);
|
|
}
|
|
|
|
DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
|
|
return AccessPermission:NotPresent;
|
|
}
|
|
|
|
DataBlock getDataBlock(Address addr), return_by_ref="yes" {
|
|
return getCacheEntry(addr).DataBlk;
|
|
}
|
|
|
|
void setAccessPermission(Entry cache_entry, Address addr, State state) {
|
|
if (is_valid(cache_entry)) {
|
|
cache_entry.changePermission(L1Cache_State_to_permission(state));
|
|
}
|
|
}
|
|
|
|
Event mandatory_request_type_to_event(RubyRequestType type) {
|
|
if (type == RubyRequestType:LD) {
|
|
return Event:Load;
|
|
} else if (type == RubyRequestType:IFETCH) {
|
|
return Event:Ifetch;
|
|
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
|
|
return Event:Store;
|
|
} else {
|
|
error("Invalid RubyRequestType");
|
|
}
|
|
}
|
|
|
|
int getPendingAcks(TBE tbe) {
|
|
return tbe.pendingAcks;
|
|
}
|
|
|
|
out_port(requestIntraChipL1Network_out, RequestMsg, requestFromL1Cache);
|
|
out_port(responseIntraChipL1Network_out, ResponseMsg, responseFromL1Cache);
|
|
out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
|
|
|
|
// Response IntraChip L1 Network - response msg to this L1 cache
|
|
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) {
|
|
if (responseIntraChipL1Network_in.isReady()) {
|
|
peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
|
|
assert(in_msg.Destination.isElement(machineID));
|
|
|
|
Entry cache_entry := getCacheEntry(in_msg.Address);
|
|
TBE tbe := L1_TBEs[in_msg.Address];
|
|
|
|
if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
|
|
trigger(Event:Data_Exclusive, in_msg.Address, cache_entry, tbe);
|
|
} else if(in_msg.Type == CoherenceResponseType:DATA) {
|
|
if ((getState(tbe, cache_entry, in_msg.Address) == State:IS ||
|
|
getState(tbe, cache_entry, in_msg.Address) == State:IS_I) &&
|
|
machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) {
|
|
|
|
trigger(Event:DataS_fromL1, in_msg.Address, cache_entry, tbe);
|
|
|
|
} else if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
|
|
trigger(Event:Data_all_Acks, in_msg.Address, cache_entry, tbe);
|
|
} else {
|
|
trigger(Event:Data, in_msg.Address, cache_entry, tbe);
|
|
}
|
|
} else if (in_msg.Type == CoherenceResponseType:ACK) {
|
|
if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
|
|
trigger(Event:Ack_all, in_msg.Address, cache_entry, tbe);
|
|
} else {
|
|
trigger(Event:Ack, in_msg.Address, cache_entry, tbe);
|
|
}
|
|
} else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
|
|
trigger(Event:WB_Ack, in_msg.Address, cache_entry, tbe);
|
|
} else {
|
|
error("Invalid L1 response type");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Request InterChip network - request from this L1 cache to the shared L2
|
|
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) {
|
|
if(requestIntraChipL1Network_in.isReady()) {
|
|
peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
|
|
assert(in_msg.Destination.isElement(machineID));
|
|
|
|
Entry cache_entry := getCacheEntry(in_msg.Address);
|
|
TBE tbe := L1_TBEs[in_msg.Address];
|
|
|
|
if (in_msg.Type == CoherenceRequestType:INV) {
|
|
trigger(Event:Inv, in_msg.Address, cache_entry, tbe);
|
|
} else if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:UPGRADE) {
|
|
// upgrade transforms to GETX due to race
|
|
trigger(Event:Fwd_GETX, in_msg.Address, cache_entry, tbe);
|
|
} else if (in_msg.Type == CoherenceRequestType:GETS) {
|
|
trigger(Event:Fwd_GETS, in_msg.Address, cache_entry, tbe);
|
|
} else if (in_msg.Type == CoherenceRequestType:GET_INSTR) {
|
|
trigger(Event:Fwd_GET_INSTR, in_msg.Address, cache_entry, tbe);
|
|
} else {
|
|
error("Invalid forwarded request type");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Mandatory Queue betweens Node's CPU and it's L1 caches
|
|
in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
|
|
if (mandatoryQueue_in.isReady()) {
|
|
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
|
|
|
|
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
|
|
|
|
if (in_msg.Type == RubyRequestType:IFETCH) {
|
|
// ** INSTRUCTION ACCESS ***
|
|
|
|
Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
|
|
if (is_valid(L1Icache_entry)) {
|
|
// The tag matches for the L1, so the L1 asks the L2 for it.
|
|
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
|
|
L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
} else {
|
|
|
|
// Check to see if it is in the OTHER L1
|
|
Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
|
|
if (is_valid(L1Dcache_entry)) {
|
|
// The block is in the wrong L1, put the request on the queue to the shared L2
|
|
trigger(Event:L1_Replacement, in_msg.LineAddress,
|
|
L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
}
|
|
|
|
if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
|
|
// L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
|
|
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
|
|
L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
} else {
|
|
// No room in the L1, so we need to make room in the L1
|
|
trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress),
|
|
getL1ICacheEntry(L1IcacheMemory.cacheProbe(in_msg.LineAddress)),
|
|
L1_TBEs[L1IcacheMemory.cacheProbe(in_msg.LineAddress)]);
|
|
}
|
|
}
|
|
} else {
|
|
|
|
// *** DATA ACCESS ***
|
|
Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
|
|
if (is_valid(L1Dcache_entry)) {
|
|
// The tag matches for the L1, so the L1 ask the L2 for it
|
|
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
|
|
L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
} else {
|
|
|
|
// Check to see if it is in the OTHER L1
|
|
Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
|
|
if (is_valid(L1Icache_entry)) {
|
|
// The block is in the wrong L1, put the request on the queue to the shared L2
|
|
trigger(Event:L1_Replacement, in_msg.LineAddress,
|
|
L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
}
|
|
|
|
if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
|
|
// L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
|
|
trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
|
|
L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
|
|
} else {
|
|
// No room in the L1, so we need to make room in the L1
|
|
trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress),
|
|
getL1DCacheEntry(L1DcacheMemory.cacheProbe(in_msg.LineAddress)),
|
|
L1_TBEs[L1DcacheMemory.cacheProbe(in_msg.LineAddress)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ACTIONS
|
|
action(a_issueGETS, "a", desc="Issue GETS") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceRequestType:GETS;
|
|
out_msg.Requestor := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
|
|
address, out_msg.Destination);
|
|
out_msg.MessageSize := MessageSizeType:Control;
|
|
out_msg.Prefetch := in_msg.Prefetch;
|
|
out_msg.AccessMode := in_msg.AccessMode;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceRequestType:GET_INSTR;
|
|
out_msg.Requestor := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
|
|
address, out_msg.Destination);
|
|
out_msg.MessageSize := MessageSizeType:Control;
|
|
out_msg.Prefetch := in_msg.Prefetch;
|
|
out_msg.AccessMode := in_msg.AccessMode;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
action(b_issueGETX, "b", desc="Issue GETX") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceRequestType:GETX;
|
|
out_msg.Requestor := machineID;
|
|
DPRINTF(RubySlicc, "%s\n", machineID);
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
|
|
address, out_msg.Destination);
|
|
out_msg.MessageSize := MessageSizeType:Control;
|
|
out_msg.Prefetch := in_msg.Prefetch;
|
|
out_msg.AccessMode := in_msg.AccessMode;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(c_issueUPGRADE, "c", desc="Issue GETX") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceRequestType:UPGRADE;
|
|
out_msg.Requestor := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
DPRINTF(RubySlicc, "address: %s, destination: %s\n",
|
|
address, out_msg.Destination);
|
|
out_msg.MessageSize := MessageSizeType:Control;
|
|
out_msg.Prefetch := in_msg.Prefetch;
|
|
out_msg.AccessMode := in_msg.AccessMode;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(d_sendDataToRequestor, "d", desc="send data to requestor") {
|
|
peek(requestIntraChipL1Network_in, RequestMsg) {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(cache_entry));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := cache_entry.DataBlk;
|
|
out_msg.Dirty := cache_entry.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(in_msg.Requestor);
|
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(cache_entry));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := cache_entry.DataBlk;
|
|
out_msg.Dirty := cache_entry.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
|
}
|
|
}
|
|
|
|
action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
|
|
peek(requestIntraChipL1Network_in, RequestMsg) {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(tbe));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := tbe.DataBlk;
|
|
out_msg.Dirty := tbe.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(in_msg.Requestor);
|
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(tbe));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := tbe.DataBlk;
|
|
out_msg.Dirty := tbe.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
|
}
|
|
}
|
|
|
|
action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
|
|
peek(requestIntraChipL1Network_in, RequestMsg) {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:ACK;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(in_msg.Requestor);
|
|
out_msg.MessageSize := MessageSizeType:Response_Control;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(f_sendDataToL2, "f", desc="send data to the L2 cache") {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(cache_entry));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := cache_entry.DataBlk;
|
|
out_msg.Dirty := cache_entry.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Writeback_Data;
|
|
}
|
|
}
|
|
|
|
action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
assert(is_valid(tbe));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:DATA;
|
|
out_msg.DataBlk := tbe.DataBlk;
|
|
out_msg.Dirty := tbe.Dirty;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Writeback_Data;
|
|
}
|
|
}
|
|
|
|
action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
|
|
peek(requestIntraChipL1Network_in, RequestMsg) {
|
|
enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:ACK;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(in_msg.Requestor);
|
|
out_msg.MessageSize := MessageSizeType:Response_Control;
|
|
out_msg.AckCount := 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
|
|
if (send_evictions) {
|
|
DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
|
|
sequencer.evictionCallback(address);
|
|
}
|
|
}
|
|
|
|
action(g_issuePUTX, "g", desc="send data to the L2 cache") {
|
|
enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) {
|
|
assert(is_valid(cache_entry));
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceRequestType:PUTX;
|
|
out_msg.DataBlk := cache_entry.DataBlk;
|
|
out_msg.Dirty := cache_entry.Dirty;
|
|
out_msg.Requestor:= machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
if (cache_entry.Dirty) {
|
|
out_msg.MessageSize := MessageSizeType:Writeback_Data;
|
|
} else {
|
|
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
|
}
|
|
}
|
|
}
|
|
|
|
action(j_sendUnblock, "j", desc="send unblock to the L2 cache") {
|
|
enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:UNBLOCK;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Response_Control;
|
|
DPRINTF(RubySlicc, "%s\n", address);
|
|
|
|
}
|
|
}
|
|
|
|
action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") {
|
|
enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
|
|
out_msg.Address := address;
|
|
out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK;
|
|
out_msg.Sender := machineID;
|
|
out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
|
|
l2_select_low_bit, l2_select_num_bits));
|
|
out_msg.MessageSize := MessageSizeType:Response_Control;
|
|
DPRINTF(RubySlicc, "%s\n", address);
|
|
|
|
}
|
|
}
|
|
|
|
action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") {
|
|
assert(is_valid(cache_entry));
|
|
DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
|
|
sequencer.readCallback(address, cache_entry.DataBlk);
|
|
}
|
|
|
|
action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") {
|
|
assert(is_valid(cache_entry));
|
|
DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
|
|
sequencer.writeCallback(address, cache_entry.DataBlk);
|
|
cache_entry.Dirty := true;
|
|
}
|
|
|
|
action(i_allocateTBE, "i", desc="Allocate TBE (isPrefetch=0, number of invalidates=0)") {
|
|
check_allocate(L1_TBEs);
|
|
assert(is_valid(cache_entry));
|
|
L1_TBEs.allocate(address);
|
|
set_tbe(L1_TBEs[address]);
|
|
tbe.isPrefetch := false;
|
|
tbe.Dirty := cache_entry.Dirty;
|
|
tbe.DataBlk := cache_entry.DataBlk;
|
|
}
|
|
|
|
action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
|
|
mandatoryQueue_in.dequeue();
|
|
}
|
|
|
|
action(l_popRequestQueue, "l", desc="Pop incoming request queue and profile the delay within this virtual network") {
|
|
profileMsgDelay(2, requestIntraChipL1Network_in.dequeue_getDelayCycles());
|
|
}
|
|
|
|
action(o_popIncomingResponseQueue, "o", desc="Pop Incoming Response queue and profile the delay within this virtual network") {
|
|
profileMsgDelay(3, responseIntraChipL1Network_in.dequeue_getDelayCycles());
|
|
}
|
|
|
|
action(s_deallocateTBE, "s", desc="Deallocate TBE") {
|
|
L1_TBEs.deallocate(address);
|
|
unset_tbe();
|
|
}
|
|
|
|
action(u_writeDataToL1Cache, "u", desc="Write data to cache") {
|
|
peek(responseIntraChipL1Network_in, ResponseMsg) {
|
|
assert(is_valid(cache_entry));
|
|
cache_entry.DataBlk := in_msg.DataBlk;
|
|
cache_entry.Dirty := in_msg.Dirty;
|
|
}
|
|
}
|
|
|
|
action(q_updateAckCount, "q", desc="Update ack count") {
|
|
peek(responseIntraChipL1Network_in, ResponseMsg) {
|
|
assert(is_valid(tbe));
|
|
tbe.pendingAcks := tbe.pendingAcks - in_msg.AckCount;
|
|
APPEND_TRANSITION_COMMENT(in_msg.AckCount);
|
|
APPEND_TRANSITION_COMMENT(" p: ");
|
|
APPEND_TRANSITION_COMMENT(tbe.pendingAcks);
|
|
}
|
|
}
|
|
|
|
action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
|
|
if (L1DcacheMemory.isTagPresent(address)) {
|
|
L1DcacheMemory.deallocate(address);
|
|
} else {
|
|
L1IcacheMemory.deallocate(address);
|
|
}
|
|
unset_cache_entry();
|
|
}
|
|
|
|
action(oo_allocateL1DCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
|
|
if (is_invalid(cache_entry)) {
|
|
set_cache_entry(L1DcacheMemory.allocate(address, new Entry));
|
|
}
|
|
}
|
|
|
|
action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
|
|
if (is_invalid(cache_entry)) {
|
|
set_cache_entry(L1IcacheMemory.allocate(address, new Entry));
|
|
}
|
|
}
|
|
|
|
action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
|
|
stall_and_wait(mandatoryQueue_in, address);
|
|
}
|
|
|
|
action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
|
|
wakeUpBuffers(address);
|
|
}
|
|
|
|
action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
L1IcacheMemory.profileMiss(in_msg);
|
|
}
|
|
}
|
|
|
|
action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
|
|
peek(mandatoryQueue_in, RubyRequest) {
|
|
L1DcacheMemory.profileMiss(in_msg);
|
|
}
|
|
}
|
|
|
|
//*****************************************************
|
|
// TRANSITIONS
|
|
//*****************************************************
|
|
|
|
// Transitions for Load/Store/Replacement/WriteBack from transient states
|
|
transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
|
|
z_stallAndWaitMandatoryQueue;
|
|
}
|
|
|
|
// Transitions from Idle
|
|
transition({NP,I}, L1_Replacement) {
|
|
ff_deallocateL1CacheBlock;
|
|
}
|
|
|
|
transition({NP,I}, Load, IS) {
|
|
oo_allocateL1DCacheBlock;
|
|
i_allocateTBE;
|
|
a_issueGETS;
|
|
uu_profileDataMiss;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition({NP,I}, Ifetch, IS) {
|
|
pp_allocateL1ICacheBlock;
|
|
i_allocateTBE;
|
|
ai_issueGETINSTR;
|
|
uu_profileInstMiss;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition({NP,I}, Store, IM) {
|
|
oo_allocateL1DCacheBlock;
|
|
i_allocateTBE;
|
|
b_issueGETX;
|
|
uu_profileDataMiss;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition({NP, I}, Inv) {
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
// Transitions from Shared
|
|
transition(S, {Load,Ifetch}) {
|
|
h_load_hit;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(S, Store, SM) {
|
|
i_allocateTBE;
|
|
c_issueUPGRADE;
|
|
uu_profileDataMiss;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(S, L1_Replacement, I) {
|
|
forward_eviction_to_cpu;
|
|
ff_deallocateL1CacheBlock;
|
|
}
|
|
|
|
transition(S, Inv, I) {
|
|
forward_eviction_to_cpu;
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
// Transitions from Exclusive
|
|
|
|
transition(E, {Load, Ifetch}) {
|
|
h_load_hit;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(E, Store, M) {
|
|
hh_store_hit;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(E, L1_Replacement, M_I) {
|
|
// silent E replacement??
|
|
forward_eviction_to_cpu;
|
|
i_allocateTBE;
|
|
g_issuePUTX; // send data, but hold in case forwarded request
|
|
ff_deallocateL1CacheBlock;
|
|
}
|
|
|
|
transition(E, Inv, I) {
|
|
// don't send data
|
|
forward_eviction_to_cpu;
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(E, Fwd_GETX, I) {
|
|
forward_eviction_to_cpu;
|
|
d_sendDataToRequestor;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
|
|
d_sendDataToRequestor;
|
|
d2_sendDataToL2;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
// Transitions from Modified
|
|
transition(M, {Load, Ifetch}) {
|
|
h_load_hit;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(M, Store) {
|
|
hh_store_hit;
|
|
k_popMandatoryQueue;
|
|
}
|
|
|
|
transition(M, L1_Replacement, M_I) {
|
|
forward_eviction_to_cpu;
|
|
i_allocateTBE;
|
|
g_issuePUTX; // send data, but hold in case forwarded request
|
|
ff_deallocateL1CacheBlock;
|
|
}
|
|
|
|
transition(M_I, WB_Ack, I) {
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(M, Inv, I) {
|
|
forward_eviction_to_cpu;
|
|
f_sendDataToL2;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(M_I, Inv, SINK_WB_ACK) {
|
|
ft_sendDataToL2_fromTBE;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(M, Fwd_GETX, I) {
|
|
forward_eviction_to_cpu;
|
|
d_sendDataToRequestor;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
|
|
d_sendDataToRequestor;
|
|
d2_sendDataToL2;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(M_I, Fwd_GETX, SINK_WB_ACK) {
|
|
dt_sendDataToRequestor_fromTBE;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(M_I, {Fwd_GETS, Fwd_GET_INSTR}, SINK_WB_ACK) {
|
|
dt_sendDataToRequestor_fromTBE;
|
|
d2t_sendDataToL2_fromTBE;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
// Transitions from IS
|
|
transition({IS, IS_I}, Inv, IS_I) {
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(IS, Data_all_Acks, S) {
|
|
u_writeDataToL1Cache;
|
|
h_load_hit;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(IS_I, Data_all_Acks, I) {
|
|
u_writeDataToL1Cache;
|
|
h_load_hit;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(IS, DataS_fromL1, S) {
|
|
u_writeDataToL1Cache;
|
|
j_sendUnblock;
|
|
h_load_hit;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(IS_I, DataS_fromL1, I) {
|
|
u_writeDataToL1Cache;
|
|
j_sendUnblock;
|
|
h_load_hit;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
// directory is blocked when sending exclusive data
|
|
transition(IS_I, Data_Exclusive, E) {
|
|
u_writeDataToL1Cache;
|
|
h_load_hit;
|
|
jj_sendExclusiveUnblock;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(IS, Data_Exclusive, E) {
|
|
u_writeDataToL1Cache;
|
|
h_load_hit;
|
|
jj_sendExclusiveUnblock;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
// Transitions from IM
|
|
transition({IM, SM}, Inv, IM) {
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(IM, Data, SM) {
|
|
u_writeDataToL1Cache;
|
|
q_updateAckCount;
|
|
o_popIncomingResponseQueue;
|
|
}
|
|
|
|
transition(IM, Data_all_Acks, M) {
|
|
u_writeDataToL1Cache;
|
|
hh_store_hit;
|
|
jj_sendExclusiveUnblock;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
// transitions from SM
|
|
transition({SM, IM}, Ack) {
|
|
q_updateAckCount;
|
|
o_popIncomingResponseQueue;
|
|
}
|
|
|
|
transition(SM, Ack_all, M) {
|
|
jj_sendExclusiveUnblock;
|
|
hh_store_hit;
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
|
|
transition(SINK_WB_ACK, Inv){
|
|
fi_sendInvAck;
|
|
l_popRequestQueue;
|
|
}
|
|
|
|
transition(SINK_WB_ACK, WB_Ack, I){
|
|
s_deallocateTBE;
|
|
o_popIncomingResponseQueue;
|
|
kd_wakeUpDependents;
|
|
}
|
|
}
|