ruby: new atomics implementation

This patch changes the way that Ruby handles atomic RMW instructions. This implementation, unlike the prior one, is protocol independent. It works by locking an address from the sequencer immediately after the read portion of an RMW completes. When that address is locked, the coherence controller will only satisfy requests coming from one port (e.g., the mandatory queue) and will ignore all others. After the write portion completed, the line is unlocked. This should also work with multi-line atomics, as long as the blocks are always acquired in the same order.
This commit is contained in:
Derek Hower
2010-01-19 17:11:36 -06:00
parent 279f179bab
commit 07ea0891f1
14 changed files with 59 additions and 216 deletions

View File

@@ -238,7 +238,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Response IntraChip L1 Network - response msg to this L1 cache
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
if (responseIntraChipL1Network_in.isReady()) {
peek(responseIntraChipL1Network_in, ResponseMsg) {
peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
trigger(Event:Data_Exclusive, in_msg.Address);
@@ -271,7 +271,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Request InterChip network - request from this L1 cache to the shared L2
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
if(requestIntraChipL1Network_in.isReady()) {
peek(requestIntraChipL1Network_in, RequestMsg) {
peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if (in_msg.Type == CoherenceRequestType:INV) {
trigger(Event:Inv, in_msg.Address);
@@ -292,7 +292,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache

View File

@@ -138,7 +138,7 @@ machine(L1Cache, "MI Example L1 Cache")
in_port(forwardRequestNetwork_in, RequestMsg, forwardToCache) {
if (forwardRequestNetwork_in.isReady()) {
peek(forwardRequestNetwork_in, RequestMsg) {
peek(forwardRequestNetwork_in, RequestMsg, block_on="Address") {
if (in_msg.Type == CoherenceRequestType:GETX) {
trigger(Event:Fwd_GETX, in_msg.Address);
}
@@ -160,7 +160,7 @@ machine(L1Cache, "MI Example L1 Cache")
in_port(responseNetwork_in, ResponseMsg, responseToCache) {
if (responseNetwork_in.isReady()) {
peek(responseNetwork_in, ResponseMsg) {
peek(responseNetwork_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:DATA) {
trigger(Event:Data, in_msg.Address);
}
@@ -174,7 +174,7 @@ machine(L1Cache, "MI Example L1 Cache")
// Mandatory Queue
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
if (cacheMemory.isTagPresent(in_msg.LineAddress) == false &&

View File

@@ -306,7 +306,7 @@ machine(L1Cache, "Directory protocol")
// Request Network
in_port(requestNetwork_in, RequestMsg, requestToL1Cache) {
if (requestNetwork_in.isReady()) {
peek(requestNetwork_in, RequestMsg) {
peek(requestNetwork_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
DEBUG_EXPR("MRM_DEBUG: L1 received");
DEBUG_EXPR(in_msg.Type);
@@ -338,7 +338,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT
// Response Network
in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
if (responseToL1Cache_in.isReady()) {
peek(responseToL1Cache_in, ResponseMsg) {
peek(responseToL1Cache_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:ACK) {
trigger(Event:Ack, in_msg.Address);
} else if (in_msg.Type == CoherenceResponseType:DATA) {
@@ -356,7 +356,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache

View File

@@ -64,6 +64,11 @@ public:
(m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime()));
}
void delayHead() {
MessageBufferNode node = m_prio_heap.extractMin();
enqueue(node.m_msgptr, 1);
}
bool areNSlotsAvailable(int n);
int getPriority() { return m_priority_rank; }
void setPriority(int rank) { m_priority_rank = rank; }

View File

@@ -13,7 +13,7 @@ RubySystem.reset
# default values
num_cores = 2
l1_cache_size_kb = 32768
l1_cache_size_bytes = 32768
l1_cache_assoc = 8
l1_cache_latency = 1
num_memories = 2

View File

@@ -68,8 +68,8 @@ assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"),
require protocol+".rb"
num_cores.times { |n|
icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
iface_ports << sequencer
if protocol == "MOESI_CMP_directory"
@@ -87,7 +87,7 @@ num_cores.times { |n|
end
}
num_l2_banks.times { |n|
cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
cache = SetAssociativeCache.new("l2u_"+n.to_s, (l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",

View File

@@ -385,12 +385,12 @@ class DMAController < NetPort
end
class Cache < LibRubyObject
param :size_kb, Integer
param :size, Integer
param :latency, Integer
param :controller, NetPort
def initialize(obj_name, size_kb, latency)
def initialize(obj_name, size, latency)
super(obj_name)
self.size_kb = size_kb
self.size = size
self.latency = latency
# controller must be set manually by the configuration script
# because there is a cyclic dependence
@@ -406,8 +406,8 @@ class SetAssociativeCache < Cache
# when an integer, it represents the number of cycles for a hit
# when a float, it represents the cache access time in ns
# when set to "auto", libruby will attempt to find a realistic latency by running CACTI
def initialize(obj_name, size_kb, latency, assoc, replacement_policy)
super(obj_name, size_kb, latency)
def initialize(obj_name, size, latency, assoc, replacement_policy)
super(obj_name, size, latency)
self.assoc = assoc
self.replacement_policy = replacement_policy
end
@@ -415,7 +415,7 @@ class SetAssociativeCache < Cache
def calculateLatency()
if self.latency == "auto"
cacti_args = Array.new()
cacti_args << (self.size_kb*1024) << RubySystem.block_size_bytes << self.assoc
cacti_args << (self.size*1024) << RubySystem.block_size_bytes << self.assoc
cacti_args << 1 << 0 << 0 << 0 << 1
cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1

View File

@@ -58,11 +58,8 @@ RubyRequestType string_to_RubyRequestType(std::string str)
ostream& operator<<(ostream& out, const RubyRequestType& obj)
{
cerr << "in op" << endl;
out << RubyRequestType_to_string(obj);
cerr << "flushing" << endl;
out << flush;
cerr << "done" << endl;
return out;
}

View File

@@ -21,9 +21,8 @@ public:
virtual const string toString() const = 0; // returns text version of controller type
virtual const string getName() const = 0; // return instance name
virtual const MachineType getMachineType() const = 0;
virtual void set_atomic(Address addr) = 0;
virtual void clear_atomic(Address addr) = 0;
virtual void reset_atomics() = 0;
virtual void blockOnQueue(Address, MessageBuffer*) = 0;
virtual void unblock(Address) = 0;
virtual void print(ostream & out) const = 0;
virtual void printStats(ostream & out) const = 0;

View File

@@ -66,8 +66,6 @@ void Sequencer::init(const vector<string> & argv)
m_instCache_ptr = NULL;
m_dataCache_ptr = NULL;
m_controller = NULL;
m_atomic_reads = 0;
m_atomic_writes = 0;
for (size_t i=0; i<argv.size(); i+=2) {
if ( argv[i] == "controller") {
m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -285,15 +283,15 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
(request->ruby_request.type == RubyRequestType_RMW_Write) ||
(request->ruby_request.type == RubyRequestType_Locked_Read) ||
(request->ruby_request.type == RubyRequestType_Locked_Write));
// POLINA: the assumption is that atomics are only on data cache and not instruction cache
if (request->ruby_request.type == RubyRequestType_Locked_Read) {
m_dataCache_ptr->setLocked(address, m_version);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
m_controller->set_atomic(address);
m_controller->blockOnQueue(address, m_mandatory_q_ptr);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
m_controller->clear_atomic(address);
m_controller->unblock(address);
}
hitCallback(request, data);
@@ -438,42 +436,12 @@ void Sequencer::issueRequest(const RubyRequest& request) {
CacheRequestType ctype;
switch(request.type) {
case RubyRequestType_IFETCH:
if (m_atomic_reads > 0 && m_atomic_writes == 0) {
m_controller->reset_atomics();
m_atomic_writes = 0;
m_atomic_reads = 0;
}
else if (m_atomic_writes > 0) {
assert(m_atomic_reads > m_atomic_writes);
cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
assert(false);
}
ctype = CacheRequestType_IFETCH;
break;
case RubyRequestType_LD:
if (m_atomic_reads > 0 && m_atomic_writes == 0) {
m_controller->reset_atomics();
m_atomic_writes = 0;
m_atomic_reads = 0;
}
else if (m_atomic_writes > 0) {
assert(m_atomic_reads > m_atomic_writes);
cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
assert(false);
}
ctype = CacheRequestType_LD;
break;
case RubyRequestType_ST:
if (m_atomic_reads > 0 && m_atomic_writes == 0) {
m_controller->reset_atomics();
m_atomic_writes = 0;
m_atomic_reads = 0;
}
else if (m_atomic_writes > 0) {
assert(m_atomic_reads > m_atomic_writes);
cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but only received: " << m_atomic_writes << endl;
assert(false);
}
ctype = CacheRequestType_ST;
break;
case RubyRequestType_Locked_Read:
@@ -481,18 +449,9 @@ void Sequencer::issueRequest(const RubyRequest& request) {
ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Read:
assert(m_atomic_writes == 0);
m_atomic_reads++;
ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Write:
assert(m_atomic_reads > 0);
assert(m_atomic_writes < m_atomic_reads);
m_atomic_writes++;
if (m_atomic_reads == m_atomic_writes) {
m_atomic_reads = 0;
m_atomic_writes = 0;
}
ctype = CacheRequestType_ATOMIC;
break;
default:

View File

@@ -128,8 +128,6 @@ private:
// Global outstanding request count, across all request tables
int m_outstanding_count;
bool m_deadlock_check_scheduled;
int m_atomic_reads;
int m_atomic_writes;
int m_store_waiting_on_load_cycles;
int m_store_waiting_on_store_cycles;

View File

@@ -29,8 +29,8 @@ from slicc.ast.StatementAST import StatementAST
from slicc.symbols import Var
class PeekStatementAST(StatementAST):
def __init__(self, slicc, queue_name, type_ast, statements, method):
super(PeekStatementAST, self).__init__(slicc)
def __init__(self, slicc, queue_name, type_ast, pairs, statements, method):
super(PeekStatementAST, self).__init__(slicc, pairs)
self.queue_name = queue_name
self.type_ast = type_ast
@@ -63,6 +63,17 @@ class PeekStatementAST(StatementAST):
in_msg_ptr = dynamic_cast<const $mtid *>(($qcode).${{self.method}}());
assert(in_msg_ptr != NULL);
''')
if self.pairs.has_key("block_on"):
address_field = self.pairs['block_on']
code('''
if ( (m_is_blocking == true) &&
(m_block_map.count(in_msg_ptr->m_$address_field) == 1) ) {
if (m_block_map[in_msg_ptr->m_$address_field] != &$qcode) {
$qcode.delayHead();
continue;
}
}
''')
# The other statements
self.statements.generate(code, return_type)

View File

@@ -514,8 +514,8 @@ class SLICC(Grammar):
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[6], p[8])
def p_statement__peek(self, p):
"statement : PEEK '(' var ',' type ')' statements"
p[0] = ast.PeekStatementAST(self, p[3], p[5], p[7], "peek")
"statement : PEEK '(' var ',' type pairs ')' statements"
p[0] = ast.PeekStatementAST(self, p[3], p[5], p[6], p[8], "peek")
def p_statement__copy_head(self, p):
"statement : COPY_HEAD '(' var ',' var pairs ')' SEMI"

View File

@@ -185,11 +185,10 @@ public:
void print(ostream& out) const;
void printConfig(ostream& out) const;
void wakeup();
void set_atomic(Address addr);
void clear_atomic(Address addr);
void reset_atomics();
void printStats(ostream& out) const { s_profiler.dumpStats(out); }
void clearStats() { s_profiler.clearStats(); }
void blockOnQueue(Address addr, MessageBuffer* port);
void unblock(Address addr);
private:
''')
@@ -198,16 +197,6 @@ private:
for param in self.config_parameters:
code('int m_${{param.ident}};')
if self.ident == "L1Cache":
code('''
int servicing_atomic;
Address locked_read_request1;
Address locked_read_request2;
Address locked_read_request3;
Address locked_read_request4;
int read_counter;
''')
code('''
int m_number_of_TBEs;
@@ -221,6 +210,8 @@ map< string, string > m_cfg;
NodeID m_version;
Network* m_net_ptr;
MachineID m_machineID;
bool m_is_blocking;
map< Address, MessageBuffer* > m_block_map;
${ident}_Profiler s_profiler;
static int m_num_controllers;
// Internal functions
@@ -297,15 +288,6 @@ $c_ident::$c_ident(const string &name)
{
''')
code.indent()
if self.ident == "L1Cache":
code('''
servicing_atomic = 0;
locked_read_request1 = Address(-1);
locked_read_request2 = Address(-1);
locked_read_request3 = Address(-1);
locked_read_request4 = Address(-1);
read_counter = 0;
''')
code('m_num_controllers++;')
for var in self.objects:
@@ -515,6 +497,17 @@ const MachineType $c_ident::getMachineType() const{
return MachineType_${ident};
}
void $c_ident::blockOnQueue(Address addr, MessageBuffer* port) {
m_is_blocking = true;
m_block_map[addr] = port;
}
void $c_ident::unblock(Address addr) {
m_block_map.erase(addr);
if (m_block_map.size() == 0) {
m_is_blocking = false;
}
}
void $c_ident::print(ostream& out) const { out << "[$c_ident " << m_version << "]"; }
void $c_ident::printConfig(ostream& out) const {
@@ -580,54 +573,12 @@ void ${ident}_Controller::wakeup()
# InPorts
#
# Find the position of the mandatory queue in the vector so
# that we can print it out first
mandatory_q = None
if self.ident == "L1Cache":
for i,port in enumerate(self.in_ports):
assert "c_code_in_port" in port
if str(port).find("mandatoryQueue_in") >= 0:
assert mandatory_q is None
mandatory_q = port
assert mandatory_q is not None
# print out the mandatory queue here
port = mandatory_q
code('// ${ident}InPort $port')
output = port["c_code_in_port"]
code('$output')
for port in self.in_ports:
# don't print out mandatory queue twice
if port == mandatory_q:
continue
if ident == "L1Cache":
if (str(port).find("forwardRequestNetwork_in") >= 0 or str(port).find("requestNetwork_in") >= 0 or str(port).find("requestIntraChipL1Network_in") >= 0):
code('''
bool postpone = false;
if ((((*m_L1Cache_forwardToCache_ptr)).isReady())) {
const RequestMsg* in_msg_ptr;
in_msg_ptr = dynamic_cast<const RequestMsg*>(((*m_L1Cache_forwardToCache_ptr)).peek());
if ((((servicing_atomic > 0) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 == ((*in_msg_ptr)).m_Address)))) {
postpone = true;
}
}
if (!postpone) {
''')
code.indent()
code('// ${ident}InPort $port')
code('${{port["c_code_in_port"]}}')
code.dedent()
if ident == "L1Cache":
if (str(port).find("forwardRequestNetwork_in") >= 0 or str(port).find("requestNetwork_in") >= 0 or str(port).find("requestIntraChipL1Network_in") >= 0):
code.dedent()
code('}')
code.indent()
code('')
code.dedent()
@@ -638,83 +589,6 @@ if (!postpone) {
}
''')
if self.ident == "L1Cache":
code('''
void ${ident}_Controller::set_atomic(Address addr)
{
servicing_atomic++;
switch (servicing_atomic) {
case(1):
assert(locked_read_request1 == Address(-1));
locked_read_request1 = addr;
break;
case(2):
assert(locked_read_request2 == Address(-1));
locked_read_request2 = addr;
break;
case(3):
assert(locked_read_request3 == Address(-1));
locked_read_request3 = addr;
break;
case(4):
assert(locked_read_request4 == Address(-1));
locked_read_request4 = addr;
break;
default:
assert(0);
}
}
void ${ident}_Controller::clear_atomic(Address addr)
{
assert(servicing_atomic > 0);
if (addr == locked_read_request1)
locked_read_request1 = Address(-1);
else if (addr == locked_read_request2)
locked_read_request2 = Address(-1);
else if (addr == locked_read_request3)
locked_read_request3 = Address(-1);
else if (addr == locked_read_request4)
locked_read_request4 = Address(-1);
else
assert(0);
servicing_atomic--;
}
void ${ident}_Controller::reset_atomics()
{
servicing_atomic = 0;
locked_read_request1 = Address(-1);
locked_read_request2 = Address(-1);
locked_read_request3 = Address(-1);
locked_read_request4 = Address(-1);
}
''')
else:
code('''
void ${ident}_Controller::reset_atomics()
{
assert(0);
}
void ${ident}_Controller::set_atomic(Address addr)
{
assert(0);
}
void ${ident}_Controller::clear_atomic(Address addr)
{
assert(0);
}
''')
code.write(path, "%s_Wakeup.cc" % self.ident)
def printCSwitch(self, path):