mem-ruby,configs: Add GLC Atomic Latency VIPER Parameter (#110)

Added a GLC atomic latency parameter (glc-atomic-latency) used when
enqueueing response messages regarding atomics directly performed in
the TCC. This latency is added in addition to the L2 response latency
(TCC_latency). This represents the latency of performing an atomic
within the L2.

With this change, the TCC response queue will receive enqueues with
varying latencies as GLC atomic responses will have this added GLC
atomic latency while data responses will not. To accommodate this in
light of the queue having strict FIFO ordering (which would be violated
here), this change also adds an optional parameter bypassStrictFIFO to
the SLICC enqueue function which allows overriding strict FIFO
requirements for individual messages on a case-by-case basis. This
parameter is only being used in the TCC's atomic response enqueue call.

Change-Id: Iabd52cbd2c0cc385c1fb3fe7bcd0cc64bdb40aac
This commit is contained in:
Daniel Kouchekinia
2023-07-23 13:57:06 -07:00
committed by GitHub
parent 0dd4334622
commit 984499329d
6 changed files with 46 additions and 12 deletions

View File

@@ -489,6 +489,9 @@ def define_options(parser):
help="Size of the mandatory queue in the GPU scalar "
"cache controller",
)
parser.add_argument(
"--glc-atomic-latency", type=int, default=1, help="GLC Atomic Latency"
)
def construct_dirs(options, system, ruby_system, network):
@@ -875,6 +878,7 @@ def construct_tccs(options, system, ruby_system, network):
tcc_cntrl.create(options, ruby_system, system)
tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
tcc_cntrl.l2_response_latency = options.TCC_latency
tcc_cntrl.glc_atomic_latency = options.glc_atomic_latency
tcc_cntrl_nodes.append(tcc_cntrl)
tcc_cntrl.WB = options.WB_L2
tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units

View File

@@ -62,7 +62,8 @@ MessageBuffer::MessageBuffer(const Params &p)
m_max_dequeue_rate(p.max_dequeue_rate), m_dequeues_this_cy(0),
m_time_last_time_size_checked(0),
m_time_last_time_enqueue(0), m_time_last_time_pop(0),
m_last_arrival_time(0), m_strict_fifo(p.ordered),
m_last_arrival_time(0), m_last_message_strict_fifo_bypassed(false),
m_strict_fifo(p.ordered),
m_randomization(p.randomization),
m_allow_zero_latency(p.allow_zero_latency),
m_routing_priority(p.routing_priority),
@@ -214,7 +215,8 @@ random_time()
}
void
MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
bool bypassStrictFIFO)
{
// record current time incase we have a pop that also adjusts my size
if (m_time_last_time_enqueue < current_time) {
@@ -252,7 +254,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
// Check the arrival time
assert(arrival_time >= current_time);
if (m_strict_fifo) {
if (m_strict_fifo &&
!(bypassStrictFIFO || m_last_message_strict_fifo_bypassed)) {
if (arrival_time < m_last_arrival_time) {
panic("FIFO ordering violated: %s name: %s current time: %d "
"delta: %d arrival_time: %d last arrival_time: %d\n",
@@ -266,6 +269,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
m_last_arrival_time = arrival_time;
}
m_last_message_strict_fifo_bypassed = bypassStrictFIFO;
// compute the delay cycles and set enqueue time
Message* msg_ptr = message.get();
assert(msg_ptr != NULL);

View File

@@ -123,7 +123,8 @@ class MessageBuffer : public SimObject
const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); }
void enqueue(MsgPtr message, Tick curTime, Tick delta);
void enqueue(MsgPtr message, Tick curTime, Tick delta,
bool bypassStrictFIFO = false);
// Defer enqueueing a message to a later cycle by putting it aside and not
// enqueueing it in this cycle
@@ -271,6 +272,9 @@ class MessageBuffer : public SimObject
uint64_t m_msg_counter;
int m_priority_rank;
bool m_last_message_strict_fifo_bypassed;
const bool m_strict_fifo;
const MessageRandomization m_randomization;
const bool m_allow_zero_latency;

View File

@@ -36,6 +36,7 @@ machine(MachineType:TCC, "TCC Cache")
bool WB; /*is this cache Writeback?*/
Cycles l2_request_latency := 50;
Cycles l2_response_latency := 20;
Cycles glc_atomic_latency := 0;
// From the TCPs or SQCs
MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
@@ -510,7 +511,7 @@ machine(MachineType:TCC, "TCC Cache")
action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
peek(coreRequestNetwork_in, CPURequestMsg) {
enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
enqueue(responseToCore_out, ResponseMsg, l2_response_latency + glc_atomic_latency, true) {
out_msg.addr := address;
out_msg.Type := CoherenceResponseType:TDSysResp;
out_msg.Destination.clear();

View File

@@ -31,12 +31,21 @@ from slicc.symbols import Var
class EnqueueStatementAST(StatementAST):
def __init__(self, slicc, queue_name, type_ast, lexpr, statements):
def __init__(
self,
slicc,
queue_name,
type_ast,
lexpr,
bypass_strict_fifo,
statements,
):
super().__init__(slicc)
self.queue_name = queue_name
self.type_ast = type_ast
self.latexpr = lexpr
self.bypass_strict_fifo = bypass_strict_fifo
self.statements = statements
def __repr__(self):
@@ -76,10 +85,17 @@ class EnqueueStatementAST(StatementAST):
if self.latexpr != None:
ret_type, rcode = self.latexpr.inline(True)
code(
"(${{self.queue_name.var.code}}).enqueue("
"out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));"
)
if self.bypass_strict_fifo != None:
bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False)
code(
"(${{self.queue_name.var.code}}).enqueue("
"out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);"
)
else:
code(
"(${{self.queue_name.var.code}}).enqueue("
"out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));"
)
else:
code(
"(${{self.queue_name.var.code}}).enqueue(out_msg, "

View File

@@ -633,11 +633,15 @@ class SLICC(Grammar):
def p_statement__enqueue(self, p):
"statement : ENQUEUE '(' var ',' type ')' statements"
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, p[7])
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, None, p[7])
def p_statement__enqueue_latency(self, p):
"statement : ENQUEUE '(' var ',' type ',' expr ')' statements"
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9])
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], None, p[9])
def p_statement__enqueue_latency_bypass_strict_fifo(self, p):
"statement : ENQUEUE '(' var ',' type ',' expr ',' expr ')' statements"
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9], p[11])
def p_statement__defer_enqueueing(self, p):
"statement : DEFER_ENQUEUEING '(' var ',' type ')' statements"