From 984499329d5b70c1841c39e545878303b4a5ddd5 Mon Sep 17 00:00:00 2001 From: Daniel Kouchekinia Date: Sun, 23 Jul 2023 13:57:06 -0700 Subject: [PATCH] mem-ruby,configs: Add GLC Atomic Latency VIPER Parameter (#110) Added a GLC atomic latency parameter (glc-atomic-latency) used when enqueueing response messages regarding atomics directly performed in the TCC. This latency is added in addition to the L2 response latency (TCC_latency). This represents the latency of performing an atomic within the L2. With this change, the TCC response queue will receive enqueues with varying latencies as GLC atomic responses will have this added GLC atomic latency while data responses will not. To accommodate this in light of the queue having strict FIFO ordering (which would be violated here), this change also adds an optional parameter bypassStrictFIFO to the SLICC enqueue function which allows overriding strict FIFO requirements for individual messages on a case-by-case basis. This parameter is only being used in the TCC's atomic response enqueue call. Change-Id: Iabd52cbd2c0cc385c1fb3fe7bcd0cc64bdb40aac --- configs/ruby/GPU_VIPER.py | 4 ++++ src/mem/ruby/network/MessageBuffer.cc | 11 +++++++--- src/mem/ruby/network/MessageBuffer.hh | 6 +++++- src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 3 ++- src/mem/slicc/ast/EnqueueStatementAST.py | 26 +++++++++++++++++++----- src/mem/slicc/parser.py | 8 ++++++-- 6 files changed, 46 insertions(+), 12 deletions(-) diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index ee8d570498..1e95964a40 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -489,6 +489,9 @@ def define_options(parser): help="Size of the mandatory queue in the GPU scalar " "cache controller", ) + parser.add_argument( + "--glc-atomic-latency", type=int, default=1, help="GLC Atomic Latency" + ) def construct_dirs(options, system, ruby_system, network): @@ -875,6 +878,7 @@ def construct_tccs(options, system, ruby_system, network): tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency + tcc_cntrl.glc_atomic_latency = options.glc_atomic_latency tcc_cntrl_nodes.append(tcc_cntrl) tcc_cntrl.WB = options.WB_L2 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 9a6500978e..9a4439a538 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -62,7 +62,8 @@ MessageBuffer::MessageBuffer(const Params &p) m_max_dequeue_rate(p.max_dequeue_rate), m_dequeues_this_cy(0), m_time_last_time_size_checked(0), m_time_last_time_enqueue(0), m_time_last_time_pop(0), - m_last_arrival_time(0), m_strict_fifo(p.ordered), + m_last_arrival_time(0), m_last_message_strict_fifo_bypassed(false), + m_strict_fifo(p.ordered), m_randomization(p.randomization), m_allow_zero_latency(p.allow_zero_latency), m_routing_priority(p.routing_priority), @@ -214,7 +215,8 @@ random_time() } void -MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) +MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool bypassStrictFIFO) { // record current time incase we have a pop that also adjusts my size if (m_time_last_time_enqueue < current_time) { @@ -252,7 +254,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) // Check the arrival time assert(arrival_time >= current_time); - if (m_strict_fifo) { + if (m_strict_fifo && + !(bypassStrictFIFO || m_last_message_strict_fifo_bypassed)) { if (arrival_time < m_last_arrival_time) { panic("FIFO ordering violated: %s name: %s current time: %d " "delta: %d arrival_time: %d last arrival_time: %d\n", @@ -266,6 +269,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) m_last_arrival_time = arrival_time; } + m_last_message_strict_fifo_bypassed = bypassStrictFIFO; + // compute the delay cycles and set enqueue time Message* msg_ptr = message.get(); assert(msg_ptr != NULL); diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 279599340a..03a0454433 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -123,7 +123,8 @@ class MessageBuffer : public SimObject const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); } - void enqueue(MsgPtr message, Tick curTime, Tick delta); + void enqueue(MsgPtr message, Tick curTime, Tick delta, + bool bypassStrictFIFO = false); // Defer enqueueing a message to a later cycle by putting it aside and not // enqueueing it in this cycle @@ -271,6 +272,9 @@ class MessageBuffer : public SimObject uint64_t m_msg_counter; int m_priority_rank; + + bool m_last_message_strict_fifo_bypassed; + const bool m_strict_fifo; const MessageRandomization m_randomization; const bool m_allow_zero_latency; diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index a83bee0fa5..31fc484973 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -36,6 +36,7 @@ machine(MachineType:TCC, "TCC Cache") bool WB; /*is this cache Writeback?*/ Cycles l2_request_latency := 50; Cycles l2_response_latency := 20; + Cycles glc_atomic_latency := 0; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; @@ -510,7 +511,7 @@ machine(MachineType:TCC, "TCC Cache") action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency + glc_atomic_latency, true) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:TDSysResp; out_msg.Destination.clear(); diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py index 148cc3a223..e9bda61911 100644 --- a/src/mem/slicc/ast/EnqueueStatementAST.py +++ b/src/mem/slicc/ast/EnqueueStatementAST.py @@ -31,12 +31,21 @@ from slicc.symbols import Var class EnqueueStatementAST(StatementAST): - def __init__(self, slicc, queue_name, type_ast, lexpr, statements): + def __init__( + self, + slicc, + queue_name, + type_ast, + lexpr, + bypass_strict_fifo, + statements, + ): super().__init__(slicc) self.queue_name = queue_name self.type_ast = type_ast self.latexpr = lexpr + self.bypass_strict_fifo = bypass_strict_fifo self.statements = statements def __repr__(self): @@ -76,10 +85,17 @@ class EnqueueStatementAST(StatementAST): if self.latexpr != None: ret_type, rcode = self.latexpr.inline(True) - code( - "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" - ) + if self.bypass_strict_fifo != None: + bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False) + code( + "(${{self.queue_name.var.code}}).enqueue(" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);" + ) + else: + code( + "(${{self.queue_name.var.code}}).enqueue(" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" + ) else: code( "(${{self.queue_name.var.code}}).enqueue(out_msg, " diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py index 155eb07f7a..d84eab211c 100644 --- a/src/mem/slicc/parser.py +++ b/src/mem/slicc/parser.py @@ -633,11 +633,15 @@ class SLICC(Grammar): def p_statement__enqueue(self, p): "statement : ENQUEUE '(' var ',' type ')' statements" - p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, p[7]) + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, None, p[7]) def p_statement__enqueue_latency(self, p): "statement : ENQUEUE '(' var ',' type ',' expr ')' statements" - p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9]) + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], None, p[9]) + + def p_statement__enqueue_latency_bypass_strict_fifo(self, p): + "statement : ENQUEUE '(' var ',' type ',' expr ',' expr ')' statements" + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9], p[11]) def p_statement__defer_enqueueing(self, p): "statement : DEFER_ENQUEUEING '(' var ',' type ')' statements"