diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index ee8d570498..1e95964a40 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -489,6 +489,9 @@ def define_options(parser): help="Size of the mandatory queue in the GPU scalar " "cache controller", ) + parser.add_argument( + "--glc-atomic-latency", type=int, default=1, help="GLC Atomic Latency" + ) def construct_dirs(options, system, ruby_system, network): @@ -875,6 +878,7 @@ def construct_tccs(options, system, ruby_system, network): tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency + tcc_cntrl.glc_atomic_latency = options.glc_atomic_latency tcc_cntrl_nodes.append(tcc_cntrl) tcc_cntrl.WB = options.WB_L2 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 9a6500978e..9a4439a538 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -62,7 +62,8 @@ MessageBuffer::MessageBuffer(const Params &p) m_max_dequeue_rate(p.max_dequeue_rate), m_dequeues_this_cy(0), m_time_last_time_size_checked(0), m_time_last_time_enqueue(0), m_time_last_time_pop(0), - m_last_arrival_time(0), m_strict_fifo(p.ordered), + m_last_arrival_time(0), m_last_message_strict_fifo_bypassed(false), + m_strict_fifo(p.ordered), m_randomization(p.randomization), m_allow_zero_latency(p.allow_zero_latency), m_routing_priority(p.routing_priority), @@ -214,7 +215,8 @@ random_time() } void -MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) +MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool bypassStrictFIFO) { // record current time incase we have a pop that also adjusts my size if (m_time_last_time_enqueue < current_time) { @@ -252,7 +254,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) // Check the arrival time assert(arrival_time >= current_time); - if (m_strict_fifo) { + if (m_strict_fifo && + !(bypassStrictFIFO || m_last_message_strict_fifo_bypassed)) { if (arrival_time < m_last_arrival_time) { panic("FIFO ordering violated: %s name: %s current time: %d " "delta: %d arrival_time: %d last arrival_time: %d\n", @@ -266,6 +269,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) m_last_arrival_time = arrival_time; } + m_last_message_strict_fifo_bypassed = bypassStrictFIFO; + // compute the delay cycles and set enqueue time Message* msg_ptr = message.get(); assert(msg_ptr != NULL); diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 279599340a..03a0454433 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -123,7 +123,8 @@ class MessageBuffer : public SimObject const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); } - void enqueue(MsgPtr message, Tick curTime, Tick delta); + void enqueue(MsgPtr message, Tick curTime, Tick delta, + bool bypassStrictFIFO = false); // Defer enqueueing a message to a later cycle by putting it aside and not // enqueueing it in this cycle @@ -271,6 +272,9 @@ class MessageBuffer : public SimObject uint64_t m_msg_counter; int m_priority_rank; + + bool m_last_message_strict_fifo_bypassed; + const bool m_strict_fifo; const MessageRandomization m_randomization; const bool m_allow_zero_latency; diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index a83bee0fa5..31fc484973 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -36,6 +36,7 @@ machine(MachineType:TCC, "TCC Cache") bool WB; /*is this cache Writeback?*/ Cycles l2_request_latency := 50; Cycles l2_response_latency := 20; + Cycles glc_atomic_latency := 0; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; @@ -510,7 +511,7 @@ machine(MachineType:TCC, "TCC Cache") action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency + glc_atomic_latency, true) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:TDSysResp; out_msg.Destination.clear(); diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py index 148cc3a223..e9bda61911 100644 --- a/src/mem/slicc/ast/EnqueueStatementAST.py +++ b/src/mem/slicc/ast/EnqueueStatementAST.py @@ -31,12 +31,21 @@ from slicc.symbols import Var class EnqueueStatementAST(StatementAST): - def __init__(self, slicc, queue_name, type_ast, lexpr, statements): + def __init__( + self, + slicc, + queue_name, + type_ast, + lexpr, + bypass_strict_fifo, + statements, + ): super().__init__(slicc) self.queue_name = queue_name self.type_ast = type_ast self.latexpr = lexpr + self.bypass_strict_fifo = bypass_strict_fifo self.statements = statements def __repr__(self): @@ -76,10 +85,17 @@ class EnqueueStatementAST(StatementAST): if self.latexpr != None: ret_type, rcode = self.latexpr.inline(True) - code( - "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" - ) + if self.bypass_strict_fifo != None: + bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False) + code( + "(${{self.queue_name.var.code}}).enqueue(" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);" + ) + else: + code( + "(${{self.queue_name.var.code}}).enqueue(" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" + ) else: code( "(${{self.queue_name.var.code}}).enqueue(out_msg, " diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py index 155eb07f7a..d84eab211c 100644 --- a/src/mem/slicc/parser.py +++ b/src/mem/slicc/parser.py @@ -633,11 +633,15 @@ class SLICC(Grammar): def p_statement__enqueue(self, p): "statement : ENQUEUE '(' var ',' type ')' statements" - p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, p[7]) + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, None, p[7]) def p_statement__enqueue_latency(self, p): "statement : ENQUEUE '(' var ',' type ',' expr ')' statements" - p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9]) + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], None, p[9]) + + def p_statement__enqueue_latency_bypass_strict_fifo(self, p): + "statement : ENQUEUE '(' var ',' type ',' expr ',' expr ')' statements" + p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9], p[11]) def p_statement__defer_enqueueing(self, p): "statement : DEFER_ENQUEUEING '(' var ',' type ')' statements"