diff --git a/build_opts/ARM_MESI_Three_Level_HTM b/build_opts/ARM_MESI_Three_Level_HTM new file mode 100644 index 0000000000..fd7c164940 --- /dev/null +++ b/build_opts/ARM_MESI_Three_Level_HTM @@ -0,0 +1,6 @@ +# Copyright (c) 2019 ARM Limited +# All rights reserved. + +TARGET_ISA = 'arm' +CPU_MODELS = 'TimingSimpleCPU,O3CPU' +PROTOCOL = 'MESI_Three_Level_HTM' diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py new file mode 100644 index 0000000000..89ca93c61d --- /dev/null +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -0,0 +1,337 @@ +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# Copyright (c) 2009,2015 Advanced Micro Devices, Inc. +# Copyright (c) 2013 Mark D. Hill and David A. Wood +# Copyright (c) 2020 ARM Limited +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import math +import m5 +from m5.objects import * +from m5.defines import buildEnv +from .Ruby import create_topology, create_directories +from .Ruby import send_evicts +from common import FileSystemConfig + +# +# Declare caches used by the protocol +# +class L0Cache(RubyCache): pass +class L1Cache(RubyCache): pass +class L2Cache(RubyCache): pass + +def define_options(parser): + parser.add_option("--num-clusters", type = "int", default = 1, + help = "number of clusters in a design in which there are shared\ + caches private to clusters") + parser.add_option("--l0i_size", type="string", default="4096B") + parser.add_option("--l0d_size", type="string", default="4096B") + parser.add_option("--l0i_assoc", type="int", default=1) + parser.add_option("--l0d_assoc", type="int", default=1) + parser.add_option("--l0_transitions_per_cycle", type="int", default=32) + parser.add_option("--l1_transitions_per_cycle", type="int", default=32) + parser.add_option("--l2_transitions_per_cycle", type="int", default=4) + parser.add_option("--enable-prefetch", action="store_true", default=False,\ + help="Enable Ruby hardware prefetcher") + return + +def create_system(options, full_system, system, dma_ports, bootmem, + ruby_system): + + if buildEnv['PROTOCOL'] != 'MESI_Three_Level_HTM': + fatal("This script requires the MESI_Three_Level protocol to be\ + built.") + + cpu_sequencers = [] + + # + # The ruby network creation expects the list of nodes in the system to be + # consistent with the NetDest list. Therefore the l1 controller nodes + # must be listed before the directory nodes and directory nodes before + # dma nodes, etc. + # + l0_cntrl_nodes = [] + l1_cntrl_nodes = [] + l2_cntrl_nodes = [] + dma_cntrl_nodes = [] + + assert (options.num_cpus % options.num_clusters == 0) + num_cpus_per_cluster = options.num_cpus / options.num_clusters + + assert (options.num_l2caches % options.num_clusters == 0) + num_l2caches_per_cluster = options.num_l2caches / options.num_clusters + + l2_bits = int(math.log(num_l2caches_per_cluster, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + l2_index_start = block_size_bits + l2_bits + + # + # Must create the individual controllers before the network to ensure the + # controller constructors are called before the network constructor + # + for i in range(options.num_clusters): + for j in range(num_cpus_per_cluster): + # + # First create the Ruby objects associated with this cpu + # + l0i_cache = L0Cache(size = options.l0i_size, + assoc = options.l0i_assoc, + is_icache = True, + start_index_bit = block_size_bits, + replacement_policy = LRURP()) + + l0d_cache = L0Cache(size = options.l0d_size, + assoc = options.l0d_assoc, + is_icache = False, + start_index_bit = block_size_bits, + replacement_policy = LRURP()) + + # the ruby random tester reuses num_cpus to specify the + # number of cpu ports connected to the tester object, which + # is stored in system.cpu. because there is only ever one + # tester object, num_cpus is not necessarily equal to the + # size of system.cpu; therefore if len(system.cpu) == 1 + # we use system.cpu[0] to set the clk_domain, thereby ensuring + # we don't index off the end of the cpu list. + if len(system.cpu) == 1: + clk_domain = system.cpu[0].clk_domain + else: + clk_domain = system.cpu[i].clk_domain + + # Ruby prefetcher + prefetcher = RubyPrefetcher( + num_streams=16, + unit_filter = 256, + nonunit_filter = 256, + train_misses = 5, + num_startup_pfs = 4, + cross_page = True + ) + + l0_cntrl = L0Cache_Controller( + version = i * num_cpus_per_cluster + j, + Icache = l0i_cache, Dcache = l0d_cache, + transitions_per_cycle = options.l0_transitions_per_cycle, + prefetcher = prefetcher, + enable_prefetch = options.enable_prefetch, + send_evictions = send_evicts(options), + clk_domain = clk_domain, + ruby_system = ruby_system) + + cpu_seq = RubyHTMSequencer(version = i * num_cpus_per_cluster + j, + icache = l0i_cache, + clk_domain = clk_domain, + dcache = l0d_cache, + ruby_system = ruby_system) + + l0_cntrl.sequencer = cpu_seq + + l1_cache = L1Cache(size = options.l1d_size, + assoc = options.l1d_assoc, + start_index_bit = block_size_bits, + is_icache = False) + + l1_cntrl = L1Cache_Controller( + version = i * num_cpus_per_cluster + j, + cache = l1_cache, l2_select_num_bits = l2_bits, + cluster_id = i, + transitions_per_cycle = options.l1_transitions_per_cycle, + ruby_system = ruby_system) + + exec("ruby_system.l0_cntrl%d = l0_cntrl" + % ( i * num_cpus_per_cluster + j)) + exec("ruby_system.l1_cntrl%d = l1_cntrl" + % ( i * num_cpus_per_cluster + j)) + + # + # Add controllers and sequencers to the appropriate lists + # + cpu_sequencers.append(cpu_seq) + l0_cntrl_nodes.append(l0_cntrl) + l1_cntrl_nodes.append(l1_cntrl) + + # Connect the L0 and L1 controllers + l0_cntrl.prefetchQueue = MessageBuffer() + l0_cntrl.mandatoryQueue = MessageBuffer() + l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) + l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 + l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) + l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 + + # Connect the L1 controllers and the network + l1_cntrl.requestToL2 = MessageBuffer() + l1_cntrl.requestToL2.master = ruby_system.network.slave + l1_cntrl.responseToL2 = MessageBuffer() + l1_cntrl.responseToL2.master = ruby_system.network.slave + l1_cntrl.unblockToL2 = MessageBuffer() + l1_cntrl.unblockToL2.master = ruby_system.network.slave + + l1_cntrl.requestFromL2 = MessageBuffer() + l1_cntrl.requestFromL2.slave = ruby_system.network.master + l1_cntrl.responseFromL2 = MessageBuffer() + l1_cntrl.responseFromL2.slave = ruby_system.network.master + + + for j in range(num_l2caches_per_cluster): + l2_cache = L2Cache(size = options.l2_size, + assoc = options.l2_assoc, + start_index_bit = l2_index_start) + + l2_cntrl = L2Cache_Controller( + version = i * num_l2caches_per_cluster + j, + L2cache = l2_cache, cluster_id = i, + transitions_per_cycle =\ + options.l2_transitions_per_cycle, + ruby_system = ruby_system) + + exec("ruby_system.l2_cntrl%d = l2_cntrl" + % (i * num_l2caches_per_cluster + j)) + l2_cntrl_nodes.append(l2_cntrl) + + # Connect the L2 controllers and the network + l2_cntrl.DirRequestFromL2Cache = MessageBuffer() + l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave + l2_cntrl.L1RequestFromL2Cache = MessageBuffer() + l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave + l2_cntrl.responseFromL2Cache = MessageBuffer() + l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave + + l2_cntrl.unblockToL2Cache = MessageBuffer() + l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master + l2_cntrl.L1RequestToL2Cache = MessageBuffer() + l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master + l2_cntrl.responseToL2Cache = MessageBuffer() + l2_cntrl.responseToL2Cache.slave = ruby_system.network.master + + # Run each of the ruby memory controllers at a ratio of the frequency of + # the ruby system + # clk_divider value is a fix to pass regression. + ruby_system.memctrl_clk_domain = DerivedClockDomain( + clk_domain = ruby_system.clk_domain, clk_divider = 3) + + mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( + options, bootmem, ruby_system, system) + dir_cntrl_nodes = mem_dir_cntrl_nodes[:] + if rom_dir_cntrl_node is not None: + dir_cntrl_nodes.append(rom_dir_cntrl_node) + for dir_cntrl in dir_cntrl_nodes: + # Connect the directory controllers and the network + dir_cntrl.requestToDir = MessageBuffer() + dir_cntrl.requestToDir.slave = ruby_system.network.master + dir_cntrl.responseToDir = MessageBuffer() + dir_cntrl.responseToDir.slave = ruby_system.network.master + dir_cntrl.responseFromDir = MessageBuffer() + dir_cntrl.responseFromDir.master = ruby_system.network.slave + dir_cntrl.requestToMemory = MessageBuffer() + dir_cntrl.responseFromMemory = MessageBuffer() + + for i, dma_port in enumerate(dma_ports): + # + # Create the Ruby objects associated with the dma controller + # + dma_seq = DMASequencer(version = i, ruby_system = ruby_system) + + dma_cntrl = DMA_Controller(version = i, + dma_sequencer = dma_seq, + transitions_per_cycle = options.ports, + ruby_system = ruby_system) + + exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) + exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) + dma_cntrl_nodes.append(dma_cntrl) + + # Connect the dma controller to the network + dma_cntrl.mandatoryQueue = MessageBuffer() + dma_cntrl.responseFromDir = MessageBuffer(ordered = True) + dma_cntrl.responseFromDir.slave = ruby_system.network.master + dma_cntrl.requestToDir = MessageBuffer() + dma_cntrl.requestToDir.master = ruby_system.network.slave + + all_cntrls = l0_cntrl_nodes + \ + l1_cntrl_nodes + \ + l2_cntrl_nodes + \ + dir_cntrl_nodes + \ + dma_cntrl_nodes + + # Create the io controller and the sequencer + if full_system: + io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) + ruby_system._io_port = io_seq + io_controller = DMA_Controller(version = len(dma_ports), + dma_sequencer = io_seq, + ruby_system = ruby_system) + ruby_system.io_controller = io_controller + + # Connect the dma controller to the network + io_controller.mandatoryQueue = MessageBuffer() + io_controller.responseFromDir = MessageBuffer(ordered = True) + io_controller.responseFromDir.slave = ruby_system.network.master + io_controller.requestToDir = MessageBuffer() + io_controller.requestToDir.master = ruby_system.network.slave + + all_cntrls = all_cntrls + [io_controller] + # Register configuration with filesystem + else: + for i in range(options.num_clusters): + for j in range(num_cpus_per_cluster): + FileSystemConfig.register_cpu(physical_package_id = 0, + core_siblings = range(options.num_cpus), + core_id = i*num_cpus_per_cluster+j, + thread_siblings = []) + + FileSystemConfig.register_cache(level = 0, + idu_type = 'Instruction', + size = options.l0i_size, + line_size =\ + options.cacheline_size, + assoc = 1, + cpus = [i*num_cpus_per_cluster+j]) + FileSystemConfig.register_cache(level = 0, + idu_type = 'Data', + size = options.l0d_size, + line_size =\ + options.cacheline_size, + assoc = 1, + cpus = [i*num_cpus_per_cluster+j]) + + FileSystemConfig.register_cache(level = 1, + idu_type = 'Unified', + size = options.l1d_size, + line_size = options.cacheline_size, + assoc = options.l1d_assoc, + cpus = [i*num_cpus_per_cluster+j]) + + FileSystemConfig.register_cache(level = 2, + idu_type = 'Unified', + size = str(MemorySize(options.l2_size) * \ + num_l2caches_per_cluster)+'B', + line_size = options.cacheline_size, + assoc = options.l2_assoc, + cpus = [n for n in range(i*num_cpus_per_cluster, \ + (i+1)*num_cpus_per_cluster)]) + + ruby_system.network.number_of_virtual_networks = 3 + topology = create_topology(all_cntrls, options) + return (cpu_sequencers, mem_dir_cntrl_nodes, topology) diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript index fc90f8a624..b31416d64e 100644 --- a/src/mem/ruby/SConscript +++ b/src/mem/ruby/SConscript @@ -138,4 +138,5 @@ MakeInclude('system/Sequencer.hh') # <# include "mem/ruby/protocol/header.hh"> in any file # generated_dir = Dir('protocol') MakeInclude('system/GPUCoalescer.hh') +MakeInclude('system/HTMSequencer.hh') MakeInclude('system/VIPERCoalescer.hh') diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm index 0f5a7ac2b1..7344ca1d2b 100644 --- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm @@ -130,6 +130,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") Ack_all, desc="Last ack for processor"; WB_Ack, desc="Ack for replacement"; + + // hardware transactional memory + L0_DataCopy, desc="Data Block from L0. Should remain in M state."; + + // L0 cache received the invalidation message and has + // sent a NAK (because of htm abort) saying that the data + // in L1 is the latest value. + L0_DataNak, desc="L0 received INV message, specifies its data is also stale"; } // TYPES @@ -361,6 +369,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") if(in_msg.Class == CoherenceClass:INV_DATA) { trigger(Event:L0_DataAck, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:NAK) { + trigger(Event:L0_DataNak, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:PUTX_COPY) { + trigger(Event:L0_DataCopy, in_msg.addr, cache_entry, tbe); } else if (in_msg.Class == CoherenceClass:INV_ACK) { trigger(Event:L0_Ack, in_msg.addr, cache_entry, tbe); } else { @@ -808,18 +820,6 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") k_popL0RequestQueue; } - transition(EE, Load, E) { - hh_xdata_to_l0; - uu_profileHit; - k_popL0RequestQueue; - } - - transition(MM, Load, M) { - hh_xdata_to_l0; - uu_profileHit; - k_popL0RequestQueue; - } - transition({S,SS}, Store, SM) { i_allocateTBE; c_issueUPGRADE; @@ -1034,7 +1034,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } - transition(SM, L0_Invalidate_Else, SM_IL0) { + transition(SM, {Inv,L0_Invalidate_Else}, SM_IL0) { forward_eviction_to_L0_else; } @@ -1093,4 +1093,55 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") transition({S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) { z2_stallAndWaitL2Queue; } + + // hardware transactional memory + + // If a transaction has aborted, the L0 could re-request + // data which is in E or EE state in L1. + transition({EE,E}, Load, E) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + // If a transaction has aborted, the L0 could re-request + // data which is in M or MM state in L1. + transition({MM,M}, Load, M) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + // If a transaction has aborted, the L0 could re-request + // data which is in M state in L1. + transition({E,M}, Store, M) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + // A transaction may have tried to modify a cache block in M state with + // non-speculative (pre-transactional) data. This needs to be copied + // to the L1 before any further modifications occur at the L0. + transition({M,E}, L0_DataCopy, M) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + } + + transition({M_IL0, E_IL0}, L0_DataCopy, M_IL0) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + } + + // A NAK from the L0 means that the L0 invalidated its + // modified line (due to an abort) so it is therefore necessary + // to use the L1's correct version instead + transition({M_IL0, E_IL0}, L0_DataNak, MM) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition(I, L1_Replacement) { + ff_deallocateCacheBlock; + } } diff --git a/src/mem/ruby/protocol/MESI_Three_Level-msg.sm b/src/mem/ruby/protocol/MESI_Three_Level-msg.sm index e738b8a127..a16e374fd6 100644 --- a/src/mem/ruby/protocol/MESI_Three_Level-msg.sm +++ b/src/mem/ruby/protocol/MESI_Three_Level-msg.sm @@ -48,6 +48,7 @@ enumeration(CoherenceClass, desc="...") { INV_OWN, desc="Invalidate (own)"; INV_ELSE, desc="Invalidate (else)"; PUTX, desc="Replacement message"; + PUTX_COPY, desc="Data block to be copied in L1. L0 will still be in M state"; WB_ACK, desc="Writeback ack"; @@ -59,6 +60,7 @@ enumeration(CoherenceClass, desc="...") { DATA, desc="Data block for L1 cache in S state"; DATA_EXCLUSIVE, desc="Data block for L1 cache in M/E state"; ACK, desc="Generic invalidate ack"; + NAK, desc="Used by L0 to tell L1 that it cannot provide the latest value"; // This is a special case in which the L1 cache lost permissions to the // shared block before it got the data. So the L0 cache can use the data diff --git a/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm b/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm new file mode 100644 index 0000000000..a6e4fafb5e --- /dev/null +++ b/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm @@ -0,0 +1,1606 @@ +/* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(MachineType:L0Cache, "MESI Directory L0 Cache") + : HTMSequencer * sequencer; + CacheMemory * Icache; + CacheMemory * Dcache; + Cycles request_latency := 2; + Cycles response_latency := 2; + bool send_evictions; + + RubyPrefetcher * prefetcher; + bool enable_prefetch := "False"; + + // From this node's L0 cache to the network + MessageBuffer * bufferToL1, network="To"; + + // To this node's L0 cache FROM the network + MessageBuffer * bufferFromL1, network="From"; + + // Message queue between this controller and the processor + MessageBuffer * mandatoryQueue; + + // Request Buffer for prefetches + MessageBuffer * prefetchQueue; +{ + // hardware transactional memory + bool htmTransactionalState, default="false"; + bool htmFailed, default="false"; + int htmUid, default=0; + HtmFailedInCacheReason htmFailedRc, default=HtmFailedInCacheReason_NO_FAIL; + + // STATES + state_declaration(State, desc="Cache states", default="L0Cache_State_I") { + // Base states + + // The cache entry has not been allocated. + I, AccessPermission:Invalid, desc="Invalid"; + + // The cache entry is in shared mode. The processor can read this entry + // but it cannot write to it. + S, AccessPermission:Read_Only, desc="Shared"; + + // The cache entry is in exclusive mode. The processor can read this + // entry. It can write to this entry without informing the directory. + // On writing, the entry moves to M state. + E, AccessPermission:Read_Only, desc="Exclusive"; + + // The processor has read and write permissions on this entry. + M, AccessPermission:Read_Write, desc="Modified"; + + // Transient States + + // The cache controller has requested an instruction. It will be stored + // in the shared state so that the processor can read it. + Inst_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet"; + + // The cache controller has requested that this entry be fetched in + // shared state so that the processor can read it. + IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet"; + + // The cache controller has requested that this entry be fetched in + // modify state so that the processor can read/write it. + IM, AccessPermission:Busy, desc="Issued GETX, have not seen response yet"; + + // The cache controller had read permission over the entry. But now the + // processor needs to write to it. So, the controller has requested for + // write permission. + SM, AccessPermission:Read_Only, desc="Issued GETX, have not seen response yet"; + + // Transient states in which block is being prefetched + PF_Inst_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet"; + PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet"; + PF_IE, AccessPermission:Busy, desc="Issued GETX, have not seen response yet"; + } + + // EVENTS + enumeration(Event, desc="Cache events") { + // Events from core + Load, desc="Load request from the home processor"; + Ifetch, desc="I-fetch request from the home processor"; + Store, desc="Store request from the home processor"; + + // invalidations from L1 (due to self or other core) + InvOwn, desc="Invalidate request from L1 (own)"; + InvElse, desc="Invalidate request from L1 (else)"; + + // internal generated request + L0_Replacement, desc="L0 Replacement", format="!r"; + + // requests forwarded from other processors + Fwd_GETX, desc="GETX from other processor"; + Fwd_GETS, desc="GETS from other processor"; + Fwd_GET_INSTR, desc="GET_INSTR from other processor"; + + // data arrives from L1 cache + Data, desc="Data for processor"; + Data_Exclusive, desc="Data for processor"; + Data_Stale, desc="Data for processor, but not for storage"; + + Ack, desc="Ack for processor"; + + WB_Ack, desc="Ack for replacement"; + + Failed_SC, desc="Store conditional request that will fail"; + + // Prefetch events (generated by prefetcher) + PF_L0_Replacement, desc="L0 Replacement caused by pretcher", format="!pr"; + PF_Load, desc="Load request from prefetcher"; + PF_Ifetch, desc="Instruction fetch request from prefetcher"; + PF_Store, desc="Exclusive load request from prefetcher"; + PF_Bad_Addr, desc="Throw away prefetch request due to bad address generation"; + + // hardware transactional memory + HTM_Abort, desc="Abort HTM transaction and rollback cache to pre-transactional state"; + HTM_Start, desc="Place cache in HTM transactional state"; + HTM_Commit, desc="Commit speculative loads/stores and place cache in normal state"; + HTM_Cancel, desc="Fail HTM transaction explicitely without aborting"; + HTM_notifyCMD, desc="Notify core via HTM CMD that HTM transaction has failed"; + HTM_notifyLD, desc="Notify core via LD that HTM transaction has failed"; + HTM_notifyST, desc="Notify core via ST that HTM transaction has failed"; + } + + // TYPES + + // CacheEntry + structure(Entry, desc="...", interface="AbstractCacheEntry" ) { + State CacheState, desc="cache state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, default="false", desc="data is dirty"; + bool isPrefetched, default="false", desc="Set if this block was prefetched"; + + // hardware transactional memory + // read/write set state + void setInHtmReadSet(bool), external="yes"; + void setInHtmWriteSet(bool), external="yes"; + bool getInHtmReadSet(), external="yes"; + bool getInHtmWriteSet(), external="yes"; + + // override invalidateEntry + void invalidateEntry() { + CacheState := State:I; + Dirty := false; + } + } + + // TBE fields + structure(TBE, desc="...") { + Addr addr, desc="Physical address for this TBE"; + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Buffer for the data block"; + bool Dirty, default="false", desc="data is dirty"; + int pendingAcks, default="0", desc="number of pending acks"; + } + + structure(TBETable, external="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + TBE getNullEntry(); + } + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + + Tick clockEdge(); + Cycles ticksToCycles(Tick t); + void set_cache_entry(AbstractCacheEntry a); + void unset_cache_entry(); + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpBuffers(Addr a); + void wakeUpAllBuffers(Addr a); + void profileMsgDelay(int virtualNetworkType, Cycles c); + MachineID mapAddressToMachine(Addr addr, MachineType mtype); + + // inclusive cache returns L0 entries only + Entry getCacheEntry(Addr addr), return_by_pointer="yes" { + Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]); + if(is_valid(Dcache_entry)) { + return Dcache_entry; + } + + Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]); + return Icache_entry; + } + + Entry getDCacheEntry(Addr addr), return_by_pointer="yes" { + Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]); + return Dcache_entry; + } + + Entry getICacheEntry(Addr addr), return_by_pointer="yes" { + Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]); + return Icache_entry; + } + + State getState(TBE tbe, Entry cache_entry, Addr addr) { + assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false); + + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:I; + } + + void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { + assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false); + + // MUST CHANGE + if(is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(tbe.TBEState)); + return L0Cache_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(cache_entry.CacheState)); + return L0Cache_State_to_permission(cache_entry.CacheState); + } + + DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent); + return AccessPermission:NotPresent; + } + + void functionalRead(Addr addr, Packet *pkt) { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + testAndRead(addr, tbe.DataBlk, pkt); + } else { + testAndRead(addr, getCacheEntry(addr).DataBlk, pkt); + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.DataBlk, pkt); + return num_functional_writes; + } + + num_functional_writes := num_functional_writes + + testAndWrite(addr, getCacheEntry(addr).DataBlk, pkt); + return num_functional_writes; + } + + void setAccessPermission(Entry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(L0Cache_State_to_permission(state)); + } + } + + Event mandatory_request_type_to_event(RubyRequestType type) { + if (type == RubyRequestType:LD) { + return Event:Load; + } else if (type == RubyRequestType:IFETCH) { + return Event:Ifetch; + } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC) + || (type == RubyRequestType:Store_Conditional)) { + return Event:Store; + } else { + error("Invalid RubyRequestType"); + } + } + + Event prefetch_request_type_to_event(RubyRequestType type) { + if (type == RubyRequestType:LD) { + return Event:PF_Load; + } else if (type == RubyRequestType:IFETCH) { + return Event:PF_Ifetch; + } else if (type == RubyRequestType:ST) { + return Event:PF_Store; + } else { + error("Invalid RubyRequestType"); + } + } + + int getPendingAcks(TBE tbe) { + return tbe.pendingAcks; + } + + out_port(requestNetwork_out, CoherenceMsg, bufferToL1); + out_port(optionalQueue_out, RubyRequest, prefetchQueue); + + void enqueuePrefetch(Addr address, RubyRequestType type) { + enqueue(optionalQueue_out, RubyRequest, 1) { + out_msg.LineAddress := address; + out_msg.Type := type; + out_msg.Prefetch := PrefetchBit:Yes; + out_msg.AccessMode := RubyAccessMode:Supervisor; + } + } + + // Prefetch queue between the controller and the prefetcher + // As per Spracklen et al. (HPCA 2005), the prefetch queue should be + // implemented as a LIFO structure. The structure would allow for fast + // searches of all entries in the queue, not just the head msg. All + // msgs in the structure can be invalidated if a demand miss matches. + in_port(optionalQueue_in, RubyRequest, prefetchQueue, desc="...", rank = 2) { + if (optionalQueue_in.isReady(clockEdge())) { + peek(optionalQueue_in, RubyRequest) { + // first check for valid address + MachineID mid := mapAddressToMachine(in_msg.LineAddress, MachineType:Directory); + NodeID nid := machineIDToNodeID(mid); + int nidint := IDToInt(nid); + int numDirs := machineCount(MachineType:Directory); + if (nidint >= numDirs) { + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + trigger(Event:PF_Bad_Addr, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:IFETCH) { + // Instruction Prefetch + Entry icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(icache_entry)) { + // The block to be prefetched is already present in the + // cache. This request will be made benign and cause the + // prefetch queue to be popped. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + icache_entry, TBEs[in_msg.LineAddress]); + } + + // Check to see if it is in the L0-D + Entry cache_entry := getDCacheEntry(in_msg.LineAddress); + if (is_valid(cache_entry)) { + // The block is in the wrong L0 cache. We should drop + // this request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + cache_entry, TBEs[in_msg.LineAddress]); + } + + if (Icache.cacheAvail(in_msg.LineAddress)) { + // L0-I does't have the line, but we have space for it + // in the L0-I so let's see if the L1 has it + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + icache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L0-I, so we need to make room in the L0-I + Addr addr := Icache.cacheProbe(in_msg.LineAddress); + check_on_cache_probe(optionalQueue_in, addr); + + trigger(Event:PF_L0_Replacement, addr, + getICacheEntry(addr), + TBEs[addr]); + } + } else { + // Data prefetch + Entry cache_entry := getDCacheEntry(in_msg.LineAddress); + if (is_valid(cache_entry)) { + // The block to be prefetched is already present in the + // cache. This request will be made benign and cause the + // prefetch queue to be popped. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + cache_entry, TBEs[in_msg.LineAddress]); + } + + // Check to see if it is in the L0-I + Entry icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(icache_entry)) { + // The block is in the wrong L0. Just drop the prefetch + // request. + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + icache_entry, TBEs[in_msg.LineAddress]); + } + + if (Dcache.cacheAvail(in_msg.LineAddress)) { + // L0-D does't have the line, but we have space for it in + // the L0-D let's see if the L1 has it + trigger(prefetch_request_type_to_event(in_msg.Type), + in_msg.LineAddress, + cache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L0-D, so we need to make room in the L0-D + Addr addr := Dcache.cacheProbe(in_msg.LineAddress); + check_on_cache_probe(optionalQueue_in, addr); + + trigger(Event:PF_L0_Replacement, addr, + getDCacheEntry(addr), + TBEs[addr]); + } + } + } + } + } + + // Messages for this L0 cache from the L1 cache + in_port(messgeBuffer_in, CoherenceMsg, bufferFromL1, rank = 1) { + if (messgeBuffer_in.isReady(clockEdge())) { + peek(messgeBuffer_in, CoherenceMsg, block_on="addr") { + assert(in_msg.Dest == machineID); + + Entry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := TBEs[in_msg.addr]; + + if(in_msg.Class == CoherenceClass:DATA_EXCLUSIVE) { + trigger(Event:Data_Exclusive, in_msg.addr, cache_entry, tbe); + } else if(in_msg.Class == CoherenceClass:DATA) { + trigger(Event:Data, in_msg.addr, cache_entry, tbe); + } else if(in_msg.Class == CoherenceClass:STALE_DATA) { + trigger(Event:Data_Stale, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:ACK) { + trigger(Event:Ack, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:WB_ACK) { + trigger(Event:WB_Ack, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:INV_OWN) { + trigger(Event:InvOwn, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:INV_ELSE) { + trigger(Event:InvElse, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:GETX || + in_msg.Class == CoherenceClass:UPGRADE) { + // upgrade transforms to GETX due to race + trigger(Event:Fwd_GETX, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:GETS) { + trigger(Event:Fwd_GETS, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:GET_INSTR) { + trigger(Event:Fwd_GET_INSTR, in_msg.addr, cache_entry, tbe); + } else { + error("Invalid forwarded request type"); + } + } + } + } + + // Mandatory Queue betweens Node's CPU and it's L0 caches + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) { + if (mandatoryQueue_in.isReady(clockEdge())) { + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + + // hardware transactional memory support begins here + + // If this cache controller is in a transactional state/mode, + // ensure that its failure status is something recognisable. + if (htmFailed) { + assert(htmFailedRc == HtmFailedInCacheReason:FAIL_SELF || + htmFailedRc == HtmFailedInCacheReason:FAIL_REMOTE || + htmFailedRc == HtmFailedInCacheReason:FAIL_OTHER); + } + + // HTM_Start commands set a new htmUid + // This is used for debugging and sanity checks + if (in_msg.Type == RubyRequestType:HTM_Start) { + assert (htmUid != in_msg.htmTransactionUid); + htmUid := in_msg.htmTransactionUid; + } + + // If the incoming memory request was generated within a transaction, + // ensure that the request's htmUid matches the htmUid of this + // cache controller. A mismatch here is fatal and implies there was + // a reordering that should never have taken place. + if (in_msg.htmFromTransaction && + (htmUid != in_msg.htmTransactionUid)) { + DPRINTF(HtmMem, + "mandatoryQueue_in: (%u) 0x%lx mismatch between cache htmUid=%u and message htmUid=%u\n", + in_msg.Type, in_msg.LineAddress, htmUid, in_msg.htmTransactionUid); + } + + // special/rare case which hopefully won't occur + if (htmFailed && in_msg.Type == RubyRequestType:HTM_Start) { + error("cannot handle this special HTM case yet"); + } + + // The transaction is to be aborted-- + // Aborting a transaction returns the cache to a non-transactional + // state/mode, resets the read/write sets, and invalidates any + // speculatively written lines. + if (in_msg.Type == RubyRequestType:HTM_Abort) { + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + trigger(Event:HTM_Abort, in_msg.LineAddress, cache_entry, tbe); + } + // The transaction has failed but not yet aborted-- + // case 1: + // If memory request is transactional but the transaction has failed, + // it is necessary to inform the CPU of the failure. + // case 2: + // If load/store memory request is transactional and cache is not + // in transactional state, it's likely that the transaction aborted + // and Ruby is still receiving scheduled memory operations. + // The solution is to make these requests benign. + else if ((in_msg.htmFromTransaction && htmFailed) || (in_msg.htmFromTransaction && !isHtmCmdRequest(in_msg.Type) && !htmTransactionalState)) { + if (isHtmCmdRequest(in_msg.Type)) { + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + trigger(Event:HTM_notifyCMD, in_msg.LineAddress, cache_entry, tbe); + } else if (isDataReadRequest(in_msg.Type)) { + Entry cache_entry := getDCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs[in_msg.LineAddress]; + trigger(Event:HTM_notifyLD, in_msg.LineAddress, cache_entry, tbe); + } else if (isWriteRequest(in_msg.Type)) { + Entry cache_entry := getDCacheEntry(in_msg.LineAddress); + TBE tbe := TBEs[in_msg.LineAddress]; + trigger(Event:HTM_notifyST, in_msg.LineAddress, cache_entry, tbe); + } else { + error("unknown message type"); + } + } + // The transaction has not failed and this is + // one of three HTM commands-- + // (1) start a transaction + // (2) commit a transaction + // (3) cancel/fail a transaction (but don't yet abort it) + else if (isHtmCmdRequest(in_msg.Type) && in_msg.Type != RubyRequestType:HTM_Abort) { + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + if (in_msg.Type == RubyRequestType:HTM_Start) { + DPRINTF(HtmMem, + "mandatoryQueue_in: Starting htm transaction htmUid=%u\n", + htmUid); + trigger(Event:HTM_Start, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:HTM_Commit) { + DPRINTF(HtmMem, + "mandatoryQueue_in: Committing transaction htmUid=%d\n", + htmUid); + trigger(Event:HTM_Commit, in_msg.LineAddress, cache_entry, tbe); + } else if (in_msg.Type == RubyRequestType:HTM_Cancel) { + DPRINTF(HtmMem, + "mandatoryQueue_in: Cancelling transaction htmUid=%d\n", + htmUid); + trigger(Event:HTM_Cancel, in_msg.LineAddress, cache_entry, tbe); + } + } + // end: hardware transactional memory + else if (in_msg.Type == RubyRequestType:IFETCH) { + // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache + // ** INSTRUCTION ACCESS *** + + Entry Icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(Icache_entry)) { + // The tag matches for the L0, so the L0 asks the L2 for it. + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } else { + + // Check to see if it is in the OTHER L0 + Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress); + if (is_valid(Dcache_entry)) { + // The block is in the wrong L0, put the request on the queue to the shared L2 + trigger(Event:L0_Replacement, in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } + + if (Icache.cacheAvail(in_msg.LineAddress)) { + // L0 does't have the line, but we have space for it + // in the L0 so let's see if the L2 has it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L0, so we need to make room in the L0 + // Check if the line we want to evict is not locked + Addr addr := Icache.cacheProbe(in_msg.LineAddress); + check_on_cache_probe(mandatoryQueue_in, addr); + trigger(Event:L0_Replacement, addr, + getICacheEntry(addr), + TBEs[addr]); + } + } + } else { + // *** DATA ACCESS *** + Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress); + + // early out for failed store conditionals + + if (in_msg.Type == RubyRequestType:Store_Conditional) { + if (!sequencer.llscCheckMonitor(in_msg.LineAddress)) { + trigger(Event:Failed_SC, in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } + } + + if (is_valid(Dcache_entry)) { + // The tag matches for the L0, so the L0 ask the L1 for it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } else { + // if the request is not valid, the store conditional will fail + if (in_msg.Type == RubyRequestType:Store_Conditional) { + // if the line is not valid, it can't be locked + trigger(Event:Failed_SC, in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } else { + // Check to see if it is in the OTHER L0 + Entry Icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(Icache_entry)) { + // The block is in the wrong L0, put the request on the queue to the private L1 + trigger(Event:L0_Replacement, in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } + + if (Dcache.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it + // in the L0 let's see if the L1 has it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L1, so we need to make room in the L0 + // Check if the line we want to evict is not locked + Addr addr := Dcache.cacheProbe(in_msg.LineAddress); + check_on_cache_probe(mandatoryQueue_in, addr); + trigger(Event:L0_Replacement, addr, + getDCacheEntry(addr), + TBEs[addr]); + } + } + } + } + } + } + } + + // ACTIONS + action(a_issueGETS, "a", desc="Issue GETS") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:GETS; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Dest); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(b_issueGETX, "b", desc="Issue GETX") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:GETX; + out_msg.Sender := machineID; + DPRINTF(RubySlicc, "%s\n", machineID); + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Dest); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(c_issueUPGRADE, "c", desc="Issue GETX") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:UPGRADE; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Dest); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(f_sendDataToL1, "f", desc="Send data to the L1 cache") { + // hardware transactional memory + // Cannot write speculative data to L1 cache + if (cache_entry.getInHtmWriteSet()) { + // If in HTM write set then send NAK to L1 + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Class := CoherenceClass:NAK; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } else { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Class := CoherenceClass:INV_DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + cache_entry.Dirty := false; + } + } + + action(fi_sendInvAck, "fi", desc="Send data to the L1 cache") { + peek(messgeBuffer_in, CoherenceMsg) { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:INV_ACK; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(forward_eviction_to_cpu, "\cc", desc="Send eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + + action(g_issuePUTE, "\ge", desc="Relinquish line to the L1 cache") { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Class := CoherenceClass:PUTX; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender:= machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(g_issuePUTM, "\gm", desc="Send modified line to the L1 cache") { + if (!cache_entry.getInHtmWriteSet()) { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Class := CoherenceClass:PUTX; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender:= machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := cache_entry.DataBlk; + } + } + } + + action(h_load_hit, "hd", desc="Notify sequencer the load completed (cache hit)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Dcache.setMRU(cache_entry); + sequencer.readCallback(address, cache_entry.DataBlk); + } + + action(h_ifetch_hit, "hi", desc="Notify sequencer the ifetch completed (cache hit)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Icache.setMRU(cache_entry); + sequencer.readCallback(address, cache_entry.DataBlk); + } + + // The action name uses a counterintuitive _hit prefix when it is only + // called due to a cache miss. It is technically now a hit after having + // serviced the miss. + action(hx_load_hit, "hxd", desc="Notify sequencer the load completed (cache miss)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Dcache.setMRU(cache_entry); + sequencer.readCallback(address, cache_entry.DataBlk, true); + } + + // The action name uses a counterintuitive _hit prefix when it is only + // called due to a cache miss. It is technically now a hit after having + // serviced the miss. + action(hx_ifetch_hit, "hxi", desc="Notify sequencer the ifetch completed (cache miss)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Icache.setMRU(cache_entry); + sequencer.readCallback(address, cache_entry.DataBlk, true); + } + + action(hh_store_hit, "\h", desc="Notify sequencer that store completed (cache hit)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Dcache.setMRU(cache_entry); + sequencer.writeCallback(address, cache_entry.DataBlk); + cache_entry.Dirty := true; + } + + // The action name uses a counterintuitive _hit prefix when it is only + // called due to a cache miss. It is technically now a hit after having + // serviced the miss. + action(hhx_store_hit, "\hx", desc="Notify sequencer that store completed (cache miss)") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + Dcache.setMRU(cache_entry); + sequencer.writeCallback(address, cache_entry.DataBlk, true); + cache_entry.Dirty := true; + } + + action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := cache_entry.Dirty; + tbe.DataBlk := cache_entry.DataBlk; + } + + action(k_popMandatoryQueue, "k", desc="Pop mandatory queue") { + mandatoryQueue_in.dequeue(clockEdge()); + } + + action(l_popRequestQueue, "l", + desc="Pop incoming request queue and profile the delay within this virtual network") { + Tick delay := messgeBuffer_in.dequeue(clockEdge()); + profileMsgDelay(2, ticksToCycles(delay)); + } + + action(o_popIncomingResponseQueue, "o", + desc="Pop Incoming Response queue and profile the delay within this virtual network") { + Tick delay := messgeBuffer_in.dequeue(clockEdge()); + profileMsgDelay(1, ticksToCycles(delay)); + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(u_writeDataToCache, "u", desc="Write data to cache") { + peek(messgeBuffer_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + } + } + + action(u_writeInstToCache, "ui", desc="Write data to cache") { + peek(messgeBuffer_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + } + } + + action(ff_deallocateCacheBlock, "\f", + desc="Deallocate L1 cache block.") { + if (Dcache.isTagPresent(address)) { + Dcache.deallocate(address); + } else { + Icache.deallocate(address); + } + unset_cache_entry(); + } + + action(oo_allocateDCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B") { + if (is_invalid(cache_entry)) { + set_cache_entry(Dcache.allocate(address, new Entry)); + } + } + + action(pp_allocateICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B") { + if (is_invalid(cache_entry)) { + set_cache_entry(Icache.allocate(address, new Entry)); + } + } + + action(z_stallAndWaitMandatoryQueue, "\z", desc="Stall cpu request queue") { + stall_and_wait(mandatoryQueue_in, address); + } + + action(kd_wakeUpDependents, "kd", desc="Wake-up dependents") { + wakeUpAllBuffers(address); + } + + action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") { + ++Icache.demand_misses; + } + + action(uu_profileInstHit, "\uih", desc="Profile the demand hit") { + ++Icache.demand_hits; + } + + action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") { + ++Dcache.demand_misses; + } + + action(uu_profileDataHit, "\udh", desc="Profile the demand hit") { + ++Dcache.demand_hits; + } + + // store conditionals + + action(hhc_storec_fail, "\hc", + desc="Notify sequencer that store conditional failed") { + sequencer.writeCallbackScFail(address, cache_entry.DataBlk); + } + + // prefetching + + action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") { + peek(optionalQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:GETS; + out_msg.Sender := machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Dest); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") { + peek(optionalQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.addr := address; + out_msg.Class := CoherenceClass:GETX; + out_msg.Sender := machineID; + DPRINTF(RubySlicc, "%s\n", machineID); + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Dest); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") { + optionalQueue_in.dequeue(clockEdge()); + } + + action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") { + assert(is_valid(cache_entry)); + cache_entry.isPrefetched := true; + } + + action(po_observeMiss, "\po", desc="Inform the prefetcher about a cache miss") { + peek(mandatoryQueue_in, RubyRequest) { + if (enable_prefetch) { + prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type); + } + } + } + + action(ppm_observePfMiss, "\ppm", + desc="Inform the prefetcher about a cache miss with in-flight prefetch") { + peek(mandatoryQueue_in, RubyRequest) { + prefetcher.observePfMiss(in_msg.LineAddress); + } + } + + action(pph_observePfHit, "\pph", + desc="Inform the prefetcher if a cache hit was the result of a prefetch") { + peek(mandatoryQueue_in, RubyRequest) { + if (cache_entry.isPrefetched) { + prefetcher.observePfHit(in_msg.LineAddress); + cache_entry.isPrefetched := false; + } + } + } + + action(z_stallAndWaitOptionalQueue, "\pz", desc="recycle prefetch request queue") { + stall_and_wait(optionalQueue_in, address); + } + + // hardware transactional memory + + action(hars_htmAddToReadSet, "\hars", desc="add to HTM read set") { + peek(mandatoryQueue_in, RubyRequest) { + if (htmTransactionalState && in_msg.htmFromTransaction) { + assert(!htmFailed); + if (!cache_entry.getInHtmReadSet()) { + DPRINTF(HtmMem, + "Adding 0x%lx to transactional read set htmUid=%u.\n", + address, htmUid); + cache_entry.setInHtmReadSet(true); + } + } + } + } + + action(haws_htmAddToWriteSet, "\haws", desc="add to HTM write set") { + peek(mandatoryQueue_in, RubyRequest) { + if (htmTransactionalState && in_msg.htmFromTransaction) { + assert(!htmFailed); + assert(!((cache_entry.getInHtmWriteSet() == false) && + (cache_entry.CacheState == State:IM))); + assert(!((cache_entry.getInHtmWriteSet() == false) && + (cache_entry.CacheState == State:SM))); + // ON DEMAND write-back + // if modified and not in write set, + // write back and retain M state + if((cache_entry.CacheState == State:M) && + !cache_entry.getInHtmWriteSet()) { + // code copied from issuePUTX + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Class := CoherenceClass:PUTX_COPY; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender:= machineID; + out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + } + if (!cache_entry.getInHtmWriteSet()) { + DPRINTF(HtmMem, + "Adding 0x%lx to transactional write set htmUid=%u.\n", + address, htmUid); + cache_entry.setInHtmWriteSet(true); + } + } + } + } + + action(hfts_htmFailTransactionSize, "\hfts^", + desc="Fail transaction due to cache associativity/capacity conflict") { + if (htmTransactionalState && + (cache_entry.getInHtmReadSet() || cache_entry.getInHtmWriteSet())) { + DPRINTF(HtmMem, + "Failure of a transaction due to cache associativity/capacity: rs=%s, ws=%s, addr=0x%lx, htmUid=%u\n", + cache_entry.getInHtmReadSet(), cache_entry.getInHtmWriteSet(), + address, htmUid); + htmFailed := true; + htmFailedRc := HtmFailedInCacheReason:FAIL_SELF; + } + } + + action(hftm_htmFailTransactionMem, "\hftm^", + desc="Fail transaction due to memory conflict") { + if (htmTransactionalState && + (cache_entry.getInHtmReadSet() || cache_entry.getInHtmWriteSet())) { + DPRINTF(HtmMem, + "Failure of a transaction due to memory conflict: rs=%s, ws=%s, addr=0x%lx, htmUid=%u\n", + cache_entry.getInHtmReadSet(), cache_entry.getInHtmWriteSet(), + address, htmUid); + htmFailed := true; + htmFailedRc := HtmFailedInCacheReason:FAIL_REMOTE; + } + } + + action(hvu_htmVerifyUid, "\hvu", + desc="Ensure cache htmUid is equivalent to message htmUid") { + peek(mandatoryQueue_in, RubyRequest) { + if (htmUid != in_msg.htmTransactionUid) { + DPRINTF(HtmMem, "cache's htmUid=%u and request's htmUid=%u\n", + htmUid, in_msg.htmTransactionUid); + error("mismatch between cache's htmUid and request's htmUid"); + } + } + } + + action(hcs_htmCommandSucceed, "\hcs", + desc="Notify sequencer HTM command succeeded") { + peek(mandatoryQueue_in, RubyRequest) { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + DPRINTF(RubySlicc, "htm command successful\n"); + sequencer.htmCallback(in_msg.LineAddress, + HtmCallbackMode:HTM_CMD, HtmFailedInCacheReason:NO_FAIL); + } + } + + action(hcs_htmCommandFail, "\hcf", + desc="Notify sequencer HTM command failed") { + peek(mandatoryQueue_in, RubyRequest) { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + DPRINTF(RubySlicc, "htm command failure\n"); + sequencer.htmCallback(in_msg.LineAddress, + HtmCallbackMode:HTM_CMD, htmFailedRc); + } + } + + action(hcs_htmLoadFail, "\hlf", + desc="Notify sequencer HTM transactional load failed") { + peek(mandatoryQueue_in, RubyRequest) { + DPRINTF(RubySlicc, "htm transactional load failure\n"); + sequencer.htmCallback(in_msg.LineAddress, + HtmCallbackMode:LD_FAIL, htmFailedRc); + } + } + + action(hcs_htmStoreFail, "\hsf", + desc="Notify sequencer HTM transactional store failed") { + peek(mandatoryQueue_in, RubyRequest) { + DPRINTF(RubySlicc, "htm transactional store failure\n"); + sequencer.htmCallback(in_msg.LineAddress, + HtmCallbackMode:ST_FAIL, htmFailedRc); + } + } + + action(hat_htmAbortTransaction, "\hat", + desc="Abort HTM transaction and rollback cache to pre-transactional state") { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + assert (htmTransactionalState); + Dcache.htmAbortTransaction(); + htmTransactionalState := false; + htmFailed := false; + sequencer.llscClearLocalMonitor(); + DPRINTF(RubySlicc, "Aborted htm transaction\n"); + } + + action(hst_htmStartTransaction, "\hst", + desc="Place cache in HTM transactional state") { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + assert (!htmTransactionalState); + htmTransactionalState := true; + htmFailedRc := HtmFailedInCacheReason:NO_FAIL; + sequencer.llscClearLocalMonitor(); + DPRINTF(RubySlicc, "Started htm transaction\n"); + } + + action(hct_htmCommitTransaction, "\hct", + desc="Commit speculative loads/stores and place cache in normal state") { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + assert (htmTransactionalState); + assert (!htmFailed); + Dcache.htmCommitTransaction(); + sequencer.llscClearLocalMonitor(); + htmTransactionalState := false; + DPRINTF(RubySlicc, "Committed htm transaction\n"); + } + + action(hcnt_htmCancelTransaction, "\hcnt", + desc="Fail HTM transaction explicitely without aborting") { + assert(is_invalid(cache_entry) && is_invalid(tbe)); + assert (htmTransactionalState); + htmFailed := true; + htmFailedRc := HtmFailedInCacheReason:FAIL_OTHER; + DPRINTF(RubySlicc, "Cancelled htm transaction\n"); + } + + //***************************************************** + // TRANSITIONS + //***************************************************** + + // Transitions for Load/Store/Replacement/WriteBack from transient states + transition({Inst_IS, IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + + // Transitions from Idle + transition(I, Load, IS) { + oo_allocateDCacheBlock; + i_allocateTBE; + hars_htmAddToReadSet; + a_issueGETS; + uu_profileDataMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition(I, Ifetch, Inst_IS) { + pp_allocateICacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileInstMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition(I, Store, IM) { + oo_allocateDCacheBlock; + i_allocateTBE; + haws_htmAddToWriteSet; + b_issueGETX; + uu_profileDataMiss; + po_observeMiss; + k_popMandatoryQueue; + } + + transition({I, Inst_IS}, {InvOwn, InvElse}) { + forward_eviction_to_cpu; + fi_sendInvAck; + l_popRequestQueue; + } + + transition({IS, IM}, InvOwn) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + fi_sendInvAck; + l_popRequestQueue; + } + + transition({IS, IM}, InvElse) { + hftm_htmFailTransactionMem; + forward_eviction_to_cpu; + fi_sendInvAck; + l_popRequestQueue; + } + + transition(SM, InvOwn, IM) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + fi_sendInvAck; + l_popRequestQueue; + } + + transition(SM, InvElse, IM) { + hftm_htmFailTransactionMem; + forward_eviction_to_cpu; + fi_sendInvAck; + l_popRequestQueue; + } + + // Transitions from Shared + transition({S,E,M}, Load) { + hars_htmAddToReadSet; + h_load_hit; + uu_profileDataHit; + pph_observePfHit; + k_popMandatoryQueue; + } + + transition({S,E,M}, Ifetch) { + h_ifetch_hit; + uu_profileInstHit; + pph_observePfHit; + k_popMandatoryQueue; + } + + transition(S, Store, SM) { + i_allocateTBE; + haws_htmAddToWriteSet; + c_issueUPGRADE; + uu_profileDataMiss; + k_popMandatoryQueue; + } + + transition(S, {L0_Replacement,PF_L0_Replacement}, I) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + ff_deallocateCacheBlock; + } + + transition(S, InvOwn, I) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(S, InvElse, I) { + hftm_htmFailTransactionMem; + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + // Transitions from Exclusive + transition({E,M}, Store, M) { + haws_htmAddToWriteSet; + hh_store_hit; + uu_profileDataHit; + pph_observePfHit; + k_popMandatoryQueue; + } + + transition(E, {L0_Replacement,PF_L0_Replacement}, I) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + g_issuePUTE; + ff_deallocateCacheBlock; + } + + transition(E, {InvElse, Fwd_GETX}, I) { + hftm_htmFailTransactionMem; + // don't send data + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(E, InvOwn, I) { + hfts_htmFailTransactionSize; + // don't send data + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) { + f_sendDataToL1; + l_popRequestQueue; + } + + // Transitions from Modified + transition(M, {L0_Replacement,PF_L0_Replacement}, I) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + g_issuePUTM; + ff_deallocateCacheBlock; + } + + transition(M, InvOwn, I) { + hfts_htmFailTransactionSize; + forward_eviction_to_cpu; + f_sendDataToL1; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(M, {InvElse, Fwd_GETX}, I) { + hftm_htmFailTransactionMem; + forward_eviction_to_cpu; + f_sendDataToL1; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) { + hftm_htmFailTransactionMem; + f_sendDataToL1; + l_popRequestQueue; + } + + transition(IS, Data, S) { + u_writeDataToCache; + hx_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(IS, Data_Exclusive, E) { + u_writeDataToCache; + hx_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(IS, Data_Stale, I) { + hftm_htmFailTransactionMem; + u_writeDataToCache; + forward_eviction_to_cpu; + hx_load_hit; + s_deallocateTBE; + ff_deallocateCacheBlock; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(Inst_IS, Data, S) { + u_writeInstToCache; + hx_ifetch_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(Inst_IS, Data_Exclusive, E) { + u_writeInstToCache; + hx_ifetch_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(Inst_IS, Data_Stale, I) { + u_writeInstToCache; + hx_ifetch_hit; + s_deallocateTBE; + ff_deallocateCacheBlock; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition({IM,SM}, Data_Exclusive, M) { + u_writeDataToCache; + hhx_store_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + // store conditionals + + transition({I,S,E,M}, Failed_SC) { + // IS,IM,SM don't handle store conditionals + hhc_storec_fail; + k_popMandatoryQueue; + } + + // prefetcher + + transition({Inst_IS, IS, IM, SM, PF_Inst_IS, PF_IS, PF_IE}, PF_L0_Replacement) { + z_stallAndWaitOptionalQueue; + } + + transition({PF_Inst_IS, PF_IS}, {Store, L0_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + + transition({PF_IE}, {Load, Ifetch, L0_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + + transition({S,E,M,Inst_IS,IS,IM,SM,PF_Inst_IS,PF_IS,PF_IE}, + {PF_Load, PF_Store, PF_Ifetch}) { + pq_popPrefetchQueue; + } + + transition(I, PF_Load, PF_IS) { + oo_allocateDCacheBlock; + i_allocateTBE; + pa_issuePfGETS; + pq_popPrefetchQueue; + } + + transition(PF_IS, Load, IS) { + hars_htmAddToReadSet; + uu_profileDataMiss; + ppm_observePfMiss; + k_popMandatoryQueue; + } + + transition(I, PF_Ifetch, PF_Inst_IS) { + pp_allocateICacheBlock; + i_allocateTBE; + pa_issuePfGETS; + pq_popPrefetchQueue; + } + + transition(PF_Inst_IS, Ifetch, Inst_IS) { + uu_profileInstMiss; + ppm_observePfMiss; + k_popMandatoryQueue; + } + + transition(I, PF_Store, PF_IE) { + oo_allocateDCacheBlock; + i_allocateTBE; + pb_issuePfGETX; + pq_popPrefetchQueue; + } + + transition(PF_IE, Store, IM) { + haws_htmAddToWriteSet; + uu_profileDataMiss; + ppm_observePfMiss; + k_popMandatoryQueue; + } + + transition({PF_Inst_IS, PF_IS, PF_IE}, {InvOwn, InvElse}) { + fi_sendInvAck; + l_popRequestQueue; + } + + transition(PF_IS, Data, S) { + u_writeDataToCache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(PF_IS, Data_Exclusive, E) { + u_writeDataToCache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(PF_IS, Data_Stale, I) { + u_writeDataToCache; + s_deallocateTBE; + mp_markPrefetched; + ff_deallocateCacheBlock; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(PF_Inst_IS, Data, S) { + u_writeInstToCache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(PF_Inst_IS, Data_Exclusive, E) { + u_writeInstToCache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(PF_IE, Data_Exclusive, E) { + u_writeDataToCache; + s_deallocateTBE; + mp_markPrefetched; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(I, PF_Bad_Addr) { + pq_popPrefetchQueue; + } + + // hardware transactional memory + + transition(I, HTM_Abort) { + hvu_htmVerifyUid; + hat_htmAbortTransaction; + hcs_htmCommandSucceed; + k_popMandatoryQueue; + } + + transition(I, HTM_Start) { + hvu_htmVerifyUid; + hst_htmStartTransaction; + hcs_htmCommandSucceed; + k_popMandatoryQueue; + } + + transition(I, HTM_Commit) { + hvu_htmVerifyUid; + hct_htmCommitTransaction; + hcs_htmCommandSucceed; + k_popMandatoryQueue; + } + + transition(I, HTM_Cancel) { + hvu_htmVerifyUid; + hcnt_htmCancelTransaction; + hcs_htmCommandSucceed; + k_popMandatoryQueue; + } + + transition(I, HTM_notifyCMD) { + hvu_htmVerifyUid; + hcs_htmCommandFail; + k_popMandatoryQueue; + } + + transition({I,S,E,M,IS,IM,SM,PF_IS,PF_IE}, HTM_notifyLD) { + hvu_htmVerifyUid; + hcs_htmLoadFail; + k_popMandatoryQueue; + } + + transition({I,S,E,M,IS,IM,SM,PF_IS,PF_IE}, HTM_notifyST) { + hvu_htmVerifyUid; + hcs_htmStoreFail; + k_popMandatoryQueue; + } + + transition(I, {L0_Replacement,PF_L0_Replacement}) { + ff_deallocateCacheBlock; + } +} diff --git a/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc b/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc new file mode 100644 index 0000000000..4ec31b5692 --- /dev/null +++ b/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc @@ -0,0 +1,9 @@ +protocol "MESI_Three_Level_HTM"; +include "RubySlicc_interfaces.slicc"; +include "MESI_Two_Level-msg.sm"; +include "MESI_Three_Level-msg.sm"; +include "MESI_Three_Level_HTM-L0cache.sm"; +include "MESI_Three_Level-L1cache.sm"; +include "MESI_Two_Level-L2cache.sm"; +include "MESI_Two_Level-dir.sm"; +include "MESI_Two_Level-dma.sm"; diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index f1d17c85e7..ea61350ff3 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited + * Copyright (c) 2020 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -167,6 +167,31 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { Release, desc="Release operation"; Acquire, desc="Acquire opertion"; AcquireRelease, desc="Acquire and Release opertion"; + HTM_Start, desc="hardware memory transaction: begin"; + HTM_Commit, desc="hardware memory transaction: commit"; + HTM_Cancel, desc="hardware memory transaction: cancel"; + HTM_Abort, desc="hardware memory transaction: abort"; +} + +bool isWriteRequest(RubyRequestType type); +bool isDataReadRequest(RubyRequestType type); +bool isReadRequest(RubyRequestType type); +bool isHtmCmdRequest(RubyRequestType type); + +// hardware transactional memory +RubyRequestType htmCmdToRubyRequestType(Packet *pkt); + +enumeration(HtmCallbackMode, desc="...", default="HtmCallbackMode_NULL") { + HTM_CMD, desc="htm command"; + LD_FAIL, desc="htm transaction failed - inform via read"; + ST_FAIL, desc="htm transaction failed - inform via write"; +} + +enumeration(HtmFailedInCacheReason, desc="...", default="HtmFailedInCacheReason_NO_FAIL") { + NO_FAIL, desc="no failure in cache"; + FAIL_SELF, desc="failed due local cache's replacement policy"; + FAIL_REMOTE, desc="failed due remote invalidation"; + FAIL_OTHER, desc="failed due other circumstances"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 71716f9fed..9c64732a87 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -132,12 +132,18 @@ structure (Sequencer, external = "yes") { // ll/sc support void writeCallbackScFail(Addr, DataBlock); bool llscCheckMonitor(Addr); + void llscClearLocalMonitor(); void evictionCallback(Addr); void recordRequestType(SequencerRequestType); bool checkResourceAvailable(CacheResourceType, Addr); } +structure (HTMSequencer, interface="Sequencer", external = "yes") { + // hardware transactional memory + void htmCallback(Addr, HtmCallbackMode, HtmFailedInCacheReason); +} + structure(RubyRequest, desc="...", interface="Message", external="yes") { Addr LineAddress, desc="Line address for this request"; Addr PhysicalAddress, desc="Physical address for this request"; @@ -152,6 +158,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") { int wfid, desc="Writethrough wavefront"; uint64_t instSeqNum, desc="Instruction sequence number"; PacketPtr pkt, desc="Packet associated with this request"; + bool htmFromTransaction, desc="Memory request originates within a HTM transaction"; + int htmTransactionUid, desc="Used to identify the unique HTM transaction that produced this request"; } structure(AbstractCacheEntry, primitive="yes", external = "yes") { @@ -185,6 +193,10 @@ structure (CacheMemory, external = "yes") { void recordRequestType(CacheRequestType, Addr); bool checkResourceAvailable(CacheResourceType, Addr); + // hardware transactional memory + void htmCommitTransaction(); + void htmAbortTransaction(); + int getCacheSize(); int getNumBlocks(); Addr getAddressAtIdx(int); diff --git a/src/mem/ruby/protocol/SConsopts b/src/mem/ruby/protocol/SConsopts index 4a309e6f0f..104d4257d0 100644 --- a/src/mem/ruby/protocol/SConsopts +++ b/src/mem/ruby/protocol/SConsopts @@ -38,6 +38,7 @@ all_protocols.extend([ 'MOESI_AMD_Base', 'MESI_Two_Level', 'MESI_Three_Level', + 'MESI_Three_Level_HTM', 'MI_example', 'MOESI_CMP_directory', 'MOESI_CMP_token', diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc index b98425d7eb..c669b1c437 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. * @@ -37,6 +49,8 @@ AbstractCacheEntry::AbstractCacheEntry() : ReplaceableEntry() m_Address = 0; m_locked = -1; m_last_touch_tick = 0; + m_htmInReadSet = false; + m_htmInWriteSet = false; } AbstractCacheEntry::~AbstractCacheEntry() @@ -81,3 +95,27 @@ AbstractCacheEntry::isLocked(int context) const m_Address, m_locked, context); return m_locked == context; } + +void +AbstractCacheEntry::setInHtmReadSet(bool val) +{ + m_htmInReadSet = val; +} + +void +AbstractCacheEntry::setInHtmWriteSet(bool val) +{ + m_htmInWriteSet = val; +} + +bool +AbstractCacheEntry::getInHtmReadSet() const +{ + return m_htmInReadSet; +} + +bool +AbstractCacheEntry::getInHtmWriteSet() const +{ + return m_htmInWriteSet; +} diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh index 056486c059..aa37cc52fa 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * All rights reserved. * @@ -90,6 +102,18 @@ class AbstractCacheEntry : public ReplaceableEntry // Set the last access Tick. void setLastAccess(Tick tick) { m_last_touch_tick = tick; } + + // hardware transactional memory + void setInHtmReadSet(bool val); + void setInHtmWriteSet(bool val); + bool getInHtmReadSet() const; + bool getInHtmWriteSet() const; + virtual void invalidateEntry() {} + + private: + // hardware transactional memory + bool m_htmInReadSet; + bool m_htmInWriteSet; }; inline std::ostream& diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index b3e239686a..ed8dbbb0a4 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2009 Mark D. Hill and David A. Wood * All rights reserved. * @@ -57,6 +69,8 @@ class RubyRequest : public Message DataBlock m_WTData; int m_wfid; uint64_t m_instSeqNum; + bool m_htmFromTransaction; + uint64_t m_htmTransactionUid; RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, @@ -71,7 +85,9 @@ class RubyRequest : public Message m_Prefetch(_pb), data(_data), m_pkt(_pkt), - m_contextId(_core_id) + m_contextId(_core_id), + m_htmFromTransaction(false), + m_htmTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } @@ -96,7 +112,9 @@ class RubyRequest : public Message m_writeMask(_wm_size,_wm_mask), m_WTData(_Data), m_wfid(_proc_id), - m_instSeqNum(_instSeqNum) + m_instSeqNum(_instSeqNum), + m_htmFromTransaction(false), + m_htmTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } @@ -122,7 +140,9 @@ class RubyRequest : public Message m_writeMask(_wm_size,_wm_mask,_atomicOps), m_WTData(_Data), m_wfid(_proc_id), - m_instSeqNum(_instSeqNum) + m_instSeqNum(_instSeqNum), + m_htmFromTransaction(false), + m_htmTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); } diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh index e3d4f0b0bb..8ff8884aae 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -85,6 +97,75 @@ inline int max_tokens() return 1024; } +inline bool +isWriteRequest(RubyRequestType type) +{ + if ((type == RubyRequestType_ST) || + (type == RubyRequestType_ATOMIC) || + (type == RubyRequestType_RMW_Read) || + (type == RubyRequestType_RMW_Write) || + (type == RubyRequestType_Store_Conditional) || + (type == RubyRequestType_Locked_RMW_Read) || + (type == RubyRequestType_Locked_RMW_Write) || + (type == RubyRequestType_FLUSH)) { + return true; + } else { + return false; + } +} + +inline bool +isDataReadRequest(RubyRequestType type) +{ + if ((type == RubyRequestType_LD) || + (type == RubyRequestType_Load_Linked)) { + return true; + } else { + return false; + } +} + +inline bool +isReadRequest(RubyRequestType type) +{ + if (isDataReadRequest(type) || + (type == RubyRequestType_IFETCH)) { + return true; + } else { + return false; + } +} + +inline bool +isHtmCmdRequest(RubyRequestType type) +{ + if ((type == RubyRequestType_HTM_Start) || + (type == RubyRequestType_HTM_Commit) || + (type == RubyRequestType_HTM_Cancel) || + (type == RubyRequestType_HTM_Abort)) { + return true; + } else { + return false; + } +} + +inline RubyRequestType +htmCmdToRubyRequestType(const Packet *pkt) +{ + if (pkt->req->isHTMStart()) { + return RubyRequestType_HTM_Start; + } else if (pkt->req->isHTMCommit()) { + return RubyRequestType_HTM_Commit; + } else if (pkt->req->isHTMCancel()) { + return RubyRequestType_HTM_Cancel; + } else if (pkt->req->isHTMAbort()) { + return RubyRequestType_HTM_Abort; + } + else { + panic("invalid ruby packet type\n"); + } +} + /** * This function accepts an address, a data block and a packet. If the address * range for the data block contains the address which the packet needs to diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index efdd77de94..b3f2c615be 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -31,6 +43,7 @@ #include "base/intmath.hh" #include "base/logging.hh" +#include "debug/HtmMem.hh" #include "debug/RubyCache.hh" #include "debug/RubyCacheTrace.hh" #include "debug/RubyResourceStalls.hh" @@ -479,6 +492,23 @@ CacheMemory::clearLocked(Addr address) entry->clearLocked(); } +void +CacheMemory::clearLockedAll(int context) +{ + // iterate through every set and way to get a cache line + for (auto i = m_cache.begin(); i != m_cache.end(); ++i) { + std::vector set = *i; + for (auto j = set.begin(); j != set.end(); ++j) { + AbstractCacheEntry *line = *j; + if (line && line->isLocked(context)) { + DPRINTF(RubyCache, "Clear Lock for addr: %#x\n", + line->m_Address); + line->clearLocked(); + } + } + } +} + bool CacheMemory::isLocked(Addr address, int context) { @@ -578,6 +608,34 @@ CacheMemory::regStats() .desc("number of stalls caused by data array") .flags(Stats::nozero) ; + + htmTransCommitReadSet + .init(8) + .name(name() + ".htm_transaction_committed_read_set") + .desc("read set size of a committed transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + + htmTransCommitWriteSet + .init(8) + .name(name() + ".htm_transaction_committed_write_set") + .desc("write set size of a committed transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + + htmTransAbortReadSet + .init(8) + .name(name() + ".htm_transaction_aborted_read_set") + .desc("read set size of a aborted transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + + htmTransAbortWriteSet + .init(8) + .name(name() + ".htm_transaction_aborted_write_set") + .desc("write set size of a aborted transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; } // assumption: SLICC generated files will only call this function @@ -655,3 +713,69 @@ CacheMemory::isBlockNotBusy(int64_t cache_set, int64_t loc) { return (m_cache[cache_set][loc]->m_Permission != AccessPermission_Busy); } + +/* hardware transactional memory */ + +void +CacheMemory::htmAbortTransaction() +{ + uint64_t htmReadSetSize = 0; + uint64_t htmWriteSetSize = 0; + + // iterate through every set and way to get a cache line + for (auto i = m_cache.begin(); i != m_cache.end(); ++i) + { + std::vector set = *i; + + for (auto j = set.begin(); j != set.end(); ++j) + { + AbstractCacheEntry *line = *j; + + if (line != nullptr) { + htmReadSetSize += (line->getInHtmReadSet() ? 1 : 0); + htmWriteSetSize += (line->getInHtmWriteSet() ? 1 : 0); + if (line->getInHtmWriteSet()) { + line->invalidateEntry(); + } + line->setInHtmWriteSet(false); + line->setInHtmReadSet(false); + line->clearLocked(); + } + } + } + + htmTransAbortReadSet.sample(htmReadSetSize); + htmTransAbortWriteSet.sample(htmWriteSetSize); + DPRINTF(HtmMem, "htmAbortTransaction: read set=%u write set=%u\n", + htmReadSetSize, htmWriteSetSize); +} + +void +CacheMemory::htmCommitTransaction() +{ + uint64_t htmReadSetSize = 0; + uint64_t htmWriteSetSize = 0; + + // iterate through every set and way to get a cache line + for (auto i = m_cache.begin(); i != m_cache.end(); ++i) + { + std::vector set = *i; + + for (auto j = set.begin(); j != set.end(); ++j) + { + AbstractCacheEntry *line = *j; + if (line != nullptr) { + htmReadSetSize += (line->getInHtmReadSet() ? 1 : 0); + htmWriteSetSize += (line->getInHtmWriteSet() ? 1 : 0); + line->setInHtmWriteSet(false); + line->setInHtmReadSet(false); + line->clearLocked(); + } + } + } + + htmTransCommitReadSet.sample(htmReadSetSize); + htmTransCommitWriteSet.sample(htmWriteSetSize); + DPRINTF(HtmMem, "htmCommitTransaction: read set=%u write set=%u\n", + htmReadSetSize, htmWriteSetSize); +} diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index 7d82d5f49e..0899e681f2 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -121,6 +133,7 @@ class CacheMemory : public SimObject // provided by the AbstractCacheEntry class. void setLocked (Addr addr, int context); void clearLocked (Addr addr); + void clearLockedAll (int context); bool isLocked (Addr addr, int context); // Print cache contents @@ -131,6 +144,10 @@ class CacheMemory : public SimObject bool checkResourceAvailable(CacheResourceType res, Addr addr); void recordRequestType(CacheRequestType requestType, Addr addr); + // hardware transactional memory + void htmAbortTransaction(); + void htmCommitTransaction(); + public: Stats::Scalar m_demand_hits; Stats::Scalar m_demand_misses; @@ -150,6 +167,12 @@ class CacheMemory : public SimObject Stats::Scalar numTagArrayStalls; Stats::Scalar numDataArrayStalls; + // hardware transactional memory + Stats::Histogram htmTransCommitReadSet; + Stats::Histogram htmTransCommitWriteSet; + Stats::Histogram htmTransAbortReadSet; + Stats::Histogram htmTransAbortWriteSet; + int getCacheSize() const { return m_cache_size; } int getCacheAssoc() const { return m_cache_assoc; } int getNumBlocks() const { return m_cache_num_sets * m_cache_assoc; } diff --git a/src/mem/ruby/system/HTMSequencer.cc b/src/mem/ruby/system/HTMSequencer.cc new file mode 100644 index 0000000000..d2cfa07515 --- /dev/null +++ b/src/mem/ruby/system/HTMSequencer.cc @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/ruby/system/HTMSequencer.hh" + +#include "debug/HtmMem.hh" +#include "debug/RubyPort.hh" +#include "mem/ruby/slicc_interface/RubySlicc_Util.hh" +#include "sim/system.hh" + +using namespace std; + +HtmCacheFailure +HTMSequencer::htmRetCodeConversion( + const HtmFailedInCacheReason ruby_ret_code) +{ + switch (ruby_ret_code) { + case HtmFailedInCacheReason_NO_FAIL: + return HtmCacheFailure::NO_FAIL; + case HtmFailedInCacheReason_FAIL_SELF: + return HtmCacheFailure::FAIL_SELF; + case HtmFailedInCacheReason_FAIL_REMOTE: + return HtmCacheFailure::FAIL_REMOTE; + case HtmFailedInCacheReason_FAIL_OTHER: + return HtmCacheFailure::FAIL_OTHER; + default: + panic("Invalid htm return code\n"); + } +} + +HTMSequencer * +RubyHTMSequencerParams::create() +{ + return new HTMSequencer(this); +} + +HTMSequencer::HTMSequencer(const RubyHTMSequencerParams *p) + : Sequencer(p) +{ + m_htmstart_tick = 0; + m_htmstart_instruction = 0; +} + +HTMSequencer::~HTMSequencer() +{ +} + +void +HTMSequencer::htmCallback(Addr address, + const HtmCallbackMode mode, + const HtmFailedInCacheReason htm_return_code) +{ + // mode=0: HTM command + // mode=1: transaction failed - inform via LD + // mode=2: transaction failed - inform via ST + + if (mode == HtmCallbackMode_HTM_CMD) { + SequencerRequest* request = nullptr; + + assert(m_htmCmdRequestTable.size() > 0); + + request = m_htmCmdRequestTable.front(); + m_htmCmdRequestTable.pop_front(); + + assert(isHtmCmdRequest(request->m_type)); + + PacketPtr pkt = request->pkt; + delete request; + + // valid responses have zero as the payload + uint8_t* dataptr = pkt->getPtr(); + memset(dataptr, 0, pkt->getSize()); + *dataptr = (uint8_t) htm_return_code; + + // record stats + if (htm_return_code == HtmFailedInCacheReason_NO_FAIL) { + if (pkt->req->isHTMStart()) { + m_htmstart_tick = pkt->req->time(); + m_htmstart_instruction = pkt->req->getInstCount(); + DPRINTF(HtmMem, "htmStart - htmUid=%u\n", + pkt->getHtmTransactionUid()); + } else if (pkt->req->isHTMCommit()) { + Tick transaction_ticks = pkt->req->time() - m_htmstart_tick; + Cycles transaction_cycles = ticksToCycles(transaction_ticks); + m_htm_transaction_cycles.sample(transaction_cycles); + m_htmstart_tick = 0; + Counter transaction_instructions = + pkt->req->getInstCount() - m_htmstart_instruction; + m_htm_transaction_instructions.sample( + transaction_instructions); + m_htmstart_instruction = 0; + DPRINTF(HtmMem, "htmCommit - htmUid=%u\n", + pkt->getHtmTransactionUid()); + } else if (pkt->req->isHTMAbort()) { + HtmFailureFaultCause cause = pkt->req->getHtmAbortCause(); + assert(cause != HtmFailureFaultCause::INVALID); + auto cause_idx = static_cast(cause); + m_htm_transaction_abort_cause[cause_idx]++; + DPRINTF(HtmMem, "htmAbort - reason=%s - htmUid=%u\n", + htmFailureToStr(cause), + pkt->getHtmTransactionUid()); + } + } else { + DPRINTF(HtmMem, "HTM_CMD: fail - htmUid=%u\n", + pkt->getHtmTransactionUid()); + } + + rubyHtmCallback(pkt, htm_return_code); + testDrainComplete(); + } else if (mode == HtmCallbackMode_LD_FAIL || + mode == HtmCallbackMode_ST_FAIL) { + // transaction failed + assert(address == makeLineAddress(address)); + assert(m_RequestTable.find(address) != m_RequestTable.end()); + + auto &seq_req_list = m_RequestTable[address]; + while (!seq_req_list.empty()) { + SequencerRequest &request = seq_req_list.front(); + + PacketPtr pkt = request.pkt; + markRemoved(); + + // TODO - atomics + + // store conditionals should indicate failure + if (request.m_type == RubyRequestType_Store_Conditional) { + pkt->req->setExtraData(0); + } + + DPRINTF(HtmMem, "%s_FAIL: size=%d - " + "addr=0x%lx - htmUid=%d\n", + (mode == HtmCallbackMode_LD_FAIL) ? "LD" : "ST", + pkt->getSize(), + address, pkt->getHtmTransactionUid()); + + rubyHtmCallback(pkt, htm_return_code); + testDrainComplete(); + pkt = nullptr; + seq_req_list.pop_front(); + } + // free all outstanding requests corresponding to this address + if (seq_req_list.empty()) { + m_RequestTable.erase(address); + } + } else { + panic("unrecognised HTM callback mode\n"); + } +} + +void +HTMSequencer::regStats() +{ + Sequencer::regStats(); + + // hardware transactional memory + m_htm_transaction_cycles + .init(10) + .name(name() + ".htm_transaction_cycles") + .desc("number of cycles spent in an outer transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + m_htm_transaction_instructions + .init(10) + .name(name() + ".htm_transaction_instructions") + .desc("number of instructions spent in an outer transaction") + .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan) + ; + auto num_causes = static_cast(HtmFailureFaultCause::NUM_CAUSES); + m_htm_transaction_abort_cause + .init(num_causes) + .name(name() + ".htm_transaction_abort_cause") + .desc("cause of htm transaction abort") + .flags(Stats::total | Stats::pdf | Stats::dist | Stats::nozero) + ; + + for (unsigned cause_idx = 0; cause_idx < num_causes; ++cause_idx) { + m_htm_transaction_abort_cause.subname( + cause_idx, + htmFailureToStr(HtmFailureFaultCause(cause_idx))); + } +} + +void +HTMSequencer::rubyHtmCallback(PacketPtr pkt, + const HtmFailedInCacheReason htm_return_code) +{ + // The packet was destined for memory and has not yet been turned + // into a response + assert(system->isMemAddr(pkt->getAddr()) || system->isDeviceMemAddr(pkt)); + assert(pkt->isRequest()); + + // First retrieve the request port from the sender State + RubyPort::SenderState *senderState = + safe_cast(pkt->popSenderState()); + + MemSlavePort *port = safe_cast(senderState->port); + assert(port != nullptr); + delete senderState; + + //port->htmCallback(pkt, htm_return_code); + DPRINTF(HtmMem, "HTM callback: start=%d, commit=%d, " + "cancel=%d, rc=%d\n", + pkt->req->isHTMStart(), pkt->req->isHTMCommit(), + pkt->req->isHTMCancel(), htm_return_code); + + // turn packet around to go back to requester if response expected + if (pkt->needsResponse()) { + DPRINTF(RubyPort, "Sending packet back over port\n"); + pkt->makeHtmTransactionalReqResponse( + htmRetCodeConversion(htm_return_code)); + port->schedTimingResp(pkt, curTick()); + } else { + delete pkt; + } + + trySendRetries(); +} + +void +HTMSequencer::wakeup() +{ + Sequencer::wakeup(); + + // Check for deadlock of any of the requests + Cycles current_time = curCycle(); + + // hardware transactional memory commands + std::deque::iterator htm = + m_htmCmdRequestTable.begin(); + std::deque::iterator htm_end = + m_htmCmdRequestTable.end(); + + for (; htm != htm_end; ++htm) { + SequencerRequest* request = *htm; + if (current_time - request->issue_time < m_deadlock_threshold) + continue; + + panic("Possible Deadlock detected. Aborting!\n" + "version: %d m_htmCmdRequestTable: %d " + "current time: %u issue_time: %d difference: %d\n", + m_version, m_htmCmdRequestTable.size(), + current_time * clockPeriod(), + request->issue_time * clockPeriod(), + (current_time * clockPeriod()) - + (request->issue_time * clockPeriod())); + } +} + +bool +HTMSequencer::empty() const +{ + return Sequencer::empty() && m_htmCmdRequestTable.empty(); +} + +template +std::ostream & +operator<<(ostream &out, const std::deque &queue) +{ + auto i = queue.begin(); + auto end = queue.end(); + + out << "["; + for (; i != end; ++i) + out << " " << *i; + out << " ]"; + + return out; +} + +void +HTMSequencer::print(ostream& out) const +{ + Sequencer::print(out); + + out << "+ [HTMSequencer: " << m_version + << ", htm cmd request table: " << m_htmCmdRequestTable + << "]"; +} + +// Insert the request in the request table. Return RequestStatus_Aliased +// if the entry was already present. +RequestStatus +HTMSequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type, + RubyRequestType secondary_type) +{ + if (isHtmCmdRequest(primary_type)) { + // for the moment, allow just one HTM cmd into the cache controller. + // Later this can be adjusted for optimization, e.g. + // back-to-back HTM_Starts. + if ((m_htmCmdRequestTable.size() > 0) && !pkt->req->isHTMAbort()) + return RequestStatus_BufferFull; + + // insert request into HtmCmd queue + SequencerRequest* htmReq = + new SequencerRequest(pkt, primary_type, secondary_type, + curCycle()); + assert(htmReq); + m_htmCmdRequestTable.push_back(htmReq); + return RequestStatus_Ready; + } else { + return Sequencer::insertRequest(pkt, primary_type, secondary_type); + } +} diff --git a/src/mem/ruby/system/HTMSequencer.hh b/src/mem/ruby/system/HTMSequencer.hh new file mode 100644 index 0000000000..5add836ef0 --- /dev/null +++ b/src/mem/ruby/system/HTMSequencer.hh @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__ +#define __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__ + +#include +#include + +#include "mem/htm.hh" +#include "mem/ruby/protocol/HtmCallbackMode.hh" +#include "mem/ruby/protocol/HtmFailedInCacheReason.hh" +#include "mem/ruby/system/RubyPort.hh" +#include "mem/ruby/system/Sequencer.hh" +#include "params/RubyHTMSequencer.hh" + +class HTMSequencer : public Sequencer +{ + public: + HTMSequencer(const RubyHTMSequencerParams *p); + ~HTMSequencer(); + + // callback to acknowledge HTM requests and + // notify cpu core when htm transaction fails in cache + void htmCallback(Addr, + const HtmCallbackMode, + const HtmFailedInCacheReason); + + bool empty() const override; + void print(std::ostream& out) const override; + void regStats() override; + void wakeup() override; + + private: + /** + * Htm return code conversion + * + * This helper is a hack meant to convert the autogenerated ruby + * enum (HtmFailedInCacheReason) to the manually defined one + * (HtmCacheFailure). This is needed since the cpu code would + * otherwise have to include the ruby generated headers in order + * to handle the htm return code. + */ + HtmCacheFailure htmRetCodeConversion(const HtmFailedInCacheReason rc); + + void rubyHtmCallback(PacketPtr pkt, const HtmFailedInCacheReason fail_r); + + RequestStatus insertRequest(PacketPtr pkt, + RubyRequestType primary_type, + RubyRequestType secondary_type) override; + + // Private copy constructor and assignment operator + HTMSequencer(const HTMSequencer& obj); + HTMSequencer& operator=(const HTMSequencer& obj); + + // table/queue for hardware transactional memory commands + // these do not have an address so a deque/queue is used instead. + std::deque m_htmCmdRequestTable; + + Tick m_htmstart_tick; + Counter m_htmstart_instruction; + + //! Histogram of cycle latencies of HTM transactions + Stats::Histogram m_htm_transaction_cycles; + //! Histogram of instruction lengths of HTM transactions + Stats::Histogram m_htm_transaction_instructions; + //! Causes for HTM transaction aborts + Stats::Vector m_htm_transaction_abort_cause; +}; + +inline std::ostream& +operator<<(std::ostream& out, const HTMSequencer& obj) +{ + obj.print(out); + out << std::flush; + return out; +} + +#endif // __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__ diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 4510e3a833..bb86e6038e 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013,2019 ARM Limited + * Copyright (c) 2012-2013,2020 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -169,6 +169,7 @@ bool RubyPort::MemMasterPort::recvTimingResp(PacketPtr pkt) { // got a response from a device assert(pkt->isResponse()); + assert(!pkt->htmTransactionFailedInCache()); // First we must retrieve the request port from the sender State RubyPort::SenderState *senderState = @@ -253,6 +254,7 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt) // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { if (!isPhysMemAddress(pkt)) { + assert(!pkt->req->isHTMCmd()); assert(ruby_port->memMasterPort.isConnected()); DPRINTF(RubyPort, "Request address %#x assumed to be a " "pio address\n", pkt->getAddr()); @@ -638,7 +640,6 @@ RubyPort::PioMasterPort::recvRangeChange() } } - int RubyPort::functionalWrite(Packet *func_pkt) { diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript index 7496971c82..a5d2fb11ea 100644 --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -56,6 +56,7 @@ Source('CacheRecorder.cc') Source('DMASequencer.cc') if env['BUILD_GPU']: Source('GPUCoalescer.cc') +Source('HTMSequencer.cc') Source('RubyPort.cc') Source('RubyPortProxy.cc') Source('RubySystem.cc') diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 42398e23e8..75c58d600e 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -55,6 +55,7 @@ #include "mem/ruby/protocol/PrefetchBit.hh" #include "mem/ruby/protocol/RubyAccessMode.hh" #include "mem/ruby/slicc_interface/RubyRequest.hh" +#include "mem/ruby/slicc_interface/RubySlicc_Util.hh" #include "mem/ruby/system/RubySystem.hh" #include "sim/system.hh" @@ -148,6 +149,12 @@ Sequencer::llscCheckMonitor(const Addr address) } } +void +Sequencer::llscClearLocalMonitor() +{ + m_dataCache_ptr->clearLockedAll(m_version); +} + void Sequencer::wakeup() { @@ -243,7 +250,8 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type, // Check if there is any outstanding request for the same cache line. auto &seq_req_list = m_RequestTable[line_addr]; // Create a default entry - seq_req_list.emplace_back(pkt, primary_type, secondary_type, curCycle()); + seq_req_list.emplace_back(pkt, primary_type, + secondary_type, curCycle()); m_outstanding_count++; if (seq_req_list.size() > 1) { @@ -569,7 +577,10 @@ Sequencer::empty() const RequestStatus Sequencer::makeRequest(PacketPtr pkt) { - if (m_outstanding_count >= m_max_outstanding_requests) { + // HTM abort signals must be allowed to reach the Sequencer + // the same cycle they are issued. They cannot be retried. + if ((m_outstanding_count >= m_max_outstanding_requests) && + !pkt->req->isHTMAbort()) { return RequestStatus_BufferFull; } @@ -590,7 +601,7 @@ Sequencer::makeRequest(PacketPtr pkt) if (pkt->isWrite()) { DPRINTF(RubySequencer, "Issuing SC\n"); primary_type = RubyRequestType_Store_Conditional; -#ifdef PROTOCOL_MESI_Three_Level +#if defined (PROTOCOL_MESI_Three_Level) || defined (PROTOCOL_MESI_Three_Level_HTM) secondary_type = RubyRequestType_Store_Conditional; #else secondary_type = RubyRequestType_ST; @@ -629,7 +640,10 @@ Sequencer::makeRequest(PacketPtr pkt) // primary_type = secondary_type = RubyRequestType_ST; } else if (pkt->isRead()) { - if (pkt->req->isInstFetch()) { + // hardware transactional memory commands + if (pkt->req->isHTMCmd()) { + primary_type = secondary_type = htmCmdToRubyRequestType(pkt); + } else if (pkt->req->isInstFetch()) { primary_type = secondary_type = RubyRequestType_IFETCH; } else { bool storeCheck = false; @@ -706,6 +720,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) printAddress(msg->getPhysicalAddress()), RubyRequestType_to_string(secondary_type)); + // hardware transactional memory + // If the request originates in a transaction, + // then mark the Ruby message as such. + if (pkt->isHtmTransactional()) { + msg->m_htmFromTransaction = true; + msg->m_htmTransactionUid = pkt->getHtmTransactionUid(); + } + Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(secondary_type)); assert(latency > 0); diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index ebca56834f..92fdab690f 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -92,7 +92,7 @@ class Sequencer : public RubyPort DataBlock& data); // Public Methods - void wakeup(); // Used only for deadlock detection + virtual void wakeup(); // Used only for deadlock detection void resetStats() override; void collateStats(); void regStats() override; @@ -114,7 +114,7 @@ class Sequencer : public RubyPort const Cycles firstResponseTime = Cycles(0)); RequestStatus makeRequest(PacketPtr pkt) override; - bool empty() const; + virtual bool empty() const; int outstandingCount() const override { return m_outstanding_count; } bool isDeadlockEventScheduled() const override @@ -123,7 +123,7 @@ class Sequencer : public RubyPort void descheduleDeadlockEvent() override { deschedule(deadlockCheckEvent); } - void print(std::ostream& out) const; + virtual void print(std::ostream& out) const; void markRemoved(); void evictionCallback(Addr address); @@ -194,16 +194,22 @@ class Sequencer : public RubyPort Cycles forwardRequestTime, Cycles firstResponseTime); - RequestStatus insertRequest(PacketPtr pkt, RubyRequestType primary_type, - RubyRequestType secondary_type); - // Private copy constructor and assignment operator Sequencer(const Sequencer& obj); Sequencer& operator=(const Sequencer& obj); + protected: + // RequestTable contains both read and write requests, handles aliasing + std::unordered_map> m_RequestTable; + + Cycles m_deadlock_threshold; + + virtual RequestStatus insertRequest(PacketPtr pkt, + RubyRequestType primary_type, + RubyRequestType secondary_type); + private: int m_max_outstanding_requests; - Cycles m_deadlock_threshold; CacheMemory* m_dataCache_ptr; CacheMemory* m_instCache_ptr; @@ -215,9 +221,6 @@ class Sequencer : public RubyPort Cycles m_data_cache_hit_latency; Cycles m_inst_cache_hit_latency; - // RequestTable contains both read and write requests, handles aliasing - std::unordered_map> m_RequestTable; - // Global outstanding request count, across all request tables int m_outstanding_count; bool m_deadlock_check_scheduled; @@ -294,6 +297,13 @@ class Sequencer : public RubyPort * @return a boolean indicating if the line address was found. */ bool llscCheckMonitor(const Addr); + + + /** + * Removes all addresses from the local monitor. + * This is independent of this Sequencer object's version id. + */ + void llscClearLocalMonitor(); }; inline std::ostream& diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index f97224dfcf..47c5b41ce0 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -1,4 +1,5 @@ # Copyright (c) 2009 Advanced Micro Devices, Inc. +# Copyright (c) 2020 ARM Limited # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -70,6 +71,11 @@ class RubySequencer(RubyPort): # 99 is the dummy default value coreid = Param.Int(99, "CorePair core id") +class RubyHTMSequencer(RubySequencer): + type = 'RubyHTMSequencer' + cxx_class = 'HTMSequencer' + cxx_header = "mem/ruby/system/HTMSequencer.hh" + class DMASequencer(RubyPort): type = 'DMASequencer' cxx_header = "mem/ruby/system/DMASequencer.hh" diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 987f3b5f13..7f92d872d2 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 ARM Limited +# Copyright (c) 2019-2020 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -54,6 +54,7 @@ python_class_map = { "CacheMemory": "RubyCache", "WireBuffer": "RubyWireBuffer", "Sequencer": "RubySequencer", + "HTMSequencer": "RubyHTMSequencer", "GPUCoalescer" : "RubyGPUCoalescer", "VIPERCoalescer" : "VIPERCoalescer", "DirectoryMemory": "RubyDirectoryMemory",