# Copyright (c) 2011-2015 Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # Configure the TLB hierarchy # Places which would probably need to be modified if you # want a different hierarchy are specified by a ' # comment import m5 from m5.objects import * def TLB_constructor(options, level, gpu_ctrl=None, full_system=False): if full_system: constructor_call = ( "VegaGPUTLB(\ gpu_device = gpu_ctrl, \ size = options.L%(level)dTLBentries, \ assoc = options.L%(level)dTLBassoc, \ hitLatency = options.L%(level)dAccessLatency,\ missLatency1 = options.L%(level)dMissLatency,\ missLatency2 = options.L%(level)dMissLatency,\ maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\ clk_domain = SrcClockDomain(\ clock = options.gpu_clock,\ voltage_domain = VoltageDomain(\ voltage = options.gpu_voltage)))" % locals() ) else: constructor_call = ( "X86GPUTLB(size = options.L%(level)dTLBentries, \ assoc = options.L%(level)dTLBassoc, \ hitLatency = options.L%(level)dAccessLatency,\ missLatency2 = options.L%(level)dMissLatency,\ maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\ accessDistance = options.L%(level)dAccessDistanceStat,\ clk_domain = SrcClockDomain(\ clock = options.gpu_clock,\ voltage_domain = VoltageDomain(\ voltage = options.gpu_voltage)))" % locals() ) return constructor_call def Coalescer_constructor(options, level, full_system): if full_system: constructor_call = ( "VegaTLBCoalescer(probesPerCycle = \ options.L%(level)dProbesPerCycle, \ tlb_level = %(level)d ,\ coalescingWindow = options.L%(level)dCoalescingWindow,\ disableCoalescing = options.L%(level)dDisableCoalescing,\ clk_domain = SrcClockDomain(\ clock = options.gpu_clock,\ voltage_domain = VoltageDomain(\ voltage = options.gpu_voltage)))" % locals() ) else: constructor_call = ( "TLBCoalescer(probesPerCycle = \ options.L%(level)dProbesPerCycle, \ coalescingWindow = options.L%(level)dCoalescingWindow,\ disableCoalescing = options.L%(level)dDisableCoalescing,\ clk_domain = SrcClockDomain(\ clock = options.gpu_clock,\ voltage_domain = VoltageDomain(\ voltage = options.gpu_voltage)))" % locals() ) return constructor_call def create_TLB_Coalescer( options, my_level, my_index, tlb_name, coalescer_name, gpu_ctrl=None, full_system=False, ): # arguments: options, TLB level, number of private structures for this # Level, TLB name and Coalescer name for i in range(my_index): tlb_name.append( eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)) ) coalescer_name.append( eval(Coalescer_constructor(options, my_level, full_system)) ) def config_tlb_hierarchy( options, system, shader_idx, gpu_ctrl=None, full_system=False ): n_cu = options.num_compute_units if options.TLB_config == "perLane": num_TLBs = 64 * n_cu elif options.TLB_config == "mono": num_TLBs = 1 elif options.TLB_config == "perCU": num_TLBs = n_cu elif options.TLB_config == "2CU": num_TLBs = n_cu >> 1 else: print("Bad option for TLB Configuration.") sys.exit(1) # ------------------------------------------------------------------------- # A visual representation of the TLB hierarchy # for ease of configuration # < Modify here the width and the number of levels if you want a different # configuration > # width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc) # for this level L1 = [ { "name": "sqc", "width": options.num_sqc, "TLBarray": [], "CoalescerArray": [], }, { "name": "scalar", "width": options.num_scalar_cache, "TLBarray": [], "CoalescerArray": [], }, { "name": "l1", "width": num_TLBs, "TLBarray": [], "CoalescerArray": [], }, ] L2 = [{"name": "l2", "width": 1, "TLBarray": [], "CoalescerArray": []}] L3 = [{"name": "l3", "width": 1, "TLBarray": [], "CoalescerArray": []}] TLB_hierarchy = [L1, L2, L3] # ------------------------------------------------------------------------- # Create the hiearchy # Call the appropriate constructors and add objects to the system for i in range(len(TLB_hierarchy)): hierarchy_level = TLB_hierarchy[i] level = i + 1 for TLB_type in hierarchy_level: TLB_index = TLB_type["width"] TLB_array = TLB_type["TLBarray"] Coalescer_array = TLB_type["CoalescerArray"] # If the sim calls for a fixed L1 TLB size across CUs, # override the TLB entries option if options.tot_L1TLB_size: options.L1TLBentries = options.tot_L1TLB_size / num_TLBs if options.L1TLBassoc > options.L1TLBentries: options.L1TLBassoc = options.L1TLBentries # call the constructors for the TLB and the Coalescer create_TLB_Coalescer( options, level, TLB_index, TLB_array, Coalescer_array, gpu_ctrl, full_system, ) system_TLB_name = TLB_type["name"] + "_tlb" system_Coalescer_name = TLB_type["name"] + "_coalescer" # add the different TLB levels to the system # Modify here if you want to make the TLB hierarchy a child of # the shader. exec(f"system.{system_TLB_name} = TLB_array") exec(f"system.{system_Coalescer_name} = Coalescer_array") # =========================================================== # Specify the TLB hierarchy (i.e., port connections) # All TLBs but the last level TLB need to have a memSidePort # =========================================================== # Each TLB is connected with its Coalescer through a single port. # There is a one-to-one mapping of TLBs to Coalescers at a given level # This won't be modified no matter what the hierarchy looks like. for i in range(len(TLB_hierarchy)): hierarchy_level = TLB_hierarchy[i] level = i + 1 for TLB_type in hierarchy_level: name = TLB_type["name"] for index in range(TLB_type["width"]): exec( "system.%s_coalescer[%d].mem_side_ports[0] = \ system.%s_tlb[%d].cpu_side_ports[0]" % (name, index, name, index) ) # Connect the cpuSidePort of all the coalescers in level 1 # < Modify here if you want a different configuration > for TLB_type in L1: name = TLB_type["name"] num_TLBs = TLB_type["width"] if name == "l1": # L1 D-TLBs tlb_per_cu = num_TLBs // n_cu for cu_idx in range(n_cu): if tlb_per_cu: for tlb in range(tlb_per_cu): exec( "system.cpu[%d].CUs[%d].translation_port[%d] = \ system.l1_coalescer[%d].cpu_side_ports[%d]" % ( shader_idx, cu_idx, tlb, cu_idx * tlb_per_cu + tlb, 0, ) ) else: exec( "system.cpu[%d].CUs[%d].translation_port[%d] = \ system.l1_coalescer[%d].cpu_side_ports[%d]" % ( shader_idx, cu_idx, tlb_per_cu, cu_idx / (n_cu / num_TLBs), cu_idx % (n_cu / num_TLBs), ) ) elif name == "sqc": # I-TLB for index in range(n_cu): sqc_tlb_index = index / options.cu_per_sqc sqc_tlb_port_id = index % options.cu_per_sqc exec( "system.cpu[%d].CUs[%d].sqc_tlb_port = \ system.sqc_coalescer[%d].cpu_side_ports[%d]" % (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id) ) elif name == "scalar": # Scalar D-TLB for index in range(n_cu): scalar_tlb_index = index / options.cu_per_scalar_cache scalar_tlb_port_id = index % options.cu_per_scalar_cache exec( "system.cpu[%d].CUs[%d].scalar_tlb_port = \ system.scalar_coalescer[%d].cpu_side_ports[%d]" % (shader_idx, index, scalar_tlb_index, scalar_tlb_port_id) ) # Connect the memSidePorts of all the TLBs with the # cpuSidePorts of the Coalescers of the next level # < Modify here if you want a different configuration > # L1 <-> L2 l2_coalescer_index = 0 for TLB_type in L1: name = TLB_type["name"] for index in range(TLB_type["width"]): exec( "system.%s_tlb[%d].mem_side_ports[0] = \ system.l2_coalescer[0].cpu_side_ports[%d]" % (name, index, l2_coalescer_index) ) l2_coalescer_index += 1 # L2 <-> L3 system.l2_tlb[0].mem_side_ports[0] = system.l3_coalescer[0].cpu_side_ports[ 0 ] # L3 TLB Vega page table walker to memory for full system only if full_system: for TLB_type in L3: name = TLB_type["name"] for index in range(TLB_type["width"]): exec( "system._dma_ports.append(system.%s_tlb[%d].walker)" % (name, index) ) return system