diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py index f6aaccfecc..740c748d05 100644 --- a/configs/common/GPUTLBConfig.py +++ b/configs/common/GPUTLBConfig.py @@ -34,41 +34,69 @@ import m5 from m5.objects import * -def TLB_constructor(level): +def TLB_constructor(options, level, gpu_ctrl=None, full_system=False): - constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \ - assoc = options.L%(level)dTLBassoc, \ - hitLatency = options.L%(level)dAccessLatency,\ - missLatency2 = options.L%(level)dMissLatency,\ - maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\ - accessDistance = options.L%(level)dAccessDistanceStat,\ - clk_domain = SrcClockDomain(\ - clock = options.gpu_clock,\ - voltage_domain = VoltageDomain(\ - voltage = options.gpu_voltage)))" % locals() - return constructor_call - -def Coalescer_constructor(level): - - constructor_call = "TLBCoalescer(probesPerCycle = \ - options.L%(level)dProbesPerCycle, \ - coalescingWindow = options.L%(level)dCoalescingWindow,\ - disableCoalescing = options.L%(level)dDisableCoalescing,\ + if full_system: + constructor_call = "VegaGPUTLB(\ + gpu_device = gpu_ctrl, \ + size = options.L%(level)dTLBentries, \ + assoc = options.L%(level)dTLBassoc, \ + hitLatency = options.L%(level)dAccessLatency,\ + missLatency1 = options.L%(level)dMissLatency,\ + missLatency2 = options.L%(level)dMissLatency,\ + maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\ + clk_domain = SrcClockDomain(\ + clock = options.gpu_clock,\ + voltage_domain = VoltageDomain(\ + voltage = options.gpu_voltage)))" % locals() + else: + constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \ + assoc = options.L%(level)dTLBassoc, \ + hitLatency = options.L%(level)dAccessLatency,\ + missLatency2 = options.L%(level)dMissLatency,\ + maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\ + accessDistance = options.L%(level)dAccessDistanceStat,\ clk_domain = SrcClockDomain(\ clock = options.gpu_clock,\ voltage_domain = VoltageDomain(\ voltage = options.gpu_voltage)))" % locals() return constructor_call +def Coalescer_constructor(options, level, full_system): + + if full_system: + constructor_call = "VegaTLBCoalescer(probesPerCycle = \ + options.L%(level)dProbesPerCycle, \ + tlb_level = %(level)d ,\ + coalescingWindow = options.L%(level)dCoalescingWindow,\ + disableCoalescing = options.L%(level)dDisableCoalescing,\ + clk_domain = SrcClockDomain(\ + clock = options.gpu_clock,\ + voltage_domain = VoltageDomain(\ + voltage = options.gpu_voltage)))" % locals() + else: + constructor_call = "TLBCoalescer(probesPerCycle = \ + options.L%(level)dProbesPerCycle, \ + coalescingWindow = options.L%(level)dCoalescingWindow,\ + disableCoalescing = options.L%(level)dDisableCoalescing,\ + clk_domain = SrcClockDomain(\ + clock = options.gpu_clock,\ + voltage_domain = VoltageDomain(\ + voltage = options.gpu_voltage)))" % locals() + return constructor_call + def create_TLB_Coalescer(options, my_level, my_index, tlb_name, - coalescer_name): + coalescer_name, gpu_ctrl=None, full_system=False): # arguments: options, TLB level, number of private structures for this # Level, TLB name and Coalescer name for i in range(my_index): - tlb_name.append(eval(TLB_constructor(my_level))) - coalescer_name.append(eval(Coalescer_constructor(my_level))) + tlb_name.append( + eval(TLB_constructor(options, my_level, gpu_ctrl, full_system))) + coalescer_name.append( + eval(Coalescer_constructor(options, my_level, full_system))) -def config_tlb_hierarchy(options, system, shader_idx): +def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None, + full_system=False): n_cu = options.num_compute_units if options.TLB_config == "perLane": @@ -121,7 +149,7 @@ def config_tlb_hierarchy(options, system, shader_idx): options.L1TLBassoc = options.L1TLBentries # call the constructors for the TLB and the Coalescer create_TLB_Coalescer(options, level, TLB_index,\ - TLB_array, Coalescer_array) + TLB_array, Coalescer_array, gpu_ctrl, full_system) system_TLB_name = TLB_type['name'] + '_tlb' system_Coalescer_name = TLB_type['name'] + '_coalescer' @@ -198,8 +226,17 @@ def config_tlb_hierarchy(options, system, shader_idx): system.l2_coalescer[0].cpu_side_ports[%d]' % \ (name, index, l2_coalescer_index)) l2_coalescer_index += 1 + # L2 <-> L3 system.l2_tlb[0].mem_side_ports[0] = \ system.l3_coalescer[0].cpu_side_ports[0] + # L3 TLB Vega page table walker to memory for full system only + if full_system: + for TLB_type in L3: + name = TLB_type['name'] + for index in range(TLB_type['width']): + exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \ + (name, index)) + return system diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 3eb8dbc403..8c9895f1c9 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -33,6 +33,7 @@ from m5.util import panic from common.Benchmarks import * from common.FSConfig import * +from common import GPUTLBConfig from common import Simulation from ruby import Ruby @@ -90,6 +91,10 @@ def makeGpuFSSystem(args): shader = createGPU(system, args) connectGPU(system, args) + # The shader core will be whatever is after the CPU cores are accounted for + shader_idx = args.num_cpus + system.cpu.append(shader) + # This arbitrary address is something in the X86 I/O hole hsapp_gpu_map_paddr = 0xe00000000 hsapp_pt_walker = VegaPagetableWalker() @@ -150,9 +155,12 @@ def makeGpuFSSystem(args): device_ih.pio = system.iobus.mem_side_ports pm4_pkt_proc.pio = system.iobus.mem_side_ports - # Create Ruby system using Ruby.py for now - #Ruby.create_system(args, True, system, system.iobus, - # system._dma_ports) + # Full system needs special TLBs for SQC, Scalar, and vector data ports + args.full_system = True + GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx, + system.pc.south_bridge.gpu, True) + + # Create Ruby system using disjoint VIPER topology system.ruby = Disjoint_VIPER() system.ruby.create(args, system, system.iobus, system._dma_ports) @@ -161,6 +169,10 @@ def makeGpuFSSystem(args): voltage_domain = system.voltage_domain) for (i, cpu) in enumerate(system.cpu): + # Break once we reach the shader "CPU" + if i == args.num_cpus: + break + # # Tie the cpu ports to the correct ruby system ports # @@ -170,14 +182,9 @@ def makeGpuFSSystem(args): system.ruby._cpu_ports[i].connectCpuPorts(cpu) - for i in range(len(system.cpu)): for j in range(len(system.cpu[i].isa)): system.cpu[i].isa[j].vendor_string = "AuthenticAMD" - # The shader core will be whatever is after the CPU cores are accounted for - shader_idx = args.num_cpus - system.cpu.append(shader) - gpu_port_idx = len(system.ruby._cpu_ports) \ - args.num_compute_units - args.num_sqc \ - args.num_scalar_cache