From 5eb4f5fe11862eba5a92f66848c95ce607dfdbc2 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 20 Oct 2021 12:52:42 -0500 Subject: [PATCH] configs: Breakup GPU_VIPER create_system code In order to have more fine grained control over which SLICC controllers are part of which Ruby network in a disjoint configuration, the create_system function in GPU_VIPER is broken up into multiple construct calls for each SLICC machine type in the protocol. By default this does not change anything functionally. A future config will use the construct calls to explicitly set which network (CPU or GPU) the controller is in. Change-Id: Ic038b300c5c3732e96992ef4bfe14e43fa0ea824 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/51847 Reviewed-by: Jason Lowe-Power Reviewed-by: Matt Sinclair Maintainer: Jason Lowe-Power Tested-by: kokoro --- configs/ruby/GPU_VIPER.py | 536 ++++++++++++++++++++++---------------- 1 file changed, 306 insertions(+), 230 deletions(-) diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index e315543d9f..b8a427104f 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -395,40 +395,13 @@ def define_options(parser): help="Size of the mandatory queue in the GPU scalar " "cache controller") +def construct_dirs(options, system, ruby_system, network): -def create_system(options, full_system, system, dma_devices, bootmem, - ruby_system, cpus): - if buildEnv['PROTOCOL'] != 'GPU_VIPER': - panic("This script requires the GPU_VIPER protocol to be built.") - - cpu_sequencers = [] - - # - # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes - # must be listed before the directory nodes and directory nodes before - # dma nodes, etc. - # - cp_cntrl_nodes = [] - tcp_cntrl_nodes = [] - sqc_cntrl_nodes = [] - tcc_cntrl_nodes = [] dir_cntrl_nodes = [] - l3_cntrl_nodes = [] - - # - # Must create the individual controllers before the network to ensure the - # controller constructors are called before the network constructor - # # For an odd number of CPUs, still create the right number of controllers TCC_bits = int(math.log(options.num_tccs, 2)) - # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu - # Clusters - crossbar_bw = None - mainCluster = None - if options.numa_high_bit: numa_bit = options.numa_high_bit else: @@ -439,12 +412,6 @@ def create_system(options, full_system, system, dma_devices, bootmem, block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 - if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: - #Assuming a 2GHz clock - crossbar_bw = 16 * options.num_compute_units * options.bw_scalor - mainCluster = Cluster(intBW=crossbar_bw) - else: - mainCluster = Cluster(intBW=8) # 16 GB/s for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: @@ -462,19 +429,19 @@ def create_system(options, full_system, system, dma_devices, bootmem, # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered = True) - dir_cntrl.requestFromCores.in_port = ruby_system.network.out_port + dir_cntrl.requestFromCores.in_port = network.out_port dir_cntrl.responseFromCores = MessageBuffer() - dir_cntrl.responseFromCores.in_port = ruby_system.network.out_port + dir_cntrl.responseFromCores.in_port = network.out_port dir_cntrl.unblockFromCores = MessageBuffer() - dir_cntrl.unblockFromCores.in_port = ruby_system.network.out_port + dir_cntrl.unblockFromCores.in_port = network.out_port dir_cntrl.probeToCore = MessageBuffer() - dir_cntrl.probeToCore.out_port = ruby_system.network.in_port + dir_cntrl.probeToCore.out_port = network.in_port dir_cntrl.responseToCore = MessageBuffer() - dir_cntrl.responseToCore.out_port = ruby_system.network.in_port + dir_cntrl.responseToCore.out_port = network.in_port dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) @@ -482,10 +449,10 @@ def create_system(options, full_system, system, dma_devices, bootmem, dir_cntrl.responseFromMemory = MessageBuffer() dir_cntrl.requestFromDMA = MessageBuffer(ordered=True) - dir_cntrl.requestFromDMA.in_port = ruby_system.network.out_port + dir_cntrl.requestFromDMA.in_port = network.out_port dir_cntrl.responseToDMA = MessageBuffer() - dir_cntrl.responseToDMA.out_port = ruby_system.network.in_port + dir_cntrl.responseToDMA.out_port = network.in_port dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() @@ -493,13 +460,13 @@ def create_system(options, full_system, system, dma_devices, bootmem, exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) - mainCluster.add(dir_cntrl) + return dir_cntrl_nodes + +def construct_corepairs(options, system, ruby_system, network): + + cpu_sequencers = [] + cp_cntrl_nodes = [] - cpuCluster = None - if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: - cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) - else: - cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() @@ -513,23 +480,285 @@ def create_system(options, full_system, system, dma_devices, bootmem, # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() - cp_cntrl.requestFromCore.out_port = ruby_system.network.in_port + cp_cntrl.requestFromCore.out_port = network.in_port cp_cntrl.responseFromCore = MessageBuffer() - cp_cntrl.responseFromCore.out_port = ruby_system.network.in_port + cp_cntrl.responseFromCore.out_port = network.in_port cp_cntrl.unblockFromCore = MessageBuffer() - cp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port + cp_cntrl.unblockFromCore.out_port = network.in_port cp_cntrl.probeToCore = MessageBuffer() - cp_cntrl.probeToCore.in_port = ruby_system.network.out_port + cp_cntrl.probeToCore.in_port = network.out_port cp_cntrl.responseToCore = MessageBuffer() - cp_cntrl.responseToCore.in_port = ruby_system.network.out_port + cp_cntrl.responseToCore.in_port = network.out_port cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered = True) + cp_cntrl_nodes.append(cp_cntrl) + + return (cpu_sequencers, cp_cntrl_nodes) + +def construct_tcps(options, system, ruby_system, network): + + tcp_sequencers = [] + tcp_cntrl_nodes = [] + + # For an odd number of CPUs, still create the right number of controllers + TCC_bits = int(math.log(options.num_tccs, 2)) + + for i in range(options.num_compute_units): + + tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, + issue_latency = 1, + number_of_TBEs = 2560) + # TBEs set to max outstanding requests + tcp_cntrl.create(options, ruby_system, system) + tcp_cntrl.WB = options.WB_L1 + tcp_cntrl.disableL1 = options.noL1 + tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency + tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency + + exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i) + # + # Add controllers and sequencers to the appropriate lists + # + tcp_sequencers.append(tcp_cntrl.coalescer) + tcp_cntrl_nodes.append(tcp_cntrl) + + # Connect the TCP controller to the ruby network + tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) + tcp_cntrl.requestFromTCP.out_port = network.in_port + + tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) + tcp_cntrl.responseFromTCP.out_port = network.in_port + + tcp_cntrl.unblockFromCore = MessageBuffer() + tcp_cntrl.unblockFromCore.out_port = network.in_port + + tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) + tcp_cntrl.probeToTCP.in_port = network.out_port + + tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) + tcp_cntrl.responseToTCP.in_port = network.out_port + + tcp_cntrl.mandatoryQueue = MessageBuffer() + + return (tcp_sequencers, tcp_cntrl_nodes) + +def construct_sqcs(options, system, ruby_system, network): + + sqc_sequencers = [] + sqc_cntrl_nodes = [] + + # For an odd number of CPUs, still create the right number of controllers + TCC_bits = int(math.log(options.num_tccs, 2)) + + for i in range(options.num_sqc): + + sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl.create(options, ruby_system, system) + + exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i) + # + # Add controllers and sequencers to the appropriate lists + # + sqc_sequencers.append(sqc_cntrl.sequencer) + sqc_cntrl_nodes.append(sqc_cntrl) + + # Connect the SQC controller to the ruby network + sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) + sqc_cntrl.requestFromSQC.out_port = network.in_port + + sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) + sqc_cntrl.probeToSQC.in_port = network.out_port + + sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) + sqc_cntrl.responseToSQC.in_port = network.out_port + + sqc_cntrl.mandatoryQueue = MessageBuffer() + + return (sqc_sequencers, sqc_cntrl_nodes) + +def construct_scalars(options, system, ruby_system, network): + + scalar_sequencers = [] + scalar_cntrl_nodes = [] + + # For an odd number of CPUs, still create the right number of controllers + TCC_bits = int(math.log(options.num_tccs, 2)) + + for i in range(options.num_scalar_cache): + scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + scalar_cntrl.create(options, ruby_system, system) + + exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i) + + scalar_sequencers.append(scalar_cntrl.sequencer) + scalar_cntrl_nodes.append(scalar_cntrl) + + scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True) + scalar_cntrl.requestFromSQC.out_port = network.in_port + + scalar_cntrl.probeToSQC = MessageBuffer(ordered = True) + scalar_cntrl.probeToSQC.in_port = network.out_port + + scalar_cntrl.responseToSQC = MessageBuffer(ordered = True) + scalar_cntrl.responseToSQC.in_port = network.out_port + + scalar_cntrl.mandatoryQueue = \ + MessageBuffer(buffer_size=options.scalar_buffer_size) + + return (scalar_sequencers, scalar_cntrl_nodes) + +def construct_cmdprocs(options, system, ruby_system, network): + + cmdproc_sequencers = [] + cmdproc_cntrl_nodes = [] + + # For an odd number of CPUs, still create the right number of controllers + TCC_bits = int(math.log(options.num_tccs, 2)) + + for i in range(options.num_cp): + + tcp_ID = options.num_compute_units + i + sqc_ID = options.num_sqc + i + + tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, + issue_latency = 1, + number_of_TBEs = 2560) + # TBEs set to max outstanding requests + tcp_cntrl.createCP(options, ruby_system, system) + tcp_cntrl.WB = options.WB_L1 + tcp_cntrl.disableL1 = options.noL1 + tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency + tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency + + exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID) + # + # Add controllers and sequencers to the appropriate lists + # + cmdproc_sequencers.append(tcp_cntrl.sequencer) + cmdproc_cntrl_nodes.append(tcp_cntrl) + + # Connect the CP (TCP) controllers to the ruby network + tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) + tcp_cntrl.requestFromTCP.out_port = network.in_port + + tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) + tcp_cntrl.responseFromTCP.out_port = network.in_port + + tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) + tcp_cntrl.unblockFromCore.out_port = network.in_port + + tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) + tcp_cntrl.probeToTCP.in_port = network.out_port + + tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) + tcp_cntrl.responseToTCP.in_port = network.out_port + + tcp_cntrl.mandatoryQueue = MessageBuffer() + + sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl.create(options, ruby_system, system) + + exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID) + # + # Add controllers and sequencers to the appropriate lists + # + cmdproc_sequencers.append(sqc_cntrl.sequencer) + cmdproc_cntrl_nodes.append(sqc_cntrl) + + return (cmdproc_sequencers, cmdproc_cntrl_nodes) + +def construct_tccs(options, system, ruby_system, network): + + tcc_cntrl_nodes = [] + + for i in range(options.num_tccs): + + tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency) + tcc_cntrl.create(options, ruby_system, system) + tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency + tcc_cntrl.l2_response_latency = options.TCC_latency + tcc_cntrl_nodes.append(tcc_cntrl) + tcc_cntrl.WB = options.WB_L2 + tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units + # the number_of_TBEs is inclusive of TBEs below + + # Connect the TCC controllers to the ruby network + tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True) + tcc_cntrl.requestFromTCP.in_port = network.out_port + + tcc_cntrl.responseToCore = MessageBuffer(ordered = True) + tcc_cntrl.responseToCore.out_port = network.in_port + + tcc_cntrl.probeFromNB = MessageBuffer() + tcc_cntrl.probeFromNB.in_port = network.out_port + + tcc_cntrl.responseFromNB = MessageBuffer() + tcc_cntrl.responseFromNB.in_port = network.out_port + + tcc_cntrl.requestToNB = MessageBuffer(ordered = True) + tcc_cntrl.requestToNB.out_port = network.in_port + + tcc_cntrl.responseToNB = MessageBuffer() + tcc_cntrl.responseToNB.out_port = network.in_port + + tcc_cntrl.unblockToNB = MessageBuffer() + tcc_cntrl.unblockToNB.out_port = network.in_port + + tcc_cntrl.triggerQueue = MessageBuffer(ordered = True) + + exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i) + + return tcc_cntrl_nodes + +def create_system(options, full_system, system, dma_devices, bootmem, + ruby_system, cpus): + if buildEnv['PROTOCOL'] != 'GPU_VIPER': + panic("This script requires the GPU_VIPER protocol to be built.") + + cpu_sequencers = [] + + # + # Must create the individual controllers before the network to ensure the + # controller constructors are called before the network constructor + # + + # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu + # Clusters + crossbar_bw = None + mainCluster = None + cpuCluster = None + gpuCluster = None + + if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: + #Assuming a 2GHz clock + crossbar_bw = 16 * options.num_compute_units * options.bw_scalor + mainCluster = Cluster(intBW = crossbar_bw) + cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) + gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) + else: + mainCluster = Cluster(intBW = 8) # 16 GB/s + cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s + gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s + + + # Create CPU directory controllers + dir_cntrl_nodes = \ + construct_dirs(options, system, ruby_system, ruby_system.network) + for dir_cntrl in dir_cntrl_nodes: + mainCluster.add(dir_cntrl) + + + # Create CPU core pairs + (cp_sequencers, cp_cntrl_nodes) = \ + construct_corepairs(options, system, ruby_system, ruby_system.network) + cpu_sequencers.extend(cp_sequencers) + for cp_cntrl in cp_cntrl_nodes: cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) @@ -584,191 +813,38 @@ def create_system(options, full_system, system, dma_devices, bootmem, cpus = [n for n in range(options.num_cpus)]) - gpuCluster = None - if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: - gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) - else: - gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s - for i in range(options.num_compute_units): - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) - # TBEs set to max outstanding requests - tcp_cntrl.create(options, ruby_system, system) - tcp_cntrl.WB = options.WB_L1 - tcp_cntrl.disableL1 = options.noL1 - tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency - tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency - - exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.coalescer) - tcp_cntrl_nodes.append(tcp_cntrl) - - # Connect the TCP controller to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.out_port = ruby_system.network.in_port - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.out_port = ruby_system.network.in_port - - tcp_cntrl.unblockFromCore = MessageBuffer() - tcp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.in_port = ruby_system.network.out_port - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.in_port = ruby_system.network.out_port - - tcp_cntrl.mandatoryQueue = MessageBuffer() - + # Create TCPs + (tcp_sequencers, tcp_cntrl_nodes) = \ + construct_tcps(options, system, ruby_system, ruby_system.network) + cpu_sequencers.extend(tcp_sequencers) + for tcp_cntrl in tcp_cntrl_nodes: gpuCluster.add(tcp_cntrl) - for i in range(options.num_sqc): - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.create(options, ruby_system, system) - - exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # Connect the SQC controller to the ruby network - sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.requestFromSQC.out_port = ruby_system.network.in_port - - sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) - sqc_cntrl.probeToSQC.in_port = ruby_system.network.out_port - - sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseToSQC.in_port = ruby_system.network.out_port - - sqc_cntrl.mandatoryQueue = MessageBuffer() - - # SQC also in GPU cluster + # Create SQCs + (sqc_sequencers, sqc_cntrl_nodes) = \ + construct_sqcs(options, system, ruby_system, ruby_system.network) + cpu_sequencers.extend(sqc_sequencers) + for sqc_cntrl in sqc_cntrl_nodes: gpuCluster.add(sqc_cntrl) - for i in range(options.num_scalar_cache): - scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - scalar_cntrl.create(options, ruby_system, system) - - exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i) - - cpu_sequencers.append(scalar_cntrl.sequencer) - - scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True) - scalar_cntrl.requestFromSQC.out_port = ruby_system.network.in_port - - scalar_cntrl.probeToSQC = MessageBuffer(ordered = True) - scalar_cntrl.probeToSQC.in_port = ruby_system.network.out_port - - scalar_cntrl.responseToSQC = MessageBuffer(ordered = True) - scalar_cntrl.responseToSQC.in_port = ruby_system.network.out_port - - scalar_cntrl.mandatoryQueue = \ - MessageBuffer(buffer_size=options.scalar_buffer_size) - + # Create Scalars + (scalar_sequencers, scalar_cntrl_nodes) = \ + construct_scalars(options, system, ruby_system, ruby_system.network) + cpu_sequencers.extend(scalar_sequencers) + for scalar_cntrl in scalar_cntrl_nodes: gpuCluster.add(scalar_cntrl) - for i in range(options.num_cp): + # Create command processors + (cmdproc_sequencers, cmdproc_cntrl_nodes) = \ + construct_cmdprocs(options, system, ruby_system, ruby_system.network) + cpu_sequencers.extend(cmdproc_sequencers) + for cmdproc_cntrl in cmdproc_cntrl_nodes: + gpuCluster.add(cmdproc_cntrl) - tcp_ID = options.num_compute_units + i - sqc_ID = options.num_sqc + i - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) - # TBEs set to max outstanding requests - tcp_cntrl.createCP(options, ruby_system, system) - tcp_cntrl.WB = options.WB_L1 - tcp_cntrl.disableL1 = options.noL1 - tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency - tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency - - exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.sequencer) - tcp_cntrl_nodes.append(tcp_cntrl) - - # Connect the CP (TCP) controllers to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.out_port = ruby_system.network.in_port - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.out_port = ruby_system.network.in_port - - tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) - tcp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.in_port = ruby_system.network.out_port - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.in_port = ruby_system.network.out_port - - tcp_cntrl.mandatoryQueue = MessageBuffer() - - gpuCluster.add(tcp_cntrl) - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.create(options, ruby_system, system) - - exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # SQC also in GPU cluster - gpuCluster.add(sqc_cntrl) - - for i in range(options.num_tccs): - - tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency) - tcc_cntrl.create(options, ruby_system, system) - tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency - tcc_cntrl.l2_response_latency = options.TCC_latency - tcc_cntrl_nodes.append(tcc_cntrl) - tcc_cntrl.WB = options.WB_L2 - tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units - # the number_of_TBEs is inclusive of TBEs below - - # Connect the TCC controllers to the ruby network - tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcc_cntrl.requestFromTCP.in_port = ruby_system.network.out_port - - tcc_cntrl.responseToCore = MessageBuffer(ordered = True) - tcc_cntrl.responseToCore.out_port = ruby_system.network.in_port - - tcc_cntrl.probeFromNB = MessageBuffer() - tcc_cntrl.probeFromNB.in_port = ruby_system.network.out_port - - tcc_cntrl.responseFromNB = MessageBuffer() - tcc_cntrl.responseFromNB.in_port = ruby_system.network.out_port - - tcc_cntrl.requestToNB = MessageBuffer(ordered = True) - tcc_cntrl.requestToNB.out_port = ruby_system.network.in_port - - tcc_cntrl.responseToNB = MessageBuffer() - tcc_cntrl.responseToNB.out_port = ruby_system.network.in_port - - tcc_cntrl.unblockToNB = MessageBuffer() - tcc_cntrl.unblockToNB.out_port = ruby_system.network.in_port - - tcc_cntrl.triggerQueue = MessageBuffer(ordered = True) - - exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i) - - # connect all of the wire buffers between L3 and dirs up - # TCC cntrls added to the GPU cluster + # Create TCCs + tcc_cntrl_nodes = \ + construct_tccs(options, system, ruby_system, ruby_system.network) + for tcc_cntrl in tcc_cntrl_nodes: gpuCluster.add(tcc_cntrl) for i, dma_device in enumerate(dma_devices):