configs: Breakup GPU_VIPER create_system code
In order to have more fine grained control over which SLICC controllers are part of which Ruby network in a disjoint configuration, the create_system function in GPU_VIPER is broken up into multiple construct calls for each SLICC machine type in the protocol. By default this does not change anything functionally. A future config will use the construct calls to explicitly set which network (CPU or GPU) the controller is in. Change-Id: Ic038b300c5c3732e96992ef4bfe14e43fa0ea824 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/51847 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -395,40 +395,13 @@ def define_options(parser):
|
||||
help="Size of the mandatory queue in the GPU scalar "
|
||||
"cache controller")
|
||||
|
||||
def construct_dirs(options, system, ruby_system, network):
|
||||
|
||||
def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
ruby_system, cpus):
|
||||
if buildEnv['PROTOCOL'] != 'GPU_VIPER':
|
||||
panic("This script requires the GPU_VIPER protocol to be built.")
|
||||
|
||||
cpu_sequencers = []
|
||||
|
||||
#
|
||||
# The ruby network creation expects the list of nodes in the system to be
|
||||
# consistent with the NetDest list. Therefore the l1 controller nodes
|
||||
# must be listed before the directory nodes and directory nodes before
|
||||
# dma nodes, etc.
|
||||
#
|
||||
cp_cntrl_nodes = []
|
||||
tcp_cntrl_nodes = []
|
||||
sqc_cntrl_nodes = []
|
||||
tcc_cntrl_nodes = []
|
||||
dir_cntrl_nodes = []
|
||||
l3_cntrl_nodes = []
|
||||
|
||||
#
|
||||
# Must create the individual controllers before the network to ensure the
|
||||
# controller constructors are called before the network constructor
|
||||
#
|
||||
|
||||
# For an odd number of CPUs, still create the right number of controllers
|
||||
TCC_bits = int(math.log(options.num_tccs, 2))
|
||||
|
||||
# This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
|
||||
# Clusters
|
||||
crossbar_bw = None
|
||||
mainCluster = None
|
||||
|
||||
if options.numa_high_bit:
|
||||
numa_bit = options.numa_high_bit
|
||||
else:
|
||||
@@ -439,12 +412,6 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
block_size_bits = int(math.log(options.cacheline_size, 2))
|
||||
numa_bit = block_size_bits + dir_bits - 1
|
||||
|
||||
if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
|
||||
#Assuming a 2GHz clock
|
||||
crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
|
||||
mainCluster = Cluster(intBW=crossbar_bw)
|
||||
else:
|
||||
mainCluster = Cluster(intBW=8) # 16 GB/s
|
||||
for i in range(options.num_dirs):
|
||||
dir_ranges = []
|
||||
for r in system.mem_ranges:
|
||||
@@ -462,19 +429,19 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
|
||||
# Connect the Directory controller to the ruby network
|
||||
dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
|
||||
dir_cntrl.requestFromCores.in_port = ruby_system.network.out_port
|
||||
dir_cntrl.requestFromCores.in_port = network.out_port
|
||||
|
||||
dir_cntrl.responseFromCores = MessageBuffer()
|
||||
dir_cntrl.responseFromCores.in_port = ruby_system.network.out_port
|
||||
dir_cntrl.responseFromCores.in_port = network.out_port
|
||||
|
||||
dir_cntrl.unblockFromCores = MessageBuffer()
|
||||
dir_cntrl.unblockFromCores.in_port = ruby_system.network.out_port
|
||||
dir_cntrl.unblockFromCores.in_port = network.out_port
|
||||
|
||||
dir_cntrl.probeToCore = MessageBuffer()
|
||||
dir_cntrl.probeToCore.out_port = ruby_system.network.in_port
|
||||
dir_cntrl.probeToCore.out_port = network.in_port
|
||||
|
||||
dir_cntrl.responseToCore = MessageBuffer()
|
||||
dir_cntrl.responseToCore.out_port = ruby_system.network.in_port
|
||||
dir_cntrl.responseToCore.out_port = network.in_port
|
||||
|
||||
dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
|
||||
dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
|
||||
@@ -482,10 +449,10 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
dir_cntrl.responseFromMemory = MessageBuffer()
|
||||
|
||||
dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
|
||||
dir_cntrl.requestFromDMA.in_port = ruby_system.network.out_port
|
||||
dir_cntrl.requestFromDMA.in_port = network.out_port
|
||||
|
||||
dir_cntrl.responseToDMA = MessageBuffer()
|
||||
dir_cntrl.responseToDMA.out_port = ruby_system.network.in_port
|
||||
dir_cntrl.responseToDMA.out_port = network.in_port
|
||||
|
||||
dir_cntrl.requestToMemory = MessageBuffer()
|
||||
dir_cntrl.responseFromMemory = MessageBuffer()
|
||||
@@ -493,13 +460,13 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
|
||||
dir_cntrl_nodes.append(dir_cntrl)
|
||||
|
||||
mainCluster.add(dir_cntrl)
|
||||
return dir_cntrl_nodes
|
||||
|
||||
def construct_corepairs(options, system, ruby_system, network):
|
||||
|
||||
cpu_sequencers = []
|
||||
cp_cntrl_nodes = []
|
||||
|
||||
cpuCluster = None
|
||||
if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
|
||||
cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
|
||||
else:
|
||||
cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
|
||||
for i in range((options.num_cpus + 1) // 2):
|
||||
|
||||
cp_cntrl = CPCntrl()
|
||||
@@ -513,23 +480,285 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
|
||||
# Connect the CP controllers and the network
|
||||
cp_cntrl.requestFromCore = MessageBuffer()
|
||||
cp_cntrl.requestFromCore.out_port = ruby_system.network.in_port
|
||||
cp_cntrl.requestFromCore.out_port = network.in_port
|
||||
|
||||
cp_cntrl.responseFromCore = MessageBuffer()
|
||||
cp_cntrl.responseFromCore.out_port = ruby_system.network.in_port
|
||||
cp_cntrl.responseFromCore.out_port = network.in_port
|
||||
|
||||
cp_cntrl.unblockFromCore = MessageBuffer()
|
||||
cp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port
|
||||
cp_cntrl.unblockFromCore.out_port = network.in_port
|
||||
|
||||
cp_cntrl.probeToCore = MessageBuffer()
|
||||
cp_cntrl.probeToCore.in_port = ruby_system.network.out_port
|
||||
cp_cntrl.probeToCore.in_port = network.out_port
|
||||
|
||||
cp_cntrl.responseToCore = MessageBuffer()
|
||||
cp_cntrl.responseToCore.in_port = ruby_system.network.out_port
|
||||
cp_cntrl.responseToCore.in_port = network.out_port
|
||||
|
||||
cp_cntrl.mandatoryQueue = MessageBuffer()
|
||||
cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
|
||||
|
||||
cp_cntrl_nodes.append(cp_cntrl)
|
||||
|
||||
return (cpu_sequencers, cp_cntrl_nodes)
|
||||
|
||||
def construct_tcps(options, system, ruby_system, network):
|
||||
|
||||
tcp_sequencers = []
|
||||
tcp_cntrl_nodes = []
|
||||
|
||||
# For an odd number of CPUs, still create the right number of controllers
|
||||
TCC_bits = int(math.log(options.num_tccs, 2))
|
||||
|
||||
for i in range(options.num_compute_units):
|
||||
|
||||
tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
|
||||
issue_latency = 1,
|
||||
number_of_TBEs = 2560)
|
||||
# TBEs set to max outstanding requests
|
||||
tcp_cntrl.create(options, ruby_system, system)
|
||||
tcp_cntrl.WB = options.WB_L1
|
||||
tcp_cntrl.disableL1 = options.noL1
|
||||
tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
|
||||
tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
|
||||
|
||||
exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
tcp_sequencers.append(tcp_cntrl.coalescer)
|
||||
tcp_cntrl_nodes.append(tcp_cntrl)
|
||||
|
||||
# Connect the TCP controller to the ruby network
|
||||
tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.requestFromTCP.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseFromTCP.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.unblockFromCore = MessageBuffer()
|
||||
tcp_cntrl.unblockFromCore.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.probeToTCP.in_port = network.out_port
|
||||
|
||||
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseToTCP.in_port = network.out_port
|
||||
|
||||
tcp_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
return (tcp_sequencers, tcp_cntrl_nodes)
|
||||
|
||||
def construct_sqcs(options, system, ruby_system, network):
|
||||
|
||||
sqc_sequencers = []
|
||||
sqc_cntrl_nodes = []
|
||||
|
||||
# For an odd number of CPUs, still create the right number of controllers
|
||||
TCC_bits = int(math.log(options.num_tccs, 2))
|
||||
|
||||
for i in range(options.num_sqc):
|
||||
|
||||
sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
sqc_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
sqc_sequencers.append(sqc_cntrl.sequencer)
|
||||
sqc_cntrl_nodes.append(sqc_cntrl)
|
||||
|
||||
# Connect the SQC controller to the ruby network
|
||||
sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.requestFromSQC.out_port = network.in_port
|
||||
|
||||
sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.probeToSQC.in_port = network.out_port
|
||||
|
||||
sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.responseToSQC.in_port = network.out_port
|
||||
|
||||
sqc_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
return (sqc_sequencers, sqc_cntrl_nodes)
|
||||
|
||||
def construct_scalars(options, system, ruby_system, network):
|
||||
|
||||
scalar_sequencers = []
|
||||
scalar_cntrl_nodes = []
|
||||
|
||||
# For an odd number of CPUs, still create the right number of controllers
|
||||
TCC_bits = int(math.log(options.num_tccs, 2))
|
||||
|
||||
for i in range(options.num_scalar_cache):
|
||||
scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
scalar_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)
|
||||
|
||||
scalar_sequencers.append(scalar_cntrl.sequencer)
|
||||
scalar_cntrl_nodes.append(scalar_cntrl)
|
||||
|
||||
scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.requestFromSQC.out_port = network.in_port
|
||||
|
||||
scalar_cntrl.probeToSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.probeToSQC.in_port = network.out_port
|
||||
|
||||
scalar_cntrl.responseToSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.responseToSQC.in_port = network.out_port
|
||||
|
||||
scalar_cntrl.mandatoryQueue = \
|
||||
MessageBuffer(buffer_size=options.scalar_buffer_size)
|
||||
|
||||
return (scalar_sequencers, scalar_cntrl_nodes)
|
||||
|
||||
def construct_cmdprocs(options, system, ruby_system, network):
|
||||
|
||||
cmdproc_sequencers = []
|
||||
cmdproc_cntrl_nodes = []
|
||||
|
||||
# For an odd number of CPUs, still create the right number of controllers
|
||||
TCC_bits = int(math.log(options.num_tccs, 2))
|
||||
|
||||
for i in range(options.num_cp):
|
||||
|
||||
tcp_ID = options.num_compute_units + i
|
||||
sqc_ID = options.num_sqc + i
|
||||
|
||||
tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
|
||||
issue_latency = 1,
|
||||
number_of_TBEs = 2560)
|
||||
# TBEs set to max outstanding requests
|
||||
tcp_cntrl.createCP(options, ruby_system, system)
|
||||
tcp_cntrl.WB = options.WB_L1
|
||||
tcp_cntrl.disableL1 = options.noL1
|
||||
tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
|
||||
tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
|
||||
|
||||
exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cmdproc_sequencers.append(tcp_cntrl.sequencer)
|
||||
cmdproc_cntrl_nodes.append(tcp_cntrl)
|
||||
|
||||
# Connect the CP (TCP) controllers to the ruby network
|
||||
tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.requestFromTCP.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseFromTCP.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.unblockFromCore.out_port = network.in_port
|
||||
|
||||
tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.probeToTCP.in_port = network.out_port
|
||||
|
||||
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseToTCP.in_port = network.out_port
|
||||
|
||||
tcp_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
sqc_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cmdproc_sequencers.append(sqc_cntrl.sequencer)
|
||||
cmdproc_cntrl_nodes.append(sqc_cntrl)
|
||||
|
||||
return (cmdproc_sequencers, cmdproc_cntrl_nodes)
|
||||
|
||||
def construct_tccs(options, system, ruby_system, network):
|
||||
|
||||
tcc_cntrl_nodes = []
|
||||
|
||||
for i in range(options.num_tccs):
|
||||
|
||||
tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
|
||||
tcc_cntrl.create(options, ruby_system, system)
|
||||
tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
|
||||
tcc_cntrl.l2_response_latency = options.TCC_latency
|
||||
tcc_cntrl_nodes.append(tcc_cntrl)
|
||||
tcc_cntrl.WB = options.WB_L2
|
||||
tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
|
||||
# the number_of_TBEs is inclusive of TBEs below
|
||||
|
||||
# Connect the TCC controllers to the ruby network
|
||||
tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.requestFromTCP.in_port = network.out_port
|
||||
|
||||
tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.responseToCore.out_port = network.in_port
|
||||
|
||||
tcc_cntrl.probeFromNB = MessageBuffer()
|
||||
tcc_cntrl.probeFromNB.in_port = network.out_port
|
||||
|
||||
tcc_cntrl.responseFromNB = MessageBuffer()
|
||||
tcc_cntrl.responseFromNB.in_port = network.out_port
|
||||
|
||||
tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.requestToNB.out_port = network.in_port
|
||||
|
||||
tcc_cntrl.responseToNB = MessageBuffer()
|
||||
tcc_cntrl.responseToNB.out_port = network.in_port
|
||||
|
||||
tcc_cntrl.unblockToNB = MessageBuffer()
|
||||
tcc_cntrl.unblockToNB.out_port = network.in_port
|
||||
|
||||
tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
|
||||
|
||||
exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)
|
||||
|
||||
return tcc_cntrl_nodes
|
||||
|
||||
def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
ruby_system, cpus):
|
||||
if buildEnv['PROTOCOL'] != 'GPU_VIPER':
|
||||
panic("This script requires the GPU_VIPER protocol to be built.")
|
||||
|
||||
cpu_sequencers = []
|
||||
|
||||
#
|
||||
# Must create the individual controllers before the network to ensure the
|
||||
# controller constructors are called before the network constructor
|
||||
#
|
||||
|
||||
# This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
|
||||
# Clusters
|
||||
crossbar_bw = None
|
||||
mainCluster = None
|
||||
cpuCluster = None
|
||||
gpuCluster = None
|
||||
|
||||
if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
|
||||
#Assuming a 2GHz clock
|
||||
crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
|
||||
mainCluster = Cluster(intBW = crossbar_bw)
|
||||
cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
|
||||
gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
|
||||
else:
|
||||
mainCluster = Cluster(intBW = 8) # 16 GB/s
|
||||
cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
|
||||
gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
|
||||
|
||||
|
||||
# Create CPU directory controllers
|
||||
dir_cntrl_nodes = \
|
||||
construct_dirs(options, system, ruby_system, ruby_system.network)
|
||||
for dir_cntrl in dir_cntrl_nodes:
|
||||
mainCluster.add(dir_cntrl)
|
||||
|
||||
|
||||
# Create CPU core pairs
|
||||
(cp_sequencers, cp_cntrl_nodes) = \
|
||||
construct_corepairs(options, system, ruby_system, ruby_system.network)
|
||||
cpu_sequencers.extend(cp_sequencers)
|
||||
for cp_cntrl in cp_cntrl_nodes:
|
||||
cpuCluster.add(cp_cntrl)
|
||||
|
||||
# Register CPUs and caches for each CorePair and directory (SE mode only)
|
||||
@@ -584,191 +813,38 @@ def create_system(options, full_system, system, dma_devices, bootmem,
|
||||
cpus = [n for n in
|
||||
range(options.num_cpus)])
|
||||
|
||||
gpuCluster = None
|
||||
if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
|
||||
gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
|
||||
else:
|
||||
gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
|
||||
for i in range(options.num_compute_units):
|
||||
|
||||
tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
|
||||
issue_latency = 1,
|
||||
number_of_TBEs = 2560)
|
||||
# TBEs set to max outstanding requests
|
||||
tcp_cntrl.create(options, ruby_system, system)
|
||||
tcp_cntrl.WB = options.WB_L1
|
||||
tcp_cntrl.disableL1 = options.noL1
|
||||
tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
|
||||
tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
|
||||
|
||||
exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cpu_sequencers.append(tcp_cntrl.coalescer)
|
||||
tcp_cntrl_nodes.append(tcp_cntrl)
|
||||
|
||||
# Connect the TCP controller to the ruby network
|
||||
tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.requestFromTCP.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseFromTCP.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.unblockFromCore = MessageBuffer()
|
||||
tcp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.probeToTCP.in_port = ruby_system.network.out_port
|
||||
|
||||
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseToTCP.in_port = ruby_system.network.out_port
|
||||
|
||||
tcp_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
# Create TCPs
|
||||
(tcp_sequencers, tcp_cntrl_nodes) = \
|
||||
construct_tcps(options, system, ruby_system, ruby_system.network)
|
||||
cpu_sequencers.extend(tcp_sequencers)
|
||||
for tcp_cntrl in tcp_cntrl_nodes:
|
||||
gpuCluster.add(tcp_cntrl)
|
||||
|
||||
for i in range(options.num_sqc):
|
||||
|
||||
sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
sqc_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cpu_sequencers.append(sqc_cntrl.sequencer)
|
||||
|
||||
# Connect the SQC controller to the ruby network
|
||||
sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.requestFromSQC.out_port = ruby_system.network.in_port
|
||||
|
||||
sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.probeToSQC.in_port = ruby_system.network.out_port
|
||||
|
||||
sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
|
||||
sqc_cntrl.responseToSQC.in_port = ruby_system.network.out_port
|
||||
|
||||
sqc_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
# SQC also in GPU cluster
|
||||
# Create SQCs
|
||||
(sqc_sequencers, sqc_cntrl_nodes) = \
|
||||
construct_sqcs(options, system, ruby_system, ruby_system.network)
|
||||
cpu_sequencers.extend(sqc_sequencers)
|
||||
for sqc_cntrl in sqc_cntrl_nodes:
|
||||
gpuCluster.add(sqc_cntrl)
|
||||
|
||||
for i in range(options.num_scalar_cache):
|
||||
scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
scalar_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)
|
||||
|
||||
cpu_sequencers.append(scalar_cntrl.sequencer)
|
||||
|
||||
scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.requestFromSQC.out_port = ruby_system.network.in_port
|
||||
|
||||
scalar_cntrl.probeToSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.probeToSQC.in_port = ruby_system.network.out_port
|
||||
|
||||
scalar_cntrl.responseToSQC = MessageBuffer(ordered = True)
|
||||
scalar_cntrl.responseToSQC.in_port = ruby_system.network.out_port
|
||||
|
||||
scalar_cntrl.mandatoryQueue = \
|
||||
MessageBuffer(buffer_size=options.scalar_buffer_size)
|
||||
|
||||
# Create Scalars
|
||||
(scalar_sequencers, scalar_cntrl_nodes) = \
|
||||
construct_scalars(options, system, ruby_system, ruby_system.network)
|
||||
cpu_sequencers.extend(scalar_sequencers)
|
||||
for scalar_cntrl in scalar_cntrl_nodes:
|
||||
gpuCluster.add(scalar_cntrl)
|
||||
|
||||
for i in range(options.num_cp):
|
||||
# Create command processors
|
||||
(cmdproc_sequencers, cmdproc_cntrl_nodes) = \
|
||||
construct_cmdprocs(options, system, ruby_system, ruby_system.network)
|
||||
cpu_sequencers.extend(cmdproc_sequencers)
|
||||
for cmdproc_cntrl in cmdproc_cntrl_nodes:
|
||||
gpuCluster.add(cmdproc_cntrl)
|
||||
|
||||
tcp_ID = options.num_compute_units + i
|
||||
sqc_ID = options.num_sqc + i
|
||||
|
||||
tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
|
||||
issue_latency = 1,
|
||||
number_of_TBEs = 2560)
|
||||
# TBEs set to max outstanding requests
|
||||
tcp_cntrl.createCP(options, ruby_system, system)
|
||||
tcp_cntrl.WB = options.WB_L1
|
||||
tcp_cntrl.disableL1 = options.noL1
|
||||
tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
|
||||
tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
|
||||
|
||||
exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cpu_sequencers.append(tcp_cntrl.sequencer)
|
||||
tcp_cntrl_nodes.append(tcp_cntrl)
|
||||
|
||||
# Connect the CP (TCP) controllers to the ruby network
|
||||
tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.requestFromTCP.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseFromTCP.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.unblockFromCore.out_port = ruby_system.network.in_port
|
||||
|
||||
tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.probeToTCP.in_port = ruby_system.network.out_port
|
||||
|
||||
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
|
||||
tcp_cntrl.responseToTCP.in_port = ruby_system.network.out_port
|
||||
|
||||
tcp_cntrl.mandatoryQueue = MessageBuffer()
|
||||
|
||||
gpuCluster.add(tcp_cntrl)
|
||||
|
||||
sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
|
||||
sqc_cntrl.create(options, ruby_system, system)
|
||||
|
||||
exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
|
||||
#
|
||||
# Add controllers and sequencers to the appropriate lists
|
||||
#
|
||||
cpu_sequencers.append(sqc_cntrl.sequencer)
|
||||
|
||||
# SQC also in GPU cluster
|
||||
gpuCluster.add(sqc_cntrl)
|
||||
|
||||
for i in range(options.num_tccs):
|
||||
|
||||
tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
|
||||
tcc_cntrl.create(options, ruby_system, system)
|
||||
tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
|
||||
tcc_cntrl.l2_response_latency = options.TCC_latency
|
||||
tcc_cntrl_nodes.append(tcc_cntrl)
|
||||
tcc_cntrl.WB = options.WB_L2
|
||||
tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
|
||||
# the number_of_TBEs is inclusive of TBEs below
|
||||
|
||||
# Connect the TCC controllers to the ruby network
|
||||
tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.requestFromTCP.in_port = ruby_system.network.out_port
|
||||
|
||||
tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.responseToCore.out_port = ruby_system.network.in_port
|
||||
|
||||
tcc_cntrl.probeFromNB = MessageBuffer()
|
||||
tcc_cntrl.probeFromNB.in_port = ruby_system.network.out_port
|
||||
|
||||
tcc_cntrl.responseFromNB = MessageBuffer()
|
||||
tcc_cntrl.responseFromNB.in_port = ruby_system.network.out_port
|
||||
|
||||
tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
|
||||
tcc_cntrl.requestToNB.out_port = ruby_system.network.in_port
|
||||
|
||||
tcc_cntrl.responseToNB = MessageBuffer()
|
||||
tcc_cntrl.responseToNB.out_port = ruby_system.network.in_port
|
||||
|
||||
tcc_cntrl.unblockToNB = MessageBuffer()
|
||||
tcc_cntrl.unblockToNB.out_port = ruby_system.network.in_port
|
||||
|
||||
tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
|
||||
|
||||
exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)
|
||||
|
||||
# connect all of the wire buffers between L3 and dirs up
|
||||
# TCC cntrls added to the GPU cluster
|
||||
# Create TCCs
|
||||
tcc_cntrl_nodes = \
|
||||
construct_tccs(options, system, ruby_system, ruby_system.network)
|
||||
for tcc_cntrl in tcc_cntrl_nodes:
|
||||
gpuCluster.add(tcc_cntrl)
|
||||
|
||||
for i, dma_device in enumerate(dma_devices):
|
||||
|
||||
Reference in New Issue
Block a user