configs, tests: Replace optparse with argparse
JIRA: https://gem5.atlassian.net/browse/GEM5-543 Change-Id: I997d6a4e45319a74e21bd0d61d4af6118474c849 Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/44513 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -37,47 +37,47 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys
|
||||
import os, argparse, sys
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
from ruby import Ruby
|
||||
from common import Options
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
|
||||
# add the gpu specific options expected by the the gpu and gpu_RfO
|
||||
parser.add_option("-u", "--num-compute-units", type="int", default=8,
|
||||
help="number of compute units in the GPU")
|
||||
parser.add_option("--num-cp", type="int", default=0,
|
||||
help="Number of GPU Command Processors (CP)")
|
||||
parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
|
||||
"per CU")
|
||||
parser.add_option("--wf-size", type="int", default=64,
|
||||
help="Wavefront size(in workitems)")
|
||||
parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
|
||||
"WF slots per SIMD")
|
||||
parser.add_argument("-u", "--num-compute-units", type=int, default=8,
|
||||
help="number of compute units in the GPU")
|
||||
parser.add_argument("--num-cp", type=int, default=0,
|
||||
help="Number of GPU Command Processors (CP)")
|
||||
parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units" \
|
||||
"per CU")
|
||||
parser.add_argument("--wf-size", type=int, default=64,
|
||||
help="Wavefront size(in workitems)")
|
||||
parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of " \
|
||||
"WF slots per SIMD")
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
options.num_compute_units=8
|
||||
options.num_sqc=2
|
||||
args.l1d_size="256B"
|
||||
args.l1i_size="256B"
|
||||
args.l2_size="512B"
|
||||
args.l3_size="1kB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.l3_assoc=2
|
||||
args.num_compute_units=8
|
||||
args.num_sqc=2
|
||||
|
||||
# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will
|
||||
# not work with the Ruby random tester.
|
||||
@@ -87,14 +87,14 @@ assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
|
||||
# create the tester and system, including ruby
|
||||
#
|
||||
tester = RubyTester(check_flush = False, checks_to_complete = 100,
|
||||
wakeup_frequency = 10, num_cpus = options.num_cpus)
|
||||
wakeup_frequency = 10, num_cpus = args.num_cpus)
|
||||
|
||||
# We set the testers as cpu for ruby to find the correct clock domains
|
||||
# for the L1 Objects.
|
||||
system = System(cpu = tester)
|
||||
|
||||
# Dummy voltage domain for all our clock domains
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
|
||||
system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
@@ -105,8 +105,8 @@ system.mem_ranges = AddrRange('256MB')
|
||||
# is stored in system.cpu. because there is only ever one
|
||||
# tester object, num_cpus is not necessarily equal to the
|
||||
# size of system.cpu
|
||||
cpu_list = [ system.cpu ] * options.num_cpus
|
||||
Ruby.create_system(options, False, system, cpus=cpu_list)
|
||||
cpu_list = [ system.cpu ] * args.num_cpus
|
||||
Ruby.create_system(args, False, system, cpus=cpu_list)
|
||||
|
||||
# Create a separate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
|
||||
@@ -37,7 +37,7 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys, math, glob
|
||||
import os, argparse, sys, math, glob
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
@@ -45,26 +45,6 @@ from ruby import Ruby
|
||||
from common import Options
|
||||
from common import GPUTLBOptions, GPUTLBConfig
|
||||
|
||||
########################## Script Options ########################
|
||||
def setOption(parser, opt_str, value = 1):
|
||||
# check to make sure the option actually exists
|
||||
if not parser.has_option(opt_str):
|
||||
raise Exception("cannot find %s in list of possible options" % opt_str)
|
||||
|
||||
opt = parser.get_option(opt_str)
|
||||
# set the value
|
||||
exec("parser.values.%s = %s" % (opt.dest, value))
|
||||
|
||||
def getOption(parser, opt_str):
|
||||
# check to make sure the option actually exists
|
||||
if not parser.has_option(opt_str):
|
||||
raise Exception("cannot find %s in list of possible options" % opt_str)
|
||||
|
||||
opt = parser.get_option(opt_str)
|
||||
# get the value
|
||||
exec("return_value = parser.values.%s" % opt.dest)
|
||||
return return_value
|
||||
|
||||
def run_test(root):
|
||||
"""gpu test requires a specialized run_test implementation to set up the
|
||||
mmio space."""
|
||||
@@ -79,100 +59,129 @@ def run_test(root):
|
||||
exit_event = m5.simulate(maxtick)
|
||||
print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
Options.addSEOptions(parser)
|
||||
|
||||
parser.add_option("-k", "--kernel-files",
|
||||
help="file(s) containing GPU kernel code (colon separated)")
|
||||
parser.add_option("-u", "--num-compute-units", type="int", default=2,
|
||||
help="number of GPU compute units"),
|
||||
parser.add_option("--num-cp", type="int", default=0,
|
||||
help="Number of GPU Command Processors (CP)")
|
||||
parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
|
||||
"per CU")
|
||||
parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
|
||||
"sharing an SQC (icache, and thus icache TLB)")
|
||||
parser.add_option("--wf-size", type="int", default=64,
|
||||
help="Wavefront size(in workitems)")
|
||||
parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \
|
||||
"WF slots per SIMD")
|
||||
parser.add_option("--sp-bypass-path-length", type="int", default=4, \
|
||||
help="Number of stages of bypass path in vector ALU for Single "\
|
||||
"Precision ops")
|
||||
parser.add_option("--dp-bypass-path-length", type="int", default=4, \
|
||||
help="Number of stages of bypass path in vector ALU for Double "\
|
||||
"Precision ops")
|
||||
parser.add_option("--issue-period", type="int", default=4, \
|
||||
help="Number of cycles per vector instruction issue period")
|
||||
parser.add_option("--glbmem-wr-bus-width", type="int", default=32, \
|
||||
help="VGPR to Coalescer (Global Memory) data bus width in bytes")
|
||||
parser.add_option("--glbmem-rd-bus-width", type="int", default=32, \
|
||||
help="Coalescer to VGPR (Global Memory) data bus width in bytes")
|
||||
parser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \
|
||||
help="Number of Shared Memory pipelines per CU")
|
||||
parser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \
|
||||
help="Number of Global Memory pipelines per CU")
|
||||
parser.add_option("--vreg-file-size", type="int", default=2048,
|
||||
help="number of physical vector registers per SIMD")
|
||||
parser.add_option("--bw-scalor", type="int", default=0,
|
||||
help="bandwidth scalor for scalability analysis")
|
||||
parser.add_option("--CPUClock", type="string", default="2GHz",
|
||||
help="CPU clock")
|
||||
parser.add_option("--GPUClock", type="string", default="1GHz",
|
||||
help="GPU clock")
|
||||
parser.add_option("--cpu-voltage", action="store", type="string",
|
||||
default='1.0V',
|
||||
help = """CPU voltage domain""")
|
||||
parser.add_option("--gpu-voltage", action="store", type="string",
|
||||
default='1.0V',
|
||||
help = """CPU voltage domain""")
|
||||
parser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST",
|
||||
help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
|
||||
parser.add_option("--xact-cas-mode", action="store_true",
|
||||
help="enable load_compare mode (transactional CAS)")
|
||||
parser.add_option("--SegFaultDebug",action="store_true",
|
||||
help="checks for GPU seg fault before TLB access")
|
||||
parser.add_option("--LocalMemBarrier",action="store_true",
|
||||
help="Barrier does not wait for writethroughs to complete")
|
||||
parser.add_option("--countPages", action="store_true",
|
||||
help="Count Page Accesses and output in per-CU output files")
|
||||
parser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\
|
||||
"TLBs")
|
||||
parser.add_option("--pf-type", type="string", help="type of prefetch: "\
|
||||
"PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
|
||||
parser.add_option("--pf-stride", type="int", help="set prefetch stride")
|
||||
parser.add_option("--numLdsBanks", type="int", default=32,
|
||||
help="number of physical banks per LDS module")
|
||||
parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
|
||||
help="number of cycles per LDS bank conflict")
|
||||
parser.add_argument(
|
||||
"-k", "--kernel-files",
|
||||
help="file(s) containing GPU kernel code (colon separated)")
|
||||
parser.add_argument(
|
||||
"-u", "--num-compute-units", type=int, default=2,
|
||||
help="number of GPU compute units"),
|
||||
parser.add_argument(
|
||||
"--num-cp", type=int, default=0,
|
||||
help="Number of GPU Command Processors (CP)")
|
||||
parser.add_argument(
|
||||
"--simds-per-cu", type=int, default=4, help="SIMD units" \
|
||||
"per CU")
|
||||
parser.add_argument(
|
||||
"--cu-per-sqc", type=int, default=4, help="number of CUs" \
|
||||
"sharing an SQC (icache, and thus icache TLB)")
|
||||
parser.add_argument(
|
||||
"--wf-size", type=int, default=64,
|
||||
help="Wavefront size(in workitems)")
|
||||
parser.add_argument(
|
||||
"--wfs-per-simd", type=int, default=8, help="Number of " \
|
||||
"WF slots per SIMD")
|
||||
parser.add_argument(
|
||||
"--sp-bypass-path-length", type=int, default=4,
|
||||
help="Number of stages of bypass path in vector ALU for Single "
|
||||
"Precision ops")
|
||||
parser.add_argument(
|
||||
"--dp-bypass-path-length", type=int, default=4,
|
||||
help="Number of stages of bypass path in vector ALU for Double "
|
||||
"Precision ops")
|
||||
parser.add_argument(
|
||||
"--issue-period", type=int, default=4,
|
||||
help="Number of cycles per vector instruction issue period")
|
||||
parser.add_argument(
|
||||
"--glbmem-wr-bus-width", type=int, default=32,
|
||||
help="VGPR to Coalescer (Global Memory) data bus width in bytes")
|
||||
parser.add_argument(
|
||||
"--glbmem-rd-bus-width", type=int, default=32,
|
||||
help="Coalescer to VGPR (Global Memory) data bus width in bytes")
|
||||
parser.add_argument(
|
||||
"--shr-mem-pipes-per-cu", type=int, default=1, \
|
||||
help="Number of Shared Memory pipelines per CU")
|
||||
parser.add_argument(
|
||||
"--glb-mem-pipes-per-cu", type=int, default=1, \
|
||||
help="Number of Global Memory pipelines per CU")
|
||||
parser.add_argument(
|
||||
"--vreg-file-size", type=int, default=2048,
|
||||
help="number of physical vector registers per SIMD")
|
||||
parser.add_argument(
|
||||
"--bw-scalor", type=int, default=0,
|
||||
help="bandwidth scalor for scalability analysis")
|
||||
parser.add_argument(
|
||||
"--CPUClock", type=str, default="2GHz",
|
||||
help="CPU clock")
|
||||
parser.add_argument(
|
||||
"--GPUClock", type=str, default="1GHz",
|
||||
help="GPU clock")
|
||||
parser.add_argument(
|
||||
"--cpu-voltage", action="store", type=str,
|
||||
default='1.0V',
|
||||
help = """CPU voltage domain""")
|
||||
parser.add_argument(
|
||||
"--gpu-voltage", action="store", type=str,
|
||||
default='1.0V',
|
||||
help = """CPU voltage domain""")
|
||||
parser.add_argument(
|
||||
"--CUExecPolicy", type=str, default="OLDEST-FIRST",
|
||||
help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
|
||||
parser.add_argument(
|
||||
"--xact-cas-mode", action="store_true",
|
||||
help="enable load_compare mode (transactional CAS)")
|
||||
parser.add_argument(
|
||||
"--SegFaultDebug",action="store_true",
|
||||
help="checks for GPU seg fault before TLB access")
|
||||
parser.add_argument(
|
||||
"--LocalMemBarrier",action="store_true",
|
||||
help="Barrier does not wait for writethroughs to complete")
|
||||
parser.add_argument(
|
||||
"--countPages", action="store_true",
|
||||
help="Count Page Accesses and output in per-CU output files")
|
||||
parser.add_argument(
|
||||
"--TLB-prefetch", type=int, help = "prefetch depth for"\
|
||||
"TLBs")
|
||||
parser.add_argument(
|
||||
"--pf-type", type=str, help="type of prefetch: "\
|
||||
"PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
|
||||
parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
|
||||
parser.add_argument(
|
||||
"--numLdsBanks", type=int, default=32,
|
||||
help="number of physical banks per LDS module")
|
||||
parser.add_argument(
|
||||
"--ldsBankConflictPenalty", type=int, default=1,
|
||||
help="number of cycles per LDS bank conflict")
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
GPUTLBOptions.tlb_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
# The GPU cache coherence protocols only work with the backing store
|
||||
setOption(parser, "--access-backing-store")
|
||||
args.access_backing_store = True
|
||||
|
||||
# Currently, the sqc (I-Cache of GPU) is shared by
|
||||
# multiple compute units(CUs). The protocol works just fine
|
||||
# even if sqc is not shared. Overriding this option here
|
||||
# so that the user need not explicitly set this (assuming
|
||||
# sharing sqc is the common usage)
|
||||
n_cu = options.num_compute_units
|
||||
num_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc))
|
||||
options.num_sqc = num_sqc # pass this to Ruby
|
||||
n_cu = args.num_compute_units
|
||||
num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
|
||||
args.num_sqc = num_sqc # pass this to Ruby
|
||||
|
||||
########################## Creating the GPU system ########################
|
||||
# shader is the GPU
|
||||
shader = Shader(n_wf = options.wfs_per_simd,
|
||||
shader = Shader(n_wf = args.wfs_per_simd,
|
||||
clk_domain = SrcClockDomain(
|
||||
clock = options.GPUClock,
|
||||
clock = args.GPUClock,
|
||||
voltage_domain = VoltageDomain(
|
||||
voltage = options.gpu_voltage)),
|
||||
voltage = args.gpu_voltage)),
|
||||
timing = True)
|
||||
|
||||
# GPU_RfO(Read For Ownership) implements SC/TSO memory model.
|
||||
@@ -190,51 +199,51 @@ else:
|
||||
|
||||
# Switching off per-lane TLB by default
|
||||
per_lane = False
|
||||
if options.TLB_config == "perLane":
|
||||
if args.TLB_config == "perLane":
|
||||
per_lane = True
|
||||
|
||||
# List of compute units; one GPU can have multiple compute units
|
||||
compute_units = []
|
||||
for i in range(n_cu):
|
||||
compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
|
||||
num_SIMDs = options.simds_per_cu,
|
||||
wfSize = options.wf_size,
|
||||
num_SIMDs = args.simds_per_cu,
|
||||
wfSize = args.wf_size,
|
||||
spbypass_pipe_length = \
|
||||
options.sp_bypass_path_length,
|
||||
args.sp_bypass_path_length,
|
||||
dpbypass_pipe_length = \
|
||||
options.dp_bypass_path_length,
|
||||
issue_period = options.issue_period,
|
||||
args.dp_bypass_path_length,
|
||||
issue_period = args.issue_period,
|
||||
coalescer_to_vrf_bus_width = \
|
||||
options.glbmem_rd_bus_width,
|
||||
args.glbmem_rd_bus_width,
|
||||
vrf_to_coalescer_bus_width = \
|
||||
options.glbmem_wr_bus_width,
|
||||
args.glbmem_wr_bus_width,
|
||||
num_global_mem_pipes = \
|
||||
options.glb_mem_pipes_per_cu,
|
||||
args.glb_mem_pipes_per_cu,
|
||||
num_shared_mem_pipes = \
|
||||
options.shr_mem_pipes_per_cu,
|
||||
n_wf = options.wfs_per_simd,
|
||||
execPolicy = options.CUExecPolicy,
|
||||
xactCasMode = options.xact_cas_mode,
|
||||
debugSegFault = options.SegFaultDebug,
|
||||
args.shr_mem_pipes_per_cu,
|
||||
n_wf = args.wfs_per_simd,
|
||||
execPolicy = args.CUExecPolicy,
|
||||
xactCasMode = args.xact_cas_mode,
|
||||
debugSegFault = args.SegFaultDebug,
|
||||
functionalTLB = True,
|
||||
localMemBarrier = options.LocalMemBarrier,
|
||||
countPages = options.countPages,
|
||||
localMemBarrier = args.LocalMemBarrier,
|
||||
countPages = args.countPages,
|
||||
localDataStore = \
|
||||
LdsState(banks = options.numLdsBanks,
|
||||
LdsState(banks = args.numLdsBanks,
|
||||
bankConflictPenalty = \
|
||||
options.ldsBankConflictPenalty)))
|
||||
args.ldsBankConflictPenalty)))
|
||||
wavefronts = []
|
||||
vrfs = []
|
||||
for j in range(options.simds_per_cu):
|
||||
for j in range(args.simds_per_cu):
|
||||
for k in range(int(shader.n_wf)):
|
||||
wavefronts.append(Wavefront(simdId = j, wf_slot_id = k))
|
||||
vrfs.append(VectorRegisterFile(simd_id=j,
|
||||
num_regs_per_simd=options.vreg_file_size))
|
||||
num_regs_per_simd=args.vreg_file_size))
|
||||
compute_units[-1].wavefronts = wavefronts
|
||||
compute_units[-1].vector_register_file = vrfs
|
||||
if options.TLB_prefetch:
|
||||
compute_units[-1].prefetch_depth = options.TLB_prefetch
|
||||
compute_units[-1].prefetch_prev_type = options.pf_type
|
||||
if args.TLB_prefetch:
|
||||
compute_units[-1].prefetch_depth = args.TLB_prefetch
|
||||
compute_units[-1].prefetch_prev_type = args.pf_type
|
||||
|
||||
# attach the LDS and the CU to the bus (actually a Bridge)
|
||||
compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave
|
||||
@@ -245,7 +254,7 @@ shader.CUs = compute_units
|
||||
|
||||
# this is a uniprocessor only test, thus the shader is the second index in the
|
||||
# list of "system.cpus"
|
||||
options.num_cpus = 1
|
||||
args.num_cpus = 1
|
||||
shader_idx = 1
|
||||
cpu = TimingSimpleCPU(cpu_id=0)
|
||||
|
||||
@@ -258,12 +267,12 @@ dispatcher = GpuDispatcher()
|
||||
cpu_list = [cpu] + [shader] + [dispatcher]
|
||||
|
||||
system = System(cpu = cpu_list,
|
||||
mem_ranges = [AddrRange(options.mem_size)],
|
||||
mem_ranges = [AddrRange(args.mem_size)],
|
||||
mem_mode = 'timing',
|
||||
workload = SEWorkload())
|
||||
|
||||
# Dummy voltage domain for all our clock domains
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
|
||||
system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
@@ -274,15 +283,15 @@ system.cpu[0].clk_domain = SrcClockDomain(clock = '2GHz',
|
||||
system.voltage_domain)
|
||||
|
||||
# configure the TLB hierarchy
|
||||
GPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx)
|
||||
GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
|
||||
|
||||
# create Ruby system
|
||||
system.piobus = IOXBar(width=32, response_latency=0,
|
||||
frontend_latency=0, forward_latency=0)
|
||||
Ruby.create_system(options, None, system)
|
||||
Ruby.create_system(args, None, system)
|
||||
|
||||
# Create a separate clock for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
# create the interrupt controller
|
||||
@@ -303,10 +312,10 @@ system.ruby._cpu_ports[0].mem_master_port = system.piobus.slave
|
||||
# per compute unit and one sequencer per SQC for the math to work out
|
||||
# correctly.
|
||||
gpu_port_idx = len(system.ruby._cpu_ports) \
|
||||
- options.num_compute_units - options.num_sqc
|
||||
gpu_port_idx = gpu_port_idx - options.num_cp * 2
|
||||
- args.num_compute_units - args.num_sqc
|
||||
gpu_port_idx = gpu_port_idx - args.num_cp * 2
|
||||
|
||||
wavefront_size = options.wf_size
|
||||
wavefront_size = args.wf_size
|
||||
for i in range(n_cu):
|
||||
# The pipeline issues wavefront_size number of uncoalesced requests
|
||||
# in one GPU issue cycle. Hence wavefront_size mem ports.
|
||||
@@ -316,14 +325,14 @@ for i in range(n_cu):
|
||||
gpu_port_idx += 1
|
||||
|
||||
for i in range(n_cu):
|
||||
if i > 0 and not i % options.cu_per_sqc:
|
||||
if i > 0 and not i % args.cu_per_sqc:
|
||||
gpu_port_idx += 1
|
||||
system.cpu[shader_idx].CUs[i].sqc_port = \
|
||||
system.ruby._cpu_ports[gpu_port_idx].slave
|
||||
gpu_port_idx = gpu_port_idx + 1
|
||||
|
||||
# Current regression tests do not support the command processor
|
||||
assert(options.num_cp == 0)
|
||||
assert(args.num_cp == 0)
|
||||
|
||||
# connect dispatcher to the system.piobus
|
||||
dispatcher.pio = system.piobus.master
|
||||
|
||||
@@ -29,34 +29,34 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys
|
||||
import os, argparse, sys
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
from ruby import Ruby
|
||||
from common import Options
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
options.ports=32
|
||||
args.l1d_size="256B"
|
||||
args.l1i_size="256B"
|
||||
args.l2_size="512B"
|
||||
args.l3_size="1kB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.l3_assoc=2
|
||||
args.ports=32
|
||||
|
||||
#MAX CORES IS 8 with the fals sharing method
|
||||
nb_cores = 8
|
||||
@@ -66,8 +66,8 @@ cpus = [ MemTest(percent_functional=50,
|
||||
percent_uncacheable=0, suppress_func_errors=True) \
|
||||
for i in range(nb_cores) ]
|
||||
|
||||
# overwrite options.num_cpus with the nb_cores value
|
||||
options.num_cpus = nb_cores
|
||||
# overwrite args.num_cpus with the nb_cores value
|
||||
args.num_cpus = nb_cores
|
||||
|
||||
# system simulated
|
||||
system = System(cpu = cpus)
|
||||
@@ -87,10 +87,10 @@ for cpu in cpus:
|
||||
|
||||
system.mem_ranges = AddrRange('256MB')
|
||||
|
||||
Ruby.create_system(options, False, system)
|
||||
Ruby.create_system(args, False, system)
|
||||
|
||||
# Create a separate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
assert(len(cpus) == len(system.ruby._cpu_ports))
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import m5, os, optparse, sys
|
||||
import m5, os, argparse, sys
|
||||
from m5.objects import *
|
||||
m5.util.addToPath('../configs/')
|
||||
from common.Benchmarks import SysConfig
|
||||
@@ -33,28 +33,28 @@ from ruby import Ruby
|
||||
from common import Options
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
Ruby.define_options(parser)
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
options.l1d_size="32kB"
|
||||
options.l1i_size="32kB"
|
||||
options.l2_size="4MB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.num_cpus = 2
|
||||
args.l1d_size="32kB"
|
||||
args.l1i_size="32kB"
|
||||
args.l2_size="4MB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.num_cpus = 2
|
||||
|
||||
#the system
|
||||
mdesc = SysConfig(disks = ['linux-x86.img'])
|
||||
system = FSConfig.makeLinuxX86System('timing', options.num_cpus,
|
||||
system = FSConfig.makeLinuxX86System('timing', args.num_cpus,
|
||||
mdesc=mdesc, Ruby=True)
|
||||
system.kernel = SysPaths.binary('x86_64-vmlinux-2.6.22.9')
|
||||
# Dummy voltage domain for all our clock domains
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
|
||||
|
||||
system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9.smp')
|
||||
system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
@@ -62,12 +62,12 @@ system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
system.cpu_clk_domain = SrcClockDomain(clock = '2GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
system.cpu = [TimingSimpleCPU(cpu_id=i, clk_domain = system.cpu_clk_domain)
|
||||
for i in range(options.num_cpus)]
|
||||
for i in range(args.num_cpus)]
|
||||
|
||||
Ruby.create_system(options, True, system, system.iobus, system._dma_ports)
|
||||
Ruby.create_system(args, True, system, system.iobus, system._dma_ports)
|
||||
|
||||
# Create a seperate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
# Connect the ruby io port to the PIO bus,
|
||||
|
||||
@@ -29,34 +29,34 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys
|
||||
import os, argparse, sys
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
from ruby import Ruby
|
||||
from common import Options
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addNoISAOptions(parser)
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
options.ports=32
|
||||
args.l1d_size="256B"
|
||||
args.l1i_size="256B"
|
||||
args.l2_size="512B"
|
||||
args.l3_size="1kB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.l3_assoc=2
|
||||
args.ports=32
|
||||
|
||||
# Turn on flush check for the hammer protocol
|
||||
check_flush = False
|
||||
@@ -67,14 +67,14 @@ if buildEnv['PROTOCOL'] == 'MOESI_hammer':
|
||||
# create the tester and system, including ruby
|
||||
#
|
||||
tester = RubyTester(check_flush = check_flush, checks_to_complete = 100,
|
||||
wakeup_frequency = 10, num_cpus = options.num_cpus)
|
||||
wakeup_frequency = 10, num_cpus = args.num_cpus)
|
||||
|
||||
# We set the testers as cpu for ruby to find the correct clock domains
|
||||
# for the L1 Objects.
|
||||
system = System(cpu = tester)
|
||||
|
||||
# Dummy voltage domain for all our clock domains
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
|
||||
system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
@@ -85,14 +85,14 @@ system.mem_ranges = AddrRange('256MB')
|
||||
# is stored in system.cpu. because there is only ever one
|
||||
# tester object, num_cpus is not necessarily equal to the
|
||||
# size of system.cpu
|
||||
cpu_list = [ system.cpu ] * options.num_cpus
|
||||
Ruby.create_system(options, False, system, cpus=cpu_list)
|
||||
cpu_list = [ system.cpu ] * args.num_cpus
|
||||
Ruby.create_system(args, False, system, cpus=cpu_list)
|
||||
|
||||
# Create a separate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
assert(options.num_cpus == len(system.ruby._cpu_ports))
|
||||
assert(args.num_cpus == len(system.ruby._cpu_ports))
|
||||
|
||||
tester.num_cpus = len(system.ruby._cpu_ports)
|
||||
|
||||
|
||||
@@ -28,39 +28,39 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys
|
||||
import os, argparse, sys
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
from common import Options
|
||||
from ruby import Ruby
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
args.l1d_size="256B"
|
||||
args.l1i_size="256B"
|
||||
args.l2_size="512B"
|
||||
args.l3_size="1kB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.l3_assoc=2
|
||||
|
||||
nb_cores = 4
|
||||
cpus = [ TimingSimpleCPU(cpu_id=i) for i in range(nb_cores) ]
|
||||
|
||||
# overwrite the num_cpus to equal nb_cores
|
||||
options.num_cpus = nb_cores
|
||||
args.num_cpus = nb_cores
|
||||
|
||||
# system simulated
|
||||
system = System(cpu = cpus, clk_domain = SrcClockDomain(clock = '1GHz'))
|
||||
@@ -69,12 +69,12 @@ system = System(cpu = cpus, clk_domain = SrcClockDomain(clock = '1GHz'))
|
||||
# CPUs frequency
|
||||
system.cpu.clk_domain = SrcClockDomain(clock = '2GHz')
|
||||
|
||||
Ruby.create_system(options, False, system)
|
||||
Ruby.create_system(args, False, system)
|
||||
|
||||
# Create a separate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock)
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock)
|
||||
|
||||
assert(options.num_cpus == len(system.ruby._cpu_ports))
|
||||
assert(args.num_cpus == len(system.ruby._cpu_ports))
|
||||
|
||||
for (i, cpu) in enumerate(system.cpu):
|
||||
# create the interrupt controller
|
||||
|
||||
@@ -28,41 +28,41 @@ import m5
|
||||
from m5.objects import *
|
||||
from m5.defines import buildEnv
|
||||
from m5.util import addToPath
|
||||
import os, optparse, sys
|
||||
import os, argparse, sys
|
||||
|
||||
m5.util.addToPath('../configs/')
|
||||
|
||||
from ruby import Ruby
|
||||
from common import Options
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
|
||||
# Add the ruby specific and protocol specific options
|
||||
Ruby.define_options(parser)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
args.l1d_size="256B"
|
||||
args.l1i_size="256B"
|
||||
args.l2_size="512B"
|
||||
args.l3_size="1kB"
|
||||
args.l1d_assoc=2
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=2
|
||||
args.l3_assoc=2
|
||||
|
||||
# this is a uniprocessor only test
|
||||
options.num_cpus = 1
|
||||
args.num_cpus = 1
|
||||
cpu = TimingSimpleCPU(cpu_id=0)
|
||||
system = System(cpu = cpu)
|
||||
|
||||
# Dummy voltage domain for all our clock domains
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
|
||||
system.clk_domain = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
@@ -72,10 +72,10 @@ system.cpu.clk_domain = SrcClockDomain(clock = '2GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
system.mem_ranges = AddrRange('256MB')
|
||||
Ruby.create_system(options, False, system)
|
||||
Ruby.create_system(args, False, system)
|
||||
|
||||
# Create a separate clock for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
|
||||
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
assert(len(system.ruby._cpu_ports) == 1)
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import optparse
|
||||
import argparse
|
||||
import m5
|
||||
from m5.objects import *
|
||||
from m5.proxy import *
|
||||
@@ -164,29 +164,29 @@ class BaseSystem(object, metaclass=ABCMeta):
|
||||
|
||||
if self.use_ruby:
|
||||
# Add the ruby specific and protocol specific options
|
||||
parser = optparse.OptionParser()
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
Ruby.define_options(parser)
|
||||
(options, args) = parser.parse_args()
|
||||
args, extra = parser.parse_known_args()
|
||||
|
||||
# Set the default cache size and associativity to be very
|
||||
# small to encourage races between requests and writebacks.
|
||||
options.l1d_size="32kB"
|
||||
options.l1i_size="32kB"
|
||||
options.l2_size="4MB"
|
||||
options.l1d_assoc=4
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=8
|
||||
options.num_cpus = self.num_cpus
|
||||
options.num_dirs = 2
|
||||
args.l1d_size="32kB"
|
||||
args.l1i_size="32kB"
|
||||
args.l2_size="4MB"
|
||||
args.l1d_assoc=4
|
||||
args.l1i_assoc=2
|
||||
args.l2_assoc=8
|
||||
args.num_cpus = self.num_cpus
|
||||
args.num_dirs = 2
|
||||
|
||||
bootmem = getattr(system, '_bootmem', None)
|
||||
Ruby.create_system(options, True, system, system.iobus,
|
||||
Ruby.create_system(args, True, system, system.iobus,
|
||||
system._dma_ports, bootmem)
|
||||
|
||||
# Create a seperate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(
|
||||
clock = options.ruby_clock,
|
||||
clock = args.ruby_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
for i, cpu in enumerate(system.cpu):
|
||||
if not cpu.switched_out:
|
||||
|
||||
@@ -43,7 +43,6 @@ parser.add_argument('--cpu-type', choices=['atomic', 'kvm', 'o3', 'simple',])
|
||||
parser.add_argument('--num-cpus', type=int)
|
||||
parser.add_argument('--boot-type', choices=['init', 'systemd',])
|
||||
|
||||
#(options, args) = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
# create the system we are going to simulate
|
||||
|
||||
Reference in New Issue
Block a user