configs, tests: Replace optparse with argparse

JIRA: https://gem5.atlassian.net/browse/GEM5-543 Change-Id: I997d6a4e45319a74e21bd0d61d4af6118474c849 Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/44513 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Tested-by: kokoro <noreply+kokoro@google.com>
2021-04-16 17:42:34 +01:00
parent 09b9512acd
commit a2c9213a31
43 changed files with 1940 additions and 1953 deletions
--- a/tests/configs/gpu-randomtest-ruby.py
+++ b/tests/configs/gpu-randomtest-ruby.py
@@ -37,47 +37,47 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys

 m5.util.addToPath('../configs/')

 from ruby import Ruby
 from common import Options

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)

 # add the gpu specific options expected by the the gpu and gpu_RfO
-parser.add_option("-u", "--num-compute-units", type="int", default=8,
-                  help="number of compute units in the GPU")
-parser.add_option("--num-cp", type="int", default=0,
-                  help="Number of GPU Command Processors (CP)")
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
-                  "per CU")
-parser.add_option("--wf-size", type="int", default=64,
-                  help="Wavefront size(in workitems)")
-parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
-                  "WF slots per SIMD")
+parser.add_argument("-u", "--num-compute-units", type=int, default=8,
+                    help="number of compute units in the GPU")
+parser.add_argument("--num-cp", type=int, default=0,
+                    help="Number of GPU Command Processors (CP)")
+parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units" \
+                    "per CU")
+parser.add_argument("--wf-size", type=int, default=64,
+                    help="Wavefront size(in workitems)")
+parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of " \
+                    "WF slots per SIMD")

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.num_compute_units=8
-options.num_sqc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.num_compute_units=8
+args.num_sqc=2

 # Check to for the GPU_RfO protocol.  Other GPU protocols are non-SC and will
 # not work with the Ruby random tester.
@@ -87,14 +87,14 @@ assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
 # create the tester and system, including ruby
 #
 tester = RubyTester(check_flush = False, checks_to_complete = 100,
-                    wakeup_frequency = 10, num_cpus = options.num_cpus)
+                    wakeup_frequency = 10, num_cpus = args.num_cpus)

 # We set the testers as cpu for ruby to find the correct clock domains
 # for the L1 Objects.
 system = System(cpu = tester)

 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                   voltage_domain = system.voltage_domain)

@@ -105,8 +105,8 @@ system.mem_ranges = AddrRange('256MB')
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)

 # Create a separate clock domain for Ruby
 system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
--- a/tests/configs/gpu-ruby.py
+++ b/tests/configs/gpu-ruby.py
@@ -37,7 +37,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys, math, glob
+import os, argparse, sys, math, glob

 m5.util.addToPath('../configs/')

@@ -45,26 +45,6 @@ from ruby import Ruby
 from common import Options
 from common import GPUTLBOptions, GPUTLBConfig

-########################## Script Options ########################
-def setOption(parser, opt_str, value = 1):
-    # check to make sure the option actually exists
-    if not parser.has_option(opt_str):
-        raise Exception("cannot find %s in list of possible options" % opt_str)
-
-    opt = parser.get_option(opt_str)
-    # set the value
-    exec("parser.values.%s = %s" % (opt.dest, value))
-
-def getOption(parser, opt_str):
-    # check to make sure the option actually exists
-    if not parser.has_option(opt_str):
-        raise Exception("cannot find %s in list of possible options" % opt_str)
-
-    opt = parser.get_option(opt_str)
-    # get the value
-    exec("return_value = parser.values.%s" % opt.dest)
-    return return_value
-
 def run_test(root):
    """gpu test requires a specialized run_test implementation to set up the
    mmio space."""
@@ -79,100 +59,129 @@ def run_test(root):
    exit_event = m5.simulate(maxtick)
    print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addSEOptions(parser)

-parser.add_option("-k", "--kernel-files",
-                  help="file(s) containing GPU kernel code (colon separated)")
-parser.add_option("-u", "--num-compute-units", type="int", default=2,
-                  help="number of GPU compute units"),
-parser.add_option("--num-cp", type="int", default=0,
-                  help="Number of GPU Command Processors (CP)")
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
-                  "per CU")
-parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
-                  "sharing an SQC (icache, and thus icache TLB)")
-parser.add_option("--wf-size", type="int", default=64,
-                  help="Wavefront size(in workitems)")
-parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \
-                  "WF slots per SIMD")
-parser.add_option("--sp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for Single "\
-                  "Precision ops")
-parser.add_option("--dp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for Double "\
-                  "Precision ops")
-parser.add_option("--issue-period", type="int", default=4, \
-                  help="Number of cycles per vector instruction issue period")
-parser.add_option("--glbmem-wr-bus-width", type="int", default=32, \
-                  help="VGPR to Coalescer (Global Memory) data bus width in bytes")
-parser.add_option("--glbmem-rd-bus-width", type="int", default=32, \
-                  help="Coalescer to VGPR (Global Memory) data bus width in bytes")
-parser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Shared Memory pipelines per CU")
-parser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Global Memory pipelines per CU")
-parser.add_option("--vreg-file-size", type="int", default=2048,
-                  help="number of physical vector registers per SIMD")
-parser.add_option("--bw-scalor", type="int", default=0,
-                  help="bandwidth scalor for scalability analysis")
-parser.add_option("--CPUClock", type="string", default="2GHz",
-                  help="CPU clock")
-parser.add_option("--GPUClock", type="string", default="1GHz",
-                  help="GPU clock")
-parser.add_option("--cpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--gpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST",
-                  help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
-parser.add_option("--xact-cas-mode", action="store_true",
-                  help="enable load_compare mode (transactional CAS)")
-parser.add_option("--SegFaultDebug",action="store_true",
-                 help="checks for GPU seg fault before TLB access")
-parser.add_option("--LocalMemBarrier",action="store_true",
-                 help="Barrier does not wait for writethroughs to complete")
-parser.add_option("--countPages", action="store_true",
-                 help="Count Page Accesses and output in per-CU output files")
-parser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\
-                  "TLBs")
-parser.add_option("--pf-type", type="string", help="type of prefetch: "\
-                  "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
-parser.add_option("--pf-stride", type="int", help="set prefetch stride")
-parser.add_option("--numLdsBanks", type="int", default=32,
-                  help="number of physical banks per LDS module")
-parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
-                  help="number of cycles per LDS bank conflict")
+parser.add_argument(
+    "-k", "--kernel-files",
+    help="file(s) containing GPU kernel code (colon separated)")
+parser.add_argument(
+    "-u", "--num-compute-units", type=int, default=2,
+    help="number of GPU compute units"),
+parser.add_argument(
+    "--num-cp", type=int, default=0,
+    help="Number of GPU Command Processors (CP)")
+parser.add_argument(
+    "--simds-per-cu", type=int, default=4, help="SIMD units" \
+    "per CU")
+parser.add_argument(
+    "--cu-per-sqc", type=int, default=4, help="number of CUs" \
+    "sharing an SQC (icache, and thus icache TLB)")
+parser.add_argument(
+    "--wf-size", type=int, default=64,
+    help="Wavefront size(in workitems)")
+parser.add_argument(
+    "--wfs-per-simd", type=int, default=8, help="Number of " \
+    "WF slots per SIMD")
+parser.add_argument(
+    "--sp-bypass-path-length", type=int, default=4,
+    help="Number of stages of bypass path in vector ALU for Single "
+    "Precision ops")
+parser.add_argument(
+    "--dp-bypass-path-length", type=int, default=4,
+    help="Number of stages of bypass path in vector ALU for Double "
+    "Precision ops")
+parser.add_argument(
+    "--issue-period", type=int, default=4,
+    help="Number of cycles per vector instruction issue period")
+parser.add_argument(
+    "--glbmem-wr-bus-width", type=int, default=32,
+    help="VGPR to Coalescer (Global Memory) data bus width in bytes")
+parser.add_argument(
+    "--glbmem-rd-bus-width", type=int, default=32,
+    help="Coalescer to VGPR (Global Memory) data bus width in bytes")
+parser.add_argument(
+    "--shr-mem-pipes-per-cu", type=int, default=1, \
+    help="Number of Shared Memory pipelines per CU")
+parser.add_argument(
+    "--glb-mem-pipes-per-cu", type=int, default=1, \
+    help="Number of Global Memory pipelines per CU")
+parser.add_argument(
+    "--vreg-file-size", type=int, default=2048,
+    help="number of physical vector registers per SIMD")
+parser.add_argument(
+    "--bw-scalor", type=int, default=0,
+    help="bandwidth scalor for scalability analysis")
+parser.add_argument(
+    "--CPUClock", type=str, default="2GHz",
+    help="CPU clock")
+parser.add_argument(
+    "--GPUClock", type=str, default="1GHz",
+    help="GPU clock")
+parser.add_argument(
+    "--cpu-voltage", action="store", type=str,
+    default='1.0V',
+    help = """CPU  voltage domain""")
+parser.add_argument(
+    "--gpu-voltage", action="store", type=str,
+    default='1.0V',
+    help = """CPU  voltage domain""")
+parser.add_argument(
+    "--CUExecPolicy", type=str, default="OLDEST-FIRST",
+    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
+parser.add_argument(
+    "--xact-cas-mode", action="store_true",
+    help="enable load_compare mode (transactional CAS)")
+parser.add_argument(
+    "--SegFaultDebug",action="store_true",
+    help="checks for GPU seg fault before TLB access")
+parser.add_argument(
+    "--LocalMemBarrier",action="store_true",
+    help="Barrier does not wait for writethroughs to complete")
+parser.add_argument(
+    "--countPages", action="store_true",
+    help="Count Page Accesses and output in per-CU output files")
+parser.add_argument(
+    "--TLB-prefetch", type=int, help = "prefetch depth for"\
+    "TLBs")
+parser.add_argument(
+    "--pf-type", type=str, help="type of prefetch: "\
+    "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
+parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+parser.add_argument(
+    "--numLdsBanks", type=int, default=32,
+    help="number of physical banks per LDS module")
+parser.add_argument(
+    "--ldsBankConflictPenalty", type=int, default=1,
+    help="number of cycles per LDS bank conflict")

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

 GPUTLBOptions.tlb_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 # The GPU cache coherence protocols only work with the backing store
-setOption(parser, "--access-backing-store")
+args.access_backing_store = True

 # Currently, the sqc (I-Cache of GPU) is shared by
 # multiple compute units(CUs). The protocol works just fine
 # even if sqc is not shared. Overriding this option here
 # so that the user need not explicitly set this (assuming
 # sharing sqc is the common usage)
-n_cu = options.num_compute_units
-num_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc))
-options.num_sqc = num_sqc # pass this to Ruby
+n_cu = args.num_compute_units
+num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
+args.num_sqc = num_sqc # pass this to Ruby

 ########################## Creating the GPU system ########################
 # shader is the GPU
-shader = Shader(n_wf = options.wfs_per_simd,
+shader = Shader(n_wf = args.wfs_per_simd,
                clk_domain = SrcClockDomain(
-                    clock = options.GPUClock,
+                    clock = args.GPUClock,
                    voltage_domain = VoltageDomain(
-                        voltage = options.gpu_voltage)),
+                        voltage = args.gpu_voltage)),
                timing = True)

 # GPU_RfO(Read For Ownership) implements SC/TSO memory model.
@@ -190,51 +199,51 @@ else:

 # Switching off per-lane TLB by default
 per_lane = False
-if options.TLB_config == "perLane":
+if args.TLB_config == "perLane":
    per_lane = True

 # List of compute units; one GPU can have multiple compute units
 compute_units = []
 for i in range(n_cu):
    compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
-                                     num_SIMDs = options.simds_per_cu,
-                                     wfSize = options.wf_size,
+                                     num_SIMDs = args.simds_per_cu,
+                                     wfSize = args.wf_size,
                                     spbypass_pipe_length = \
-                                     options.sp_bypass_path_length,
+                                     args.sp_bypass_path_length,
                                     dpbypass_pipe_length = \
-                                     options.dp_bypass_path_length,
-                                     issue_period = options.issue_period,
+                                     args.dp_bypass_path_length,
+                                     issue_period = args.issue_period,
                                     coalescer_to_vrf_bus_width = \
-                                     options.glbmem_rd_bus_width,
+                                     args.glbmem_rd_bus_width,
                                     vrf_to_coalescer_bus_width = \
-                                     options.glbmem_wr_bus_width,
+                                     args.glbmem_wr_bus_width,
                                     num_global_mem_pipes = \
-                                     options.glb_mem_pipes_per_cu,
+                                     args.glb_mem_pipes_per_cu,
                                     num_shared_mem_pipes = \
-                                     options.shr_mem_pipes_per_cu,
-                                     n_wf = options.wfs_per_simd,
-                                     execPolicy = options.CUExecPolicy,
-                                     xactCasMode = options.xact_cas_mode,
-                                     debugSegFault = options.SegFaultDebug,
+                                     args.shr_mem_pipes_per_cu,
+                                     n_wf = args.wfs_per_simd,
+                                     execPolicy = args.CUExecPolicy,
+                                     xactCasMode = args.xact_cas_mode,
+                                     debugSegFault = args.SegFaultDebug,
                                     functionalTLB = True,
-                                     localMemBarrier = options.LocalMemBarrier,
-                                     countPages = options.countPages,
+                                     localMemBarrier = args.LocalMemBarrier,
+                                     countPages = args.countPages,
                                     localDataStore = \
-                                     LdsState(banks = options.numLdsBanks,
+                                     LdsState(banks = args.numLdsBanks,
                                              bankConflictPenalty = \
-                                              options.ldsBankConflictPenalty)))
+                                              args.ldsBankConflictPenalty)))
    wavefronts = []
    vrfs = []
-    for j in range(options.simds_per_cu):
+    for j in range(args.simds_per_cu):
        for k in range(int(shader.n_wf)):
            wavefronts.append(Wavefront(simdId = j, wf_slot_id = k))
        vrfs.append(VectorRegisterFile(simd_id=j,
-                              num_regs_per_simd=options.vreg_file_size))
+                              num_regs_per_simd=args.vreg_file_size))
    compute_units[-1].wavefronts = wavefronts
    compute_units[-1].vector_register_file = vrfs
-    if options.TLB_prefetch:
-        compute_units[-1].prefetch_depth = options.TLB_prefetch
-        compute_units[-1].prefetch_prev_type = options.pf_type
+    if args.TLB_prefetch:
+        compute_units[-1].prefetch_depth = args.TLB_prefetch
+        compute_units[-1].prefetch_prev_type = args.pf_type

    # attach the LDS and the CU to the bus (actually a Bridge)
    compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave
@@ -245,7 +254,7 @@ shader.CUs = compute_units

 # this is a uniprocessor only test, thus the shader is the second index in the
 # list of "system.cpus"
-options.num_cpus = 1
+args.num_cpus = 1
 shader_idx = 1
 cpu = TimingSimpleCPU(cpu_id=0)

@@ -258,12 +267,12 @@ dispatcher = GpuDispatcher()
 cpu_list = [cpu] + [shader] + [dispatcher]

 system = System(cpu = cpu_list,
-                mem_ranges = [AddrRange(options.mem_size)],
+                mem_ranges = [AddrRange(args.mem_size)],
                mem_mode = 'timing',
                workload = SEWorkload())

 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                   voltage_domain = system.voltage_domain)

@@ -274,15 +283,15 @@ system.cpu[0].clk_domain = SrcClockDomain(clock = '2GHz',
                                          system.voltage_domain)

 # configure the TLB hierarchy
-GPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx)
+GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)

 # create Ruby system
 system.piobus = IOXBar(width=32, response_latency=0,
                       frontend_latency=0, forward_latency=0)
-Ruby.create_system(options, None, system)
+Ruby.create_system(args, None, system)

 # Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                        voltage_domain = system.voltage_domain)

 # create the interrupt controller
@@ -303,10 +312,10 @@ system.ruby._cpu_ports[0].mem_master_port = system.piobus.slave
 # per compute unit and one sequencer per SQC for the math to work out
 # correctly.
 gpu_port_idx = len(system.ruby._cpu_ports) \
-               - options.num_compute_units - options.num_sqc
-gpu_port_idx = gpu_port_idx - options.num_cp * 2
+               - args.num_compute_units - args.num_sqc
+gpu_port_idx = gpu_port_idx - args.num_cp * 2

-wavefront_size = options.wf_size
+wavefront_size = args.wf_size
 for i in range(n_cu):
    # The pipeline issues wavefront_size number of uncoalesced requests
    # in one GPU issue cycle. Hence wavefront_size mem ports.
@@ -316,14 +325,14 @@ for i in range(n_cu):
    gpu_port_idx += 1

 for i in range(n_cu):
-    if i > 0 and not i % options.cu_per_sqc:
+    if i > 0 and not i % args.cu_per_sqc:
        gpu_port_idx += 1
    system.cpu[shader_idx].CUs[i].sqc_port = \
            system.ruby._cpu_ports[gpu_port_idx].slave
 gpu_port_idx = gpu_port_idx + 1

 # Current regression tests do not support the command processor
-assert(options.num_cp == 0)
+assert(args.num_cp == 0)

 # connect dispatcher to the system.piobus
 dispatcher.pio = system.piobus.master
--- a/tests/configs/memtest-ruby.py
+++ b/tests/configs/memtest-ruby.py
@@ -29,34 +29,34 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys

 m5.util.addToPath('../configs/')

 from ruby import Ruby
 from common import Options

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.ports=32
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.ports=32

 #MAX CORES IS 8 with the fals sharing method
 nb_cores = 8
@@ -66,8 +66,8 @@ cpus = [ MemTest(percent_functional=50,
                 percent_uncacheable=0, suppress_func_errors=True) \
         for i in range(nb_cores) ]

-# overwrite options.num_cpus with the nb_cores value
-options.num_cpus = nb_cores
+# overwrite args.num_cpus with the nb_cores value
+args.num_cpus = nb_cores

 # system simulated
 system = System(cpu = cpus)
@@ -87,10 +87,10 @@ for cpu in cpus:

 system.mem_ranges = AddrRange('256MB')

-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)

 # Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                        voltage_domain = system.voltage_domain)

 assert(len(cpus) == len(system.ruby._cpu_ports))
--- a/tests/configs/pc-simple-timing-ruby.py
+++ b/tests/configs/pc-simple-timing-ruby.py
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-import m5, os, optparse, sys
+import m5, os, argparse, sys
 from m5.objects import *
 m5.util.addToPath('../configs/')
 from common.Benchmarks import SysConfig
@@ -33,28 +33,28 @@ from ruby import Ruby
 from common import Options

 # Add the ruby specific and protocol specific options
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Ruby.define_options(parser)
-(options, args) = parser.parse_args()
+args = parser.parse_args()

 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
-options.l1d_size="32kB"
-options.l1i_size="32kB"
-options.l2_size="4MB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.num_cpus = 2
+args.l1d_size="32kB"
+args.l1i_size="32kB"
+args.l2_size="4MB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.num_cpus = 2

 #the system
 mdesc = SysConfig(disks = ['linux-x86.img'])
-system = FSConfig.makeLinuxX86System('timing', options.num_cpus,
+system = FSConfig.makeLinuxX86System('timing', args.num_cpus,
                                     mdesc=mdesc, Ruby=True)
 system.kernel = SysPaths.binary('x86_64-vmlinux-2.6.22.9')
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)

 system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9.smp')
 system.clk_domain = SrcClockDomain(clock = '1GHz',
@@ -62,12 +62,12 @@ system.clk_domain = SrcClockDomain(clock = '1GHz',
 system.cpu_clk_domain = SrcClockDomain(clock = '2GHz',
                                       voltage_domain = system.voltage_domain)
 system.cpu = [TimingSimpleCPU(cpu_id=i, clk_domain = system.cpu_clk_domain)
-              for i in range(options.num_cpus)]
+              for i in range(args.num_cpus)]

-Ruby.create_system(options, True, system, system.iobus, system._dma_ports)
+Ruby.create_system(args, True, system, system.iobus, system._dma_ports)

 # Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                        voltage_domain = system.voltage_domain)

 # Connect the ruby io port to the PIO bus,
--- a/tests/configs/rubytest-ruby.py
+++ b/tests/configs/rubytest-ruby.py
@@ -29,34 +29,34 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys

 m5.util.addToPath('../configs/')

 from ruby import Ruby
 from common import Options

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.ports=32
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.ports=32

 # Turn on flush check for the hammer protocol
 check_flush = False
@@ -67,14 +67,14 @@ if buildEnv['PROTOCOL'] == 'MOESI_hammer':
 # create the tester and system, including ruby
 #
 tester = RubyTester(check_flush = check_flush, checks_to_complete = 100,
-                    wakeup_frequency = 10, num_cpus = options.num_cpus)
+                    wakeup_frequency = 10, num_cpus = args.num_cpus)

 # We set the testers as cpu for ruby to find the correct clock domains
 # for the L1 Objects.
 system = System(cpu = tester)

 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                   voltage_domain = system.voltage_domain)

@@ -85,14 +85,14 @@ system.mem_ranges = AddrRange('256MB')
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)

 # Create a separate clock domain for Ruby
 system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
                                        voltage_domain = system.voltage_domain)

-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))

 tester.num_cpus = len(system.ruby._cpu_ports)

--- a/tests/configs/simple-timing-mp-ruby.py
+++ b/tests/configs/simple-timing-mp-ruby.py
@@ -28,39 +28,39 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys

 m5.util.addToPath('../configs/')

 from common import Options
 from ruby import Ruby

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2

 nb_cores = 4
 cpus = [ TimingSimpleCPU(cpu_id=i) for i in range(nb_cores) ]

 # overwrite the num_cpus to equal nb_cores
-options.num_cpus = nb_cores
+args.num_cpus = nb_cores

 # system simulated
 system = System(cpu = cpus, clk_domain = SrcClockDomain(clock = '1GHz'))
@@ -69,12 +69,12 @@ system = System(cpu = cpus, clk_domain = SrcClockDomain(clock = '1GHz'))
 # CPUs frequency
 system.cpu.clk_domain = SrcClockDomain(clock = '2GHz')

-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)

 # Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock)
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock)

-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))

 for (i, cpu) in enumerate(system.cpu):
    # create the interrupt controller
--- a/tests/configs/simple-timing-ruby.py
+++ b/tests/configs/simple-timing-ruby.py
@@ -28,41 +28,41 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys

 m5.util.addToPath('../configs/')

 from ruby import Ruby
 from common import Options

-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)

 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)

-(options, args) = parser.parse_args()
+args = parser.parse_args()

 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2

 # this is a uniprocessor only test
-options.num_cpus = 1
+args.num_cpus = 1
 cpu = TimingSimpleCPU(cpu_id=0)
 system = System(cpu = cpu)

 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                   voltage_domain = system.voltage_domain)

@@ -72,10 +72,10 @@ system.cpu.clk_domain = SrcClockDomain(clock = '2GHz',
                                       voltage_domain = system.voltage_domain)

 system.mem_ranges = AddrRange('256MB')
-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)

 # Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                        voltage_domain = system.voltage_domain)

 assert(len(system.ruby._cpu_ports) == 1)
--- a/tests/gem5/configs/base_config.py
+++ b/tests/gem5/configs/base_config.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 from abc import ABCMeta, abstractmethod
-import optparse
+import argparse
 import m5
 from m5.objects import *
 from m5.proxy import *
@@ -164,29 +164,29 @@ class BaseSystem(object, metaclass=ABCMeta):

        if self.use_ruby:
            # Add the ruby specific and protocol specific options
-            parser = optparse.OptionParser()
+            parser = argparse.ArgumentParser()
            Options.addCommonOptions(parser)
            Ruby.define_options(parser)
-            (options, args) = parser.parse_args()
+            args, extra = parser.parse_known_args()

            # Set the default cache size and associativity to be very
            # small to encourage races between requests and writebacks.
-            options.l1d_size="32kB"
-            options.l1i_size="32kB"
-            options.l2_size="4MB"
-            options.l1d_assoc=4
-            options.l1i_assoc=2
-            options.l2_assoc=8
-            options.num_cpus = self.num_cpus
-            options.num_dirs = 2
+            args.l1d_size="32kB"
+            args.l1i_size="32kB"
+            args.l2_size="4MB"
+            args.l1d_assoc=4
+            args.l1i_assoc=2
+            args.l2_assoc=8
+            args.num_cpus = self.num_cpus
+            args.num_dirs = 2

            bootmem = getattr(system, '_bootmem', None)
-            Ruby.create_system(options, True, system, system.iobus,
+            Ruby.create_system(args, True, system, system.iobus,
                               system._dma_ports, bootmem)

            # Create a seperate clock domain for Ruby
            system.ruby.clk_domain = SrcClockDomain(
-                clock = options.ruby_clock,
+                clock = args.ruby_clock,
                voltage_domain = system.voltage_domain)
            for i, cpu in enumerate(system.cpu):
                if not cpu.switched_out:
--- a/tests/gem5/x86-boot-tests/run_exit.py
+++ b/tests/gem5/x86-boot-tests/run_exit.py
@@ -43,7 +43,6 @@ parser.add_argument('--cpu-type', choices=['atomic', 'kvm', 'o3', 'simple',])
 parser.add_argument('--num-cpus', type=int)
 parser.add_argument('--boot-type', choices=['init', 'systemd',])

-#(options, args) = parser.parse_args()
 args = parser.parse_args()

 # create the system we are going to simulate