stdlib, python: Add warning message and clarify binary vs metric units (#1479)
This PR changes memory and cache sizes in various parts of the gem5 codebase to use binary units (e.g. KiB) instead of metric units (e.g. kB). This makes the codebase more consistent, as gem5 automatically converts memory and cache sizes that are in metric units to binary units. This PR also adds a warning message to let users know when an auto-conversion from base 10 to base 2 units occurs. There were a few places in configs and in the comments of various files where I didn't change the metric units, as I couldn't figure out where the parameters with those units were being used.
This commit is contained in:
@@ -55,7 +55,7 @@ class SysConfig:
|
||||
if self.memsize:
|
||||
return self.memsize
|
||||
else:
|
||||
return "128MB"
|
||||
return "128MiB"
|
||||
|
||||
def disks(self):
|
||||
if self.disknames:
|
||||
@@ -77,8 +77,8 @@ class SysConfig:
|
||||
# The first defined machine is the test system, the others are driving systems
|
||||
|
||||
Benchmarks = {
|
||||
"PovrayBench": [SysConfig("povray-bench.rcS", "512MB", ["povray.img"])],
|
||||
"PovrayAutumn": [SysConfig("povray-autumn.rcS", "512MB", ["povray.img"])],
|
||||
"PovrayBench": [SysConfig("povray-bench.rcS", "512MiB", ["povray.img"])],
|
||||
"PovrayAutumn": [SysConfig("povray-autumn.rcS", "512MiB", ["povray.img"])],
|
||||
"NetperfStream": [
|
||||
SysConfig("netperf-stream-client.rcS"),
|
||||
SysConfig("netperf-server.rcS"),
|
||||
@@ -97,55 +97,55 @@ Benchmarks = {
|
||||
SysConfig("netperf-server.rcS"),
|
||||
],
|
||||
"SurgeStandard": [
|
||||
SysConfig("surge-server.rcS", "512MB"),
|
||||
SysConfig("surge-client.rcS", "256MB"),
|
||||
SysConfig("surge-server.rcS", "512MiB"),
|
||||
SysConfig("surge-client.rcS", "256MiB"),
|
||||
],
|
||||
"SurgeSpecweb": [
|
||||
SysConfig("spec-surge-server.rcS", "512MB"),
|
||||
SysConfig("spec-surge-client.rcS", "256MB"),
|
||||
SysConfig("spec-surge-server.rcS", "512MiB"),
|
||||
SysConfig("spec-surge-client.rcS", "256MiB"),
|
||||
],
|
||||
"Nhfsstone": [
|
||||
SysConfig("nfs-server-nhfsstone.rcS", "512MB"),
|
||||
SysConfig("nfs-server-nhfsstone.rcS", "512MiB"),
|
||||
SysConfig("nfs-client-nhfsstone.rcS"),
|
||||
],
|
||||
"Nfs": [
|
||||
SysConfig("nfs-server.rcS", "900MB"),
|
||||
SysConfig("nfs-server.rcS", "900MiB"),
|
||||
SysConfig("nfs-client-dbench.rcS"),
|
||||
],
|
||||
"NfsTcp": [
|
||||
SysConfig("nfs-server.rcS", "900MB"),
|
||||
SysConfig("nfs-server.rcS", "900MiB"),
|
||||
SysConfig("nfs-client-tcp.rcS"),
|
||||
],
|
||||
"IScsiInitiator": [
|
||||
SysConfig("iscsi-client.rcS", "512MB"),
|
||||
SysConfig("iscsi-server.rcS", "512MB"),
|
||||
SysConfig("iscsi-client.rcS", "512MiB"),
|
||||
SysConfig("iscsi-server.rcS", "512MiB"),
|
||||
],
|
||||
"IScsiTarget": [
|
||||
SysConfig("iscsi-server.rcS", "512MB"),
|
||||
SysConfig("iscsi-client.rcS", "512MB"),
|
||||
SysConfig("iscsi-server.rcS", "512MiB"),
|
||||
SysConfig("iscsi-client.rcS", "512MiB"),
|
||||
],
|
||||
"Validation": [
|
||||
SysConfig("iscsi-server.rcS", "512MB"),
|
||||
SysConfig("iscsi-client.rcS", "512MB"),
|
||||
SysConfig("iscsi-server.rcS", "512MiB"),
|
||||
SysConfig("iscsi-client.rcS", "512MiB"),
|
||||
],
|
||||
"Ping": [SysConfig("ping-server.rcS"), SysConfig("ping-client.rcS")],
|
||||
"ValAccDelay": [SysConfig("devtime.rcS", "512MB")],
|
||||
"ValAccDelay2": [SysConfig("devtimewmr.rcS", "512MB")],
|
||||
"ValMemLat": [SysConfig("micro_memlat.rcS", "512MB")],
|
||||
"ValMemLat2MB": [SysConfig("micro_memlat2mb.rcS", "512MB")],
|
||||
"ValMemLat8MB": [SysConfig("micro_memlat8mb.rcS", "512MB")],
|
||||
"ValMemLat": [SysConfig("micro_memlat8.rcS", "512MB")],
|
||||
"ValTlbLat": [SysConfig("micro_tlblat.rcS", "512MB")],
|
||||
"ValSysLat": [SysConfig("micro_syscall.rcS", "512MB")],
|
||||
"ValCtxLat": [SysConfig("micro_ctx.rcS", "512MB")],
|
||||
"ValStream": [SysConfig("micro_stream.rcS", "512MB")],
|
||||
"ValStreamScale": [SysConfig("micro_streamscale.rcS", "512MB")],
|
||||
"ValStreamCopy": [SysConfig("micro_streamcopy.rcS", "512MB")],
|
||||
"MutexTest": [SysConfig("mutex-test.rcS", "128MB")],
|
||||
"ValAccDelay": [SysConfig("devtime.rcS", "512MiB")],
|
||||
"ValAccDelay2": [SysConfig("devtimewmr.rcS", "512MiB")],
|
||||
"ValMemLat": [SysConfig("micro_memlat.rcS", "512MiB")],
|
||||
"ValMemLat2MB": [SysConfig("micro_memlat2mb.rcS", "512MiB")],
|
||||
"ValMemLat8MB": [SysConfig("micro_memlat8mb.rcS", "512MiB")],
|
||||
"ValMemLat": [SysConfig("micro_memlat8.rcS", "512MiB")],
|
||||
"ValTlbLat": [SysConfig("micro_tlblat.rcS", "512MiB")],
|
||||
"ValSysLat": [SysConfig("micro_syscall.rcS", "512MiB")],
|
||||
"ValCtxLat": [SysConfig("micro_ctx.rcS", "512MiB")],
|
||||
"ValStream": [SysConfig("micro_stream.rcS", "512MiB")],
|
||||
"ValStreamScale": [SysConfig("micro_streamscale.rcS", "512MiB")],
|
||||
"ValStreamCopy": [SysConfig("micro_streamcopy.rcS", "512MiB")],
|
||||
"MutexTest": [SysConfig("mutex-test.rcS", "128MiB")],
|
||||
"ArmAndroid-GB": [
|
||||
SysConfig(
|
||||
"null.rcS",
|
||||
"256MB",
|
||||
"256MiB",
|
||||
["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img"],
|
||||
None,
|
||||
"android-gingerbread",
|
||||
@@ -154,7 +154,7 @@ Benchmarks = {
|
||||
"bbench-gb": [
|
||||
SysConfig(
|
||||
"bbench-gb.rcS",
|
||||
"256MB",
|
||||
"256MiB",
|
||||
["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img"],
|
||||
None,
|
||||
"android-gingerbread",
|
||||
@@ -163,7 +163,7 @@ Benchmarks = {
|
||||
"ArmAndroid-ICS": [
|
||||
SysConfig(
|
||||
"null.rcS",
|
||||
"256MB",
|
||||
"256MiB",
|
||||
["ARMv7a-ICS-Android.SMP.nolock.clean.img"],
|
||||
None,
|
||||
"android-ics",
|
||||
@@ -172,7 +172,7 @@ Benchmarks = {
|
||||
"bbench-ics": [
|
||||
SysConfig(
|
||||
"bbench-ics.rcS",
|
||||
"256MB",
|
||||
"256MiB",
|
||||
["ARMv7a-ICS-Android.SMP.nolock.img"],
|
||||
None,
|
||||
"android-ics",
|
||||
|
||||
@@ -137,8 +137,8 @@ def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
|
||||
self.t1000.attachOnChipIO(self.membus)
|
||||
self.t1000.attachIO(self.iobus)
|
||||
self.mem_ranges = [
|
||||
AddrRange(Addr("1MB"), size="64MB"),
|
||||
AddrRange(Addr("2GB"), size="256MB"),
|
||||
AddrRange(Addr("1MiB"), size="64MiB"),
|
||||
AddrRange(Addr("2GiB"), size="256MiB"),
|
||||
]
|
||||
self.bridge.mem_side_port = self.iobus.cpu_side_ports
|
||||
self.bridge.cpu_side_port = self.membus.mem_side_ports
|
||||
@@ -174,21 +174,21 @@ def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
|
||||
# ROM for OBP/Reset/Hypervisor
|
||||
self.rom = SimpleMemory(
|
||||
image_file=binary("t1000_rom.bin"),
|
||||
range=AddrRange(0xFFF0000000, size="8MB"),
|
||||
range=AddrRange(0xFFF0000000, size="8MiB"),
|
||||
)
|
||||
# nvram
|
||||
self.nvram = SimpleMemory(
|
||||
image_file=binary("nvram1"), range=AddrRange(0x1F11000000, size="8kB")
|
||||
image_file=binary("nvram1"), range=AddrRange(0x1F11000000, size="8KiB")
|
||||
)
|
||||
# hypervisor description
|
||||
self.hypervisor_desc = SimpleMemory(
|
||||
image_file=binary("1up-hv.bin"),
|
||||
range=AddrRange(0x1F12080000, size="8kB"),
|
||||
range=AddrRange(0x1F12080000, size="8KiB"),
|
||||
)
|
||||
# partition description
|
||||
self.partition_desc = SimpleMemory(
|
||||
image_file=binary("1up-md.bin"),
|
||||
range=AddrRange(0x1F12000000, size="8kB"),
|
||||
range=AddrRange(0x1F12000000, size="8KiB"),
|
||||
)
|
||||
|
||||
self.rom.port = self.membus.mem_side_ports
|
||||
@@ -423,7 +423,7 @@ def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
|
||||
self.iobus = IOXBar()
|
||||
self.membus = MemBus()
|
||||
self.bridge = Bridge(delay="50ns")
|
||||
self.mem_ranges = [AddrRange("1GB")]
|
||||
self.mem_ranges = [AddrRange("1GiB")]
|
||||
self.bridge.mem_side_port = self.iobus.cpu_side_ports
|
||||
self.bridge.cpu_side_port = self.membus.mem_side_ports
|
||||
self.disks = makeCowDisks(mdesc.disks())
|
||||
@@ -469,7 +469,7 @@ def connectX86ClassicSystem(x86_sys, numCPUs):
|
||||
x86_sys.bridge.cpu_side_port = x86_sys.membus.mem_side_ports
|
||||
# Allow the bridge to pass through:
|
||||
# 1) kernel configured PCI device memory map address: address range
|
||||
# [0xC0000000, 0xFFFF0000). (The upper 64kB are reserved for m5ops.)
|
||||
# [0xC0000000, 0xFFFF0000). (The upper 64KiB are reserved for m5ops.)
|
||||
# 2) the bridge to pass through the IO APIC (two pages, already contained in 1),
|
||||
# 3) everything in the IO address range up to the local APIC, and
|
||||
# 4) then the entire PCI address space and beyond.
|
||||
@@ -526,22 +526,22 @@ def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
|
||||
# Physical memory
|
||||
# On the PC platform, the memory region 0xC0000000-0xFFFFFFFF is reserved
|
||||
# for various devices. Hence, if the physical memory size is greater than
|
||||
# 3GB, we need to split it into two parts.
|
||||
# 3GiB, we need to split it into two parts.
|
||||
excess_mem_size = convert.toMemorySize(mdesc.mem()) - convert.toMemorySize(
|
||||
"3GB"
|
||||
"3GiB"
|
||||
)
|
||||
if excess_mem_size <= 0:
|
||||
self.mem_ranges = [AddrRange(mdesc.mem())]
|
||||
else:
|
||||
warn(
|
||||
"Physical memory size specified is %s which is greater than "
|
||||
"3GB. Twice the number of memory controllers would be "
|
||||
"3GiB. Twice the number of memory controllers would be "
|
||||
"created." % (mdesc.mem())
|
||||
)
|
||||
|
||||
self.mem_ranges = [
|
||||
AddrRange("3GB"),
|
||||
AddrRange(Addr("4GB"), size=excess_mem_size),
|
||||
AddrRange("3GiB"),
|
||||
AddrRange(Addr("4GiB"), size=excess_mem_size),
|
||||
]
|
||||
|
||||
# Platform
|
||||
@@ -663,16 +663,16 @@ def makeLinuxX86System(
|
||||
# Build up the x86 system and then specialize it for Linux
|
||||
self = makeX86System(mem_mode, numCPUs, mdesc, X86FsLinux(), Ruby)
|
||||
|
||||
# We assume below that there's at least 1MB of memory. We'll require 2
|
||||
# We assume below that there's at least 1MiB of memory. We'll require 2
|
||||
# just to avoid corner cases.
|
||||
phys_mem_size = sum([r.size() for r in self.mem_ranges])
|
||||
assert phys_mem_size >= 0x200000
|
||||
assert len(self.mem_ranges) <= 2
|
||||
|
||||
entries = [
|
||||
# Mark the first megabyte of memory as reserved
|
||||
X86E820Entry(addr=0, size="639kB", range_type=1),
|
||||
X86E820Entry(addr=0x9FC00, size="385kB", range_type=2),
|
||||
# Mark the first mibibyte of memory as reserved
|
||||
X86E820Entry(addr=0, size="639KiB", range_type=1),
|
||||
X86E820Entry(addr=0x9FC00, size="385KiB", range_type=2),
|
||||
# Mark the rest of physical memory as available
|
||||
X86E820Entry(
|
||||
addr=0x100000,
|
||||
@@ -681,7 +681,7 @@ def makeLinuxX86System(
|
||||
),
|
||||
]
|
||||
|
||||
# Mark [mem_size, 3GB) as reserved if memory less than 3GB, which force
|
||||
# Mark [mem_size, 3iB) as reserved if memory less than 3GiB, which force
|
||||
# IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests to this
|
||||
# specific range can pass though bridge to iobus.
|
||||
if len(self.mem_ranges) == 1:
|
||||
@@ -693,10 +693,10 @@ def makeLinuxX86System(
|
||||
)
|
||||
)
|
||||
|
||||
# Reserve the last 16kB of the 32-bit address space for the m5op interface
|
||||
entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2))
|
||||
# Reserve the last 16KiB of the 32-bit address space for the m5op interface
|
||||
entries.append(X86E820Entry(addr=0xFFFF0000, size="64KiB", range_type=2))
|
||||
|
||||
# In case the physical memory is greater than 3GB, we split it into two
|
||||
# In case the physical memory is greater than 3GiB, we split it into two
|
||||
# parts and add a separate e820 entry for the second part. This entry
|
||||
# starts at 0x100000000, which is the first address after the space
|
||||
# reserved for devices.
|
||||
|
||||
@@ -202,7 +202,7 @@ def register_node(cpu_list, mem, node_number):
|
||||
file_append((nodedir, "cpumap"), hex_mask(cpu_list))
|
||||
file_append(
|
||||
(nodedir, "meminfo"),
|
||||
"Node %d MemTotal: %dkB"
|
||||
"Node %d MemTotal: %dKiB"
|
||||
% (node_number, toMemorySize(str(mem)) / kibi),
|
||||
)
|
||||
|
||||
|
||||
@@ -300,10 +300,10 @@ def add_options(parser):
|
||||
# address range for each of the serial links
|
||||
parser.add_argument(
|
||||
"--serial-link-addr-range",
|
||||
default="1GB",
|
||||
default="1GiB",
|
||||
type=str,
|
||||
help="memory range for each of the serial links.\
|
||||
Default: 1GB",
|
||||
Default: 1GiB",
|
||||
)
|
||||
|
||||
# *****************************PERFORMANCE MONITORING*********************
|
||||
@@ -390,10 +390,10 @@ def add_options(parser):
|
||||
# HMC device - vault capacity or size
|
||||
parser.add_argument(
|
||||
"--hmc-dev-vault-size",
|
||||
default="256MB",
|
||||
default="256MiB",
|
||||
type=str,
|
||||
help="vault storage capacity in bytes. Default:\
|
||||
256MB",
|
||||
256MiB",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-type",
|
||||
|
||||
@@ -98,7 +98,7 @@ parser.add_argument(
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="16MB",
|
||||
default="16MiB",
|
||||
help="Specify the memory size",
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -161,7 +161,7 @@ def is_pow2(num):
|
||||
# assume we start every range at 0
|
||||
max_range = int(mem_range.end)
|
||||
|
||||
# start at a size of 4 kByte, and go up till we hit the max, increase
|
||||
# start at a size of 4 kibibyte, and go up till we hit the max, increase
|
||||
# the step every time we hit a power of two
|
||||
min_range = 4096
|
||||
ranges = [min_range]
|
||||
@@ -295,17 +295,17 @@ class L3Cache(Cache):
|
||||
|
||||
# note that everything is in the same clock domain, 2.0 GHz as
|
||||
# specified above
|
||||
system.l1cache = L1_DCache(size="64kB")
|
||||
system.l1cache = L1_DCache(size="64KiB")
|
||||
system.monitor.mem_side_port = system.l1cache.cpu_side
|
||||
|
||||
system.l2cache = L2Cache(size="512kB", writeback_clean=True)
|
||||
system.l2cache = L2Cache(size="512KiB", writeback_clean=True)
|
||||
system.l2cache.xbar = L2XBar()
|
||||
system.l1cache.mem_side = system.l2cache.xbar.cpu_side_ports
|
||||
system.l2cache.cpu_side = system.l2cache.xbar.mem_side_ports
|
||||
|
||||
# make the L3 mostly exclusive, and correspondingly ensure that the L2
|
||||
# writes back also clean lines to the L3
|
||||
system.l3cache = L3Cache(size="4MB", clusivity="mostly_excl")
|
||||
system.l3cache = L3Cache(size="4MiB", clusivity="mostly_excl")
|
||||
system.l3cache.xbar = L2XBar()
|
||||
system.l2cache.mem_side = system.l3cache.xbar.cpu_side_ports
|
||||
system.l3cache.cpu_side = system.l3cache.xbar.mem_side_ports
|
||||
|
||||
@@ -116,8 +116,8 @@ system.clk_domain = SrcClockDomain(
|
||||
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
|
||||
)
|
||||
|
||||
# We are fine with 256 MB memory for now.
|
||||
mem_range = AddrRange("256MB")
|
||||
# We are fine with 256 MiB memory for now.
|
||||
mem_range = AddrRange("256MiB")
|
||||
# Start address is 0
|
||||
system.mem_ranges = [mem_range]
|
||||
|
||||
|
||||
@@ -108,8 +108,8 @@ system.clk_domain = SrcClockDomain(
|
||||
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
|
||||
)
|
||||
|
||||
# we are fine with 256 MB memory for now
|
||||
mem_range = AddrRange("256MB")
|
||||
# we are fine with 256 MiB memory for now
|
||||
mem_range = AddrRange("256MiB")
|
||||
system.mem_ranges = [mem_range]
|
||||
|
||||
# do not worry about reserving space for the backing store
|
||||
|
||||
@@ -308,7 +308,7 @@ def main():
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="2GB",
|
||||
default="2GiB",
|
||||
help="Specify the physical memory size",
|
||||
)
|
||||
parser.add_argument("--checkpoint", action="store_true")
|
||||
|
||||
@@ -52,7 +52,7 @@ class L1I(L1_ICache):
|
||||
response_latency = 1
|
||||
mshrs = 4
|
||||
tgts_per_mshr = 8
|
||||
size = "48kB"
|
||||
size = "48KiB"
|
||||
assoc = 3
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ class L1D(L1_DCache):
|
||||
response_latency = 1
|
||||
mshrs = 16
|
||||
tgts_per_mshr = 16
|
||||
size = "32kB"
|
||||
size = "32KiB"
|
||||
assoc = 2
|
||||
write_buffers = 16
|
||||
|
||||
@@ -73,14 +73,14 @@ class L2(L2Cache):
|
||||
response_latency = 5
|
||||
mshrs = 32
|
||||
tgts_per_mshr = 8
|
||||
size = "1MB"
|
||||
size = "1MiB"
|
||||
assoc = 16
|
||||
write_buffers = 8
|
||||
clusivity = "mostly_excl"
|
||||
|
||||
|
||||
class L3(Cache):
|
||||
size = "16MB"
|
||||
size = "16MiB"
|
||||
assoc = 16
|
||||
tag_latency = 20
|
||||
data_latency = 20
|
||||
|
||||
@@ -156,7 +156,7 @@ def main():
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="2GB",
|
||||
default="2GiB",
|
||||
help="Specify the physical memory size",
|
||||
)
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ from devices import (
|
||||
|
||||
default_disk = "aarch64-ubuntu-trusty-headless.img"
|
||||
|
||||
default_mem_size = "2GB"
|
||||
default_mem_size = "2GiB"
|
||||
|
||||
|
||||
def _to_ticks(value):
|
||||
|
||||
@@ -278,10 +278,10 @@ def main():
|
||||
parser.add_argument("--num-dirs", type=int, default=1)
|
||||
parser.add_argument("--num-l2caches", type=int, default=1)
|
||||
parser.add_argument("--num-l3caches", type=int, default=1)
|
||||
parser.add_argument("--l1d_size", type=str, default="64kB")
|
||||
parser.add_argument("--l1i_size", type=str, default="32kB")
|
||||
parser.add_argument("--l2_size", type=str, default="2MB")
|
||||
parser.add_argument("--l3_size", type=str, default="16MB")
|
||||
parser.add_argument("--l1d_size", type=str, default="64KiB")
|
||||
parser.add_argument("--l1i_size", type=str, default="32KiB")
|
||||
parser.add_argument("--l2_size", type=str, default="2MiB")
|
||||
parser.add_argument("--l3_size", type=str, default="16MiB")
|
||||
parser.add_argument("--l1d_assoc", type=int, default=2)
|
||||
parser.add_argument("--l1i_assoc", type=int, default=2)
|
||||
parser.add_argument("--l2_assoc", type=int, default=8)
|
||||
|
||||
@@ -189,7 +189,7 @@ def main():
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="2GB",
|
||||
default="2GiB",
|
||||
help="Specify the physical memory size",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -59,12 +59,12 @@ requires(isa_required=ISA.ARM)
|
||||
cache_hierarchy = NoCache()
|
||||
|
||||
# We use a single channel DDR3_1600 memory system
|
||||
memory = SingleChannelDDR3_1600(size="32MB")
|
||||
memory = SingleChannelDDR3_1600(size="32MiB")
|
||||
|
||||
# We use a simple Timing processor with one core.
|
||||
processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1)
|
||||
|
||||
# The gem5 library simble board which can be used to run simple SE-mode
|
||||
# The gem5 library simple board which can be used to run simple SE-mode
|
||||
# simulations.
|
||||
board = SimpleBoard(
|
||||
clk_freq="3GHz",
|
||||
|
||||
@@ -67,11 +67,11 @@ from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierar
|
||||
|
||||
# Here we setup the parameters of the l1 and l2 caches.
|
||||
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
|
||||
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
|
||||
l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB"
|
||||
)
|
||||
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
memory = DualChannelDDR4_2400(size="2GB")
|
||||
memory = DualChannelDDR4_2400(size="2GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -66,12 +66,12 @@ from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierar
|
||||
|
||||
# Here we setup the parameters of the l1 and l2 caches.
|
||||
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
|
||||
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
|
||||
l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB"
|
||||
)
|
||||
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="2GB")
|
||||
memory = DualChannelDDR4_2400(size="2GiB")
|
||||
|
||||
# Here we setup the processor. We use a simple TIMING processor. The config
|
||||
# script was also tested with ATOMIC processor.
|
||||
|
||||
@@ -75,7 +75,7 @@ cache_hierarchy = OctopiCache(
|
||||
is_fullsystem=True,
|
||||
)
|
||||
|
||||
memory = DualChannelDDR4_2400(size="16GB")
|
||||
memory = DualChannelDDR4_2400(size="16GiB")
|
||||
|
||||
# The number of cores must be consistent with
|
||||
# num_core_complexes and num_cores_per_core_complexes
|
||||
|
||||
@@ -64,14 +64,14 @@ requires(isa_required=ISA.RISCV)
|
||||
cache_hierarchy = NoCache()
|
||||
|
||||
# We use a single channel DDR3_1600 memory system
|
||||
memory = SingleChannelDDR3_1600(size="32MB")
|
||||
memory = SingleChannelDDR3_1600(size="32MiB")
|
||||
|
||||
# We use a simple Timing processor with one core.
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1
|
||||
)
|
||||
|
||||
# The gem5 library simble board which can be used to run simple SE-mode
|
||||
# The gem5 library simple board which can be used to run simple SE-mode
|
||||
# simulations.
|
||||
board = SimpleBoard(
|
||||
clk_freq="3GHz",
|
||||
|
||||
@@ -75,14 +75,14 @@ requires(isa_required=ISA.RISCV)
|
||||
cache_hierarchy = NoCache()
|
||||
|
||||
# We use a single channel DDR3_1600 memory system
|
||||
memory = SingleChannelDDR3_1600(size="32MB")
|
||||
memory = SingleChannelDDR3_1600(size="32MiB")
|
||||
|
||||
# We use a simple Timing processor with one core.
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1
|
||||
)
|
||||
|
||||
# The gem5 library simble board which can be used to run simple SE-mode
|
||||
# The gem5 library simple board which can be used to run simple SE-mode
|
||||
# simulations.
|
||||
board = SimpleBoard(
|
||||
clk_freq="3GHz",
|
||||
|
||||
@@ -94,7 +94,7 @@ cache_hierarchy = NoCache()
|
||||
# Using simple memory to take checkpoints might slightly imporve the
|
||||
# performance in atomic mode. The memory structure can be changed when
|
||||
# restoring from a checkpoint, but the size of the memory must be maintained.
|
||||
memory = SingleChannelDDR3_1600(size="2GB")
|
||||
memory = SingleChannelDDR3_1600(size="2GiB")
|
||||
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.ATOMIC,
|
||||
|
||||
@@ -81,14 +81,14 @@ requires(isa_required=ISA.X86)
|
||||
# The cache hierarchy can be different from the cache hierarchy used in taking
|
||||
# the checkpoints
|
||||
cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1i_size="32kB",
|
||||
l2_size="256kB",
|
||||
l1d_size="32KiB",
|
||||
l1i_size="32KiB",
|
||||
l2_size="256KiB",
|
||||
)
|
||||
|
||||
# The memory structure can be different from the memory structure used in
|
||||
# taking the checkpoints, but the size of the memory must be maintained
|
||||
memory = DualChannelDDR4_2400(size="2GB")
|
||||
memory = DualChannelDDR4_2400(size="2GiB")
|
||||
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.TIMING,
|
||||
|
||||
@@ -50,7 +50,7 @@ from gem5.utils.requires import requires
|
||||
requires(isa_required=ISA.ARM)
|
||||
|
||||
# We need a cache as DRAMSys only accepts requests with the size of a cache line
|
||||
cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="32kB", l1i_size="32kB")
|
||||
cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="32KiB", l1i_size="32KiB")
|
||||
|
||||
# We use a single channel DDR3_1600 memory system
|
||||
memory = DRAMSysDDR3_1600(recordable=True)
|
||||
@@ -58,7 +58,7 @@ memory = DRAMSysDDR3_1600(recordable=True)
|
||||
# We use a simple Timing processor with one core.
|
||||
processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1)
|
||||
|
||||
# The gem5 library simble board which can be used to run simple SE-mode
|
||||
# The gem5 library simple board which can be used to run simple SE-mode
|
||||
# simulations.
|
||||
board = SimpleBoard(
|
||||
clk_freq="3GHz",
|
||||
|
||||
@@ -40,12 +40,12 @@ from gem5.simulate.simulator import Simulator
|
||||
memory = DRAMSysMem(
|
||||
configuration="ext/dramsys/DRAMSys/configs/ddr4-example.json",
|
||||
recordable=True,
|
||||
size="4GB",
|
||||
size="4GiB",
|
||||
)
|
||||
|
||||
generator = LinearGenerator(
|
||||
duration="250us",
|
||||
rate="40GB/s",
|
||||
rate="40GiB/s",
|
||||
num_cores=1,
|
||||
max_addr=memory.get_size(),
|
||||
)
|
||||
|
||||
@@ -94,7 +94,7 @@ cache_hierarchy = NoCache()
|
||||
# performance in atomic mode. The memory structure can be changed when
|
||||
# restoring from a checkpoint, but the size of the memory must be equal or
|
||||
# greater to that taken when creating the checkpoint.
|
||||
memory = SingleChannelDDR3_1600(size="2GB")
|
||||
memory = SingleChannelDDR3_1600(size="2GiB")
|
||||
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.ATOMIC,
|
||||
|
||||
@@ -91,14 +91,14 @@ args = parser.parse_args()
|
||||
# The cache hierarchy can be different from the cache hierarchy used in taking
|
||||
# the checkpoints
|
||||
cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1i_size="32kB",
|
||||
l2_size="256kB",
|
||||
l1d_size="32KiB",
|
||||
l1i_size="32KiB",
|
||||
l2_size="256KiB",
|
||||
)
|
||||
|
||||
# The memory structure can be different from the memory structure used in
|
||||
# taking the checkpoints, but the size of the memory must be equal or larger.
|
||||
memory = DualChannelDDR4_2400(size="2GB")
|
||||
memory = DualChannelDDR4_2400(size="2GiB")
|
||||
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.TIMING,
|
||||
|
||||
@@ -101,15 +101,15 @@ multisim.set_num_processes(3)
|
||||
for benchmark in obtain_resource("npb-benchmark-suite"):
|
||||
for num_cores in [1, 2]:
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1i_size="32kB",
|
||||
l2_size="256kB",
|
||||
l1d_size="32KiB",
|
||||
l1i_size="32KiB",
|
||||
l2_size="256KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_assoc=8,
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
processor = SimpleSwitchableProcessor(
|
||||
starting_core_type=CPUTypes.ATOMIC,
|
||||
switch_core_type=CPUTypes.TIMING,
|
||||
|
||||
@@ -70,7 +70,7 @@ multisim.set_num_processes(2)
|
||||
|
||||
for process_id in range(5):
|
||||
cache_hierarchy = NoCache()
|
||||
memory = SingleChannelDDR3_1600(size="32MB")
|
||||
memory = SingleChannelDDR3_1600(size="32MiB")
|
||||
processor = SimpleProcessor(
|
||||
cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1
|
||||
)
|
||||
|
||||
@@ -59,7 +59,7 @@ requires(isa_required=ISA.POWER)
|
||||
cache_hierarchy = NoCache()
|
||||
|
||||
# We use a single channel DDR4_2400 memory system
|
||||
memory = SingleChannelDDR4_2400(size="32MB")
|
||||
memory = SingleChannelDDR4_2400(size="32MiB")
|
||||
|
||||
# We use a simple ATOMIC processor with one core.
|
||||
processor = SimpleProcessor(
|
||||
|
||||
@@ -63,12 +63,12 @@ from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_h
|
||||
|
||||
# Here we setup the parameters of the l1 and l2 caches.
|
||||
cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy(
|
||||
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
|
||||
l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB"
|
||||
)
|
||||
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. We use a simple processor.
|
||||
processor = SimpleProcessor(
|
||||
|
||||
@@ -64,7 +64,7 @@ args = parser.parse_args()
|
||||
# instantiate the riscv matched board with default parameters
|
||||
board = RISCVMatchedBoard(
|
||||
clk_freq="1.2GHz",
|
||||
l2_size="2MB",
|
||||
l2_size="2MiB",
|
||||
is_fs=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
Script to run GAPBS benchmarks with gem5. The script expects the
|
||||
benchmark program and the simulation size to run. The input is in the format
|
||||
<benchmark_prog> <size> <synthetic>
|
||||
The system is fixed with 2 CPU cores, MESI Two Level system cache and 3 GB
|
||||
The system is fixed with 2 CPU cores, MESI Two Level system cache and 3 GiB
|
||||
DDR4 memory. It uses the x86 board.
|
||||
|
||||
This script will count the total number of instructions executed
|
||||
@@ -102,18 +102,18 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
)
|
||||
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1d_size="32KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="32kB",
|
||||
l1i_size="32KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
# The X86 board only supports 3 GB of main memory.
|
||||
# The X86 board only supports 3 GiB of main memory.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
Script to run NAS parallel benchmarks with gem5. The script expects the
|
||||
benchmark program to run. The input is in the format
|
||||
<benchmark_prog>.<class>.x .The system is fixed with 2 CPU cores, MESI
|
||||
Two Level system cache and 3 GB DDR4 memory. It uses the x86 board.
|
||||
Two Level system cache and 3 GiB DDR4 memory. It uses the x86 board.
|
||||
|
||||
This script will count the total number of instructions executed
|
||||
in the ROI. It also tracks how much wallclock and simulated time.
|
||||
@@ -77,8 +77,8 @@ requires(
|
||||
# Following are the list of benchmark programs for npb.
|
||||
|
||||
# We are restricting classes of NPB to A, B and C as the other classes (D and
|
||||
# F) require main memory size of more than 3 GB. The X86Board is currently
|
||||
# limited to 3 GB of memory. This limitation is explained later in line 136.
|
||||
# F) require main memory size of more than 3 GiB. The X86Board is currently
|
||||
# limited to 3 GiB of memory. This limitation is explained later in line 136.
|
||||
|
||||
# The resource disk has binaries for class D. However, only `ep` benchmark
|
||||
# works with class D in the current configuration. More information on the
|
||||
@@ -109,13 +109,13 @@ parser.add_argument(
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# The simulation may fail in the case of `mg` with class C as it uses 3.3 GB
|
||||
# The simulation may fail in the case of `mg` with class C as it uses 3.3 GiB
|
||||
# of memory (more information is available at https://arxiv.org/abs/2010.13216).
|
||||
# We warn the user here.
|
||||
|
||||
if args.benchmark == "npb-mg-c":
|
||||
warn(
|
||||
"mg.C uses 3.3 GB of memory. Currently we are simulating 3 GB\
|
||||
"mg.C uses 3.3 GiB of memory. Currently we are simulating 3 GiB\
|
||||
of main memory in the system."
|
||||
)
|
||||
|
||||
@@ -124,7 +124,7 @@ if args.benchmark == "npb-mg-c":
|
||||
elif args.benchmark == "npb-ft-c":
|
||||
warn(
|
||||
"There is not enough memory for ft.C. Currently we are\
|
||||
simulating 3 GB of main memory in the system."
|
||||
simulating 3 GiB of main memory in the system."
|
||||
)
|
||||
|
||||
# Checking for the maximum number of instructions, if provided by the user.
|
||||
@@ -137,18 +137,18 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
)
|
||||
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1d_size="32KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="32kB",
|
||||
l1i_size="32KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
# The X86 board only supports 3 GB of main memory.
|
||||
# The X86 board only supports 3 GiB of main memory.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
Script to run PARSEC benchmarks with gem5.
|
||||
The script expects a benchmark program name and the simulation
|
||||
size. The system is fixed with 2 CPU cores, MESI Two Level system
|
||||
cache and 3 GB DDR4 memory. It uses the x86 board.
|
||||
cache and 3 GiB DDR4 memory. It uses the x86 board.
|
||||
|
||||
This script will count the total number of instructions executed
|
||||
in the ROI. It also tracks how much wallclock and simulated time.
|
||||
@@ -124,19 +124,19 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
)
|
||||
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1d_size="32KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="32kB",
|
||||
l1i_size="32KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
# The X86 board only supports 3 GB of main memory.
|
||||
# The X86 board only supports 3 GiB of main memory.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
Script to run SPEC CPU2006 benchmarks with gem5.
|
||||
The script expects a benchmark program name and the simulation
|
||||
size. The system is fixed with 2 CPU cores, MESI Two Level system
|
||||
cache and 3 GB DDR4 memory. It uses the x86 board.
|
||||
cache and 3 GiB DDR4 memory. It uses the x86 board.
|
||||
|
||||
This script will count the total number of instructions executed
|
||||
in the ROI. It also tracks how much wallclock and simulated time.
|
||||
@@ -193,18 +193,18 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
)
|
||||
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1d_size="32KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="32kB",
|
||||
l1i_size="32KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
# The X86 board only supports 3 GB of main memory.
|
||||
# The X86 board only supports 3 GiB of main memory.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
Script to run SPEC CPU2017 benchmarks with gem5.
|
||||
The script expects a benchmark program name and the simulation
|
||||
size. The system is fixed with 2 CPU cores, MESI Two Level system
|
||||
cache and 3 GB DDR4 memory. It uses the x86 board.
|
||||
cache and 3 GiB DDR4 memory. It uses the x86 board.
|
||||
|
||||
This script will count the total number of instructions executed
|
||||
in the ROI. It also tracks how much wallclock and simulated time.
|
||||
@@ -207,18 +207,18 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
)
|
||||
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="32kB",
|
||||
l1d_size="32KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="32kB",
|
||||
l1i_size="32KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=2,
|
||||
)
|
||||
# Memory: Dual Channel DDR4 2400 DRAM device.
|
||||
# The X86 board only supports 3 GB of main memory.
|
||||
# The X86 board only supports 3 GiB of main memory.
|
||||
|
||||
memory = DualChannelDDR4_2400(size="3GB")
|
||||
memory = DualChannelDDR4_2400(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -67,17 +67,17 @@ from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import
|
||||
|
||||
# Here we setup a MESI Two Level Cache Hierarchy.
|
||||
cache_hierarchy = MESITwoLevelCacheHierarchy(
|
||||
l1d_size="16kB",
|
||||
l1d_size="16KiB",
|
||||
l1d_assoc=8,
|
||||
l1i_size="16kB",
|
||||
l1i_size="16KiB",
|
||||
l1i_assoc=8,
|
||||
l2_size="256kB",
|
||||
l2_size="256KiB",
|
||||
l2_assoc=16,
|
||||
num_l2_banks=1,
|
||||
)
|
||||
|
||||
# Setup the system memory.
|
||||
memory = SingleChannelDDR3_1600(size="3GB")
|
||||
memory = SingleChannelDDR3_1600(size="3GiB")
|
||||
|
||||
# Here we setup the processor. This is a special switchable processor in which
|
||||
# a starting core type and a switch core type must be specified. Once a
|
||||
|
||||
@@ -126,7 +126,7 @@ if __name__ == "__m5_main__":
|
||||
args.ruby = True
|
||||
args.cpu_type = "X86KvmCPU"
|
||||
args.num_cpus = 1
|
||||
args.mem_size = "3GB"
|
||||
args.mem_size = "3GiB"
|
||||
args.dgpu = True
|
||||
args.dgpu_mem_size = "16GB"
|
||||
args.dgpu_start = "0GB"
|
||||
|
||||
@@ -134,7 +134,7 @@ if __name__ == "__m5_main__":
|
||||
args.ruby = True
|
||||
args.cpu_type = "X86KvmCPU"
|
||||
args.num_cpus = 1
|
||||
args.mem_size = "3GB"
|
||||
args.mem_size = "3GiB"
|
||||
args.dgpu = True
|
||||
args.dgpu_mem_size = "16GB"
|
||||
args.dgpu_start = "0GB"
|
||||
|
||||
@@ -124,7 +124,7 @@ if __name__ == "__m5_main__":
|
||||
args.ruby = True
|
||||
args.cpu_type = "X86KvmCPU"
|
||||
args.num_cpus = 1
|
||||
args.mem_size = "3GB"
|
||||
args.mem_size = "3GiB"
|
||||
args.dgpu = True
|
||||
args.dgpu_mem_size = "16GB"
|
||||
args.dgpu_start = "0GB"
|
||||
|
||||
@@ -140,7 +140,7 @@ def runMI200GPUFS(cpu_type):
|
||||
# Defaults for MI200
|
||||
args.ruby = True
|
||||
args.cpu_type = "X86KvmCPU"
|
||||
args.mem_size = "8GB" # CPU host memory
|
||||
args.mem_size = "8GiB" # CPU host memory
|
||||
args.dgpu = True
|
||||
args.dgpu_mem_size = "16GB" # GPU device memory
|
||||
args.dgpu_start = "0GB"
|
||||
|
||||
@@ -152,7 +152,7 @@ def runMI300GPUFS(
|
||||
|
||||
# Defaults for CPU
|
||||
args.cpu_type = "X86KvmCPU"
|
||||
args.mem_size = "8GB"
|
||||
args.mem_size = "8GiB"
|
||||
|
||||
# Defaults for MI300X
|
||||
args.gpu_device = "MI300X"
|
||||
|
||||
@@ -57,8 +57,8 @@ def makeGpuFSSystem(args):
|
||||
]
|
||||
cmdline = " ".join(boot_options)
|
||||
|
||||
if MemorySize(args.mem_size) < MemorySize("2GB"):
|
||||
panic("Need at least 2GB of system memory to load amdgpu module")
|
||||
if MemorySize(args.mem_size) < MemorySize("2GiB"):
|
||||
panic("Need at least 2GiB of system memory to load amdgpu module")
|
||||
|
||||
# Use the common FSConfig to setup a Linux X86 System
|
||||
(TestCPUClass, test_mem_mode) = Simulation.getCPUClass(args.cpu_type)
|
||||
@@ -89,7 +89,7 @@ def makeGpuFSSystem(args):
|
||||
)
|
||||
|
||||
# Setup VGA ROM region
|
||||
system.shadow_rom_ranges = [AddrRange(0xC0000, size=Addr("128kB"))]
|
||||
system.shadow_rom_ranges = [AddrRange(0xC0000, size=Addr("128KiB"))]
|
||||
|
||||
# Create specified number of CPUs. GPUFS really only needs one.
|
||||
system.cpu = [
|
||||
|
||||
@@ -141,7 +141,7 @@ def runVegaGPUFS(cpu_type):
|
||||
args.ruby = True
|
||||
args.cpu_type = cpu_type
|
||||
args.num_cpus = 1
|
||||
args.mem_size = "3GB"
|
||||
args.mem_size = "3GiB"
|
||||
args.dgpu = True
|
||||
args.dgpu_mem_size = "16GB"
|
||||
args.dgpu_start = "0GB"
|
||||
|
||||
@@ -77,7 +77,7 @@ cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy(
|
||||
)
|
||||
|
||||
# Setup the system memory.
|
||||
memory = SingleChannelDDR3_1600(size="128MB")
|
||||
memory = SingleChannelDDR3_1600(size="128MiB")
|
||||
# Setup a single core Processor.
|
||||
if args.cpu_type == "atomic":
|
||||
processor = SimpleProcessor(
|
||||
|
||||
@@ -189,7 +189,7 @@ for t, m in zip(testerspec, multiplier):
|
||||
|
||||
# Define a prototype L1 cache that we scale for all successive levels
|
||||
proto_l1 = Cache(
|
||||
size="32kB",
|
||||
size="32KiB",
|
||||
assoc=4,
|
||||
tag_latency=1,
|
||||
data_latency=1,
|
||||
|
||||
@@ -218,7 +218,7 @@ else:
|
||||
|
||||
# Define a prototype L1 cache that we scale for all successive levels
|
||||
proto_l1 = Cache(
|
||||
size="32kB",
|
||||
size="32KiB",
|
||||
assoc=4,
|
||||
tag_latency=1,
|
||||
data_latency=1,
|
||||
@@ -356,7 +356,7 @@ last_subsys = getattr(system, f"l{len(cachespec)}subsys0")
|
||||
last_subsys.xbar.point_of_coherency = True
|
||||
if args.noncoherent_cache:
|
||||
system.llc = NoncoherentCache(
|
||||
size="16MB",
|
||||
size="16MiB",
|
||||
assoc=16,
|
||||
tag_latency=10,
|
||||
data_latency=10,
|
||||
|
||||
@@ -175,7 +175,7 @@ if not (args.num_dmas is None):
|
||||
# level 1: large
|
||||
# Each location corresponds to a 4-byte piece of data
|
||||
#
|
||||
args.mem_size = "1024MB"
|
||||
args.mem_size = "1024MiB"
|
||||
if args.address_range == "small":
|
||||
num_atomic_locs = 10
|
||||
num_regular_locs_per_atomic_loc = 10000
|
||||
|
||||
@@ -71,7 +71,7 @@ class L1ICache(L1Cache):
|
||||
"""Simple L1 instruction cache with default values"""
|
||||
|
||||
# Set the default size
|
||||
size = "16kB"
|
||||
size = "16KiB"
|
||||
|
||||
SimpleOpts.add_option(
|
||||
"--l1i_size", help=f"L1 instruction cache size. Default: {size}"
|
||||
@@ -92,7 +92,7 @@ class L1DCache(L1Cache):
|
||||
"""Simple L1 data cache with default values"""
|
||||
|
||||
# Set the default size
|
||||
size = "64kB"
|
||||
size = "64KiB"
|
||||
|
||||
SimpleOpts.add_option(
|
||||
"--l1d_size", help=f"L1 data cache size. Default: {size}"
|
||||
@@ -113,7 +113,7 @@ class L2Cache(Cache):
|
||||
"""Simple L2 Cache with default values"""
|
||||
|
||||
# Default parameters
|
||||
size = "256kB"
|
||||
size = "256KiB"
|
||||
assoc = 8
|
||||
tag_latency = 20
|
||||
data_latency = 20
|
||||
|
||||
@@ -39,7 +39,7 @@ system.clk_domain.clock = "1GHz"
|
||||
system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
system.mem_mode = "timing"
|
||||
system.mem_ranges = [AddrRange("512MB")]
|
||||
system.mem_ranges = [AddrRange("512MiB")]
|
||||
system.cpu = ArmTimingSimpleCPU()
|
||||
|
||||
system.membus = SystemXBar()
|
||||
|
||||
@@ -39,7 +39,7 @@ system.clk_domain.clock = "1GHz"
|
||||
system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
system.mem_mode = "timing"
|
||||
system.mem_ranges = [AddrRange("512MB")]
|
||||
system.mem_ranges = [AddrRange("512MiB")]
|
||||
system.cpu = RiscvTimingSimpleCPU()
|
||||
|
||||
system.membus = SystemXBar()
|
||||
|
||||
@@ -54,7 +54,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create a simple CPU
|
||||
# You can use ISA-specific CPU models for different workloads:
|
||||
|
||||
@@ -77,7 +77,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create a simple CPU
|
||||
system.cpu = X86TimingSimpleCPU()
|
||||
|
||||
@@ -46,7 +46,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create a simple CPU
|
||||
system.cpu = X86TimingSimpleCPU()
|
||||
@@ -55,7 +55,7 @@ system.cpu = X86TimingSimpleCPU()
|
||||
system.membus = SystemXBar()
|
||||
|
||||
# Create a simple cache
|
||||
system.cache = SimpleCache(size="1kB")
|
||||
system.cache = SimpleCache(size="1KiB")
|
||||
|
||||
# Connect the I and D cache ports of the CPU to the memobj.
|
||||
# Since cpu_side is a vector port, each time one of these is connected, it will
|
||||
|
||||
@@ -46,7 +46,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create a simple CPU
|
||||
system.cpu = X86TimingSimpleCPU()
|
||||
|
||||
@@ -127,7 +127,9 @@ class L1Cache(L1Cache_Controller):
|
||||
self.version = self.versionCount()
|
||||
# This is the cache memory object that stores the cache data and tags
|
||||
self.cacheMemory = RubyCache(
|
||||
size="16kB", assoc=8, start_index_bit=self.getBlockSizeBits(system)
|
||||
size="16KiB",
|
||||
assoc=8,
|
||||
start_index_bit=self.getBlockSizeBits(system),
|
||||
)
|
||||
self.clk_domain = cpu.clk_domain
|
||||
self.send_evictions = self.sendEvicts(cpu)
|
||||
|
||||
@@ -125,7 +125,9 @@ class L1Cache(L1Cache_Controller):
|
||||
self.version = self.versionCount()
|
||||
# This is the cache memory object that stores the cache data and tags
|
||||
self.cacheMemory = RubyCache(
|
||||
size="16kB", assoc=8, start_index_bit=self.getBlockSizeBits(system)
|
||||
size="16KiB",
|
||||
assoc=8,
|
||||
start_index_bit=self.getBlockSizeBits(system),
|
||||
)
|
||||
self.clk_domain = cpu.clk_domain
|
||||
self.send_evictions = self.sendEvicts(cpu)
|
||||
|
||||
@@ -51,7 +51,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create the tester
|
||||
system.tester = RubyTester(
|
||||
@@ -59,7 +59,7 @@ system.tester = RubyTester(
|
||||
)
|
||||
|
||||
# Create a simple memory controller and connect it to the membus
|
||||
system.mem_ctrl = SimpleMemory(latency="50ns", bandwidth="0GB/s")
|
||||
system.mem_ctrl = SimpleMemory(latency="50ns", bandwidth="0GiB/s")
|
||||
system.mem_ctrl.range = system.mem_ranges[0]
|
||||
|
||||
# Create the Ruby System
|
||||
|
||||
@@ -60,7 +60,7 @@ system.clk_domain.voltage_domain = VoltageDomain()
|
||||
|
||||
# Set up the system
|
||||
system.mem_mode = "timing" # Use timing accesses
|
||||
system.mem_ranges = [AddrRange("512MB")] # Create an address range
|
||||
system.mem_ranges = [AddrRange("512MiB")] # Create an address range
|
||||
|
||||
# Create a pair of simple CPUs
|
||||
system.cpu = [X86TimingSimpleCPU() for i in range(2)]
|
||||
|
||||
@@ -104,8 +104,8 @@ system.clk_domain = SrcClockDomain(
|
||||
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
|
||||
)
|
||||
|
||||
# we are fine with 256 MB memory for now
|
||||
mem_range = AddrRange("512MB")
|
||||
# we are fine with 256 MiB memory for now
|
||||
mem_range = AddrRange("512MiB")
|
||||
system.mem_ranges = [mem_range]
|
||||
|
||||
# do not worry about reserving space for the backing store
|
||||
|
||||
@@ -127,8 +127,8 @@ system.clk_domain = SrcClockDomain(
|
||||
# the second, larger (1024) range for NVM
|
||||
# the NVM range starts directly after the DRAM range
|
||||
system.mem_ranges = [
|
||||
AddrRange("128MB"),
|
||||
AddrRange(Addr("128MB"), size="1024MB"),
|
||||
AddrRange("128MiB"),
|
||||
AddrRange(Addr("128MiB"), size="1024MiB"),
|
||||
]
|
||||
|
||||
# do not worry about reserving space for the backing store
|
||||
|
||||
@@ -659,7 +659,7 @@ class CHI_MN(CHI_Node):
|
||||
super().__init__(ruby_system)
|
||||
|
||||
# MiscNode has internal address range starting at 0
|
||||
addr_range = AddrRange(0, size="1kB")
|
||||
addr_range = AddrRange(0, size="1KiB")
|
||||
|
||||
self._cntrl = CHI_MNController(
|
||||
ruby_system, addr_range, l1d_caches, early_nonsync_comp
|
||||
|
||||
@@ -138,7 +138,7 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
|
||||
|
||||
class TCPCache(RubyCache):
|
||||
size = "16kB"
|
||||
size = "16KiB"
|
||||
assoc = 16
|
||||
dataArrayBanks = 16 # number of data banks
|
||||
tagArrayBanks = 16 # number of tag banks
|
||||
@@ -276,7 +276,7 @@ class SQCCntrl(SQC_Controller, CntrlBase):
|
||||
|
||||
|
||||
class TCC(RubyCache):
|
||||
size = MemorySize("256kB")
|
||||
size = MemorySize("256KiB")
|
||||
assoc = 16
|
||||
dataAccessLatency = 8
|
||||
tagAccessLatency = 2
|
||||
@@ -289,7 +289,7 @@ class TCC(RubyCache):
|
||||
if hasattr(options, "bw_scalor") and options.bw_scalor > 0:
|
||||
s = options.num_compute_units
|
||||
tcc_size = s * 128
|
||||
tcc_size = str(tcc_size) + "kB"
|
||||
tcc_size = str(tcc_size) + "KiB"
|
||||
self.size = MemorySize(tcc_size)
|
||||
self.dataArrayBanks = 64
|
||||
self.tagArrayBanks = 64
|
||||
@@ -443,7 +443,7 @@ def define_options(parser):
|
||||
help="number of TCC banks in the GPU",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sqc-size", type=str, default="32kB", help="SQC cache size"
|
||||
"--sqc-size", type=str, default="32KiB", help="SQC cache size"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sqc-assoc", type=int, default=8, help="SQC cache assoc"
|
||||
@@ -478,11 +478,11 @@ def define_options(parser):
|
||||
"--TCC_latency", type=int, default=16, help="TCC latency"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tcc-size", type=str, default="256kB", help="agregate tcc size"
|
||||
"--tcc-size", type=str, default="256KiB", help="agregate tcc size"
|
||||
)
|
||||
parser.add_argument("--tcc-assoc", type=int, default=16, help="tcc assoc")
|
||||
parser.add_argument(
|
||||
"--tcp-size", type=str, default="16kB", help="tcp size"
|
||||
"--tcp-size", type=str, default="16KiB", help="tcp size"
|
||||
)
|
||||
parser.add_argument("--tcp-assoc", type=int, default=16, help="tcp assoc")
|
||||
parser.add_argument(
|
||||
|
||||
@@ -51,9 +51,9 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
"-f", "--frequency", default="1GHz", help="Frequency of each CPU"
|
||||
)
|
||||
parser.add_argument("--l1size", default="32kB")
|
||||
parser.add_argument("--l1size", default="32KiB")
|
||||
parser.add_argument("--l1latency", default=1)
|
||||
parser.add_argument("--l2size", default="256kB")
|
||||
parser.add_argument("--l2size", default="256KiB")
|
||||
parser.add_argument("--l2latency", default=10)
|
||||
parser.add_argument(
|
||||
"--rootdir",
|
||||
|
||||
@@ -49,9 +49,9 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
"-f", "--frequency", default="1GHz", help="Frequency of each CPU"
|
||||
)
|
||||
parser.add_argument("--l1size", default="32kB")
|
||||
parser.add_argument("--l1size", default="32KiB")
|
||||
parser.add_argument("--l1latency", default="1ns")
|
||||
parser.add_argument("--l2size", default="256kB")
|
||||
parser.add_argument("--l2size", default="256KiB")
|
||||
parser.add_argument("--l2latency", default="10ns")
|
||||
parser.add_argument(
|
||||
"--rootdir",
|
||||
|
||||
Reference in New Issue
Block a user