The command executed was `black src configs tests util`. Change-Id: I8dfaa6ab04658fea37618127d6ac19270028d771 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47024 Maintainer: Bobby Bruce <bbruce@ucdavis.edu> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
342 lines
12 KiB
Python
342 lines
12 KiB
Python
# Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# 3. Neither the name of the copyright holder nor the names of its
|
|
# contributors may be used to endorse or promote products derived from this
|
|
# software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
from m5.defines import buildEnv
|
|
from m5.params import *
|
|
from m5.proxy import *
|
|
from m5.SimObject import SimObject
|
|
|
|
from m5.objects.Bridge import Bridge
|
|
from m5.objects.ClockedObject import ClockedObject
|
|
from m5.objects.Device import DmaVirtDevice
|
|
from m5.objects.LdsState import LdsState
|
|
from m5.objects.Process import EmulatedDriver
|
|
from m5.objects.VegaGPUTLB import VegaPagetableWalker
|
|
|
|
|
|
class PrefetchType(Enum):
|
|
vals = ["PF_CU", "PF_PHASE", "PF_WF", "PF_STRIDE", "PF_END"]
|
|
|
|
|
|
class GfxVersion(ScopedEnum):
|
|
vals = ["gfx801", "gfx803", "gfx900", "gfx902"]
|
|
|
|
|
|
class PoolManager(SimObject):
|
|
type = "PoolManager"
|
|
abstract = True
|
|
cxx_class = "gem5::PoolManager"
|
|
cxx_header = "gpu-compute/pool_manager.hh"
|
|
|
|
min_alloc = Param.Int(4, "min number of VGPRs allocated per WF")
|
|
pool_size = Param.Int(2048, "number of vector registers per SIMD")
|
|
|
|
|
|
# The simple pool manage only allows one workgroup to
|
|
# be executing on a CU at any given time.
|
|
class SimplePoolManager(PoolManager):
|
|
type = "SimplePoolManager"
|
|
cxx_class = "gem5::SimplePoolManager"
|
|
cxx_header = "gpu-compute/simple_pool_manager.hh"
|
|
|
|
|
|
## This is for allowing multiple workgroups on one CU
|
|
class DynPoolManager(PoolManager):
|
|
type = "DynPoolManager"
|
|
cxx_class = "gem5::DynPoolManager"
|
|
cxx_header = "gpu-compute/dyn_pool_manager.hh"
|
|
|
|
|
|
class RegisterFile(SimObject):
|
|
type = "RegisterFile"
|
|
cxx_class = "gem5::RegisterFile"
|
|
cxx_header = "gpu-compute/register_file.hh"
|
|
|
|
simd_id = Param.Int(-1, "SIMD ID associated with this Register File")
|
|
num_regs = Param.Int(2048, "number of registers in this RF")
|
|
wf_size = Param.Int(64, "Wavefront size (in work items)")
|
|
|
|
|
|
class ScalarRegisterFile(RegisterFile):
|
|
type = "ScalarRegisterFile"
|
|
cxx_class = "gem5::ScalarRegisterFile"
|
|
cxx_header = "gpu-compute/scalar_register_file.hh"
|
|
|
|
|
|
class VectorRegisterFile(RegisterFile):
|
|
type = "VectorRegisterFile"
|
|
cxx_class = "gem5::VectorRegisterFile"
|
|
cxx_header = "gpu-compute/vector_register_file.hh"
|
|
|
|
|
|
class RegisterManager(SimObject):
|
|
type = "RegisterManager"
|
|
cxx_class = "gem5::RegisterManager"
|
|
cxx_header = "gpu-compute/register_manager.hh"
|
|
|
|
policy = Param.String("static", "Register Manager Policy")
|
|
vrf_pool_managers = VectorParam.PoolManager("VRF Pool Managers")
|
|
srf_pool_managers = VectorParam.PoolManager("SRF Pool Managers")
|
|
|
|
|
|
class Wavefront(SimObject):
|
|
type = "Wavefront"
|
|
cxx_class = "gem5::Wavefront"
|
|
cxx_header = "gpu-compute/wavefront.hh"
|
|
|
|
simdId = Param.Int("SIMD id (0-ComputeUnit.num_SIMDs)")
|
|
wf_slot_id = Param.Int("wavefront id (0-ComputeUnit.max_wfs)")
|
|
wf_size = Param.Int(64, "Wavefront size (in work items)")
|
|
max_ib_size = Param.Int(
|
|
13,
|
|
"Maximum size (in number of insts) of the " "instruction buffer (IB).",
|
|
)
|
|
|
|
|
|
# Most of the default values here are obtained from the
|
|
# AMD Graphics Core Next (GCN) Architecture whitepaper.
|
|
class ComputeUnit(ClockedObject):
|
|
type = "ComputeUnit"
|
|
cxx_class = "gem5::ComputeUnit"
|
|
cxx_header = "gpu-compute/compute_unit.hh"
|
|
|
|
wavefronts = VectorParam.Wavefront("Number of wavefronts")
|
|
# Wavefront size is 64. This is configurable, however changing
|
|
# this value to anything other than 64 will likely cause errors.
|
|
wf_size = Param.Int(64, "Wavefront size (in work items)")
|
|
num_barrier_slots = Param.Int(4, "Number of barrier slots in a CU")
|
|
num_SIMDs = Param.Int(4, "number of SIMD units per CU")
|
|
num_scalar_cores = Param.Int(1, "number of Scalar cores per CU")
|
|
num_scalar_mem_pipes = Param.Int(
|
|
1, "number of Scalar memory pipelines " "per CU"
|
|
)
|
|
simd_width = Param.Int(16, "width (number of lanes) per SIMD unit")
|
|
|
|
operand_network_length = Param.Int(
|
|
1, "number of pipe stages of operand " "network"
|
|
)
|
|
|
|
spbypass_pipe_length = Param.Int(
|
|
4, "vector ALU Single Precision bypass " "latency"
|
|
)
|
|
|
|
dpbypass_pipe_length = Param.Int(
|
|
4, "vector ALU Double Precision bypass " "latency"
|
|
)
|
|
scalar_pipe_length = Param.Int(1, "number of pipe stages per scalar ALU")
|
|
issue_period = Param.Int(4, "number of cycles per issue period")
|
|
|
|
vrf_gm_bus_latency = Param.Int(
|
|
1, "number of cycles per use of VRF to " "GM bus"
|
|
)
|
|
srf_scm_bus_latency = Param.Int(
|
|
1, "number of cycles per use of SRF " "to Scalar Mem bus"
|
|
)
|
|
vrf_lm_bus_latency = Param.Int(
|
|
1, "number of cycles per use of VRF to " "LM bus"
|
|
)
|
|
|
|
num_global_mem_pipes = Param.Int(1, "number of global memory pipes per CU")
|
|
num_shared_mem_pipes = Param.Int(1, "number of shared memory pipes per CU")
|
|
n_wf = Param.Int(10, "Number of wavefront slots per SIMD")
|
|
mem_req_latency = Param.Int(
|
|
50,
|
|
"Latency for request from the cu to ruby. "
|
|
"Represents the pipeline to reach the TCP "
|
|
"and specified in GPU clock cycles",
|
|
)
|
|
mem_resp_latency = Param.Int(
|
|
50,
|
|
"Latency for responses from ruby to the "
|
|
"cu. Represents the pipeline between the "
|
|
"TCP and cu as well as TCP data array "
|
|
"access. Specified in GPU clock cycles",
|
|
)
|
|
system = Param.System(Parent.any, "system object")
|
|
cu_id = Param.Int("CU id")
|
|
vrf_to_coalescer_bus_width = Param.Int(
|
|
64, "VRF->Coalescer data bus " "width in bytes"
|
|
)
|
|
coalescer_to_vrf_bus_width = Param.Int(
|
|
64, "Coalescer->VRF data bus " "width in bytes"
|
|
)
|
|
|
|
memory_port = VectorRequestPort("Port to the memory system")
|
|
translation_port = VectorRequestPort("Port to the TLB hierarchy")
|
|
sqc_port = RequestPort("Port to the SQC (I-cache")
|
|
sqc_tlb_port = RequestPort("Port to the TLB for the SQC (I-cache)")
|
|
scalar_port = RequestPort("Port to the scalar data cache")
|
|
scalar_tlb_port = RequestPort("Port to the TLB for the scalar data cache")
|
|
gmTokenPort = RequestPort("Port to the GPU coalesecer for sharing tokens")
|
|
|
|
perLaneTLB = Param.Bool(False, "enable per-lane TLB")
|
|
prefetch_depth = Param.Int(
|
|
0,
|
|
"Number of prefetches triggered at a time" "(0 turns off prefetching)",
|
|
)
|
|
prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)")
|
|
prefetch_prev_type = Param.PrefetchType(
|
|
"PF_PHASE",
|
|
"Prefetch the stride "
|
|
"from last mem req in lane of "
|
|
"CU|Phase|Wavefront",
|
|
)
|
|
execPolicy = Param.String("OLDEST-FIRST", "WF execution selection policy")
|
|
debugSegFault = Param.Bool(False, "enable debugging GPU seg faults")
|
|
functionalTLB = Param.Bool(False, "Assume TLB causes no delay")
|
|
|
|
localMemBarrier = Param.Bool(
|
|
False, "Assume Barriers do not wait on " "kernel end"
|
|
)
|
|
|
|
countPages = Param.Bool(
|
|
False,
|
|
"Generate per-CU file of all pages " "touched and how many times",
|
|
)
|
|
scalar_mem_queue_size = Param.Int(
|
|
32, "Number of entries in scalar " "memory pipeline's queues"
|
|
)
|
|
global_mem_queue_size = Param.Int(
|
|
256, "Number of entries in the global " "memory pipeline's queues"
|
|
)
|
|
local_mem_queue_size = Param.Int(
|
|
256, "Number of entries in the local " "memory pipeline's queues"
|
|
)
|
|
max_wave_requests = Param.Int(
|
|
64, "number of pending vector memory " "requests per wavefront"
|
|
)
|
|
max_cu_tokens = Param.Int(
|
|
4,
|
|
"Maximum number of tokens, i.e., the number"
|
|
" of instructions that can be sent to coalescer",
|
|
)
|
|
ldsBus = Bridge() # the bridge between the CU and its LDS
|
|
ldsPort = RequestPort("The port that goes to the LDS")
|
|
localDataStore = Param.LdsState("the LDS for this CU")
|
|
|
|
vector_register_file = VectorParam.VectorRegisterFile(
|
|
"Vector register " "file"
|
|
)
|
|
|
|
scalar_register_file = VectorParam.ScalarRegisterFile(
|
|
"Scalar register " "file"
|
|
)
|
|
out_of_order_data_delivery = Param.Bool(
|
|
False, "enable OoO data delivery" " in the GM pipeline"
|
|
)
|
|
register_manager = Param.RegisterManager("Register Manager")
|
|
fetch_depth = Param.Int(
|
|
2, "number of i-cache lines that may be " "buffered in the fetch unit."
|
|
)
|
|
|
|
|
|
class Shader(ClockedObject):
|
|
type = "Shader"
|
|
cxx_class = "gem5::Shader"
|
|
cxx_header = "gpu-compute/shader.hh"
|
|
CUs = VectorParam.ComputeUnit("Number of compute units")
|
|
gpu_cmd_proc = Param.GPUCommandProcessor("Command processor for GPU")
|
|
dispatcher = Param.GPUDispatcher("GPU workgroup dispatcher")
|
|
system_hub = Param.AMDGPUSystemHub(NULL, "GPU System Hub (FS Mode only)")
|
|
n_wf = Param.Int(10, "Number of wavefront slots per SIMD")
|
|
impl_kern_launch_acq = Param.Bool(
|
|
True,
|
|
"""Insert acq packet into
|
|
ruby at kernel launch""",
|
|
)
|
|
impl_kern_end_rel = Param.Bool(
|
|
False,
|
|
"""Insert rel packet into
|
|
ruby at kernel end""",
|
|
)
|
|
globalmem = Param.MemorySize("64kB", "Memory size")
|
|
timing = Param.Bool(False, "timing memory accesses")
|
|
|
|
cpu_pointer = Param.BaseCPU(NULL, "pointer to base CPU")
|
|
translation = Param.Bool(False, "address translation")
|
|
timer_period = Param.Clock("10us", "system timer period")
|
|
idlecu_timeout = Param.Tick(0, "Idle CU watchdog timeout threshold")
|
|
max_valu_insts = Param.Int(0, "Maximum vALU insts before exiting")
|
|
|
|
|
|
class GPUComputeDriver(EmulatedDriver):
|
|
type = "GPUComputeDriver"
|
|
cxx_class = "gem5::GPUComputeDriver"
|
|
cxx_header = "gpu-compute/gpu_compute_driver.hh"
|
|
device = Param.GPUCommandProcessor("GPU controlled by this driver")
|
|
isdGPU = Param.Bool(False, "Driver is for a dGPU")
|
|
gfxVersion = Param.GfxVersion("gfx801", "ISA of gpu to model")
|
|
dGPUPoolID = Param.Int(0, "Pool ID for dGPU.")
|
|
# Default Mtype for caches
|
|
# -- 1 1 1 C_RW_S (Cached-ReadWrite-Shared)
|
|
# -- 1 1 0 C_RW_US (Cached-ReadWrite-Unshared)
|
|
# -- 1 0 1 C_RO_S (Cached-ReadOnly-Shared)
|
|
# -- 1 0 0 C_RO_US (Cached-ReadOnly-Unshared)
|
|
# -- 0 1 x UC_L2 (Uncached_GL2)
|
|
# -- 0 0 x UC_All (Uncached_All_Load)
|
|
# default value: 5/C_RO_S (only allow caching in GL2 for read. Shared)
|
|
m_type = Param.Int("Default MTYPE for cache. Valid values between 0-7")
|
|
|
|
|
|
class GPURenderDriver(EmulatedDriver):
|
|
type = "GPURenderDriver"
|
|
cxx_class = "gem5::GPURenderDriver"
|
|
cxx_header = "gpu-compute/gpu_render_driver.hh"
|
|
|
|
|
|
class GPUDispatcher(SimObject):
|
|
type = "GPUDispatcher"
|
|
cxx_class = "gem5::GPUDispatcher"
|
|
cxx_header = "gpu-compute/dispatcher.hh"
|
|
|
|
|
|
class GPUCommandProcessor(DmaVirtDevice):
|
|
type = "GPUCommandProcessor"
|
|
cxx_class = "gem5::GPUCommandProcessor"
|
|
cxx_header = "gpu-compute/gpu_command_processor.hh"
|
|
dispatcher = Param.GPUDispatcher("workgroup dispatcher for the GPU")
|
|
|
|
hsapp = Param.HSAPacketProcessor("PP attached to this device")
|
|
walker = Param.VegaPagetableWalker(
|
|
VegaPagetableWalker(), "Page table walker"
|
|
)
|
|
|
|
|
|
class StorageClassType(Enum):
|
|
vals = [
|
|
"SC_SPILL",
|
|
"SC_GLOBAL",
|
|
"SC_GROUP",
|
|
"SC_PRIVATE",
|
|
"SC_READONLY",
|
|
"SC_KERNARG",
|
|
"SC_ARG",
|
|
"SC_NONE",
|
|
]
|