Currently uses gfx801, gfx803, gfx900 for Carrizo, Fiji, and Vega respectively Change-Id: I62758914b6a60f16dd4f2141a23c0a9141a4e1a0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42217 Maintainer: Matthew Poremba <matthew.poremba@amd.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
456 lines
22 KiB
Python
456 lines
22 KiB
Python
# Copyright (c) 2018 Advanced Micro Devices, Inc.
|
|
# All rights reserved.
|
|
#
|
|
# For use for simulation and test purposes only
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# 3. Neither the name of the copyright holder nor the names of its
|
|
# contributors may be used to endorse or promote products derived from this
|
|
# software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import m5
|
|
|
|
import operator
|
|
from os import mkdir, makedirs, getpid, listdir, fsync
|
|
from os.path import join as joinpath
|
|
from os.path import isdir
|
|
from shutil import rmtree, copyfile
|
|
from m5.util.convert import toFrequency
|
|
|
|
def file_append(path, contents):
|
|
with open(joinpath(*path), 'a') as f:
|
|
f.write(str(contents))
|
|
f.flush()
|
|
fsync(f.fileno())
|
|
|
|
def remake_dir(path):
|
|
if isdir(path):
|
|
rmtree(path)
|
|
makedirs(path)
|
|
|
|
# This fakes out a dGPU setup so the runtime operates correctly. The spoofed
|
|
# system has a single dGPU and a single socket CPU. Note that more complex
|
|
# topologies (multi-GPU, multi-socket CPUs) need to have a different setup
|
|
# here or the runtime won't be able to issue Memcpies from one node to another.
|
|
#
|
|
# TODO: There is way too much hardcoded here. It doesn't effect anything in
|
|
# our current ROCm stack (1.6), but it is highly possible that it will in the
|
|
# future. We might need to scrub through this and extract the appropriate
|
|
# fields from the simulator in the future.
|
|
def createVegaTopology(options):
|
|
topology_dir = joinpath(m5.options.outdir, \
|
|
'fs/sys/devices/virtual/kfd/kfd/topology')
|
|
remake_dir(topology_dir)
|
|
|
|
amdgpu_dir = joinpath(m5.options.outdir, \
|
|
'fs/sys/module/amdgpu/parameters')
|
|
remake_dir(amdgpu_dir)
|
|
|
|
pci_ids_dir = joinpath(m5.options.outdir, \
|
|
'fs/usr/share/hwdata/')
|
|
remake_dir(pci_ids_dir)
|
|
|
|
# Vega reported VM size in GB. Used to reserve an allocation from CPU
|
|
# to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree)
|
|
file_append((amdgpu_dir, 'vm_size'), 256)
|
|
|
|
# Ripped from real Vega platform to appease KMT version checks
|
|
file_append((topology_dir, 'generation_id'), 2)
|
|
|
|
# Set up system properties. Regiter as ast-rocm server
|
|
sys_prop = 'platform_oem 35498446626881\n' + \
|
|
'platform_id 71791775140929\n' + \
|
|
'platform_rev 2\n'
|
|
file_append((topology_dir, 'system_properties'), sys_prop)
|
|
|
|
# Populate the topology tree
|
|
# Our dGPU system is two nodes. Node 0 is a CPU and Node 1 is a dGPU
|
|
node_dir = joinpath(topology_dir, 'nodes/0')
|
|
remake_dir(node_dir)
|
|
|
|
# Register as a CPU
|
|
file_append((node_dir, 'gpu_id'), 0)
|
|
file_append((node_dir, 'name'), '')
|
|
|
|
# CPU links. Only thing that matters is we tell the runtime that GPU is
|
|
# connected through PCIe to CPU socket 0.
|
|
io_links = 1
|
|
io_dir = joinpath(node_dir, 'io_links/0')
|
|
remake_dir(io_dir)
|
|
io_prop = 'type 2\n' + \
|
|
'version_major 0\n' + \
|
|
'version_minor 0\n' + \
|
|
'node_from 0\n' + \
|
|
'node_to 1\n' + \
|
|
'weight 20\n' + \
|
|
'min_latency 0\n' + \
|
|
'max_latency 0\n' + \
|
|
'min_bandwidth 0\n' + \
|
|
'max_bandwidth 0\n' + \
|
|
'recommended_transfer_size 0\n' + \
|
|
'flags 13\n'
|
|
file_append((io_dir, 'properties'), io_prop)
|
|
|
|
# Populate CPU node properties
|
|
node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \
|
|
'simd_count 0\n' + \
|
|
'mem_banks_count 1\n' + \
|
|
'caches_count 0\n' + \
|
|
'io_links_count %s\n' % io_links + \
|
|
'cpu_core_id_base 0\n' + \
|
|
'simd_id_base 0\n' + \
|
|
'max_waves_per_simd 0\n' + \
|
|
'lds_size_in_kb 0\n' + \
|
|
'gds_size_in_kb 0\n' + \
|
|
'wave_front_size 64\n' + \
|
|
'array_count 0\n' + \
|
|
'simd_arrays_per_engine 0\n' + \
|
|
'cu_per_simd_array 0\n' + \
|
|
'simd_per_cu 0\n' + \
|
|
'max_slots_scratch_cu 0\n' + \
|
|
'vendor_id 0\n' + \
|
|
'device_id 0\n' + \
|
|
'location_id 0\n' + \
|
|
'drm_render_minor 0\n' + \
|
|
'max_engine_clk_ccompute 3400\n'
|
|
|
|
file_append((node_dir, 'properties'), node_prop)
|
|
|
|
# CPU memory reporting
|
|
mem_dir = joinpath(node_dir, 'mem_banks/0')
|
|
remake_dir(mem_dir)
|
|
mem_prop = 'heap_type %s\n' % HsaHeaptype.HSA_HEAPTYPE_SYSTEM.value + \
|
|
'size_in_bytes 33704329216\n' + \
|
|
'flags 0\n' + \
|
|
'width 72\n' + \
|
|
'mem_clk_max 2400\n'
|
|
|
|
file_append((mem_dir, 'properties'), mem_prop)
|
|
|
|
# Build the GPU node
|
|
node_dir = joinpath(topology_dir, 'nodes/1')
|
|
remake_dir(node_dir)
|
|
|
|
# Register as a Vega
|
|
file_append((node_dir, 'gpu_id'), 22124)
|
|
file_append((node_dir, 'name'), 'Vega\n')
|
|
|
|
# 96 in real Vega
|
|
# Random comment for comparison purposes
|
|
caches = 0
|
|
|
|
# GPU links. Only thing that matters is we tell the runtime that GPU is
|
|
# connected through PCIe to CPU socket 0.
|
|
io_links = 1
|
|
io_dir = joinpath(node_dir, 'io_links/0')
|
|
remake_dir(io_dir)
|
|
io_prop = 'type 2\n' + \
|
|
'version_major 0\n' + \
|
|
'version_minor 0\n' + \
|
|
'node_from 1\n' + \
|
|
'node_to 0\n' + \
|
|
'weight 20\n' + \
|
|
'min_latency 0\n' + \
|
|
'max_latency 0\n' + \
|
|
'min_bandwidth 0\n' + \
|
|
'max_bandwidth 0\n' + \
|
|
'recommended_transfer_size 0\n' + \
|
|
'flags 1\n'
|
|
file_append((io_dir, 'properties'), io_prop)
|
|
|
|
# Populate GPU node properties
|
|
cu_scratch = options.simds_per_cu * options.wfs_per_simd
|
|
node_prop = 'cpu_cores_count 0\n' + \
|
|
'simd_count 256\n' + \
|
|
'mem_banks_count 1\n' + \
|
|
'caches_count %s\n' % caches + \
|
|
'io_links_count %s\n' % io_links + \
|
|
'cpu_core_id_base 0\n' + \
|
|
'simd_id_base 2147487744\n' + \
|
|
'max_waves_per_simd 10\n' + \
|
|
'lds_size_in_kb 64\n' + \
|
|
'gds_size_in_kb 0\n' + \
|
|
'wave_front_size 64\n' + \
|
|
'array_count 4\n' + \
|
|
'simd_arrays_per_engine 1\n' + \
|
|
'cu_per_simd_array 16\n' + \
|
|
'simd_per_cu 4\n' + \
|
|
'max_slots_scratch_cu %s\n' % cu_scratch + \
|
|
'vendor_id 4098\n' + \
|
|
'device_id 26720\n' + \
|
|
'location_id 1024\n' + \
|
|
'drm_render_minor 128\n' + \
|
|
'hive_id 0\n' + \
|
|
'num_sdma_engines 2\n' + \
|
|
'num_sdma_xgmi_engines 0\n' + \
|
|
'max_engine_clk_fcompute 1500\n' + \
|
|
'local_mem_size 17163091968\n' + \
|
|
'fw_version 421\n' + \
|
|
'capability 238208\n' + \
|
|
'debug_prop 32768\n' + \
|
|
'sdma_fw_version 430\n' + \
|
|
'max_engine_clk_ccompute 3400\n'
|
|
|
|
file_append((node_dir, 'properties'), node_prop)
|
|
|
|
# Fiji HBM reporting
|
|
# TODO: Extract size, clk, and width from sim paramters
|
|
mem_dir = joinpath(node_dir, 'mem_banks/0')
|
|
remake_dir(mem_dir)
|
|
mem_prop = 'heap_type %s\n' % heap_type.value + \
|
|
'size_in_bytes 17163091968\n' + \
|
|
'flags 0\n' + \
|
|
'width 2048\n' + \
|
|
'mem_clk_max 945\n'
|
|
|
|
file_append((mem_dir, 'properties'), mem_prop)
|
|
|
|
# This fakes out a dGPU setup so the runtime correctly operations. The spoofed
|
|
# system has a single dGPU and a single socket CPU. Note that more complex
|
|
# topologies (multi-GPU, multi-socket CPUs) need to have a different setup
|
|
# here or the runtime won't be able to issue Memcpies from one node to another.
|
|
#
|
|
# TODO: There is way too much hardcoded here. It doesn't effect anything in
|
|
# our current ROCm stack (1.6), but it is highly possible that it will in the
|
|
# future. We might need to scrub through this and extract the appropriate
|
|
# fields from the simulator in the future.
|
|
def createFijiTopology(options):
|
|
topology_dir = joinpath(m5.options.outdir, \
|
|
'fs/sys/devices/virtual/kfd/kfd/topology')
|
|
remake_dir(topology_dir)
|
|
|
|
amdgpu_dir = joinpath(m5.options.outdir, \
|
|
'fs/sys/module/amdgpu/parameters')
|
|
remake_dir(amdgpu_dir)
|
|
|
|
# Fiji reported VM size in GB. Used to reserve an allocation from CPU
|
|
# to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree)
|
|
file_append((amdgpu_dir, 'vm_size'), 256)
|
|
|
|
# Ripped from real Fiji platform to appease KMT version checks
|
|
file_append((topology_dir, 'generation_id'), 2)
|
|
|
|
# Set up system properties. Regiter as ast-rocm server
|
|
sys_prop = 'platform_oem 35498446626881\n' + \
|
|
'platform_id 71791775140929\n' + \
|
|
'platform_rev 2\n'
|
|
file_append((topology_dir, 'system_properties'), sys_prop)
|
|
|
|
# Populate the topology tree
|
|
# Our dGPU system is two nodes. Node 0 is a CPU and Node 1 is a dGPU
|
|
node_dir = joinpath(topology_dir, 'nodes/0')
|
|
remake_dir(node_dir)
|
|
|
|
# Register as a CPU
|
|
file_append((node_dir, 'gpu_id'), 0)
|
|
file_append((node_dir, 'name'), '')
|
|
|
|
# CPU links. Only thing that matters is we tell the runtime that GPU is
|
|
# connected through PCIe to CPU socket 0.
|
|
io_links = 1
|
|
io_dir = joinpath(node_dir, 'io_links/0')
|
|
remake_dir(io_dir)
|
|
io_prop = 'type 2\n' + \
|
|
'version_major 0\n' + \
|
|
'version_minor 0\n' + \
|
|
'node_from 0\n' + \
|
|
'node_to 1\n' + \
|
|
'weight 20\n' + \
|
|
'min_latency 0\n' + \
|
|
'max_latency 0\n' + \
|
|
'min_bandwidth 0\n' + \
|
|
'max_bandwidth 0\n' + \
|
|
'recommended_transfer_size 0\n' + \
|
|
'flags 13\n'
|
|
file_append((io_dir, 'properties'), io_prop)
|
|
|
|
# Populate CPU node properties
|
|
node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \
|
|
'simd_count 0\n' + \
|
|
'mem_banks_count 1\n' + \
|
|
'caches_count 0\n' + \
|
|
'io_links_count %s\n' % io_links + \
|
|
'cpu_core_id_base 0\n' + \
|
|
'simd_id_base 0\n' + \
|
|
'max_waves_per_simd 0\n' + \
|
|
'lds_size_in_kb 0\n' + \
|
|
'gds_size_in_kb 0\n' + \
|
|
'wave_front_size 64\n' + \
|
|
'array_count 0\n' + \
|
|
'simd_arrays_per_engine 0\n' + \
|
|
'cu_per_simd_array 0\n' + \
|
|
'simd_per_cu 0\n' + \
|
|
'max_slots_scratch_cu 0\n' + \
|
|
'vendor_id 0\n' + \
|
|
'device_id 0\n' + \
|
|
'location_id 0\n' + \
|
|
'drm_render_minor 0\n' + \
|
|
'max_engine_clk_ccompute 3400\n'
|
|
|
|
file_append((node_dir, 'properties'), node_prop)
|
|
|
|
# CPU memory reporting
|
|
mem_dir = joinpath(node_dir, 'mem_banks/0')
|
|
remake_dir(mem_dir)
|
|
mem_prop = 'heap_type 0\n' + \
|
|
'size_in_bytes 33704329216\n' + \
|
|
'flags 0\n' + \
|
|
'width 72\n' + \
|
|
'mem_clk_max 2400\n'
|
|
|
|
file_append((mem_dir, 'properties'), mem_prop)
|
|
|
|
# Build the GPU node
|
|
node_dir = joinpath(topology_dir, 'nodes/1')
|
|
remake_dir(node_dir)
|
|
|
|
# Register as a Fiji
|
|
file_append((node_dir, 'gpu_id'), 50156)
|
|
file_append((node_dir, 'name'), 'Fiji\n')
|
|
|
|
# Real Fiji shows 96, but building that topology is complex and doesn't
|
|
# appear to be required for anything.
|
|
caches = 0
|
|
|
|
# GPU links. Only thing that matters is we tell the runtime that GPU is
|
|
# connected through PCIe to CPU socket 0.
|
|
io_links = 1
|
|
io_dir = joinpath(node_dir, 'io_links/0')
|
|
remake_dir(io_dir)
|
|
io_prop = 'type 2\n' + \
|
|
'version_major 0\n' + \
|
|
'version_minor 0\n' + \
|
|
'node_from 1\n' + \
|
|
'node_to 0\n' + \
|
|
'weight 20\n' + \
|
|
'min_latency 0\n' + \
|
|
'max_latency 0\n' + \
|
|
'min_bandwidth 0\n' + \
|
|
'max_bandwidth 0\n' + \
|
|
'recommended_transfer_size 0\n' + \
|
|
'flags 1\n'
|
|
file_append((io_dir, 'properties'), io_prop)
|
|
|
|
# Populate GPU node properties
|
|
node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \
|
|
'simd_count %s\n' \
|
|
% (options.num_compute_units * options.simds_per_cu) + \
|
|
'mem_banks_count 1\n' + \
|
|
'caches_count %s\n' % caches + \
|
|
'io_links_count %s\n' % io_links + \
|
|
'cpu_core_id_base 0\n' + \
|
|
'simd_id_base 2147487744\n' + \
|
|
'max_waves_per_simd %s\n' % options.wfs_per_simd + \
|
|
'lds_size_in_kb %s\n' % int(options.lds_size / 1024) + \
|
|
'gds_size_in_kb 0\n' + \
|
|
'wave_front_size %s\n' % options.wf_size + \
|
|
'array_count 4\n' + \
|
|
'simd_arrays_per_engine %s\n' % options.sa_per_complex + \
|
|
'cu_per_simd_array %s\n' % options.cu_per_sa + \
|
|
'simd_per_cu %s\n' % options.simds_per_cu + \
|
|
'max_slots_scratch_cu 32\n' + \
|
|
'vendor_id 4098\n' + \
|
|
'device_id 29440\n' + \
|
|
'location_id 512\n' + \
|
|
'max_engine_clk_fcompute %s\n' \
|
|
% int(toFrequency(options.gpu_clock) / 1e6) + \
|
|
'local_mem_size 4294967296\n' + \
|
|
'fw_version 730\n' + \
|
|
'capability 4736\n' + \
|
|
'max_engine_clk_ccompute %s\n' \
|
|
% int(toFrequency(options.CPUClock) / 1e6)
|
|
|
|
file_append((node_dir, 'properties'), node_prop)
|
|
|
|
# Fiji HBM reporting
|
|
# TODO: Extract size, clk, and width from sim paramters
|
|
mem_dir = joinpath(node_dir, 'mem_banks/0')
|
|
remake_dir(mem_dir)
|
|
mem_prop = 'heap_type 1\n' + \
|
|
'size_in_bytes 4294967296\n' + \
|
|
'flags 0\n' + \
|
|
'width 4096\n' + \
|
|
'mem_clk_max 500\n'
|
|
|
|
file_append((mem_dir, 'properties'), mem_prop)
|
|
|
|
|
|
def createCarrizoTopology(options):
|
|
topology_dir = joinpath(m5.options.outdir, \
|
|
'fs/sys/devices/virtual/kfd/kfd/topology')
|
|
remake_dir(topology_dir)
|
|
|
|
# Ripped from real Kaveri platform to appease kmt version checks
|
|
# Set up generation_id
|
|
file_append((topology_dir, 'generation_id'), 1)
|
|
|
|
# Set up system properties
|
|
sys_prop = 'platform_oem 2314885673410447169\n' + \
|
|
'platform_id 35322352389441\n' + \
|
|
'platform_rev 1\n'
|
|
file_append((topology_dir, 'system_properties'), sys_prop)
|
|
|
|
# Populate the topology tree
|
|
# TODO: Just the bare minimum to pass for now
|
|
node_dir = joinpath(topology_dir, 'nodes/0')
|
|
remake_dir(node_dir)
|
|
|
|
# must show valid kaveri gpu id or massive meltdown
|
|
file_append((node_dir, 'gpu_id'), 2765)
|
|
|
|
# must have marketing name
|
|
file_append((node_dir, 'name'), 'Carrizo\n')
|
|
|
|
# populate global node properties
|
|
# NOTE: SIMD count triggers a valid GPU agent creation
|
|
node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \
|
|
'simd_count %s\n' \
|
|
% (options.num_compute_units * options.simds_per_cu) + \
|
|
'mem_banks_count 0\n' + \
|
|
'caches_count 0\n' + \
|
|
'io_links_count 0\n' + \
|
|
'cpu_core_id_base 16\n' + \
|
|
'simd_id_base 2147483648\n' + \
|
|
'max_waves_per_simd %s\n' % options.wfs_per_simd + \
|
|
'lds_size_in_kb %s\n' % int(options.lds_size / 1024) + \
|
|
'gds_size_in_kb 0\n' + \
|
|
'wave_front_size %s\n' % options.wf_size + \
|
|
'array_count 1\n' + \
|
|
'simd_arrays_per_engine %s\n' % options.sa_per_complex + \
|
|
'cu_per_simd_array %s\n' % options.cu_per_sa + \
|
|
'simd_per_cu %s\n' % options.simds_per_cu + \
|
|
'max_slots_scratch_cu 32\n' + \
|
|
'vendor_id 4098\n' + \
|
|
'device_id 39028\n' + \
|
|
'location_id 8\n' + \
|
|
'max_engine_clk_fcompute %s\n' \
|
|
% int(toFrequency(options.gpu_clock) / 1e6) + \
|
|
'local_mem_size 0\n' + \
|
|
'fw_version 699\n' + \
|
|
'capability 4738\n' + \
|
|
'max_engine_clk_ccompute %s\n' \
|
|
% int(toFrequency(options.CPUClock) / 1e6)
|
|
|
|
file_append((node_dir, 'properties'), node_prop)
|