Merge branch 'develop' into requirements-fixer-hook

This commit is contained in:
Bobby R. Bruce
2023-10-09 22:37:25 -07:00
committed by GitHub
23 changed files with 1385 additions and 923 deletions

View File

@@ -1,3 +1,4 @@
---
# This workflow runs after a pull-request has been approved by a reviewer.
name: CI Tests
@@ -75,12 +76,12 @@ jobs:
run: apt install -y jq
- name: Get directories for testlib-quick
working-directory: "${{ github.workspace }}/tests"
working-directory: ${{ github.workspace }}/tests
id: dir-matrix
run: echo "test-dirs-matrix=$(find gem5/* -type d -maxdepth 0 | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
- name: Get the build targets for testlib-quick-gem5-builds
working-directory: "${{ github.workspace }}/tests"
working-directory: ${{ github.workspace }}/tests
id: build-matrix
run: echo "build-matrix=$(./main.py list --build-targets -q | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
@@ -130,10 +131,7 @@ jobs:
test-dir: ${{ fromJson(needs.testlib-quick-matrix.outputs.test-dirs-matrix) }}
steps:
- name: Clean runner
run:
rm -rf ./* || true
rm -rf ./.??* || true
rm -rf ~/.cache || true
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
# Checkout the repository then download the gem5.opt artifact.
- uses: actions/checkout@v3
@@ -165,7 +163,8 @@ jobs:
if: success() || failure()
uses: actions/upload-artifact@v3
with:
name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-testlib-quick-${{ steps.sanitize-test-dir.outputs.sanatized-test-dir }}-status-${{ steps.run-tests.outcome }}-output
name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-testlib-quick-${{ steps.sanitize-test-dir.outputs.sanatized-test-dir
}}-status-${{ steps.run-tests.outcome }}-output
path: tests/testing-results
retention-days: 30

View File

@@ -1,3 +1,4 @@
---
# This workflow runs all of the compiler tests
name: Compiler Tests
@@ -5,7 +6,7 @@ name: Compiler Tests
on:
# Runs every Friday from 7AM UTC
schedule:
- cron: '00 7 * * 5'
- cron: 00 7 * * 5
# Allows us to manually start workflow for testing
workflow_dispatch:
@@ -15,7 +16,9 @@ jobs:
strategy:
fail-fast: false
matrix:
image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-9, gcc-version-8, clang-version-16, clang-version-15, clang-version-14, clang-version-13, clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, ubuntu-20.04_all-dependencies, ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-9, gcc-version-8, clang-version-16, clang-version-15, clang-version-14,
clang-version-13, clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, ubuntu-20.04_all-dependencies,
ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
opts: [.opt, .fast]
runs-on: [self-hosted, linux, x64, build]
timeout-minutes: 2880 # 48 hours
@@ -35,7 +38,9 @@ jobs:
strategy:
fail-fast: false
matrix:
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level, NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86, GCN3_X86]
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level,
NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86,
GCN3_X86]
image: [gcc-version-12, clang-version-16]
opts: [.opt]
runs-on: [self-hosted, linux, x64, build]

View File

@@ -1,3 +1,4 @@
---
# This workflow runs all of the long tests within main.py, extra tests in nightly.sh, and unittests
name: Daily Tests
@@ -5,7 +6,7 @@ name: Daily Tests
on:
# Runs every day from 7AM UTC
schedule:
- cron: '0 7 * * *'
- cron: 0 7 * * *
jobs:
name-artifacts:
@@ -22,7 +23,7 @@ jobs:
fail-fast: false
matrix:
# NULL is in quotes since it is considered a keyword in yaml files
image: [ALL, ALL_CHI, ARM, ALL_MSI, ALL_MESI_Two_Level, "NULL", NULL_MI_example, RISCV, VEGA_X86]
image: [ALL, ALL_CHI, ARM, ALL_MSI, ALL_MESI_Two_Level, 'NULL', NULL_MI_example, RISCV, VEGA_X86]
# this allows us to pass additional command line parameters
# the default is to add -j $(nproc), but some images
# require more specifications when built
@@ -76,17 +77,15 @@ jobs:
strategy:
fail-fast: false
matrix:
test-type: [arm_boot_tests, fs, gpu, insttest_se, learning_gem5, m5threads_test_atomic, memory, multi_isa, replacement_policies, riscv_boot_tests, stdlib, x86_boot_tests]
test-type: [arm_boot_tests, fs, gpu, insttest_se, learning_gem5, m5threads_test_atomic, memory, multi_isa, replacement_policies, riscv_boot_tests,
stdlib, x86_boot_tests]
runs-on: [self-hosted, linux, x64, run]
container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
needs: [name-artifacts, build-gem5]
timeout-minutes: 1440 # 24 hours for entire matrix to run
steps:
- name: Clean runner
run:
rm -rf ./* || true
rm -rf ./.??* || true
rm -rf ~/.cache || true
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
- uses: actions/checkout@v3
with:
# Scheduled workflows run on the default branch by default. We
@@ -168,16 +167,14 @@ jobs:
strategy:
fail-fast: false
matrix:
test-type: [gem5-library-example-x86-ubuntu-run-ALL-x86_64-opt, gem5-library-example-riscv-ubuntu-run-ALL-x86_64-opt, lupv-example-ALL-x86_64-opt, gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
test-type: [gem5-library-example-x86-ubuntu-run-ALL-x86_64-opt, gem5-library-example-riscv-ubuntu-run-ALL-x86_64-opt, lupv-example-ALL-x86_64-opt,
gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
needs: [name-artifacts, build-gem5]
timeout-minutes: 1440 # 24 hours
steps:
- name: Clean runner
run:
rm -rf ./* || true
rm -rf ./.??* || true
rm -rf ~/.cache || true
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
- uses: actions/checkout@v3
with:
# Scheduled workflows run on the default branch by default. We
@@ -190,7 +187,8 @@ jobs:
- run: chmod u+x build/ALL/gem5.opt
- name: long ${{ matrix.test-type }} gem5_library_example_tests
working-directory: ${{ github.workspace }}/tests
run: ./main.py run --uid SuiteUID:tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py:test-${{ matrix.test-type }} --length=long --skip-build -vv
run: ./main.py run --uid SuiteUID:tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py:test-${{ matrix.test-type }} --length=long
--skip-build -vv
- name: create zip of results
if: success() || failure()
run: |
@@ -281,6 +279,8 @@ jobs:
with:
args: -q http://dist.gem5.org/dist/develop/test-progs/heterosync/gcn3/allSyncPrims-1kernel # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time
- name: Run allSyncPrims-1kernel sleepMutex test with GCN3_X86/gem5.opt (SE mode)
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16 4"
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16
4"
- name: Run allSyncPrims-1kernel lfTreeBarrUsing test with GCN3_X86/gem5.opt (SE mode)
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq 10 16 4"
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq
10 16 4"

View File

@@ -1,3 +1,4 @@
---
name: Docker images build and push
on:

View File

@@ -1,3 +1,4 @@
---
# This workflow runs all of the very-long tests within main.py
name: Weekly Tests
@@ -5,7 +6,7 @@ name: Weekly Tests
on:
# Runs every Sunday from 7AM UTC
schedule:
- cron: '00 7 * * 6'
- cron: 00 7 * * 6
# Allows us to manually start workflow for testing
workflow_dispatch:

View File

@@ -1,8 +1,9 @@
---
# This workflow file contains miscellaneous tasks to manage the repository.
name: Utils for Repository
on:
schedule:
- cron: '30 1 * * *'
- cron: 30 1 * * *
workflow_dispatch:
jobs:
@@ -13,7 +14,8 @@ jobs:
steps:
- uses: actions/stale@v8.0.0
with:
close-issue-message: 'This issue is being closed because it has been inactive waiting for response for 30 days. If this is still an issue, please open a new issue and reference this one.'
close-issue-message: This issue is being closed because it has been inactive waiting for response for 30 days. If this is still an issue,
please open a new issue and reference this one.
days-before-stale: 21
days-before-close: 7
any-of-labels: 'needs details'
any-of-labels: needs details

View File

@@ -1,3 +1,4 @@
---
# This workflow runs all of the very-long tests within main.py
name: Weekly Tests
@@ -5,7 +6,7 @@ name: Weekly Tests
on:
# Runs every Sunday from 7AM UTC
schedule:
- cron: '00 7 * * 6'
- cron: 00 7 * * 6
# Allows us to manually start workflow for testing
workflow_dispatch:
@@ -45,10 +46,7 @@ jobs:
timeout-minutes: 4320 # 3 days
steps:
- name: Clean runner
run:
rm -rf ./* || true
rm -rf ./.??* || true
rm -rf ~/.cache || true
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
- uses: actions/checkout@v3
with:
# Scheduled workflows run on the default branch by default. We

View File

@@ -1,3 +1,4 @@
---
# Copyright (c) 2022 Arm Limited
# All rights reserved.
#
@@ -33,7 +34,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
minimum_pre_commit_version: "2.18"
minimum_pre_commit_version: '2.18'
default_language_version:
python: python3
@@ -61,8 +62,12 @@ repos:
- id: check-added-large-files
- id: mixed-line-ending
args: [--fix=lf]
- id: requirements-txt-fixer
- id: check-case-conflict
- id: requirements-txt-fixer
- repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
rev: 0.2.3
hooks:
- id: yamlfmt
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
@@ -73,18 +78,18 @@ repos:
name: gem5 style checker
entry: util/git-pre-commit.py
always_run: true
exclude: ".*"
exclude: .*
language: system
description: 'The gem5 style checker hook.'
description: The gem5 style checker hook.
- id: gem5-commit-msg-checker
name: gem5 commit msg checker
entry: ext/git-commit-msg
language: system
stages: [commit-msg]
description: 'The gem5 commit message checker hook.'
description: The gem5 commit message checker hook.
- id: gerrit-commit-msg-job
name: gerrit commit message job
entry: util/gerrit-commit-msg-hook
language: system
stages: [commit-msg]
description: 'Adds Change-ID to the commit message. Needed by Gerrit.'
description: Adds Change-ID to the commit message. Needed by Gerrit.

View File

@@ -1,3 +1,4 @@
---
# See CONTRIBUTING.md for details of gem5's contribution process.
#
# This file contains a list of gem5's subsystems and their

View File

@@ -0,0 +1,58 @@
# Copyright (c) 2012, 2017-2018, 2023 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.objects import *
from .O3_ARM_v7a import O3_ARM_v7a_3
# O3_ARM_v7a_3 adapted to generate elastic traces
class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3):
# Make the number of entries in the ROB, LQ and SQ very
# large so that there are no stalls due to resource
# limitation as such stalls will get captured in the trace
# as compute delay. For replay, ROB, LQ and SQ sizes are
# modelled in the Trace CPU.
numROBEntries = 512
LQEntries = 128
SQEntries = 128
def attach_probe_listener(self, inst_trace_file, data_trace_file):
# Attach the elastic trace probe listener. Set the protobuf trace
# file names. Set the dependency window size equal to the cpu it
# is attached to.
self.traceListener = m5.objects.ElasticTrace(
instFetchTraceFile=inst_trace_file,
dataDepTraceFile=data_trace_file,
depWindowSize=3 * self.numROBEntries,
)

View File

@@ -338,56 +338,15 @@ class FastmodelCluster(CpuCluster):
pass
class BaseSimpleSystem(ArmSystem):
cache_line_size = 64
def __init__(self, mem_size, platform, **kwargs):
super(BaseSimpleSystem, self).__init__(**kwargs)
self.voltage_domain = VoltageDomain(voltage="1.0V")
self.clk_domain = SrcClockDomain(
clock="1GHz", voltage_domain=Parent.voltage_domain
)
if platform is None:
self.realview = VExpress_GEM5_V1()
else:
self.realview = platform
if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
self.terminal = Terminal()
self.vncserver = VncServer()
self.iobus = IOXBar()
# Device DMA -> MEM
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
class ClusterSystem:
"""
Base class providing cpu clusters generation/handling methods to
SE/FS systems
"""
def __init__(self, **kwargs):
self._clusters = []
def getMemRanges(self, mem_size):
"""
Define system memory ranges. This depends on the physical
memory map provided by the realview platform and by the memory
size provided by the user (mem_size argument).
The method is iterating over all platform ranges until they cover
the entire user's memory requirements.
"""
mem_ranges = []
for mem_range in self.realview._mem_regions:
size_in_range = min(mem_size, mem_range.size())
mem_ranges.append(
AddrRange(start=mem_range.start, size=size_in_range)
)
mem_size -= size_in_range
if mem_size == 0:
return mem_ranges
raise ValueError("memory size too big for platform capabilities")
def numCpuClusters(self):
return len(self._clusters)
@@ -423,6 +382,80 @@ class BaseSimpleSystem(ArmSystem):
cluster.connectMemSide(cluster_mem_bus)
class SimpleSeSystem(System, ClusterSystem):
"""
Example system class for syscall emulation mode
"""
# Use a fixed cache line size of 64 bytes
cache_line_size = 64
def __init__(self, **kwargs):
System.__init__(self, **kwargs)
ClusterSystem.__init__(self, **kwargs)
# Create a voltage and clock domain for system components
self.voltage_domain = VoltageDomain(voltage="3.3V")
self.clk_domain = SrcClockDomain(
clock="1GHz", voltage_domain=self.voltage_domain
)
# Create the off-chip memory bus.
self.membus = SystemXBar()
def connect(self):
self.system_port = self.membus.cpu_side_ports
class BaseSimpleSystem(ArmSystem, ClusterSystem):
cache_line_size = 64
def __init__(self, mem_size, platform, **kwargs):
ArmSystem.__init__(self, **kwargs)
ClusterSystem.__init__(self, **kwargs)
self.voltage_domain = VoltageDomain(voltage="1.0V")
self.clk_domain = SrcClockDomain(
clock="1GHz", voltage_domain=Parent.voltage_domain
)
if platform is None:
self.realview = VExpress_GEM5_V1()
else:
self.realview = platform
if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
self.terminal = Terminal()
self.vncserver = VncServer()
self.iobus = IOXBar()
# Device DMA -> MEM
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
def getMemRanges(self, mem_size):
"""
Define system memory ranges. This depends on the physical
memory map provided by the realview platform and by the memory
size provided by the user (mem_size argument).
The method is iterating over all platform ranges until they cover
the entire user's memory requirements.
"""
mem_ranges = []
for mem_range in self.realview._mem_regions:
size_in_range = min(mem_size, mem_range.size())
mem_ranges.append(
AddrRange(start=mem_range.start, size=size_in_range)
)
mem_size -= size_in_range
if mem_size == 0:
return mem_ranges
raise ValueError("memory size too big for platform capabilities")
class SimpleSystem(BaseSimpleSystem):
"""
Meant to be used with the classic memory model

View File

@@ -0,0 +1,191 @@
# Copyright (c) 2016-2017, 2022-2023 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import m5
from m5.util import addToPath
from m5.objects import *
import argparse
import shlex
m5.util.addToPath("../..")
from common import ObjectList
import devices
def get_processes(cmd):
"""Interprets commands to run and returns a list of processes"""
cwd = os.getcwd()
multiprocesses = []
for idx, c in enumerate(cmd):
argv = shlex.split(c)
process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0])
process.gid = os.getgid()
print("info: %d. command and arguments: %s" % (idx + 1, process.cmd))
multiprocesses.append(process)
return multiprocesses
def create(args):
"""Create and configure the system object."""
system = devices.SimpleSeSystem(
mem_mode="timing",
)
# Add CPUs to the system. A cluster of CPUs typically have
# private L1 caches and a shared L2 cache.
system.cpu_cluster = devices.ArmCpuCluster(
system,
args.num_cores,
args.cpu_freq,
"1.2V",
ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"),
devices.L1I,
devices.L1D,
devices.L2,
)
# Attach the elastic trace probe listener to every CPU in the cluster
for cpu in system.cpu_cluster:
cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file)
# As elastic trace generation is enabled, make sure the memory system is
# minimal so that compute delays do not include memory access latencies.
# Configure the compulsory L1 caches for the O3CPU, do not configure
# any more caches.
system.addCaches(True, last_cache_level=1)
# For elastic trace, over-riding Simple Memory latency to 1ns."
system.memory = SimpleMemory(
range=AddrRange(start=0, size=args.mem_size),
latency="1ns",
port=system.membus.mem_side_ports,
)
# Parse the command line and get a list of Processes instances
# that we can pass to gem5.
processes = get_processes(args.commands_to_run)
if len(processes) != args.num_cores:
print(
"Error: Cannot map %d command(s) onto %d CPU(s)"
% (len(processes), args.num_cores)
)
sys.exit(1)
system.workload = SEWorkload.init_compatible(processes[0].executable)
# Assign one workload to each CPU
for cpu, workload in zip(system.cpu_cluster.cpus, processes):
cpu.workload = workload
return system
def main():
parser = argparse.ArgumentParser(epilog=__doc__)
parser.add_argument(
"commands_to_run",
metavar="command(s)",
nargs="+",
help="Command(s) to run",
)
parser.add_argument(
"--inst-trace-file",
action="store",
type=str,
help="""Instruction fetch trace file input to
Elastic Trace probe in a capture simulation and
Trace CPU in a replay simulation""",
default="fetchtrace.proto.gz",
)
parser.add_argument(
"--data-trace-file",
action="store",
type=str,
help="""Data dependency trace file input to
Elastic Trace probe in a capture simulation and
Trace CPU in a replay simulation""",
default="deptrace.proto.gz",
)
parser.add_argument("--cpu-freq", type=str, default="4GHz")
parser.add_argument(
"--num-cores", type=int, default=1, help="Number of CPU cores"
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="2GB",
help="Specify the physical memory size",
)
args = parser.parse_args()
# Create a single root node for gem5's object hierarchy. There can
# only exist one root node in the simulator at any given
# time. Tell gem5 that we want to use syscall emulation mode
# instead of full system mode.
root = Root(full_system=False)
# Populate the root node with a system. A system corresponds to a
# single node with shared memory.
root.system = create(args)
# Instantiate the C++ object hierarchy. After this point,
# SimObjects can't be instantiated anymore.
m5.instantiate()
# Start the simulator. This gives control to the C++ world and
# starts the simulator. The returned event tells the simulation
# script why the simulator exited.
event = m5.simulate()
# Print the reason for the simulation exit. Some exit codes are
# requests for service (e.g., checkpoints) from the simulation
# script. We'll just ignore them here and exit.
print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}")
if __name__ == "__m5_main__":
main()

View File

@@ -64,72 +64,6 @@ cpu_types = {
}
class SimpleSeSystem(System):
"""
Example system class for syscall emulation mode
"""
# Use a fixed cache line size of 64 bytes
cache_line_size = 64
def __init__(self, args, **kwargs):
super(SimpleSeSystem, self).__init__(**kwargs)
# Setup book keeping to be able to use CpuClusters from the
# devices module.
self._clusters = []
self._num_cpus = 0
# Create a voltage and clock domain for system components
self.voltage_domain = VoltageDomain(voltage="3.3V")
self.clk_domain = SrcClockDomain(
clock="1GHz", voltage_domain=self.voltage_domain
)
# Create the off-chip memory bus.
self.membus = SystemXBar()
# Wire up the system port that gem5 uses to load the kernel
# and to perform debug accesses.
self.system_port = self.membus.cpu_side_ports
# Add CPUs to the system. A cluster of CPUs typically have
# private L1 caches and a shared L2 cache.
self.cpu_cluster = devices.ArmCpuCluster(
self,
args.num_cores,
args.cpu_freq,
"1.2V",
*cpu_types[args.cpu],
tarmac_gen=args.tarmac_gen,
tarmac_dest=args.tarmac_dest,
)
# Create a cache hierarchy (unless we are simulating a
# functional CPU in atomic memory mode) for the CPU cluster
# and connect it to the shared memory bus.
if self.cpu_cluster.memory_mode() == "timing":
self.cpu_cluster.addL1()
self.cpu_cluster.addL2(self.cpu_cluster.clk_domain)
self.cpu_cluster.connectMemSide(self.membus)
# Tell gem5 about the memory mode used by the CPUs we are
# simulating.
self.mem_mode = self.cpu_cluster.memory_mode()
def numCpuClusters(self):
return len(self._clusters)
def addCpuCluster(self, cpu_cluster):
assert cpu_cluster not in self._clusters
assert len(cpu_cluster) > 0
self._clusters.append(cpu_cluster)
self._num_cpus += len(cpu_cluster)
def numCpus(self):
return self._num_cpus
def get_processes(cmd):
"""Interprets commands to run and returns a list of processes"""
@@ -150,7 +84,31 @@ def get_processes(cmd):
def create(args):
"""Create and configure the system object."""
system = SimpleSeSystem(args)
cpu_class = cpu_types[args.cpu][0]
mem_mode = cpu_class.memory_mode()
# Only simulate caches when using a timing CPU (e.g., the HPI model)
want_caches = True if mem_mode == "timing" else False
system = devices.SimpleSeSystem(
mem_mode=mem_mode,
)
# Add CPUs to the system. A cluster of CPUs typically have
# private L1 caches and a shared L2 cache.
system.cpu_cluster = devices.ArmCpuCluster(
system,
args.num_cores,
args.cpu_freq,
"1.2V",
*cpu_types[args.cpu],
tarmac_gen=args.tarmac_gen,
tarmac_dest=args.tarmac_dest,
)
# Create a cache hierarchy for the cluster. We are assuming that
# clusters have core-private L1 caches and an L2 that's shared
# within the cluster.
system.addCaches(want_caches, last_cache_level=2)
# Tell components about the expected physical memory ranges. This
# is, for example, used by the MemConfig helper to determine where
@@ -160,6 +118,9 @@ def create(args):
# Configure the off-chip memory system.
MemConfig.config_mem(args, system)
# Wire up the system's memory system
system.connect()
# Parse the command line and get a list of Processes instances
# that we can pass to gem5.
processes = get_processes(args.commands_to_run)

View File

@@ -128,7 +128,6 @@ board.set_se_simpoint_workload(
)
dir = Path(args.checkpoint_path)
dir.mkdir(exist_ok=True)
simulator = Simulator(
board=board,

View File

@@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
tags='x86 isa')
Source('amdgpu_device.cc', tags='x86 isa')
Source('amdgpu_gfx.cc', tags='x86 isa')
Source('amdgpu_nbio.cc', tags='x86 isa')
Source('amdgpu_vm.cc', tags='x86 isa')
Source('interrupt_handler.cc', tags='x86 isa')

View File

@@ -379,6 +379,9 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
case GRBM_BASE:
gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
break;
case GFX_BASE:
gfx.readMMIO(pkt, aperture_offset);
break;
case MMHUB_BASE:
gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
break;
@@ -507,6 +510,9 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
case NBIO_BASE:
nbio.writeMMIO(pkt, aperture_offset);
break;
case GFX_BASE:
gfx.writeMMIO(pkt, aperture_offset);
break;
default:
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
break;

View File

@@ -36,6 +36,7 @@
#include "base/bitunion.hh"
#include "dev/amdgpu/amdgpu_defines.hh"
#include "dev/amdgpu/amdgpu_gfx.hh"
#include "dev/amdgpu/amdgpu_nbio.hh"
#include "dev/amdgpu/amdgpu_vm.hh"
#include "dev/amdgpu/memory_manager.hh"
@@ -109,6 +110,7 @@ class AMDGPUDevice : public PciDevice
* Blocks of the GPU
*/
AMDGPUNbio nbio;
AMDGPUGfx gfx;
AMDGPUMemoryManager *gpuMemMgr;
AMDGPUInterruptHandler *deviceIH;
AMDGPUVM gpuvm;

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/amdgpu/amdgpu_gfx.hh"
#include "mem/packet_access.hh"
#include "sim/core.hh"
namespace gem5
{
void
AMDGPUGfx::readMMIO(PacketPtr pkt, Addr offset)
{
switch (offset) {
case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB:
pkt->setLE<uint32_t>(captured_clock_count);
break;
case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB:
pkt->setLE<uint32_t>(captured_clock_count >> 32);
break;
default:
break;
}
}
void
AMDGPUGfx::writeMMIO(PacketPtr pkt, Addr offset)
{
switch (offset) {
case AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT:
// Use gem5 Ticks in nanoseconds are the counter. The first capture
// is expected to return zero.
if (captured_clock_count == 1) {
captured_clock_count = 0;
} else {
captured_clock_count = curTick() / sim_clock::as_int::ns;
}
break;
default:
break;
}
}
} // namespace gem5

View File

@@ -0,0 +1,75 @@
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DEV_AMDGPU_AMDGPU_GFX_HH__
#define __DEV_AMDGPU_AMDGPU_GFX_HH__
#include "base/types.hh"
#include "mem/packet.hh"
/**
* MMIO offsets for GFX. This class handles MMIO reads/writes to the GFX_BASE
* aperture which are generally read/written by the gfx driver source here:
*
* drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/
* drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
*
* The MMIO addresses in the file are dword addresses. Here they are converted
* to byte addresses so gem5 does not need to shift the values.
*/
// Registers used to read GPU clock count used in profiling
#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB 0x13090
#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB 0x13094
#define AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT 0x13098
namespace gem5
{
class AMDGPUGfx
{
public:
AMDGPUGfx() { }
void readMMIO(PacketPtr pkt, Addr offset);
void writeMMIO(PacketPtr pkt, Addr offset);
private:
/*
* GPU clock count at the time capture MMIO is received.
*/
uint64_t captured_clock_count = 1;
};
} // namespace gem5
#endif // __DEV_AMDGPU_AMDGPU_GFX_HH__

View File

@@ -69,6 +69,12 @@ typedef struct amd_signal_s
uint32_t reserved3[2];
} amd_signal_t;
typedef struct
{
uint64_t start_ts;
uint64_t end_ts;
} amd_event_t;
} // namespace gem5
#endif // DEV_HSA_HSA_SIGNAL_H

View File

@@ -248,6 +248,10 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
initABI(task);
++dynamic_task_id;
// The driver expects the start time to be in ns
Tick start_ts = curTick() / sim_clock::as_int::ns;
dispatchStartTime.insert({disp_pkt->completion_signal, start_ts});
}
void
@@ -280,16 +284,6 @@ GPUCommandProcessor::sendCompletionSignal(Addr signal_handle)
void
GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff)
{
Addr value_addr = getHsaSignalValueAddr(signal_handle);
uint64_t *signalValue = new uint64_t;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaSignalData(value_addr, diff, signalValue); });
dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue);
DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n",
value_addr);
Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
uint64_t *mailboxValue = new uint64_t;
auto cb2 = new DmaVirtCallback<uint64_t>(
@@ -300,20 +294,6 @@ GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff)
mailbox_addr);
}
void
GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff,
uint64_t *prev_value)
{
// Reuse the value allocated for the read
DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n",
*prev_value, *prev_value + diff);
*prev_value += diff;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaSignalDone(prev_value); });
dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value);
}
void
GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle,
uint64_t *mailbox_value)
@@ -331,6 +311,20 @@ GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle,
dmaReadVirt(event_addr, sizeof(uint64_t), cb, (void *)mailbox_value);
} else {
delete mailbox_value;
Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
amd_event_t *event_ts = new amd_event_t;
event_ts->start_ts = dispatchStartTime[signal_handle];
event_ts->end_ts = curTick() / sim_clock::as_int::ns;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaEventTs(signal_handle, event_ts); });
dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb, (void *)event_ts);
DPRINTF(GPUCommandProc, "updateHsaMailboxData reading timestamp addr "
"%lx\n", ts_addr);
dispatchStartTime.erase(signal_handle);
}
}
@@ -346,6 +340,52 @@ GPUCommandProcessor::updateHsaEventData(Addr signal_handle,
[ = ] (const uint64_t &)
{ updateHsaSignalDone(event_value); }, *event_value);
dmaWriteVirt(mailbox_addr, sizeof(uint64_t), cb, &cb->dmaBuffer, 0);
Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
amd_event_t *event_ts = new amd_event_t;
event_ts->start_ts = dispatchStartTime[signal_handle];
event_ts->end_ts = curTick() / sim_clock::as_int::ns;
auto cb2 = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaEventTs(signal_handle, event_ts); });
dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb2, (void *)event_ts);
DPRINTF(GPUCommandProc, "updateHsaEventData reading timestamp addr %lx\n",
ts_addr);
dispatchStartTime.erase(signal_handle);
}
void
GPUCommandProcessor::updateHsaEventTs(Addr signal_handle,
amd_event_t *ts)
{
delete ts;
Addr value_addr = getHsaSignalValueAddr(signal_handle);
int64_t diff = -1;
uint64_t *signalValue = new uint64_t;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaSignalData(value_addr, diff, signalValue); });
dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue);
DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n",
value_addr);
}
void
GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff,
uint64_t *prev_value)
{
// Reuse the value allocated for the read
DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n",
*prev_value, *prev_value + diff);
*prev_value += diff;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ updateHsaSignalDone(prev_value); });
dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value);
}
void

View File

@@ -117,6 +117,7 @@ class GPUCommandProcessor : public DmaVirtDevice
void updateHsaSignalDone(uint64_t *signal_value);
void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value);
void updateHsaEventData(Addr signal_handle, uint64_t *event_value);
void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value);
uint64_t functionalReadHsaSignal(Addr signal_handle);
@@ -148,6 +149,9 @@ class GPUCommandProcessor : public DmaVirtDevice
HSAPacketProcessor *hsaPP;
TranslationGenPtr translate(Addr vaddr, Addr size) override;
// Keep track of start times for task dispatches.
std::unordered_map<Addr, Tick> dispatchStartTime;
/**
* Perform a DMA read of the read_dispatch_id_field_base_byte_offset
* field, which follows directly after the read_dispatch_id (the read

View File

@@ -1,3 +1,4 @@
---
version: '2'
services: