Merge branch 'develop' into requirements-fixer-hook
This commit is contained in:
13
.github/workflows/ci-tests.yaml
vendored
13
.github/workflows/ci-tests.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# This workflow runs after a pull-request has been approved by a reviewer.
|
||||
|
||||
name: CI Tests
|
||||
@@ -75,12 +76,12 @@ jobs:
|
||||
run: apt install -y jq
|
||||
|
||||
- name: Get directories for testlib-quick
|
||||
working-directory: "${{ github.workspace }}/tests"
|
||||
working-directory: ${{ github.workspace }}/tests
|
||||
id: dir-matrix
|
||||
run: echo "test-dirs-matrix=$(find gem5/* -type d -maxdepth 0 | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
|
||||
|
||||
- name: Get the build targets for testlib-quick-gem5-builds
|
||||
working-directory: "${{ github.workspace }}/tests"
|
||||
working-directory: ${{ github.workspace }}/tests
|
||||
id: build-matrix
|
||||
run: echo "build-matrix=$(./main.py list --build-targets -q | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
|
||||
|
||||
@@ -130,10 +131,7 @@ jobs:
|
||||
test-dir: ${{ fromJson(needs.testlib-quick-matrix.outputs.test-dirs-matrix) }}
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run:
|
||||
rm -rf ./* || true
|
||||
rm -rf ./.??* || true
|
||||
rm -rf ~/.cache || true
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
|
||||
# Checkout the repository then download the gem5.opt artifact.
|
||||
- uses: actions/checkout@v3
|
||||
@@ -165,7 +163,8 @@ jobs:
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-testlib-quick-${{ steps.sanitize-test-dir.outputs.sanatized-test-dir }}-status-${{ steps.run-tests.outcome }}-output
|
||||
name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-testlib-quick-${{ steps.sanitize-test-dir.outputs.sanatized-test-dir
|
||||
}}-status-${{ steps.run-tests.outcome }}-output
|
||||
path: tests/testing-results
|
||||
retention-days: 30
|
||||
|
||||
|
||||
11
.github/workflows/compiler-tests.yaml
vendored
11
.github/workflows/compiler-tests.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# This workflow runs all of the compiler tests
|
||||
|
||||
name: Compiler Tests
|
||||
@@ -5,7 +6,7 @@ name: Compiler Tests
|
||||
on:
|
||||
# Runs every Friday from 7AM UTC
|
||||
schedule:
|
||||
- cron: '00 7 * * 5'
|
||||
- cron: 00 7 * * 5
|
||||
# Allows us to manually start workflow for testing
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -15,7 +16,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-9, gcc-version-8, clang-version-16, clang-version-15, clang-version-14, clang-version-13, clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, ubuntu-20.04_all-dependencies, ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
|
||||
image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-9, gcc-version-8, clang-version-16, clang-version-15, clang-version-14,
|
||||
clang-version-13, clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, ubuntu-20.04_all-dependencies,
|
||||
ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
|
||||
opts: [.opt, .fast]
|
||||
runs-on: [self-hosted, linux, x64, build]
|
||||
timeout-minutes: 2880 # 48 hours
|
||||
@@ -35,7 +38,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level, NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86, GCN3_X86]
|
||||
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level,
|
||||
NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86,
|
||||
GCN3_X86]
|
||||
image: [gcc-version-12, clang-version-16]
|
||||
opts: [.opt]
|
||||
runs-on: [self-hosted, linux, x64, build]
|
||||
|
||||
30
.github/workflows/daily-tests.yaml
vendored
30
.github/workflows/daily-tests.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# This workflow runs all of the long tests within main.py, extra tests in nightly.sh, and unittests
|
||||
|
||||
name: Daily Tests
|
||||
@@ -5,7 +6,7 @@ name: Daily Tests
|
||||
on:
|
||||
# Runs every day from 7AM UTC
|
||||
schedule:
|
||||
- cron: '0 7 * * *'
|
||||
- cron: 0 7 * * *
|
||||
|
||||
jobs:
|
||||
name-artifacts:
|
||||
@@ -22,7 +23,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# NULL is in quotes since it is considered a keyword in yaml files
|
||||
image: [ALL, ALL_CHI, ARM, ALL_MSI, ALL_MESI_Two_Level, "NULL", NULL_MI_example, RISCV, VEGA_X86]
|
||||
image: [ALL, ALL_CHI, ARM, ALL_MSI, ALL_MESI_Two_Level, 'NULL', NULL_MI_example, RISCV, VEGA_X86]
|
||||
# this allows us to pass additional command line parameters
|
||||
# the default is to add -j $(nproc), but some images
|
||||
# require more specifications when built
|
||||
@@ -76,17 +77,15 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test-type: [arm_boot_tests, fs, gpu, insttest_se, learning_gem5, m5threads_test_atomic, memory, multi_isa, replacement_policies, riscv_boot_tests, stdlib, x86_boot_tests]
|
||||
test-type: [arm_boot_tests, fs, gpu, insttest_se, learning_gem5, m5threads_test_atomic, memory, multi_isa, replacement_policies, riscv_boot_tests,
|
||||
stdlib, x86_boot_tests]
|
||||
runs-on: [self-hosted, linux, x64, run]
|
||||
container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
|
||||
needs: [name-artifacts, build-gem5]
|
||||
timeout-minutes: 1440 # 24 hours for entire matrix to run
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run:
|
||||
rm -rf ./* || true
|
||||
rm -rf ./.??* || true
|
||||
rm -rf ~/.cache || true
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
# Scheduled workflows run on the default branch by default. We
|
||||
@@ -168,16 +167,14 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test-type: [gem5-library-example-x86-ubuntu-run-ALL-x86_64-opt, gem5-library-example-riscv-ubuntu-run-ALL-x86_64-opt, lupv-example-ALL-x86_64-opt, gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
|
||||
test-type: [gem5-library-example-x86-ubuntu-run-ALL-x86_64-opt, gem5-library-example-riscv-ubuntu-run-ALL-x86_64-opt, lupv-example-ALL-x86_64-opt,
|
||||
gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
|
||||
container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
|
||||
needs: [name-artifacts, build-gem5]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run:
|
||||
rm -rf ./* || true
|
||||
rm -rf ./.??* || true
|
||||
rm -rf ~/.cache || true
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
# Scheduled workflows run on the default branch by default. We
|
||||
@@ -190,7 +187,8 @@ jobs:
|
||||
- run: chmod u+x build/ALL/gem5.opt
|
||||
- name: long ${{ matrix.test-type }} gem5_library_example_tests
|
||||
working-directory: ${{ github.workspace }}/tests
|
||||
run: ./main.py run --uid SuiteUID:tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py:test-${{ matrix.test-type }} --length=long --skip-build -vv
|
||||
run: ./main.py run --uid SuiteUID:tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py:test-${{ matrix.test-type }} --length=long
|
||||
--skip-build -vv
|
||||
- name: create zip of results
|
||||
if: success() || failure()
|
||||
run: |
|
||||
@@ -281,6 +279,8 @@ jobs:
|
||||
with:
|
||||
args: -q http://dist.gem5.org/dist/develop/test-progs/heterosync/gcn3/allSyncPrims-1kernel # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time
|
||||
- name: Run allSyncPrims-1kernel sleepMutex test with GCN3_X86/gem5.opt (SE mode)
|
||||
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16 4"
|
||||
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16
|
||||
4"
|
||||
- name: Run allSyncPrims-1kernel lfTreeBarrUsing test with GCN3_X86/gem5.opt (SE mode)
|
||||
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq 10 16 4"
|
||||
run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq
|
||||
10 16 4"
|
||||
|
||||
1
.github/workflows/docker-build.yaml
vendored
1
.github/workflows/docker-build.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
name: Docker images build and push
|
||||
|
||||
on:
|
||||
|
||||
3
.github/workflows/gpu-tests.yaml
vendored
3
.github/workflows/gpu-tests.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# This workflow runs all of the very-long tests within main.py
|
||||
|
||||
name: Weekly Tests
|
||||
@@ -5,7 +6,7 @@ name: Weekly Tests
|
||||
on:
|
||||
# Runs every Sunday from 7AM UTC
|
||||
schedule:
|
||||
- cron: '00 7 * * 6'
|
||||
- cron: 00 7 * * 6
|
||||
# Allows us to manually start workflow for testing
|
||||
workflow_dispatch:
|
||||
|
||||
|
||||
8
.github/workflows/utils.yaml
vendored
8
.github/workflows/utils.yaml
vendored
@@ -1,8 +1,9 @@
|
||||
---
|
||||
# This workflow file contains miscellaneous tasks to manage the repository.
|
||||
name: Utils for Repository
|
||||
on:
|
||||
schedule:
|
||||
- cron: '30 1 * * *'
|
||||
- cron: 30 1 * * *
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
@@ -13,7 +14,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/stale@v8.0.0
|
||||
with:
|
||||
close-issue-message: 'This issue is being closed because it has been inactive waiting for response for 30 days. If this is still an issue, please open a new issue and reference this one.'
|
||||
close-issue-message: This issue is being closed because it has been inactive waiting for response for 30 days. If this is still an issue,
|
||||
please open a new issue and reference this one.
|
||||
days-before-stale: 21
|
||||
days-before-close: 7
|
||||
any-of-labels: 'needs details'
|
||||
any-of-labels: needs details
|
||||
|
||||
8
.github/workflows/weekly-tests.yaml
vendored
8
.github/workflows/weekly-tests.yaml
vendored
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# This workflow runs all of the very-long tests within main.py
|
||||
|
||||
name: Weekly Tests
|
||||
@@ -5,7 +6,7 @@ name: Weekly Tests
|
||||
on:
|
||||
# Runs every Sunday from 7AM UTC
|
||||
schedule:
|
||||
- cron: '00 7 * * 6'
|
||||
- cron: 00 7 * * 6
|
||||
# Allows us to manually start workflow for testing
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -45,10 +46,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 3 days
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run:
|
||||
rm -rf ./* || true
|
||||
rm -rf ./.??* || true
|
||||
rm -rf ~/.cache || true
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
# Scheduled workflows run on the default branch by default. We
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# Copyright (c) 2022 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
@@ -33,7 +34,7 @@
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
minimum_pre_commit_version: "2.18"
|
||||
minimum_pre_commit_version: '2.18'
|
||||
|
||||
default_language_version:
|
||||
python: python3
|
||||
@@ -61,8 +62,12 @@ repos:
|
||||
- id: check-added-large-files
|
||||
- id: mixed-line-ending
|
||||
args: [--fix=lf]
|
||||
- id: requirements-txt-fixer
|
||||
- id: check-case-conflict
|
||||
- id: requirements-txt-fixer
|
||||
- repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
|
||||
rev: 0.2.3
|
||||
hooks:
|
||||
- id: yamlfmt
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.6.0
|
||||
hooks:
|
||||
@@ -73,18 +78,18 @@ repos:
|
||||
name: gem5 style checker
|
||||
entry: util/git-pre-commit.py
|
||||
always_run: true
|
||||
exclude: ".*"
|
||||
exclude: .*
|
||||
language: system
|
||||
description: 'The gem5 style checker hook.'
|
||||
description: The gem5 style checker hook.
|
||||
- id: gem5-commit-msg-checker
|
||||
name: gem5 commit msg checker
|
||||
entry: ext/git-commit-msg
|
||||
language: system
|
||||
stages: [commit-msg]
|
||||
description: 'The gem5 commit message checker hook.'
|
||||
description: The gem5 commit message checker hook.
|
||||
- id: gerrit-commit-msg-job
|
||||
name: gerrit commit message job
|
||||
entry: util/gerrit-commit-msg-hook
|
||||
language: system
|
||||
stages: [commit-msg]
|
||||
description: 'Adds Change-ID to the commit message. Needed by Gerrit.'
|
||||
description: Adds Change-ID to the commit message. Needed by Gerrit.
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
---
|
||||
# See CONTRIBUTING.md for details of gem5's contribution process.
|
||||
#
|
||||
# This file contains a list of gem5's subsystems and their
|
||||
|
||||
58
configs/common/cores/arm/O3_ARM_Etrace.py
Normal file
58
configs/common/cores/arm/O3_ARM_Etrace.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (c) 2012, 2017-2018, 2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
# not be construed as granting a license to any other intellectual
|
||||
# property including but not limited to intellectual property relating
|
||||
# to a hardware implementation of the functionality of the software
|
||||
# licensed hereunder. You may use the software subject to the license
|
||||
# terms below provided that you ensure that this notice is replicated
|
||||
# unmodified and in its entirety in all distributions of the software,
|
||||
# modified or unmodified, in source code or in binary form.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects import *
|
||||
from .O3_ARM_v7a import O3_ARM_v7a_3
|
||||
|
||||
# O3_ARM_v7a_3 adapted to generate elastic traces
|
||||
class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3):
|
||||
# Make the number of entries in the ROB, LQ and SQ very
|
||||
# large so that there are no stalls due to resource
|
||||
# limitation as such stalls will get captured in the trace
|
||||
# as compute delay. For replay, ROB, LQ and SQ sizes are
|
||||
# modelled in the Trace CPU.
|
||||
numROBEntries = 512
|
||||
LQEntries = 128
|
||||
SQEntries = 128
|
||||
|
||||
def attach_probe_listener(self, inst_trace_file, data_trace_file):
|
||||
# Attach the elastic trace probe listener. Set the protobuf trace
|
||||
# file names. Set the dependency window size equal to the cpu it
|
||||
# is attached to.
|
||||
self.traceListener = m5.objects.ElasticTrace(
|
||||
instFetchTraceFile=inst_trace_file,
|
||||
dataDepTraceFile=data_trace_file,
|
||||
depWindowSize=3 * self.numROBEntries,
|
||||
)
|
||||
@@ -338,56 +338,15 @@ class FastmodelCluster(CpuCluster):
|
||||
pass
|
||||
|
||||
|
||||
class BaseSimpleSystem(ArmSystem):
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, mem_size, platform, **kwargs):
|
||||
super(BaseSimpleSystem, self).__init__(**kwargs)
|
||||
|
||||
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=Parent.voltage_domain
|
||||
)
|
||||
|
||||
if platform is None:
|
||||
self.realview = VExpress_GEM5_V1()
|
||||
else:
|
||||
self.realview = platform
|
||||
|
||||
if hasattr(self.realview.gic, "cpu_addr"):
|
||||
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
||||
|
||||
self.terminal = Terminal()
|
||||
self.vncserver = VncServer()
|
||||
|
||||
self.iobus = IOXBar()
|
||||
# Device DMA -> MEM
|
||||
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
|
||||
class ClusterSystem:
|
||||
"""
|
||||
Base class providing cpu clusters generation/handling methods to
|
||||
SE/FS systems
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._clusters = []
|
||||
|
||||
def getMemRanges(self, mem_size):
|
||||
"""
|
||||
Define system memory ranges. This depends on the physical
|
||||
memory map provided by the realview platform and by the memory
|
||||
size provided by the user (mem_size argument).
|
||||
The method is iterating over all platform ranges until they cover
|
||||
the entire user's memory requirements.
|
||||
"""
|
||||
mem_ranges = []
|
||||
for mem_range in self.realview._mem_regions:
|
||||
size_in_range = min(mem_size, mem_range.size())
|
||||
|
||||
mem_ranges.append(
|
||||
AddrRange(start=mem_range.start, size=size_in_range)
|
||||
)
|
||||
|
||||
mem_size -= size_in_range
|
||||
if mem_size == 0:
|
||||
return mem_ranges
|
||||
|
||||
raise ValueError("memory size too big for platform capabilities")
|
||||
|
||||
def numCpuClusters(self):
|
||||
return len(self._clusters)
|
||||
|
||||
@@ -423,6 +382,80 @@ class BaseSimpleSystem(ArmSystem):
|
||||
cluster.connectMemSide(cluster_mem_bus)
|
||||
|
||||
|
||||
class SimpleSeSystem(System, ClusterSystem):
|
||||
"""
|
||||
Example system class for syscall emulation mode
|
||||
"""
|
||||
|
||||
# Use a fixed cache line size of 64 bytes
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
System.__init__(self, **kwargs)
|
||||
ClusterSystem.__init__(self, **kwargs)
|
||||
# Create a voltage and clock domain for system components
|
||||
self.voltage_domain = VoltageDomain(voltage="3.3V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=self.voltage_domain
|
||||
)
|
||||
|
||||
# Create the off-chip memory bus.
|
||||
self.membus = SystemXBar()
|
||||
|
||||
def connect(self):
|
||||
self.system_port = self.membus.cpu_side_ports
|
||||
|
||||
|
||||
class BaseSimpleSystem(ArmSystem, ClusterSystem):
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, mem_size, platform, **kwargs):
|
||||
ArmSystem.__init__(self, **kwargs)
|
||||
ClusterSystem.__init__(self, **kwargs)
|
||||
|
||||
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=Parent.voltage_domain
|
||||
)
|
||||
|
||||
if platform is None:
|
||||
self.realview = VExpress_GEM5_V1()
|
||||
else:
|
||||
self.realview = platform
|
||||
|
||||
if hasattr(self.realview.gic, "cpu_addr"):
|
||||
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
||||
|
||||
self.terminal = Terminal()
|
||||
self.vncserver = VncServer()
|
||||
|
||||
self.iobus = IOXBar()
|
||||
# Device DMA -> MEM
|
||||
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
|
||||
|
||||
def getMemRanges(self, mem_size):
|
||||
"""
|
||||
Define system memory ranges. This depends on the physical
|
||||
memory map provided by the realview platform and by the memory
|
||||
size provided by the user (mem_size argument).
|
||||
The method is iterating over all platform ranges until they cover
|
||||
the entire user's memory requirements.
|
||||
"""
|
||||
mem_ranges = []
|
||||
for mem_range in self.realview._mem_regions:
|
||||
size_in_range = min(mem_size, mem_range.size())
|
||||
|
||||
mem_ranges.append(
|
||||
AddrRange(start=mem_range.start, size=size_in_range)
|
||||
)
|
||||
|
||||
mem_size -= size_in_range
|
||||
if mem_size == 0:
|
||||
return mem_ranges
|
||||
|
||||
raise ValueError("memory size too big for platform capabilities")
|
||||
|
||||
|
||||
class SimpleSystem(BaseSimpleSystem):
|
||||
"""
|
||||
Meant to be used with the classic memory model
|
||||
|
||||
191
configs/example/arm/etrace_se.py
Normal file
191
configs/example/arm/etrace_se.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# Copyright (c) 2016-2017, 2022-2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
# not be construed as granting a license to any other intellectual
|
||||
# property including but not limited to intellectual property relating
|
||||
# to a hardware implementation of the functionality of the software
|
||||
# licensed hereunder. You may use the software subject to the license
|
||||
# terms below provided that you ensure that this notice is replicated
|
||||
# unmodified and in its entirety in all distributions of the software,
|
||||
# modified or unmodified, in source code or in binary form.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
import os
|
||||
import m5
|
||||
from m5.util import addToPath
|
||||
from m5.objects import *
|
||||
import argparse
|
||||
import shlex
|
||||
|
||||
m5.util.addToPath("../..")
|
||||
|
||||
from common import ObjectList
|
||||
|
||||
import devices
|
||||
|
||||
|
||||
def get_processes(cmd):
|
||||
"""Interprets commands to run and returns a list of processes"""
|
||||
|
||||
cwd = os.getcwd()
|
||||
multiprocesses = []
|
||||
for idx, c in enumerate(cmd):
|
||||
argv = shlex.split(c)
|
||||
|
||||
process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0])
|
||||
process.gid = os.getgid()
|
||||
|
||||
print("info: %d. command and arguments: %s" % (idx + 1, process.cmd))
|
||||
multiprocesses.append(process)
|
||||
|
||||
return multiprocesses
|
||||
|
||||
|
||||
def create(args):
|
||||
"""Create and configure the system object."""
|
||||
|
||||
system = devices.SimpleSeSystem(
|
||||
mem_mode="timing",
|
||||
)
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
system.cpu_cluster = devices.ArmCpuCluster(
|
||||
system,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"),
|
||||
devices.L1I,
|
||||
devices.L1D,
|
||||
devices.L2,
|
||||
)
|
||||
|
||||
# Attach the elastic trace probe listener to every CPU in the cluster
|
||||
for cpu in system.cpu_cluster:
|
||||
cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file)
|
||||
|
||||
# As elastic trace generation is enabled, make sure the memory system is
|
||||
# minimal so that compute delays do not include memory access latencies.
|
||||
# Configure the compulsory L1 caches for the O3CPU, do not configure
|
||||
# any more caches.
|
||||
system.addCaches(True, last_cache_level=1)
|
||||
|
||||
# For elastic trace, over-riding Simple Memory latency to 1ns."
|
||||
system.memory = SimpleMemory(
|
||||
range=AddrRange(start=0, size=args.mem_size),
|
||||
latency="1ns",
|
||||
port=system.membus.mem_side_ports,
|
||||
)
|
||||
|
||||
# Parse the command line and get a list of Processes instances
|
||||
# that we can pass to gem5.
|
||||
processes = get_processes(args.commands_to_run)
|
||||
if len(processes) != args.num_cores:
|
||||
print(
|
||||
"Error: Cannot map %d command(s) onto %d CPU(s)"
|
||||
% (len(processes), args.num_cores)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
system.workload = SEWorkload.init_compatible(processes[0].executable)
|
||||
|
||||
# Assign one workload to each CPU
|
||||
for cpu, workload in zip(system.cpu_cluster.cpus, processes):
|
||||
cpu.workload = workload
|
||||
|
||||
return system
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(epilog=__doc__)
|
||||
|
||||
parser.add_argument(
|
||||
"commands_to_run",
|
||||
metavar="command(s)",
|
||||
nargs="+",
|
||||
help="Command(s) to run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inst-trace-file",
|
||||
action="store",
|
||||
type=str,
|
||||
help="""Instruction fetch trace file input to
|
||||
Elastic Trace probe in a capture simulation and
|
||||
Trace CPU in a replay simulation""",
|
||||
default="fetchtrace.proto.gz",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-trace-file",
|
||||
action="store",
|
||||
type=str,
|
||||
help="""Data dependency trace file input to
|
||||
Elastic Trace probe in a capture simulation and
|
||||
Trace CPU in a replay simulation""",
|
||||
default="deptrace.proto.gz",
|
||||
)
|
||||
parser.add_argument("--cpu-freq", type=str, default="4GHz")
|
||||
parser.add_argument(
|
||||
"--num-cores", type=int, default=1, help="Number of CPU cores"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="2GB",
|
||||
help="Specify the physical memory size",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create a single root node for gem5's object hierarchy. There can
|
||||
# only exist one root node in the simulator at any given
|
||||
# time. Tell gem5 that we want to use syscall emulation mode
|
||||
# instead of full system mode.
|
||||
root = Root(full_system=False)
|
||||
|
||||
# Populate the root node with a system. A system corresponds to a
|
||||
# single node with shared memory.
|
||||
root.system = create(args)
|
||||
|
||||
# Instantiate the C++ object hierarchy. After this point,
|
||||
# SimObjects can't be instantiated anymore.
|
||||
m5.instantiate()
|
||||
|
||||
# Start the simulator. This gives control to the C++ world and
|
||||
# starts the simulator. The returned event tells the simulation
|
||||
# script why the simulator exited.
|
||||
event = m5.simulate()
|
||||
|
||||
# Print the reason for the simulation exit. Some exit codes are
|
||||
# requests for service (e.g., checkpoints) from the simulation
|
||||
# script. We'll just ignore them here and exit.
|
||||
print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}")
|
||||
|
||||
|
||||
if __name__ == "__m5_main__":
|
||||
main()
|
||||
@@ -64,72 +64,6 @@ cpu_types = {
|
||||
}
|
||||
|
||||
|
||||
class SimpleSeSystem(System):
|
||||
"""
|
||||
Example system class for syscall emulation mode
|
||||
"""
|
||||
|
||||
# Use a fixed cache line size of 64 bytes
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
super(SimpleSeSystem, self).__init__(**kwargs)
|
||||
|
||||
# Setup book keeping to be able to use CpuClusters from the
|
||||
# devices module.
|
||||
self._clusters = []
|
||||
self._num_cpus = 0
|
||||
|
||||
# Create a voltage and clock domain for system components
|
||||
self.voltage_domain = VoltageDomain(voltage="3.3V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=self.voltage_domain
|
||||
)
|
||||
|
||||
# Create the off-chip memory bus.
|
||||
self.membus = SystemXBar()
|
||||
|
||||
# Wire up the system port that gem5 uses to load the kernel
|
||||
# and to perform debug accesses.
|
||||
self.system_port = self.membus.cpu_side_ports
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
self.cpu_cluster = devices.ArmCpuCluster(
|
||||
self,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
*cpu_types[args.cpu],
|
||||
tarmac_gen=args.tarmac_gen,
|
||||
tarmac_dest=args.tarmac_dest,
|
||||
)
|
||||
|
||||
# Create a cache hierarchy (unless we are simulating a
|
||||
# functional CPU in atomic memory mode) for the CPU cluster
|
||||
# and connect it to the shared memory bus.
|
||||
if self.cpu_cluster.memory_mode() == "timing":
|
||||
self.cpu_cluster.addL1()
|
||||
self.cpu_cluster.addL2(self.cpu_cluster.clk_domain)
|
||||
self.cpu_cluster.connectMemSide(self.membus)
|
||||
|
||||
# Tell gem5 about the memory mode used by the CPUs we are
|
||||
# simulating.
|
||||
self.mem_mode = self.cpu_cluster.memory_mode()
|
||||
|
||||
def numCpuClusters(self):
|
||||
return len(self._clusters)
|
||||
|
||||
def addCpuCluster(self, cpu_cluster):
|
||||
assert cpu_cluster not in self._clusters
|
||||
assert len(cpu_cluster) > 0
|
||||
self._clusters.append(cpu_cluster)
|
||||
self._num_cpus += len(cpu_cluster)
|
||||
|
||||
def numCpus(self):
|
||||
return self._num_cpus
|
||||
|
||||
|
||||
def get_processes(cmd):
|
||||
"""Interprets commands to run and returns a list of processes"""
|
||||
|
||||
@@ -150,7 +84,31 @@ def get_processes(cmd):
|
||||
def create(args):
|
||||
"""Create and configure the system object."""
|
||||
|
||||
system = SimpleSeSystem(args)
|
||||
cpu_class = cpu_types[args.cpu][0]
|
||||
mem_mode = cpu_class.memory_mode()
|
||||
# Only simulate caches when using a timing CPU (e.g., the HPI model)
|
||||
want_caches = True if mem_mode == "timing" else False
|
||||
|
||||
system = devices.SimpleSeSystem(
|
||||
mem_mode=mem_mode,
|
||||
)
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
system.cpu_cluster = devices.ArmCpuCluster(
|
||||
system,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
*cpu_types[args.cpu],
|
||||
tarmac_gen=args.tarmac_gen,
|
||||
tarmac_dest=args.tarmac_dest,
|
||||
)
|
||||
|
||||
# Create a cache hierarchy for the cluster. We are assuming that
|
||||
# clusters have core-private L1 caches and an L2 that's shared
|
||||
# within the cluster.
|
||||
system.addCaches(want_caches, last_cache_level=2)
|
||||
|
||||
# Tell components about the expected physical memory ranges. This
|
||||
# is, for example, used by the MemConfig helper to determine where
|
||||
@@ -160,6 +118,9 @@ def create(args):
|
||||
# Configure the off-chip memory system.
|
||||
MemConfig.config_mem(args, system)
|
||||
|
||||
# Wire up the system's memory system
|
||||
system.connect()
|
||||
|
||||
# Parse the command line and get a list of Processes instances
|
||||
# that we can pass to gem5.
|
||||
processes = get_processes(args.commands_to_run)
|
||||
|
||||
@@ -128,7 +128,6 @@ board.set_se_simpoint_workload(
|
||||
)
|
||||
|
||||
dir = Path(args.checkpoint_path)
|
||||
dir.mkdir(exist_ok=True)
|
||||
|
||||
simulator = Simulator(
|
||||
board=board,
|
||||
|
||||
@@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
|
||||
tags='x86 isa')
|
||||
|
||||
Source('amdgpu_device.cc', tags='x86 isa')
|
||||
Source('amdgpu_gfx.cc', tags='x86 isa')
|
||||
Source('amdgpu_nbio.cc', tags='x86 isa')
|
||||
Source('amdgpu_vm.cc', tags='x86 isa')
|
||||
Source('interrupt_handler.cc', tags='x86 isa')
|
||||
|
||||
@@ -379,6 +379,9 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
|
||||
case GRBM_BASE:
|
||||
gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
|
||||
break;
|
||||
case GFX_BASE:
|
||||
gfx.readMMIO(pkt, aperture_offset);
|
||||
break;
|
||||
case MMHUB_BASE:
|
||||
gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
|
||||
break;
|
||||
@@ -507,6 +510,9 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
|
||||
case NBIO_BASE:
|
||||
nbio.writeMMIO(pkt, aperture_offset);
|
||||
break;
|
||||
case GFX_BASE:
|
||||
gfx.writeMMIO(pkt, aperture_offset);
|
||||
break;
|
||||
default:
|
||||
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
|
||||
break;
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
#include "base/bitunion.hh"
|
||||
#include "dev/amdgpu/amdgpu_defines.hh"
|
||||
#include "dev/amdgpu/amdgpu_gfx.hh"
|
||||
#include "dev/amdgpu/amdgpu_nbio.hh"
|
||||
#include "dev/amdgpu/amdgpu_vm.hh"
|
||||
#include "dev/amdgpu/memory_manager.hh"
|
||||
@@ -109,6 +110,7 @@ class AMDGPUDevice : public PciDevice
|
||||
* Blocks of the GPU
|
||||
*/
|
||||
AMDGPUNbio nbio;
|
||||
AMDGPUGfx gfx;
|
||||
AMDGPUMemoryManager *gpuMemMgr;
|
||||
AMDGPUInterruptHandler *deviceIH;
|
||||
AMDGPUVM gpuvm;
|
||||
|
||||
73
src/dev/amdgpu/amdgpu_gfx.cc
Normal file
73
src/dev/amdgpu/amdgpu_gfx.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "dev/amdgpu/amdgpu_gfx.hh"
|
||||
|
||||
#include "mem/packet_access.hh"
|
||||
#include "sim/core.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
void
|
||||
AMDGPUGfx::readMMIO(PacketPtr pkt, Addr offset)
|
||||
{
|
||||
switch (offset) {
|
||||
case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB:
|
||||
pkt->setLE<uint32_t>(captured_clock_count);
|
||||
break;
|
||||
case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB:
|
||||
pkt->setLE<uint32_t>(captured_clock_count >> 32);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUGfx::writeMMIO(PacketPtr pkt, Addr offset)
|
||||
{
|
||||
switch (offset) {
|
||||
case AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT:
|
||||
// Use gem5 Ticks in nanoseconds are the counter. The first capture
|
||||
// is expected to return zero.
|
||||
if (captured_clock_count == 1) {
|
||||
captured_clock_count = 0;
|
||||
} else {
|
||||
captured_clock_count = curTick() / sim_clock::as_int::ns;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
75
src/dev/amdgpu/amdgpu_gfx.hh
Normal file
75
src/dev/amdgpu/amdgpu_gfx.hh
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __DEV_AMDGPU_AMDGPU_GFX_HH__
|
||||
#define __DEV_AMDGPU_AMDGPU_GFX_HH__
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
||||
/**
|
||||
* MMIO offsets for GFX. This class handles MMIO reads/writes to the GFX_BASE
|
||||
* aperture which are generally read/written by the gfx driver source here:
|
||||
*
|
||||
* drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/
|
||||
* drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
|
||||
*
|
||||
* The MMIO addresses in the file are dword addresses. Here they are converted
|
||||
* to byte addresses so gem5 does not need to shift the values.
|
||||
*/
|
||||
|
||||
// Registers used to read GPU clock count used in profiling
|
||||
#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB 0x13090
|
||||
#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB 0x13094
|
||||
#define AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT 0x13098
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
class AMDGPUGfx
|
||||
{
|
||||
public:
|
||||
AMDGPUGfx() { }
|
||||
|
||||
void readMMIO(PacketPtr pkt, Addr offset);
|
||||
void writeMMIO(PacketPtr pkt, Addr offset);
|
||||
|
||||
private:
|
||||
/*
|
||||
* GPU clock count at the time capture MMIO is received.
|
||||
*/
|
||||
uint64_t captured_clock_count = 1;
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __DEV_AMDGPU_AMDGPU_GFX_HH__
|
||||
@@ -69,6 +69,12 @@ typedef struct amd_signal_s
|
||||
uint32_t reserved3[2];
|
||||
} amd_signal_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
} amd_event_t;
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // DEV_HSA_HSA_SIGNAL_H
|
||||
|
||||
@@ -248,6 +248,10 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
|
||||
|
||||
initABI(task);
|
||||
++dynamic_task_id;
|
||||
|
||||
// The driver expects the start time to be in ns
|
||||
Tick start_ts = curTick() / sim_clock::as_int::ns;
|
||||
dispatchStartTime.insert({disp_pkt->completion_signal, start_ts});
|
||||
}
|
||||
|
||||
void
|
||||
@@ -280,16 +284,6 @@ GPUCommandProcessor::sendCompletionSignal(Addr signal_handle)
|
||||
void
|
||||
GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff)
|
||||
{
|
||||
Addr value_addr = getHsaSignalValueAddr(signal_handle);
|
||||
|
||||
uint64_t *signalValue = new uint64_t;
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaSignalData(value_addr, diff, signalValue); });
|
||||
dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue);
|
||||
DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n",
|
||||
value_addr);
|
||||
|
||||
Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
|
||||
uint64_t *mailboxValue = new uint64_t;
|
||||
auto cb2 = new DmaVirtCallback<uint64_t>(
|
||||
@@ -300,20 +294,6 @@ GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff)
|
||||
mailbox_addr);
|
||||
}
|
||||
|
||||
void
|
||||
GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff,
|
||||
uint64_t *prev_value)
|
||||
{
|
||||
// Reuse the value allocated for the read
|
||||
DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n",
|
||||
*prev_value, *prev_value + diff);
|
||||
*prev_value += diff;
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaSignalDone(prev_value); });
|
||||
dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value);
|
||||
}
|
||||
|
||||
void
|
||||
GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle,
|
||||
uint64_t *mailbox_value)
|
||||
@@ -331,6 +311,20 @@ GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle,
|
||||
dmaReadVirt(event_addr, sizeof(uint64_t), cb, (void *)mailbox_value);
|
||||
} else {
|
||||
delete mailbox_value;
|
||||
|
||||
Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
|
||||
|
||||
amd_event_t *event_ts = new amd_event_t;
|
||||
event_ts->start_ts = dispatchStartTime[signal_handle];
|
||||
event_ts->end_ts = curTick() / sim_clock::as_int::ns;
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaEventTs(signal_handle, event_ts); });
|
||||
dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb, (void *)event_ts);
|
||||
DPRINTF(GPUCommandProc, "updateHsaMailboxData reading timestamp addr "
|
||||
"%lx\n", ts_addr);
|
||||
|
||||
dispatchStartTime.erase(signal_handle);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -346,6 +340,52 @@ GPUCommandProcessor::updateHsaEventData(Addr signal_handle,
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaSignalDone(event_value); }, *event_value);
|
||||
dmaWriteVirt(mailbox_addr, sizeof(uint64_t), cb, &cb->dmaBuffer, 0);
|
||||
|
||||
Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
|
||||
|
||||
amd_event_t *event_ts = new amd_event_t;
|
||||
event_ts->start_ts = dispatchStartTime[signal_handle];
|
||||
event_ts->end_ts = curTick() / sim_clock::as_int::ns;
|
||||
auto cb2 = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaEventTs(signal_handle, event_ts); });
|
||||
dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb2, (void *)event_ts);
|
||||
DPRINTF(GPUCommandProc, "updateHsaEventData reading timestamp addr %lx\n",
|
||||
ts_addr);
|
||||
|
||||
dispatchStartTime.erase(signal_handle);
|
||||
}
|
||||
|
||||
void
|
||||
GPUCommandProcessor::updateHsaEventTs(Addr signal_handle,
|
||||
amd_event_t *ts)
|
||||
{
|
||||
delete ts;
|
||||
|
||||
Addr value_addr = getHsaSignalValueAddr(signal_handle);
|
||||
int64_t diff = -1;
|
||||
|
||||
uint64_t *signalValue = new uint64_t;
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaSignalData(value_addr, diff, signalValue); });
|
||||
dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue);
|
||||
DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n",
|
||||
value_addr);
|
||||
}
|
||||
|
||||
void
|
||||
GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff,
|
||||
uint64_t *prev_value)
|
||||
{
|
||||
// Reuse the value allocated for the read
|
||||
DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n",
|
||||
*prev_value, *prev_value + diff);
|
||||
*prev_value += diff;
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ updateHsaSignalDone(prev_value); });
|
||||
dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -117,6 +117,7 @@ class GPUCommandProcessor : public DmaVirtDevice
|
||||
void updateHsaSignalDone(uint64_t *signal_value);
|
||||
void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value);
|
||||
void updateHsaEventData(Addr signal_handle, uint64_t *event_value);
|
||||
void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value);
|
||||
|
||||
uint64_t functionalReadHsaSignal(Addr signal_handle);
|
||||
|
||||
@@ -148,6 +149,9 @@ class GPUCommandProcessor : public DmaVirtDevice
|
||||
HSAPacketProcessor *hsaPP;
|
||||
TranslationGenPtr translate(Addr vaddr, Addr size) override;
|
||||
|
||||
// Keep track of start times for task dispatches.
|
||||
std::unordered_map<Addr, Tick> dispatchStartTime;
|
||||
|
||||
/**
|
||||
* Perform a DMA read of the read_dispatch_id_field_base_byte_offset
|
||||
* field, which follows directly after the read_dispatch_id (the read
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
---
|
||||
version: '2'
|
||||
|
||||
services:
|
||||
|
||||
Reference in New Issue
Block a user