Merge branch 'develop' into update-pannotia-tests
This commit is contained in:
65
.github/workflows/ci-tests.yaml
vendored
65
.github/workflows/ci-tests.yaml
vendored
@@ -5,7 +5,7 @@ name: CI Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize, ready_for_review]
|
||||
types: [opened, synchronize, ready_for_review]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
|
||||
@@ -21,17 +21,48 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
get-date:
|
||||
# We use the date to label caches. A cache is a a "hit" if the date is the
|
||||
# request binary and date are the same as what is stored in the cache.
|
||||
# This essentially means the first job to run on a given day for a given
|
||||
# binary will always be a "miss" and will have to build the binary then
|
||||
# upload it as that day's binary to upload. While this isn't the most
|
||||
# efficient way to do this, the alternative was to run take a hash of the
|
||||
# `src` directory contents and use it as a hash. We found there to be bugs
|
||||
# with the hash function where this task would timeout. This approach is
|
||||
# simple, works, and still provides some level of caching.
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
date: ${{ steps.date.outputs.date }}
|
||||
steps:
|
||||
- name: Get the current date
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
unittests-all-opt:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
if: github.event.pull_request.draft == false
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
needs: [pre-commit] # only runs if pre-commit passes.
|
||||
needs: [pre-commit, get-date] # only runs if pre-commit passes.
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
||||
# Restore the cache if available. As this just builds the unittests
|
||||
# we only obtain the cache and do not provide if if is not
|
||||
# available.
|
||||
- name: Cache build/ALL
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-all
|
||||
|
||||
- name: CI Unittests
|
||||
working-directory: ${{ github.workspace }}
|
||||
run: scons build/ALL/unittests.opt -j $(nproc)
|
||||
run: scons --no-compress-debug build/ALL/unittests.opt -j $(nproc)
|
||||
- run: echo "This job's status is ${{ job.status }}."
|
||||
|
||||
testlib-quick-matrix:
|
||||
@@ -83,14 +114,24 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
if: github.event.pull_request.draft == false
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
needs: [pre-commit, testlib-quick-matrix]
|
||||
needs: [pre-commit, testlib-quick-matrix, get-date]
|
||||
strategy:
|
||||
matrix:
|
||||
build-target: ${{ fromJson(needs.testlib-quick-matrix.outputs.build-matrix) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Cache build/ALL
|
||||
uses: actions/cache@v4
|
||||
if: ${{ endsWith(matrix.build-target, 'build/ALL/gem5.opt') }}
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-all
|
||||
|
||||
- name: Build gem5
|
||||
run: scons ${{ matrix.build-target }} -j $(nproc)
|
||||
run: scons --no-compress-debug ${{ matrix.build-target }} -j $(nproc)
|
||||
|
||||
# Upload the gem5 binary as an artifact.
|
||||
# Note: the "achor.txt" file is a hack to make sure the paths are
|
||||
@@ -199,13 +240,23 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/gcn-gpu:latest
|
||||
timeout-minutes: 180
|
||||
needs: [pre-commit]
|
||||
needs: [pre-commit, get-date]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Obtain the cache if available. If not available this will upload
|
||||
# this job's instance of the cache.
|
||||
- name: Cache build/VEGA_X86
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/VEGA_X86
|
||||
key: testlib-build-vega-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-vega
|
||||
|
||||
# Build the VEGA_X86/gem5.opt binary.
|
||||
- name: Build VEGA_X86/gem5.opt
|
||||
run: scons build/VEGA_X86/gem5.opt -j`nproc`
|
||||
run: scons --no-compress-debug build/VEGA_X86/gem5.opt -j`nproc`
|
||||
|
||||
# Run the GPU tests.
|
||||
- name: Run Testlib GPU Tests
|
||||
|
||||
6
.github/workflows/compiler-tests.yaml
vendored
6
.github/workflows/compiler-tests.yaml
vendored
@@ -13,8 +13,8 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image: [gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16, clang-version-15,
|
||||
clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies]
|
||||
image: [gcc-version-14, gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16,
|
||||
clang-version-15, clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies]
|
||||
opts: [.opt, .fast]
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
timeout-minutes: 2880 # 48 hours
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
matrix:
|
||||
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, MIPS, 'NULL', NULL_MESI_Two_Level,
|
||||
NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86]
|
||||
image: [gcc-version-13, clang-version-18]
|
||||
image: [gcc-version-14, clang-version-18]
|
||||
opts: [.opt]
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
timeout-minutes: 2880 # 48 hours
|
||||
|
||||
22
.github/workflows/daily-tests.yaml
vendored
22
.github/workflows/daily-tests.yaml
vendored
@@ -8,6 +8,14 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
get-date:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get the current date
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
# this builds both unittests.fast and unittests.debug
|
||||
unittests-fast-debug:
|
||||
strategy:
|
||||
@@ -16,13 +24,14 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
timeout-minutes: 60
|
||||
needs: get-date
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Cache build/ALL
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-all
|
||||
- name: ALL/unittests.${{ matrix.type }} UnitTests
|
||||
@@ -38,6 +47,7 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
timeout-minutes: 1440 # 24 hours for entire matrix to run
|
||||
needs: get-date
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
@@ -47,13 +57,13 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/NULL
|
||||
key: testlib-build-null-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-null-${{ env.date }}
|
||||
|
||||
- name: Restore build/ALL cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
|
||||
- name: long ${{ matrix.test-type }} tests
|
||||
working-directory: ${{ github.workspace }}/tests
|
||||
@@ -81,6 +91,7 @@ jobs:
|
||||
gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
needs: get-date
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
@@ -90,7 +101,7 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-all
|
||||
|
||||
@@ -113,6 +124,7 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/gcn-gpu:latest
|
||||
timeout-minutes: 720 # 12 hours
|
||||
needs: get-date
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -123,7 +135,7 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/VEGA_X86
|
||||
key: testlib-build-vega-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-vega-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-vega
|
||||
|
||||
|
||||
13
.github/workflows/weekly-tests.yaml
vendored
13
.github/workflows/weekly-tests.yaml
vendored
@@ -9,6 +9,13 @@ on:
|
||||
|
||||
jobs:
|
||||
|
||||
get-date:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get the current date
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
# start running the very-long tests
|
||||
testlib-very-long-tests:
|
||||
strategy:
|
||||
@@ -18,6 +25,7 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
|
||||
timeout-minutes: 4320 # 3 days
|
||||
needs: get-date
|
||||
steps:
|
||||
- name: Clean runner
|
||||
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
|
||||
@@ -27,7 +35,7 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/ALL
|
||||
key: testlib-build-all-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-all-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-all
|
||||
|
||||
@@ -49,6 +57,7 @@ jobs:
|
||||
runs-on: [self-hosted, linux, x64]
|
||||
container: ghcr.io/gem5/gcn-gpu:latest
|
||||
timeout-minutes: 4320 # 3 days
|
||||
needs: get-date
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -59,7 +68,7 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: build/VEGA_X86
|
||||
key: testlib-build-vega-${{ hashFiles('src/**') }}
|
||||
key: testlib-build-vega-${{ env.date }}
|
||||
restore-keys: |
|
||||
testlib-build-vega
|
||||
|
||||
|
||||
@@ -49,11 +49,11 @@ exclude: |
|
||||
tests/.*/ref/.*
|
||||
)$
|
||||
|
||||
default_stages: [commit]
|
||||
default_stages: [pre-commit]
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
@@ -69,7 +69,7 @@ repos:
|
||||
- id: destroyed-symlinks
|
||||
- id: requirements-txt-fixer
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.11.5
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
|
||||
@@ -77,11 +77,11 @@ repos:
|
||||
hooks:
|
||||
- id: yamlfmt
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.9.1
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.14.0
|
||||
rev: v3.17.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
# Python 3.8 is the earliest version supported.
|
||||
|
||||
7
.vscode/settings.json
vendored
Normal file
7
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"python.analysis.extraPaths": [
|
||||
"src/python",
|
||||
"ext",
|
||||
"tests"
|
||||
]
|
||||
}
|
||||
@@ -568,9 +568,9 @@ def config_hmc_dev(opt, system, hmc_host):
|
||||
# Attach 4 serial link to 4 crossbar/s
|
||||
for i in range(opt.num_serial_links):
|
||||
if opt.enable_link_monitor:
|
||||
system.hmc_host.seriallink[
|
||||
i
|
||||
].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port
|
||||
system.hmc_host.seriallink[i].mem_side_port = (
|
||||
system.hmc_dev.lmonitor[i].cpu_side_port
|
||||
)
|
||||
system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[
|
||||
i
|
||||
].cpu_side_ports
|
||||
@@ -613,14 +613,12 @@ def config_hmc_dev(opt, system, hmc_host):
|
||||
]
|
||||
|
||||
# Connect the bridge between corssbars
|
||||
system.hmc_dev.xbar[
|
||||
i
|
||||
].mem_side_ports = system.hmc_dev.buffers[
|
||||
index
|
||||
].cpu_side_port
|
||||
system.hmc_dev.buffers[
|
||||
index
|
||||
].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports
|
||||
system.hmc_dev.xbar[i].mem_side_ports = (
|
||||
system.hmc_dev.buffers[index].cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffers[index].mem_side_port = (
|
||||
system.hmc_dev.xbar[j].cpu_side_ports
|
||||
)
|
||||
else:
|
||||
# Don't connect the xbar to itself
|
||||
pass
|
||||
@@ -629,49 +627,49 @@ def config_hmc_dev(opt, system, hmc_host):
|
||||
# can only direct traffic to it local vaults
|
||||
if opt.arch == "mixed":
|
||||
system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
|
||||
system.hmc_dev.xbar[
|
||||
3
|
||||
].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port
|
||||
system.hmc_dev.xbar[3].mem_side_ports = (
|
||||
system.hmc_dev.buffer30.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[
|
||||
0
|
||||
].cpu_side_ports
|
||||
|
||||
system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
|
||||
system.hmc_dev.xbar[
|
||||
3
|
||||
].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port
|
||||
system.hmc_dev.xbar[3].mem_side_ports = (
|
||||
system.hmc_dev.buffer31.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[
|
||||
1
|
||||
].cpu_side_ports
|
||||
|
||||
system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
|
||||
system.hmc_dev.xbar[
|
||||
3
|
||||
].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port
|
||||
system.hmc_dev.xbar[3].mem_side_ports = (
|
||||
system.hmc_dev.buffer32.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[
|
||||
2
|
||||
].cpu_side_ports
|
||||
|
||||
system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
|
||||
system.hmc_dev.xbar[
|
||||
2
|
||||
].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port
|
||||
system.hmc_dev.xbar[2].mem_side_ports = (
|
||||
system.hmc_dev.buffer20.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[
|
||||
0
|
||||
].cpu_side_ports
|
||||
|
||||
system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
|
||||
system.hmc_dev.xbar[
|
||||
2
|
||||
].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port
|
||||
system.hmc_dev.xbar[2].mem_side_ports = (
|
||||
system.hmc_dev.buffer21.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[
|
||||
1
|
||||
].cpu_side_ports
|
||||
|
||||
system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
|
||||
system.hmc_dev.xbar[
|
||||
2
|
||||
].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port
|
||||
system.hmc_dev.xbar[2].mem_side_ports = (
|
||||
system.hmc_dev.buffer23.cpu_side_port
|
||||
)
|
||||
system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[
|
||||
3
|
||||
].cpu_side_ports
|
||||
|
||||
@@ -541,9 +541,9 @@ def run(options, root, testsys, cpu_class):
|
||||
IndirectBPClass = ObjectList.indirect_bp_list.get(
|
||||
options.indirect_bp_type
|
||||
)
|
||||
switch_cpus[
|
||||
i
|
||||
].branchPred.indirectBranchPred = IndirectBPClass()
|
||||
switch_cpus[i].branchPred.indirectBranchPred = (
|
||||
IndirectBPClass()
|
||||
)
|
||||
switch_cpus[i].createThreads()
|
||||
|
||||
# If elastic tracing is enabled attach the elastic trace probe
|
||||
|
||||
@@ -1683,6 +1683,15 @@ class HPI_MMU(ArmMMU):
|
||||
class HPI_BTB(SimpleBTB):
|
||||
numEntries = 128
|
||||
tagBits = 18
|
||||
associativity = 1
|
||||
instShiftAmt = 2
|
||||
btbReplPolicy = LRURP()
|
||||
btbIndexingPolicy = BTBSetAssociative(
|
||||
num_entries=Parent.numEntries,
|
||||
set_shift=Parent.instShiftAmt,
|
||||
assoc=Parent.associativity,
|
||||
tag_bits=Parent.tagBits,
|
||||
)
|
||||
|
||||
|
||||
class HPI_BP(TournamentBP):
|
||||
|
||||
@@ -111,6 +111,15 @@ class O3_ARM_v7a_FUP(FUPool):
|
||||
class O3_ARM_v7a_BTB(SimpleBTB):
|
||||
numEntries = 2048
|
||||
tagBits = 18
|
||||
associativity = 1
|
||||
instShiftAmt = 2
|
||||
btbReplPolicy = LRURP()
|
||||
btbIndexingPolicy = BTBSetAssociative(
|
||||
num_entries=Parent.numEntries,
|
||||
set_shift=Parent.instShiftAmt,
|
||||
assoc=Parent.associativity,
|
||||
tag_bits=Parent.tagBits,
|
||||
)
|
||||
|
||||
|
||||
# Bi-Mode Branch Predictor
|
||||
|
||||
@@ -108,6 +108,15 @@ class ex5_big_FUP(FUPool):
|
||||
class ex5_big_BTB(SimpleBTB):
|
||||
numEntries = 4096
|
||||
tagBits = 18
|
||||
associativity = 1
|
||||
instShiftAmt = 2
|
||||
btbReplPolicy = LRURP()
|
||||
btbIndexingPolicy = BTBSetAssociative(
|
||||
num_entries=Parent.numEntries,
|
||||
set_shift=Parent.instShiftAmt,
|
||||
assoc=Parent.associativity,
|
||||
tag_bits=Parent.tagBits,
|
||||
)
|
||||
|
||||
|
||||
# Bi-Mode Branch Predictor
|
||||
|
||||
@@ -213,9 +213,9 @@ def build_test_system(np, isa: ISA):
|
||||
IndirectBPClass = ObjectList.indirect_bp_list.get(
|
||||
args.indirect_bp_type
|
||||
)
|
||||
test_sys.cpu[
|
||||
i
|
||||
].branchPred.indirectBranchPred = IndirectBPClass()
|
||||
test_sys.cpu[i].branchPred.indirectBranchPred = (
|
||||
IndirectBPClass()
|
||||
)
|
||||
test_sys.cpu[i].createThreads()
|
||||
|
||||
# If elastic tracing is enabled when not restoring from checkpoint and
|
||||
|
||||
@@ -935,9 +935,9 @@ gpu_port_idx = gpu_port_idx - args.num_cp * 2
|
||||
token_port_idx = 0
|
||||
for i in range(len(system.ruby._cpu_ports)):
|
||||
if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
|
||||
system.cpu[shader_idx].CUs[
|
||||
token_port_idx
|
||||
].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
|
||||
system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = (
|
||||
system.ruby._cpu_ports[i].gmTokenPort
|
||||
)
|
||||
token_port_idx += 1
|
||||
|
||||
wavefront_size = args.wf_size
|
||||
|
||||
92
configs/example/gem5_library/arm-demo-ubuntu-run.py
Normal file
92
configs/example/gem5_library/arm-demo-ubuntu-run.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# Copyright (c) 2024 The Regents of the University of California
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
This script further shows an example of booting an ARM based full system Ubuntu
|
||||
disk image. This simulation boots the disk image using the ArmDemoBoard.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
```bash
|
||||
scons build/ARM/gem5.opt -j $(nproc)
|
||||
./build/ARM/gem5.opt configs/example/gem5_library/arm-demo-ubuntu-run.py
|
||||
```
|
||||
"""
|
||||
import argparse
|
||||
|
||||
from gem5.isas import ISA
|
||||
from gem5.prebuilt.demo.arm_demo_board import ArmDemoBoard
|
||||
from gem5.resources.resource import obtain_resource
|
||||
from gem5.simulate.exit_event import ExitEvent
|
||||
from gem5.simulate.simulator import Simulator
|
||||
from gem5.utils.requires import requires
|
||||
|
||||
# This runs a check to ensure the gem5 binary interpreting this file is compiled to include the ARM ISA.
|
||||
requires(isa_required=ISA.ARM)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="An example configuration script to run the ArmDemoBoard."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--use-kvm",
|
||||
action="store_true",
|
||||
help="Use KVM cores instead of Timing.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
board = ArmDemoBoard(use_kvm=args.use_kvm)
|
||||
|
||||
board.set_workload(
|
||||
obtain_resource(
|
||||
"arm-ubuntu-24.04-boot-with-systemd", resource_version="2.0.0"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def exit_event_handler():
|
||||
print("First exit: kernel booted")
|
||||
yield False # gem5 is now executing systemd startup
|
||||
print("Second exit: Started `after_boot.sh` script")
|
||||
# The after_boot.sh script is executed after the kernel and systemd have
|
||||
# booted.
|
||||
yield False # gem5 is now executing the `after_boot.sh` script
|
||||
print("Third exit: Finished `after_boot.sh` script")
|
||||
# The after_boot.sh script will run a script if it is passed via
|
||||
# m5 readfile. This is the last exit event before the simulation exits.
|
||||
yield True
|
||||
|
||||
|
||||
# We define the system with the aforementioned system defined.
|
||||
simulator = Simulator(
|
||||
board=board,
|
||||
on_exit_event={
|
||||
ExitEvent.EXIT: exit_event_handler(),
|
||||
},
|
||||
)
|
||||
|
||||
simulator.run()
|
||||
120
configs/example/gem5_library/riscv-rvv-example.py
Executable file
120
configs/example/gem5_library/riscv-rvv-example.py
Executable file
@@ -0,0 +1,120 @@
|
||||
# Copyright (c) 2024 Barcelona Supercomputing Center
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
This script demonstrates how to run RISC-V vector-enabled binaries in SE mode
|
||||
with gem5. It accepts the number of CORES, VLEN, and ELEN as optional
|
||||
parameters, as well as the resource name to run. If no resource name is
|
||||
provided, a list of available resources will be displayed. If one is given the
|
||||
simulation will then execute the specified resource binary with the selected
|
||||
parameters until completion.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
# Compile gem5 for RISC-V
|
||||
scons build/RISCV/gem5.opt
|
||||
|
||||
# Run the simulation
|
||||
./build/RISCV/gem5.opt configs/example/gem5_library/riscv-rvv-example.py \
|
||||
[-c CORES] [-v VLEN] [-e ELEN] <resource>
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
from m5.objects import RiscvO3CPU
|
||||
|
||||
from gem5.components.boards.simple_board import SimpleBoard
|
||||
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
|
||||
PrivateL1PrivateL2CacheHierarchy,
|
||||
)
|
||||
from gem5.components.memory import SingleChannelDDR3_1600
|
||||
from gem5.components.processors.base_cpu_core import BaseCPUCore
|
||||
from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
|
||||
from gem5.isas import ISA
|
||||
from gem5.resources.resource import obtain_resource
|
||||
from gem5.simulate.simulator import Simulator
|
||||
from gem5.utils.requires import requires
|
||||
|
||||
|
||||
class RVVCore(BaseCPUCore):
|
||||
def __init__(self, elen, vlen, cpu_id):
|
||||
super().__init__(core=RiscvO3CPU(cpu_id=cpu_id), isa=ISA.RISCV)
|
||||
self.core.isa[0].elen = elen
|
||||
self.core.isa[0].vlen = vlen
|
||||
|
||||
|
||||
requires(isa_required=ISA.RISCV)
|
||||
|
||||
resources = [
|
||||
"rvv-branch",
|
||||
"rvv-index",
|
||||
"rvv-matmul",
|
||||
"rvv-memcpy",
|
||||
"rvv-reduce",
|
||||
"rvv-saxpy",
|
||||
"rvv-sgemm",
|
||||
"rvv-strcmp",
|
||||
"rvv-strcpy",
|
||||
"rvv-strlen",
|
||||
"rvv-strlen-fault",
|
||||
"rvv-strncpy",
|
||||
]
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("resource", type=str, choices=resources)
|
||||
parser.add_argument("-c", "--cores", required=False, type=int, default=1)
|
||||
parser.add_argument("-v", "--vlen", required=False, type=int, default=256)
|
||||
parser.add_argument("-e", "--elen", required=False, type=int, default=64)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
|
||||
l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB"
|
||||
)
|
||||
|
||||
memory = SingleChannelDDR3_1600()
|
||||
|
||||
processor = BaseCPUProcessor(
|
||||
cores=[RVVCore(args.elen, args.vlen, i) for i in range(args.cores)]
|
||||
)
|
||||
|
||||
board = SimpleBoard(
|
||||
clk_freq="1GHz",
|
||||
processor=processor,
|
||||
memory=memory,
|
||||
cache_hierarchy=cache_hierarchy,
|
||||
)
|
||||
|
||||
binary = obtain_resource(args.resource)
|
||||
board.set_se_binary_workload(binary)
|
||||
|
||||
simulator = Simulator(board=board, full_system=False)
|
||||
print("Beginning simulation!")
|
||||
simulator.run()
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
# Copyright (c) 2021-2024 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
@@ -82,10 +82,6 @@ def addRunFSOptions(parser):
|
||||
help="The second disk image to mount (/dev/sdb)",
|
||||
)
|
||||
parser.add_argument("--kernel", default=None, help="Linux kernel to boot")
|
||||
parser.add_argument("--gpu-rom", default=None, help="GPU BIOS to load")
|
||||
parser.add_argument(
|
||||
"--gpu-mmio-trace", default=None, help="GPU MMIO trace to load"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checkpoint-before-mmios",
|
||||
default=False,
|
||||
@@ -241,16 +237,6 @@ def runGpuFSSystem(args):
|
||||
math.ceil(float(n_cu) / args.cu_per_scalar_cache)
|
||||
)
|
||||
|
||||
# Verify MMIO trace is valid. This is only needed for Vega10 simulations.
|
||||
# The md5sum refers to the md5sum of the Vega10 MMIO hardware trace in
|
||||
# the gem5-resources repository. By checking it here, we avoid potential
|
||||
# errors that would cause the driver not to load and simulations to fail.
|
||||
if args.gpu_device == "Vega10":
|
||||
mmio_file = open(args.gpu_mmio_trace, "rb")
|
||||
mmio_md5 = hashlib.md5(mmio_file.read()).hexdigest()
|
||||
if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d":
|
||||
m5.util.panic("MMIO file does not match gem5 resources")
|
||||
|
||||
system = makeGpuFSSystem(args)
|
||||
|
||||
root = Root(
|
||||
|
||||
@@ -176,8 +176,6 @@ def createGPU(system, args):
|
||||
def connectGPU(system, args):
|
||||
system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0)
|
||||
|
||||
system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace
|
||||
system.pc.south_bridge.gpu.rom_binary = args.gpu_rom
|
||||
system.pc.south_bridge.gpu.checkpoint_before_mmios = (
|
||||
args.checkpoint_before_mmios
|
||||
)
|
||||
|
||||
@@ -336,9 +336,9 @@ def makeGpuFSSystem(args):
|
||||
token_port_idx = 0
|
||||
for i in range(len(system.ruby._cpu_ports)):
|
||||
if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
|
||||
system.cpu[shader_idx].CUs[
|
||||
token_port_idx
|
||||
].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
|
||||
system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = (
|
||||
system.ruby._cpu_ports[i].gmTokenPort
|
||||
)
|
||||
token_port_idx += 1
|
||||
|
||||
wavefront_size = args.wf_size
|
||||
@@ -346,9 +346,9 @@ def makeGpuFSSystem(args):
|
||||
# The pipeline issues wavefront_size number of uncoalesced requests
|
||||
# in one GPU issue cycle. Hence wavefront_size mem ports.
|
||||
for j in range(wavefront_size):
|
||||
system.cpu[shader_idx].CUs[i].memory_port[
|
||||
j
|
||||
] = system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
|
||||
system.cpu[shader_idx].CUs[i].memory_port[j] = (
|
||||
system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
|
||||
)
|
||||
gpu_port_idx += 1
|
||||
|
||||
for i in range(args.num_compute_units):
|
||||
|
||||
@@ -110,8 +110,7 @@ board.set_kernel_disk_workload(
|
||||
# Begin running of the simulation.
|
||||
print("Running with ISA: " + processor.get_isa().name)
|
||||
print()
|
||||
root = Root(full_system=True, system=board)
|
||||
board._pre_instantiate()
|
||||
root = board._pre_instantiate()
|
||||
m5.instantiate()
|
||||
print("Beginning simulation!")
|
||||
|
||||
|
||||
@@ -250,9 +250,11 @@ class ConfigManager:
|
||||
obj,
|
||||
param_name,
|
||||
[
|
||||
(
|
||||
self.objects_by_name[name]
|
||||
if name != "Null"
|
||||
else m5.params.NULL
|
||||
)
|
||||
for name in param_values
|
||||
],
|
||||
)
|
||||
|
||||
@@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs):
|
||||
num_lanes=1,
|
||||
clk_domain=thread_clock,
|
||||
deadlock_threshold=tester_deadlock_threshold,
|
||||
cache_line_size=system.cache_line_size,
|
||||
)
|
||||
)
|
||||
g_thread_idx += 1
|
||||
@@ -393,6 +394,7 @@ for cu_idx in range(n_CUs):
|
||||
num_lanes=args.wf_size,
|
||||
clk_domain=thread_clock,
|
||||
deadlock_threshold=tester_deadlock_threshold,
|
||||
cache_line_size=system.cache_line_size,
|
||||
)
|
||||
)
|
||||
g_thread_idx += 1
|
||||
|
||||
@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
|
||||
# I/D cache is combined and grab from ctrl
|
||||
dcache=self.controllers[i].cacheMemory,
|
||||
clk_domain=self.controllers[i].clk_domain,
|
||||
ruby_system=self,
|
||||
)
|
||||
for i in range(len(cpus))
|
||||
]
|
||||
@@ -191,7 +192,9 @@ class DirController(Directory_Controller):
|
||||
self.version = self.versionCount()
|
||||
self.addr_ranges = ranges
|
||||
self.ruby_system = ruby_system
|
||||
self.directory = RubyDirectoryMemory()
|
||||
self.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
# Connect this directory to the memory side.
|
||||
self.memory = mem_ctrls[0].port
|
||||
self.connectQueues(ruby_system)
|
||||
|
||||
@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
|
||||
# I/D cache is combined and grab from ctrl
|
||||
dcache=self.controllers[i].cacheMemory,
|
||||
clk_domain=self.controllers[i].clk_domain,
|
||||
ruby_system=self,
|
||||
)
|
||||
for i in range(len(cpus))
|
||||
]
|
||||
@@ -180,7 +181,9 @@ class DirController(Directory_Controller):
|
||||
self.version = self.versionCount()
|
||||
self.addr_ranges = ranges
|
||||
self.ruby_system = ruby_system
|
||||
self.directory = RubyDirectoryMemory()
|
||||
self.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
# Connect this directory to the memory side.
|
||||
self.memory = mem_ctrls[0].port
|
||||
self.connectQueues(ruby_system)
|
||||
|
||||
@@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem):
|
||||
# I/D cache is combined and grab from ctrl
|
||||
dcache=self.controllers[i].cacheMemory,
|
||||
clk_domain=self.clk_domain,
|
||||
ruby_system=self,
|
||||
)
|
||||
for i in range(num_testers)
|
||||
]
|
||||
|
||||
@@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase):
|
||||
self.L2cache = L2Cache()
|
||||
self.L2cache.create(options.l2_size, options.l2_assoc, options)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.dcache = self.L1D0cache
|
||||
self.sequencer.ruby_system = ruby_system
|
||||
self.sequencer.coreid = 0
|
||||
self.sequencer.is_cpu_sequencer = True
|
||||
|
||||
self.sequencer1 = RubySequencer()
|
||||
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer1.version = self.seqCount()
|
||||
self.sequencer1.dcache = self.L1D1cache
|
||||
self.sequencer1.ruby_system = ruby_system
|
||||
|
||||
@@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.L2cache = L2Cache()
|
||||
self.L2cache.create(options.l2_size, options.l2_assoc, options)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.dcache = self.L1D0cache
|
||||
self.sequencer.ruby_system = ruby_system
|
||||
self.sequencer.coreid = 0
|
||||
self.sequencer.is_cpu_sequencer = True
|
||||
|
||||
self.sequencer1 = RubySequencer()
|
||||
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer1.version = self.seqCount()
|
||||
self.sequencer1.dcache = self.L1D1cache
|
||||
self.sequencer1.ruby_system = ruby_system
|
||||
@@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
|
||||
# TCP_Controller inherits this from RubyController
|
||||
self.mandatory_queue_latency = options.mandatory_queue_latency
|
||||
|
||||
self.coalescer = VIPERCoalescer()
|
||||
self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
|
||||
self.coalescer.version = self.seqCount()
|
||||
self.coalescer.icache = self.L1cache
|
||||
self.coalescer.dcache = self.L1cache
|
||||
@@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
|
||||
options.max_coalesces_per_cycle
|
||||
)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.dcache = self.L1cache
|
||||
self.sequencer.ruby_system = ruby_system
|
||||
@@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
|
||||
self.L1cache.create(options)
|
||||
self.issue_latency = 1
|
||||
|
||||
self.coalescer = VIPERCoalescer()
|
||||
self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
|
||||
self.coalescer.version = self.seqCount()
|
||||
self.coalescer.icache = self.L1cache
|
||||
self.coalescer.dcache = self.L1cache
|
||||
@@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
|
||||
self.coalescer.support_inst_reqs = False
|
||||
self.coalescer.is_cpu_sequencer = False
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.dcache = self.L1cache
|
||||
self.sequencer.ruby_system = ruby_system
|
||||
@@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
|
||||
self.response_latency = 30
|
||||
|
||||
self.addr_ranges = dir_ranges
|
||||
self.directory = RubyDirectoryMemory()
|
||||
self.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
|
||||
self.L3CacheMemory = L3Cache()
|
||||
self.L3CacheMemory.create(options, ruby_system, system)
|
||||
@@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network):
|
||||
dir_cntrl.addr_ranges = dram_intf.range
|
||||
|
||||
# Append
|
||||
exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i)
|
||||
exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i)
|
||||
dir_cntrl_nodes.append(dir_cntrl)
|
||||
mem_ctrls.append(mem_ctrl)
|
||||
|
||||
|
||||
@@ -148,6 +148,7 @@ def create_system(
|
||||
train_misses=5,
|
||||
num_startup_pfs=4,
|
||||
cross_page=True,
|
||||
block_size=options.cacheline_size,
|
||||
)
|
||||
|
||||
l0_cntrl = L0Cache_Controller(
|
||||
|
||||
@@ -148,6 +148,7 @@ def create_system(
|
||||
train_misses=5,
|
||||
num_startup_pfs=4,
|
||||
cross_page=True,
|
||||
block_size=options.cacheline_size,
|
||||
)
|
||||
|
||||
l0_cntrl = L0Cache_Controller(
|
||||
|
||||
@@ -94,7 +94,7 @@ def create_system(
|
||||
is_icache=False,
|
||||
)
|
||||
|
||||
prefetcher = RubyPrefetcher()
|
||||
prefetcher = RubyPrefetcher(block_size=options.cacheline_size)
|
||||
|
||||
clk_domain = cpus[i].clk_domain
|
||||
|
||||
|
||||
@@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
|
||||
self.L2cache = L2Cache()
|
||||
self.L2cache.create(options)
|
||||
|
||||
self.sequencer = RubySequencer()
|
||||
self.sequencer = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer.version = self.seqCount()
|
||||
self.sequencer.dcache = self.L1D0cache
|
||||
self.sequencer.ruby_system = ruby_system
|
||||
self.sequencer.coreid = 0
|
||||
self.sequencer.is_cpu_sequencer = True
|
||||
|
||||
self.sequencer1 = RubySequencer()
|
||||
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
|
||||
self.sequencer1.version = self.seqCount()
|
||||
self.sequencer1.dcache = self.L1D1cache
|
||||
self.sequencer1.ruby_system = ruby_system
|
||||
@@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
|
||||
self.response_latency = 30
|
||||
|
||||
self.addr_ranges = dir_ranges
|
||||
self.directory = RubyDirectoryMemory()
|
||||
self.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
|
||||
self.L3CacheMemory = L3Cache()
|
||||
self.L3CacheMemory.create(options, ruby_system, system)
|
||||
|
||||
@@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system):
|
||||
for i in range(options.num_dirs):
|
||||
dir_cntrl = Directory_Controller()
|
||||
dir_cntrl.version = i
|
||||
dir_cntrl.directory = RubyDirectoryMemory()
|
||||
dir_cntrl.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
dir_cntrl.ruby_system = ruby_system
|
||||
|
||||
exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
|
||||
@@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system):
|
||||
|
||||
if bootmem is not None:
|
||||
rom_dir_cntrl = Directory_Controller()
|
||||
rom_dir_cntrl.directory = RubyDirectoryMemory()
|
||||
rom_dir_cntrl.directory = RubyDirectoryMemory(
|
||||
block_size=ruby_system.block_size_bytes
|
||||
)
|
||||
rom_dir_cntrl.ruby_system = ruby_system
|
||||
rom_dir_cntrl.version = i + 1
|
||||
rom_dir_cntrl.memory = bootmem.port
|
||||
|
||||
@@ -960,11 +960,14 @@ class PackedReg
|
||||
uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
|
||||
value &= elem_mask;
|
||||
|
||||
// Clear the bits where the value goes so that operator| can be used.
|
||||
elem_mask <<= qw_lbit;
|
||||
qword &= elem_mask;
|
||||
qword &= ~elem_mask;
|
||||
|
||||
value <<= qw_lbit;
|
||||
qword |= value;
|
||||
// Promote to 64-bit to prevent shifting out of range
|
||||
uint64_t value64 = value;
|
||||
value64 <<= qw_lbit;
|
||||
qword |= value64;
|
||||
|
||||
dwords[udw] = uint32_t(qword >> 32);
|
||||
dwords[ldw] = uint32_t(qword & mask(32));
|
||||
|
||||
@@ -53,8 +53,6 @@ namespace gem5
|
||||
namespace ArmISA
|
||||
{
|
||||
|
||||
GenericISA::BasicDecodeCache<Decoder, ExtMachInst> Decoder::defaultCache;
|
||||
|
||||
Decoder::Decoder(const ArmDecoderParams ¶ms)
|
||||
: InstDecoder(params, &data),
|
||||
dvmEnabled(params.dvm_enabled),
|
||||
|
||||
@@ -94,7 +94,7 @@ class Decoder : public InstDecoder
|
||||
enums::DecoderFlavor decoderFlavor;
|
||||
|
||||
/// A cache of decoded instruction objects.
|
||||
static GenericISA::BasicDecodeCache<Decoder, ExtMachInst> defaultCache;
|
||||
GenericISA::BasicDecodeCache<Decoder, ExtMachInst> defaultCache;
|
||||
friend class GenericISA::BasicDecodeCache<Decoder, ExtMachInst>;
|
||||
|
||||
/**
|
||||
|
||||
@@ -264,7 +264,7 @@ class ArmFaultVals : public ArmFault
|
||||
static FaultVals vals;
|
||||
|
||||
public:
|
||||
ArmFaultVals<T>(ExtMachInst mach_inst = 0, uint32_t _iss = 0) :
|
||||
ArmFaultVals(ExtMachInst mach_inst = 0, uint32_t _iss = 0) :
|
||||
ArmFault(mach_inst, _iss) {}
|
||||
FaultName name() const override { return vals.name; }
|
||||
FaultOffset offset(ThreadContext *tc) override;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// -*- mode:c++ -*-
|
||||
|
||||
// Copyright (c) 2010-2011, 2016-2019 ARM Limited
|
||||
// Copyright (c) 2010-2011, 2016-2019, 2024 ARM Limited
|
||||
// All rights reserved
|
||||
//
|
||||
// The license below extends only to copyright in the software and shall
|
||||
@@ -1891,6 +1891,150 @@ let {{
|
||||
return new NVrsqrteD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
}
|
||||
} else if ((b & 0x1c) == 0x00) {
|
||||
if (bits(b, 1)) {
|
||||
switch(size) {
|
||||
case 1:
|
||||
if (q) {
|
||||
return new NVcvt2uhAQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2uhAD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 2:
|
||||
if (q) {
|
||||
return new NVcvt2usAQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2usAD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2shAQ<int16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2shAD<int16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2ssAQ<int32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2ssAD<int32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
} else if ((b & 0x1c) == 0x04) {
|
||||
if (bits(b, 1)) {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2uhNQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2uhND<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2usNQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2usND<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2shNQ<int16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2shND<int16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2ssNQ<int32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2ssND<int32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
} else if ((b & 0x1c) == 0x08) {
|
||||
if (bits(b, 1)) {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2uhPQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2uhPD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2usPQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2usPD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2shPQ<int16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2shPD<int16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2ssPQ<int32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2ssPD<int32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
} else if ((b & 0x1c) == 0x0c) {
|
||||
if (bits(b, 1)) {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2uhMQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2uhMD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2usMQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2usMD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVcvt2shMQ<int16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2shMD<int16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVcvt2ssMQ<int32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVcvt2ssMD<int32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// -*- mode:c++ -*-
|
||||
|
||||
// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
|
||||
// Copyright (c) 2010-2011, 2015, 2019, 2024 ARM Limited
|
||||
// All rights reserved
|
||||
//
|
||||
// The license below extends only to copyright in the software and shall
|
||||
@@ -3579,6 +3579,128 @@ let {{
|
||||
'''
|
||||
twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
|
||||
|
||||
vcvthp2hCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrExc;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
|
||||
float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1);
|
||||
if (flushToZero(mid))
|
||||
fpscr.idc = 1;
|
||||
destElem = vfpFpToFixed<float>(mid, %s, 16, 0, true, %s);
|
||||
__asm__ __volatile__("" :: "m" (destElem));
|
||||
finishVfp(fpscr, state, true);
|
||||
FpscrExc = fpscr;
|
||||
'''
|
||||
|
||||
vcvtahp2uhCode = vcvthp2hCode % ("false", "VfpRoundAway")
|
||||
twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vcvtahp2uhCode)
|
||||
twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vcvtahp2uhCode)
|
||||
|
||||
vcvtnhp2uhCode = vcvthp2hCode % ("false", "VfpRoundNearest")
|
||||
twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhND", "SimdCvtOp",
|
||||
("uint16_t",), 2, vcvtnhp2uhCode)
|
||||
twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhNQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vcvtnhp2uhCode)
|
||||
|
||||
vcvtphp2uhCode = vcvthp2hCode % ("false", "VfpRoundUpward")
|
||||
twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vcvtphp2uhCode)
|
||||
twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vcvtphp2uhCode)
|
||||
|
||||
vcvtmhp2uhCode = vcvthp2hCode % ("false", "VfpRoundDown")
|
||||
twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vcvtmhp2uhCode)
|
||||
twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vcvtmhp2uhCode)
|
||||
|
||||
vcvtahp2shCode = vcvthp2hCode % ("true", "VfpRoundAway")
|
||||
twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAD", "SimdCvtOp",
|
||||
("int16_t",), 2, vcvtahp2shCode)
|
||||
twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAQ", "SimdCvtOp",
|
||||
("int16_t",), 4, vcvtahp2shCode)
|
||||
|
||||
vcvtnhp2shCode = vcvthp2hCode % ("true", "VfpRoundNearest")
|
||||
twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shND", "SimdCvtOp",
|
||||
("int16_t",), 2, vcvtnhp2shCode)
|
||||
twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shNQ", "SimdCvtOp",
|
||||
("int16_t",), 4, vcvtnhp2shCode)
|
||||
|
||||
vcvtphp2shCode = vcvthp2hCode % ("true", "VfpRoundUpward")
|
||||
twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPD", "SimdCvtOp",
|
||||
("int16_t",), 2, vcvtphp2shCode)
|
||||
twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPQ", "SimdCvtOp",
|
||||
("int16_t",), 4, vcvtphp2shCode)
|
||||
|
||||
vcvtmhp2shCode = vcvthp2hCode % ("true", "VfpRoundDown")
|
||||
twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMD", "SimdCvtOp",
|
||||
("int16_t",), 2, vcvtmhp2shCode)
|
||||
twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMQ", "SimdCvtOp",
|
||||
("int16_t",), 4, vcvtmhp2shCode)
|
||||
|
||||
vcvtsp2sCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrExc;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
|
||||
float mid = bitsToFp(srcElem1, (float)0.0);
|
||||
if (flushToZero(mid))
|
||||
fpscr.idc = 1;
|
||||
destElem = vfpFpToFixed<float>(mid, %s, 32, 0, true, %s);
|
||||
__asm__ __volatile__("" :: "m" (destElem));
|
||||
finishVfp(fpscr, state, true);
|
||||
FpscrExc = fpscr;
|
||||
'''
|
||||
|
||||
vcvtasp2usCode = vcvtsp2sCode % ("false", "VfpRoundAway")
|
||||
twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vcvtasp2usCode)
|
||||
twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vcvtasp2usCode)
|
||||
|
||||
vcvtnsp2usCode = vcvtsp2sCode % ("false", "VfpRoundNearest")
|
||||
twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usND", "SimdCvtOp",
|
||||
("uint32_t",), 2, vcvtnsp2usCode)
|
||||
twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usNQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vcvtnsp2usCode)
|
||||
|
||||
vcvtpsp2usCode = vcvtsp2sCode % ("false", "VfpRoundUpward")
|
||||
twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vcvtpsp2usCode)
|
||||
twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vcvtpsp2usCode)
|
||||
|
||||
vcvtmsp2usCode = vcvtsp2sCode % ("false", "VfpRoundDown")
|
||||
twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vcvtmsp2usCode)
|
||||
twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vcvtmsp2usCode)
|
||||
|
||||
vcvtasp2ssCode = vcvtsp2sCode % ("true", "VfpRoundAway")
|
||||
twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAD", "SimdCvtOp",
|
||||
("int32_t",), 2, vcvtasp2ssCode)
|
||||
twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAQ", "SimdCvtOp",
|
||||
("int32_t",), 4, vcvtasp2ssCode)
|
||||
|
||||
vcvtnsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundNearest")
|
||||
twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssND", "SimdCvtOp",
|
||||
("int32_t",), 2, vcvtnsp2ssCode)
|
||||
twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssNQ", "SimdCvtOp",
|
||||
("int32_t",), 4, vcvtnsp2ssCode)
|
||||
|
||||
vcvtpsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundUpward")
|
||||
twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPD", "SimdCvtOp",
|
||||
("int32_t",), 2, vcvtpsp2ssCode)
|
||||
twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPQ", "SimdCvtOp",
|
||||
("int32_t",), 4, vcvtpsp2ssCode)
|
||||
|
||||
vcvtmsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundDown")
|
||||
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMD", "SimdCvtOp",
|
||||
("int32_t",), 2, vcvtmsp2ssCode)
|
||||
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp",
|
||||
("int32_t",), 4, vcvtmsp2ssCode)
|
||||
|
||||
vrsqrteCode = '''
|
||||
destElem = unsignedRSqrtEstimate(srcElem1);
|
||||
'''
|
||||
|
||||
@@ -89,6 +89,12 @@ class BaseInterrupts : public SimObject
|
||||
{
|
||||
panic("Interrupts::clearAll unimplemented!\n");
|
||||
}
|
||||
|
||||
virtual bool
|
||||
isWakeUp() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -111,12 +111,13 @@ class Template:
|
||||
|
||||
operands = SubOperandList(self.parser, compositeCode, d.operands)
|
||||
|
||||
myDict[
|
||||
"reg_idx_arr_decl"
|
||||
] = "RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]" % (
|
||||
myDict["reg_idx_arr_decl"] = (
|
||||
"RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]"
|
||||
% (
|
||||
d.operands.numSrcRegs + d.srcRegIdxPadding,
|
||||
d.operands.numDestRegs + d.destRegIdxPadding,
|
||||
)
|
||||
)
|
||||
|
||||
# The reinterpret casts are largely because an array with a known
|
||||
# size cannot be passed as an argument which is an array with an
|
||||
@@ -821,7 +822,7 @@ class ISAParser(Grammar):
|
||||
"DBLCOLON",
|
||||
"ASTERISK",
|
||||
# C preprocessor directives
|
||||
"CPPDIRECTIVE"
|
||||
"CPPDIRECTIVE",
|
||||
# The following are matched but never returned. commented out to
|
||||
# suppress PLY warning
|
||||
# newfile directive
|
||||
|
||||
@@ -140,9 +140,9 @@ def handle_statement(parser, container, statement):
|
||||
if statement.is_microop:
|
||||
if statement.mnemonic not in parser.microops.keys():
|
||||
raise Exception(f"Unrecognized mnemonic: {statement.mnemonic}")
|
||||
parser.symbols[
|
||||
"__microopClassFromInsideTheAssembler"
|
||||
] = parser.microops[statement.mnemonic]
|
||||
parser.symbols["__microopClassFromInsideTheAssembler"] = (
|
||||
parser.microops[statement.mnemonic]
|
||||
)
|
||||
try:
|
||||
microop = eval(
|
||||
f"__microopClassFromInsideTheAssembler({statement.params})",
|
||||
@@ -166,9 +166,9 @@ def handle_statement(parser, container, statement):
|
||||
elif statement.is_directive:
|
||||
if statement.name not in container.directives.keys():
|
||||
raise Exception(f"Unrecognized directive: {statement.name}")
|
||||
parser.symbols[
|
||||
"__directiveFunctionFromInsideTheAssembler"
|
||||
] = container.directives[statement.name]
|
||||
parser.symbols["__directiveFunctionFromInsideTheAssembler"] = (
|
||||
container.directives[statement.name]
|
||||
)
|
||||
try:
|
||||
eval(
|
||||
f"__directiveFunctionFromInsideTheAssembler({statement.params})",
|
||||
|
||||
@@ -114,6 +114,13 @@ class RiscvISA(BaseISA):
|
||||
|
||||
enable_Zicbom_fs = Param.Bool(True, "Enable Zicbom extension in FS mode")
|
||||
enable_Zicboz_fs = Param.Bool(True, "Enable Zicboz extension in FS mode")
|
||||
enable_Zcd = Param.Bool(
|
||||
True,
|
||||
"Enable Zcd extensions. "
|
||||
"Set the option to false implies the Zcmp and Zcmt is enable as "
|
||||
"c.fsdsp is overlap with them."
|
||||
"Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc",
|
||||
)
|
||||
|
||||
wfi_resume_on_pending = Param.Bool(
|
||||
False,
|
||||
|
||||
@@ -44,6 +44,7 @@ Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
|
||||
ISA *isa = dynamic_cast<ISA*>(p.isa);
|
||||
vlen = isa->getVecLenInBits();
|
||||
elen = isa->getVecElemLenInBits();
|
||||
_enableZcd = isa->enableZcd();
|
||||
reset();
|
||||
}
|
||||
|
||||
@@ -127,6 +128,7 @@ Decoder::decode(PCStateBase &_next_pc)
|
||||
emi.vtype8 = next_pc.vtype() & 0xff;
|
||||
emi.vill = next_pc.vtype().vill;
|
||||
emi.rv_type = static_cast<int>(next_pc.rvType());
|
||||
emi.enable_zcd = _enableZcd;
|
||||
|
||||
return decode(emi, next_pc.instAddr());
|
||||
}
|
||||
|
||||
@@ -62,6 +62,7 @@ class Decoder : public InstDecoder
|
||||
|
||||
uint32_t vlen;
|
||||
uint32_t elen;
|
||||
bool _enableZcd;
|
||||
|
||||
virtual StaticInstPtr decodeInst(ExtMachInst mach_inst);
|
||||
|
||||
|
||||
@@ -34,3 +34,4 @@ Source('mem.cc', tags='riscv isa')
|
||||
Source('standard.cc', tags='riscv isa')
|
||||
Source('static_inst.cc', tags='riscv isa')
|
||||
Source('vector.cc', tags='riscv isa')
|
||||
Source('zcmp.cc', tags='riscv isa')
|
||||
|
||||
130
src/arch/riscv/insts/zcmp.cc
Normal file
130
src/arch/riscv/insts/zcmp.cc
Normal file
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Google LLC
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "arch/riscv/insts/zcmp.hh"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "arch/riscv/regs/int.hh"
|
||||
#include "arch/riscv/utility.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace RiscvISA
|
||||
{
|
||||
|
||||
CmMacroInst::CmMacroInst(
|
||||
const char* mnem, ExtMachInst machInst, OpClass opClass)
|
||||
: RiscvMacroInst(mnem, machInst, opClass), rlist(machInst.rlist)
|
||||
{
|
||||
}
|
||||
|
||||
// Ref: https://github.com/riscv-software-src/riscv-isa-sim/blob/f7d0dba60/
|
||||
// riscv/decode.h#L168
|
||||
uint64_t
|
||||
CmMacroInst::stackAdj() const
|
||||
{
|
||||
uint64_t stack_adj_base = 0;
|
||||
switch (machInst.rlist) {
|
||||
case 15:
|
||||
stack_adj_base += 16;
|
||||
[[fallthrough]];
|
||||
case 14:
|
||||
if (machInst.rv_type == RV64) {
|
||||
stack_adj_base += 16;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case 13:
|
||||
case 12:
|
||||
stack_adj_base += 16;
|
||||
[[fallthrough]];
|
||||
case 11:
|
||||
case 10:
|
||||
if (machInst.rv_type == RV64) {
|
||||
stack_adj_base += 16;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case 9:
|
||||
case 8:
|
||||
stack_adj_base += 16;
|
||||
[[fallthrough]];
|
||||
case 7:
|
||||
case 6:
|
||||
if (machInst.rv_type == RV64) {
|
||||
stack_adj_base += 16;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case 5:
|
||||
case 4:
|
||||
stack_adj_base += 16;
|
||||
break;
|
||||
}
|
||||
|
||||
return stack_adj_base + machInst.spimm * 16;
|
||||
}
|
||||
|
||||
std::string
|
||||
CmMacroInst::getRlistStr() const
|
||||
{
|
||||
std::string s = "";
|
||||
switch (machInst.rlist) {
|
||||
case 15:
|
||||
s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg),
|
||||
registerName(int_reg::S0),
|
||||
registerName(PushPopRegList[0]));
|
||||
break;
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
case 8:
|
||||
case 7:
|
||||
case 6:
|
||||
s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg),
|
||||
registerName(int_reg::S0),
|
||||
registerName(PushPopRegList[16-machInst.rlist]));
|
||||
break;
|
||||
case 5:
|
||||
s = csprintf("{%s, %s}", registerName(ReturnAddrReg),
|
||||
registerName(int_reg::S0));
|
||||
break;
|
||||
case 4:
|
||||
s = csprintf("{%s}", registerName(ReturnAddrReg));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace RiscvISA
|
||||
} // namespace gem5
|
||||
60
src/arch/riscv/insts/zcmp.hh
Normal file
60
src/arch/riscv/insts/zcmp.hh
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Google LLC
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_RISCV_INSTS_ZCMP_HH__
|
||||
#define __ARCH_RISCV_INSTS_ZCMP_HH__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "arch/riscv/insts/static_inst.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace RiscvISA
|
||||
{
|
||||
|
||||
class CmMacroInst : public RiscvMacroInst
|
||||
{
|
||||
public:
|
||||
CmMacroInst(const char* mnem, ExtMachInst machInst, OpClass opClass);
|
||||
|
||||
protected:
|
||||
using RiscvMacroInst::RiscvMacroInst;
|
||||
|
||||
uint64_t stackAdj() const;
|
||||
std::string getRlistStr() const;
|
||||
|
||||
uint64_t rlist;
|
||||
};
|
||||
|
||||
} // namespace RiscvISA
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __ARCH_RISCV_INSTS_ZCMP_HH__
|
||||
@@ -95,6 +95,11 @@ class Interrupts : public BaseInterrupts
|
||||
|
||||
void clearAll() override;
|
||||
|
||||
bool isWakeUp() const override
|
||||
{
|
||||
return checkNonMaskableInterrupt() || (ip & ie).any();
|
||||
}
|
||||
|
||||
uint64_t readIP() const { return (uint64_t)ip.to_ulong(); }
|
||||
uint64_t readIE() const { return (uint64_t)ie.to_ulong(); }
|
||||
void setIP(const uint64_t& val) { ip = val; }
|
||||
|
||||
@@ -260,7 +260,7 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
|
||||
ISA::ISA(const Params &p) : BaseISA(p, "riscv"),
|
||||
_rvType(p.riscv_type), enableRvv(p.enable_rvv), vlen(p.vlen), elen(p.elen),
|
||||
_privilegeModeSet(p.privilege_mode_set),
|
||||
_wfiResumeOnPending(p.wfi_resume_on_pending)
|
||||
_wfiResumeOnPending(p.wfi_resume_on_pending), _enableZcd(p.enable_Zcd)
|
||||
{
|
||||
_regClasses.push_back(&intRegClass);
|
||||
_regClasses.push_back(&floatRegClass);
|
||||
|
||||
@@ -108,6 +108,14 @@ class ISA : public BaseISA
|
||||
*/
|
||||
const bool _wfiResumeOnPending;
|
||||
|
||||
/**
|
||||
* Enable Zcd extensions.
|
||||
* Set the option to false implies the Zcmp and Zcmt is enable as c.fsdsp
|
||||
* is overlap with them.
|
||||
* Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc
|
||||
*/
|
||||
bool _enableZcd;
|
||||
|
||||
public:
|
||||
using Params = RiscvISAParams;
|
||||
|
||||
@@ -184,6 +192,8 @@ class ISA : public BaseISA
|
||||
|
||||
bool resumeOnPending() { return _wfiResumeOnPending; }
|
||||
|
||||
bool enableZcd() { return _enableZcd; }
|
||||
|
||||
virtual Addr getFaultHandlerAddr(
|
||||
RegIndex idx, uint64_t cause, bool intr) const;
|
||||
};
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
// Bitfield definitions.
|
||||
//
|
||||
def bitfield RVTYPE rv_type;
|
||||
def bitfield ENABLE_ZCD enable_zcd;
|
||||
|
||||
def bitfield QUADRANT <1:0>;
|
||||
def bitfield OPCODE5 <6:2>;
|
||||
@@ -103,10 +104,13 @@ def bitfield CFUNCT1 <12>;
|
||||
def bitfield CFUNCT1BIT6 <6>;
|
||||
def bitfield CFUNCT2HIGH <11:10>;
|
||||
def bitfield CFUNCT2LOW <6:5>;
|
||||
def bitfield CFUNCT2MID <9:8>;
|
||||
def bitfield RC1 <11:7>;
|
||||
def bitfield RC2 <6:2>;
|
||||
def bitfield RP1 <9:7>;
|
||||
def bitfield RP2 <4:2>;
|
||||
def bitfield R1S <9:7>;
|
||||
def bitfield R2S <4:2>;
|
||||
def bitfield FC1 <11:7>;
|
||||
def bitfield FC2 <6:2>;
|
||||
def bitfield FP2 <4:2>;
|
||||
|
||||
@@ -54,6 +54,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Rp2 = rvSext(sp + imm);
|
||||
}}, uint64_t);
|
||||
format CompressedLoad {
|
||||
0x1: decode ENABLE_ZCD {
|
||||
0x1: c_fld({{
|
||||
offset = CIMM3 << 3 | CIMM2 << 6;
|
||||
}}, {{
|
||||
@@ -71,6 +72,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}}, {{
|
||||
EA = rvSext(Rp1 + offset);
|
||||
}});
|
||||
}
|
||||
0x2: c_lw({{
|
||||
offset = CIMM2<1:1> << 2 |
|
||||
CIMM3 << 3 |
|
||||
@@ -152,7 +154,8 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}
|
||||
}
|
||||
format CompressedStore {
|
||||
0x5: c_fsd({{
|
||||
0x5: decode ENABLE_ZCD {
|
||||
0x1: c_fsd({{
|
||||
offset = CIMM3 << 3 | CIMM2 << 6;
|
||||
}}, {{
|
||||
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||
@@ -164,6 +167,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}}, {{
|
||||
EA = rvSext(Rp1 + offset);
|
||||
}});
|
||||
}
|
||||
0x6: c_sw({{
|
||||
offset = CIMM2<1:1> << 2 |
|
||||
CIMM3 << 3 |
|
||||
@@ -381,6 +385,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Rc1 = rvSext(Rc1 << imm);
|
||||
}}, uint64_t);
|
||||
format CompressedLoad {
|
||||
0x1: decode ENABLE_ZCD {
|
||||
0x1: c_fldsp({{
|
||||
offset = CIMM5<4:3> << 3 |
|
||||
CIMM1 << 5 |
|
||||
@@ -398,6 +403,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}}, {{
|
||||
EA = rvSext(sp + offset);
|
||||
}});
|
||||
}
|
||||
0x2: c_lwsp({{
|
||||
offset = CIMM5<4:2> << 2 |
|
||||
CIMM1 << 5 |
|
||||
@@ -480,7 +486,22 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}
|
||||
}
|
||||
format CompressedStore {
|
||||
0x5: c_fsdsp({{
|
||||
0x5: decode ENABLE_ZCD {
|
||||
0x0: decode CFUNCT6LOW3 {
|
||||
0x3: decode CFUNCT2LOW {
|
||||
0x1: CmMvsa01::cm_mvsa01();
|
||||
0x3: CmMva01s::cm_mva01s();
|
||||
}
|
||||
0x6: decode CFUNCT2MID {
|
||||
0x0: CmPush::cm_push();
|
||||
0x2: CmPop::cm_pop();
|
||||
}
|
||||
0x7: decode CFUNCT2MID {
|
||||
0x0: CmPop::cm_popretz(is_ret=True, has_a0=True);
|
||||
0x2: CmPop::cm_popret(is_ret=True);
|
||||
}
|
||||
}
|
||||
0x1: c_fsdsp({{
|
||||
offset = CIMM6<5:3> << 3 |
|
||||
CIMM6<2:0> << 6;
|
||||
}}, {{
|
||||
@@ -493,6 +514,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}}, {{
|
||||
EA = rvSext(sp + offset);
|
||||
}});
|
||||
}
|
||||
0x6: c_swsp({{
|
||||
offset = CIMM6<5:2> << 2 |
|
||||
CIMM6<1:0> << 6;
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
##include "vector_conf.isa"
|
||||
##include "vector_arith.isa"
|
||||
##include "vector_mem.isa"
|
||||
##include "zcmp.isa"
|
||||
|
||||
// Include formats for nonstandard extensions
|
||||
##include "compressed.isa"
|
||||
|
||||
782
src/arch/riscv/isa/formats/zcmp.isa
Normal file
782
src/arch/riscv/isa/formats/zcmp.isa
Normal file
@@ -0,0 +1,782 @@
|
||||
// -*- mode:c++ -*-
|
||||
|
||||
// Copyright (c) 2015 RISC-V Foundation
|
||||
// Copyright (c) 2016 The University of Virginia
|
||||
// Copyright (c) 2024 Google LLC
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met: redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer;
|
||||
// redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution;
|
||||
// neither the name of the copyright holders nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Cmpush template.
|
||||
def template CmPushDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst);
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
}};
|
||||
|
||||
|
||||
def template CmPushConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
|
||||
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
StaticInstPtr cur_inst = nullptr;
|
||||
if (rlist < 4) {
|
||||
cur_inst = new Unknown(machInst);
|
||||
cur_inst->setFlag(IsMicroop);
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
} else {
|
||||
int start_reg = 0;
|
||||
if (rlist != 15) {
|
||||
start_reg = (16-rlist);
|
||||
}
|
||||
|
||||
int offset = 0;
|
||||
for (int i = start_reg; i < PushPopRegList.size(); i++) {
|
||||
offset -= rvSelect(4, 8);
|
||||
|
||||
if (machInst.rv_type == RV32) {
|
||||
cur_inst = new %(class_name)s32MicroInst(
|
||||
machInst, PushPopRegList[i], offset);
|
||||
} else {
|
||||
cur_inst = new %(class_name)s64MicroInst(
|
||||
machInst, PushPopRegList[i], offset);
|
||||
}
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
}
|
||||
|
||||
cur_inst = new %(class_name)sSpAdjMicroInst(machInst, -stackAdj());
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
}
|
||||
|
||||
microops.front()->setFirstMicroop();
|
||||
microops.back()->setLastMicroop();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmPushExecute {{
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << getRlistStr() << ", " << (int64_t)-stackAdj();
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmStoreMicroDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegId push_reg, int64_t offset);
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault completeAcc(
|
||||
Packet *, ExecContext *, trace::InstRecord *) const override;
|
||||
std::string generateDisassembly(
|
||||
Addr, const loader::SymbolTable *) const override;
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
|
||||
int64_t offset;
|
||||
Request::Flags memAccessFlags;
|
||||
};
|
||||
}};
|
||||
|
||||
def template CmStoreMicroConstructor {{
|
||||
%(class_name)s::%(class_name)s(
|
||||
ExtMachInst machInst, RegId push_reg, int64_t offset)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s),
|
||||
offset(offset)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmStoreMicroExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(
|
||||
ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
%(memacc_code)s;
|
||||
|
||||
{
|
||||
Fault fault =
|
||||
writeMemAtomicLE(xc, traceData, Mem, EA, memAccessFlags,
|
||||
nullptr);
|
||||
if (fault != NoFault)
|
||||
return fault;
|
||||
}
|
||||
|
||||
%(op_wb)s;
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
|
||||
offset << '(' << registerName(srcRegIdx(0)) << ')';
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmStoreMicroInitiateAcc {{
|
||||
Fault
|
||||
%(class_name)s::initiateAcc(ExecContext *xc,
|
||||
trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
%(memacc_code)s;
|
||||
|
||||
{
|
||||
Fault fault = writeMemTimingLE(xc, traceData, Mem, EA,
|
||||
memAccessFlags, nullptr);
|
||||
if (fault != NoFault)
|
||||
return fault;
|
||||
}
|
||||
|
||||
%(op_wb)s;
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmStoreMicroCompleteAcc {{
|
||||
Fault
|
||||
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
|
||||
trace::InstRecord *traceData) const
|
||||
{
|
||||
return NoFault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template SpAdjMicroDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, int64_t adj);
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
std::string generateDisassembly(
|
||||
Addr, const loader::SymbolTable *) const override;
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
|
||||
int64_t adj;
|
||||
};
|
||||
}};
|
||||
|
||||
def template SpAdjMicroConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst, int64_t adj)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s), adj(adj)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template SpAdjMicroExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(
|
||||
ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(code)s;
|
||||
%(op_wb)s;
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' '
|
||||
<< registerName(srcRegIdx(0)) << ' ' << adj;
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
// Cmpop decode template.
|
||||
def template CmPopDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst);
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
};
|
||||
}};
|
||||
|
||||
|
||||
def template CmPopConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
|
||||
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
StaticInstPtr cur_inst = nullptr;
|
||||
if (rlist < 4) {
|
||||
cur_inst = new Unknown(machInst);
|
||||
cur_inst->setFlag(IsMicroop);
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
} else {
|
||||
int start_reg = 0;
|
||||
if (rlist != 15) {
|
||||
start_reg = (16-rlist);
|
||||
}
|
||||
|
||||
int offset = stackAdj();
|
||||
for (int i = start_reg; i < PushPopRegList.size(); i++) {
|
||||
offset -= rvSelect(4, 8);
|
||||
|
||||
if (machInst.rv_type == RV32) {
|
||||
cur_inst = new %(class_name)s32MicroInst(
|
||||
machInst, PushPopRegList[i], offset);
|
||||
} else {
|
||||
cur_inst = new %(class_name)s64MicroInst(
|
||||
machInst, PushPopRegList[i], offset);
|
||||
}
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
}
|
||||
|
||||
cur_inst = new %(class_name)sSpAdjMicroInst(machInst, stackAdj());
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
|
||||
%(move_a0_desc)s;
|
||||
%(return_desc)s;
|
||||
}
|
||||
|
||||
microops.front()->setFirstMicroop();
|
||||
microops.back()->setLastMicroop();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmPopExecute {{
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << getRlistStr() << ", " << stackAdj();
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmLoadMicroDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegId pop_reg, int64_t offset);
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault completeAcc(
|
||||
Packet *, ExecContext *, trace::InstRecord *) const override;
|
||||
std::string generateDisassembly(
|
||||
Addr, const loader::SymbolTable *) const override;
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
|
||||
int64_t offset;
|
||||
Request::Flags memAccessFlags;
|
||||
};
|
||||
}};
|
||||
|
||||
def template CmLoadMicroConstructor {{
|
||||
%(class_name)s::%(class_name)s(
|
||||
ExtMachInst machInst, RegId pop_reg, int64_t offset)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s),
|
||||
offset(offset)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmLoadMicroExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(
|
||||
ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
{
|
||||
Fault fault =
|
||||
readMemAtomicLE(xc, traceData, EA, Mem, memAccessFlags);
|
||||
if (fault != NoFault)
|
||||
return fault;
|
||||
}
|
||||
|
||||
%(memacc_code)s;
|
||||
|
||||
%(op_wb)s;
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
|
||||
offset << '(' << registerName(srcRegIdx(0)) << ')';
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmLoadMicroInitiateAcc {{
|
||||
Fault
|
||||
%(class_name)s::initiateAcc(ExecContext *xc,
|
||||
trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
|
||||
%(op_src_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
return initiateMemRead(xc, traceData, EA, Mem, memAccessFlags);
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmLoadMicroCompleteAcc {{
|
||||
Fault
|
||||
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
|
||||
trace::InstRecord *traceData) const
|
||||
{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
getMemLE(pkt, Mem, traceData);
|
||||
|
||||
%(memacc_code)s;
|
||||
%(op_wb)s;
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmRetMicroDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
/// Constructor.
|
||||
%(class_name)s(ExtMachInst machInst);
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
|
||||
std::string
|
||||
generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
|
||||
std::unique_ptr<PCStateBase> branchTarget(
|
||||
ThreadContext *tc) const override;
|
||||
|
||||
using StaticInst::branchTarget;
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
};
|
||||
}};
|
||||
|
||||
def template CmRetMicroConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmRetMicroExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(
|
||||
ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(code)s;
|
||||
%(op_wb)s;
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::unique_ptr<PCStateBase>
|
||||
%(class_name)s::branchTarget(ThreadContext *tc) const
|
||||
{
|
||||
PCStateBase *pc_ptr = tc->pcState().clone();
|
||||
pc_ptr->as<PCState>().set(rvSext(tc->getReg(srcRegIdx(0)) & ~0x1));
|
||||
return std::unique_ptr<PCStateBase>{pc_ptr};
|
||||
}
|
||||
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(srcRegIdx(0));
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
// Cmmvsa01 decode template
|
||||
def template CmMvDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst);
|
||||
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
}};
|
||||
|
||||
def template CmMvsa01Constructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
StaticInstPtr cur_inst;
|
||||
cur_inst = new %(class_name)sMvMicroInst(
|
||||
machInst, int_reg::A0, StackRegs[machInst.r1s]);
|
||||
microops.emplace_back(cur_inst);
|
||||
cur_inst = new %(class_name)sMvMicroInst(
|
||||
machInst, int_reg::A1, StackRegs[machInst.r2s]);
|
||||
microops.emplace_back(cur_inst);
|
||||
|
||||
microops.front()->setFirstMicroop();
|
||||
microops.back()->setLastMicroop();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmMva01sConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
StaticInstPtr cur_inst;
|
||||
cur_inst = new %(class_name)sMvMicroInst(
|
||||
machInst, StackRegs[machInst.r1s], int_reg::A0);
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
cur_inst = new %(class_name)sMvMicroInst(
|
||||
machInst, StackRegs[machInst.r2s], int_reg::A1);
|
||||
cur_inst->setDelayedCommit();
|
||||
microops.emplace_back(cur_inst);
|
||||
|
||||
microops.front()->setFirstMicroop();
|
||||
microops.back()->setLastMicroop();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmMvExecute {{
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(StackRegs[machInst.r1s])
|
||||
<< ", " << registerName(StackRegs[machInst.r2s]);
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmMvMicroDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegId push_reg, RegId pop_reg);
|
||||
protected:
|
||||
using %(base_class)s::%(base_class)s;
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
std::string generateDisassembly(
|
||||
Addr, const loader::SymbolTable *) const override;
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
};
|
||||
}};
|
||||
|
||||
def template CmMvMicroConstructor {{
|
||||
%(class_name)s::%(class_name)s(
|
||||
ExtMachInst machInst, RegId push_reg, RegId pop_reg)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CmMvMicroExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(
|
||||
ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(code)s;
|
||||
%(op_wb)s;
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::string
|
||||
%(class_name)s::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' '
|
||||
<< registerName(srcRegIdx(0));
|
||||
return ss.str();
|
||||
}
|
||||
}};
|
||||
|
||||
def format CmPush(*flags) {{
|
||||
code = ''
|
||||
macro_iop = InstObjParams(name, Name, 'CmMacroInst', code, flags)
|
||||
header_output = CmPushDeclare.subst(macro_iop)
|
||||
decoder_output = CmPushConstructor.subst(macro_iop)
|
||||
exec_output = CmPushExecute.subst(macro_iop)
|
||||
decode_block = BasicDecode.subst(macro_iop)
|
||||
|
||||
memacc_code = 'Mem_sw = CmPushReg_sw;'
|
||||
ea_code = 'EA = rvSext(sp + offset);'
|
||||
micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst',
|
||||
{'ea_code': ea_code, 'memacc_code': memacc_code},
|
||||
flags)
|
||||
|
||||
mem_flags = [getAlignFlag(micro32_iop)]
|
||||
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
|
||||
micro32_iop.constructor += s
|
||||
|
||||
header_output += CmStoreMicroDeclare.subst(micro32_iop)
|
||||
decoder_output += CmStoreMicroConstructor.subst(micro32_iop)
|
||||
exec_output += CmStoreMicroExecute.subst(micro32_iop) \
|
||||
+ CmStoreMicroInitiateAcc.subst(micro32_iop) \
|
||||
+ CmStoreMicroCompleteAcc.subst(micro32_iop)
|
||||
|
||||
memacc_code = 'Mem = CmPushReg;'
|
||||
ea_code = 'EA = rvSext(sp + offset);'
|
||||
micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst',
|
||||
{'ea_code': ea_code, 'memacc_code': memacc_code},
|
||||
flags)
|
||||
|
||||
mem_flags = [getAlignFlag(micro64_iop)]
|
||||
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
|
||||
micro64_iop.constructor += s
|
||||
|
||||
header_output += CmStoreMicroDeclare.subst(micro64_iop)
|
||||
decoder_output += CmStoreMicroConstructor.subst(micro64_iop)
|
||||
exec_output += CmStoreMicroExecute.subst(micro64_iop) \
|
||||
+ CmStoreMicroInitiateAcc.subst(micro64_iop) \
|
||||
+ CmStoreMicroCompleteAcc.subst(micro64_iop)
|
||||
|
||||
code = 'spd = rvSext(sp + adj);'
|
||||
sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst',
|
||||
'RiscvMicroInst', code, flags)
|
||||
|
||||
header_output += SpAdjMicroDeclare.subst(sp_adj_iop)
|
||||
decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop)
|
||||
exec_output += SpAdjMicroExecute.subst(sp_adj_iop)
|
||||
}};
|
||||
|
||||
def format CmPop(is_ret=False, has_a0=False, *flags) {{
|
||||
code = ''
|
||||
flags = []
|
||||
has_a0 = eval(has_a0)
|
||||
is_ret = eval(is_ret)
|
||||
move_a0_desc = ''
|
||||
return_desc = ''
|
||||
|
||||
if has_a0:
|
||||
move_a0_desc = rf'''
|
||||
cur_inst = new {Name}MvMicroInst(
|
||||
machInst, ReturnValueReg, int_reg::Zero);
|
||||
microops.emplace_back(cur_inst);
|
||||
'''
|
||||
|
||||
if is_ret:
|
||||
return_desc = rf'''
|
||||
cur_inst = new {Name}RetMicroInst(machInst);
|
||||
microops.emplace_back(cur_inst);
|
||||
'''
|
||||
|
||||
macro_iop = InstObjParams(name, Name, 'CmMacroInst',
|
||||
{'code': code, 'move_a0_desc': move_a0_desc,
|
||||
'return_desc': return_desc},
|
||||
flags)
|
||||
header_output = CmPopDeclare.subst(macro_iop)
|
||||
decoder_output = CmPopConstructor.subst(macro_iop)
|
||||
exec_output = CmPopExecute.subst(macro_iop)
|
||||
decode_block = BasicDecode.subst(macro_iop)
|
||||
|
||||
memacc_code = 'CmPopReg_sw = Mem_sw;'
|
||||
ea_code = 'EA = rvSext(sp + offset);'
|
||||
micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst',
|
||||
{'ea_code': ea_code, 'memacc_code': memacc_code},
|
||||
flags)
|
||||
|
||||
mem_flags = [getAlignFlag(micro32_iop)]
|
||||
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
|
||||
micro32_iop.constructor += s
|
||||
|
||||
header_output += CmLoadMicroDeclare.subst(micro32_iop)
|
||||
decoder_output += CmLoadMicroConstructor.subst(micro32_iop)
|
||||
exec_output += CmLoadMicroExecute.subst(micro32_iop) \
|
||||
+ CmLoadMicroInitiateAcc.subst(micro32_iop) \
|
||||
+ CmLoadMicroCompleteAcc.subst(micro32_iop)
|
||||
|
||||
memacc_code = 'CmPopReg = Mem;'
|
||||
ea_code = 'EA = rvSext(sp + offset);'
|
||||
micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst',
|
||||
{'ea_code': ea_code, 'memacc_code': memacc_code},
|
||||
flags)
|
||||
|
||||
mem_flags = [getAlignFlag(micro64_iop)]
|
||||
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
|
||||
micro64_iop.constructor += s
|
||||
|
||||
header_output += CmLoadMicroDeclare.subst(micro64_iop)
|
||||
decoder_output += CmLoadMicroConstructor.subst(micro64_iop)
|
||||
exec_output += CmLoadMicroExecute.subst(micro64_iop) \
|
||||
+ CmLoadMicroInitiateAcc.subst(micro64_iop) \
|
||||
+ CmLoadMicroCompleteAcc.subst(micro64_iop)
|
||||
|
||||
code = 'spd = rvSext(sp + adj);'
|
||||
sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst',
|
||||
'RiscvMicroInst', code, flags)
|
||||
|
||||
header_output += SpAdjMicroDeclare.subst(sp_adj_iop)
|
||||
decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop)
|
||||
exec_output += SpAdjMicroExecute.subst(sp_adj_iop)
|
||||
|
||||
if has_a0:
|
||||
code = 'CmPopReg = CmPushReg;'
|
||||
has_a0_iop = InstObjParams('mv', f'{Name}MvMicroInst',
|
||||
'RiscvMicroInst', code, flags)
|
||||
|
||||
header_output += CmMvMicroDeclare.subst(has_a0_iop)
|
||||
decoder_output += CmMvMicroConstructor.subst(has_a0_iop)
|
||||
exec_output += CmMvMicroExecute.subst(has_a0_iop)
|
||||
|
||||
if is_ret:
|
||||
code = 'NPC = rvSext(ra & (~0x1));'
|
||||
ret_flags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn']
|
||||
is_ret_iop = InstObjParams('jr', f'{Name}RetMicroInst',
|
||||
'RiscvMicroInst', code, ret_flags)
|
||||
|
||||
header_output += CmRetMicroDeclare.subst(is_ret_iop)
|
||||
decoder_output += CmRetMicroConstructor.subst(is_ret_iop)
|
||||
exec_output += CmRetMicroExecute.subst(is_ret_iop)
|
||||
}};
|
||||
|
||||
def format CmMvsa01() {{
|
||||
code = ''
|
||||
flags = []
|
||||
iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags)
|
||||
header_output = CmMvDeclare.subst(iop)
|
||||
decoder_output = CmMvsa01Constructor.subst(iop)
|
||||
exec_output = CmMvExecute.subst(iop)
|
||||
decode_block = BasicDecode.subst(iop)
|
||||
|
||||
code = 'CmPopReg = CmPushReg;'
|
||||
micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst',
|
||||
code, flags)
|
||||
|
||||
header_output += CmMvMicroDeclare.subst(micro_iop)
|
||||
decoder_output += CmMvMicroConstructor.subst(micro_iop)
|
||||
exec_output += CmMvMicroExecute.subst(micro_iop)
|
||||
}};
|
||||
|
||||
def format CmMva01s() {{
|
||||
code = ''
|
||||
flags = []
|
||||
iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags)
|
||||
header_output = CmMvDeclare.subst(iop)
|
||||
decoder_output = CmMva01sConstructor.subst(iop)
|
||||
exec_output = CmMvExecute.subst(iop)
|
||||
decode_block = BasicDecode.subst(iop)
|
||||
|
||||
code = 'CmPopReg = CmPushReg;'
|
||||
micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst',
|
||||
code, flags)
|
||||
|
||||
header_output += CmMvMicroDeclare.subst(micro_iop)
|
||||
decoder_output += CmMvMicroConstructor.subst(micro_iop)
|
||||
exec_output += CmMvMicroExecute.subst(micro_iop)
|
||||
}};
|
||||
@@ -55,6 +55,7 @@ output header {{
|
||||
#include "arch/riscv/insts/static_inst.hh"
|
||||
#include "arch/riscv/insts/unknown.hh"
|
||||
#include "arch/riscv/insts/vector.hh"
|
||||
#include "arch/riscv/insts/zcmp.hh"
|
||||
#include "arch/riscv/interrupts.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
||||
@@ -70,10 +70,14 @@ def operands {{
|
||||
'Rp2': IntReg('ud', 'RP2 + 8', 'IsInteger', 3),
|
||||
'ra': IntReg('ud', 'ReturnAddrReg', 'IsInteger', 1),
|
||||
'sp': IntReg('ud', 'StackPointerReg', 'IsInteger', 2),
|
||||
'spd': IntReg('ud', 'StackPointerReg', 'IsInteger', 1),
|
||||
|
||||
'a0': IntReg('ud', '10', 'IsInteger', 1),
|
||||
'a1': IntReg('ud', '11', 'IsInteger', 2),
|
||||
|
||||
'CmPushReg': IntReg('ud', 'push_reg', 'IsInteger', 3),
|
||||
'CmPopReg': IntReg('ud', 'pop_reg', 'IsInteger', 1),
|
||||
|
||||
'Fd': FloatRegOp('df', 'FD', 'IsFloating', 1),
|
||||
'Fd_bits': FloatRegOp('ud', 'FD', 'IsFloating', 1),
|
||||
'Fs1': FloatRegOp('df', 'FS1', 'IsFloating', 2),
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "arch/riscv/utility.hh"
|
||||
#include "kern/linux/flag_tables.hh"
|
||||
#include "kern/linux/linux.hh"
|
||||
#include "base/bitfield.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
@@ -42,6 +43,101 @@ class RiscvLinux : public Linux
|
||||
{
|
||||
public:
|
||||
static const ByteOrder byteOrder = ByteOrder::little;
|
||||
|
||||
enum RiscvHwprobeKey
|
||||
{
|
||||
Mvendorid,
|
||||
Marchid,
|
||||
Mimpid,
|
||||
BaseBehavior,
|
||||
IMAExt0,
|
||||
Cpuperf0,
|
||||
ZicbozBlockSize,
|
||||
HighestVirtAddress,
|
||||
TimeCsrFreq,
|
||||
MisalignedScalarPerf
|
||||
};
|
||||
|
||||
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
|
||||
#define RISCV_HWPROBE_MAX_KEY 9
|
||||
|
||||
BitUnion64(key_base_behavior_t)
|
||||
Bitfield<0> ima;
|
||||
EndBitUnion(key_base_behavior_t)
|
||||
|
||||
BitUnion64(key_ima_ext_0_t)
|
||||
Bitfield<49> ZAWRS;
|
||||
Bitfield<48> ZCMOP;
|
||||
Bitfield<47> ZCF;
|
||||
Bitfield<46> ZCD;
|
||||
Bitfield<45> ZCB;
|
||||
Bitfield<44> ZCA;
|
||||
Bitfield<43> ZIMOP;
|
||||
Bitfield<42> ZVE64D;
|
||||
Bitfield<41> ZVE64F;
|
||||
Bitfield<40> ZVE64X;
|
||||
Bitfield<39> ZVE32F;
|
||||
Bitfield<38> ZVE32X;
|
||||
Bitfield<37> ZIHINTPAUSE;
|
||||
Bitfield<36> ZICOND;
|
||||
Bitfield<35> ZACAS;
|
||||
Bitfield<34> ZTSO;
|
||||
Bitfield<33> ZFA;
|
||||
Bitfield<32> ZVFHMIN;
|
||||
Bitfield<31> ZVFH;
|
||||
Bitfield<30> ZIHINTNTL;
|
||||
Bitfield<29> ZFHMIN;
|
||||
Bitfield<28> ZFH;
|
||||
Bitfield<27> ZVKT;
|
||||
Bitfield<26> ZVKSH;
|
||||
Bitfield<25> ZVKSED;
|
||||
Bitfield<24> ZVKNHB;
|
||||
Bitfield<22> ZVKNHA;
|
||||
Bitfield<21> ZVKNED;
|
||||
Bitfield<20> ZVKG;
|
||||
Bitfield<19> ZVKB;
|
||||
Bitfield<18> ZVBC;
|
||||
Bitfield<17> ZVBB;
|
||||
Bitfield<16> ZKT;
|
||||
Bitfield<15> ZKSH;
|
||||
Bitfield<14> ZKSED;
|
||||
Bitfield<13> ZKNH;
|
||||
Bitfield<12> ZKNE;
|
||||
Bitfield<11> ZKND;
|
||||
Bitfield<10> ZBKX;
|
||||
Bitfield<9> ZBKC;
|
||||
Bitfield<8> ZBKB;
|
||||
Bitfield<7> ZBC;
|
||||
Bitfield<6> ZICBOZ;
|
||||
Bitfield<5> ZBS;
|
||||
Bitfield<4> ZBB;
|
||||
Bitfield<3> ZBA;
|
||||
Bitfield<2> V;
|
||||
Bitfield<1> C;
|
||||
Bitfield<0> FD;
|
||||
EndBitUnion(key_ima_ext_0_t)
|
||||
|
||||
enum MisalignedScalarPerf
|
||||
{
|
||||
Unknown,
|
||||
Emulated,
|
||||
Slow,
|
||||
Fast,
|
||||
Unsupported
|
||||
};
|
||||
|
||||
/* Flags */
|
||||
#define RISCV_HWPROBE_WHICH_CPUS (1 << 0)
|
||||
|
||||
struct riscv_hwprobe {
|
||||
int64_t key;
|
||||
uint64_t value;
|
||||
};
|
||||
|
||||
typedef struct cpumask {
|
||||
size_t size;
|
||||
uint64_t bits[];
|
||||
} cpumask_t;
|
||||
};
|
||||
|
||||
class RiscvLinux64 : public RiscvLinux, public OpenFlagTable<RiscvLinux64>
|
||||
@@ -195,6 +291,21 @@ class RiscvLinux64 : public RiscvLinux, public OpenFlagTable<RiscvLinux64>
|
||||
uint32_t mem_unit;
|
||||
};
|
||||
|
||||
struct tgt_clone_args
|
||||
{
|
||||
uint64_t flags;
|
||||
uint64_t pidfd;
|
||||
uint64_t child_tid;
|
||||
uint64_t parent_tid;
|
||||
uint64_t exit_signal;
|
||||
uint64_t stack;
|
||||
uint64_t stack_size;
|
||||
uint64_t tls;
|
||||
uint64_t set_tid;
|
||||
uint64_t set_tid_size;
|
||||
uint64_t cgroup;
|
||||
};
|
||||
|
||||
static void
|
||||
archClone(uint64_t flags,
|
||||
Process *pp, Process *cp,
|
||||
|
||||
@@ -44,6 +44,8 @@
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "arch/riscv/process.hh"
|
||||
#include "arch/riscv/insts/static_inst.hh"
|
||||
#include "arch/riscv/regs/misc.hh"
|
||||
#include "base/loader/object_file.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
@@ -134,6 +136,388 @@ unameFunc32(SyscallDesc *desc, ThreadContext *tc, VPtr<Linux::utsname> name)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_set_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp)
|
||||
{
|
||||
assert(cpu < dstp->size * 8);
|
||||
auto &bits = dstp->bits[cpu / sizeof(uint64_t)];
|
||||
bits = insertBits(bits, cpu % sizeof(uint64_t), 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_clear_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp)
|
||||
{
|
||||
assert(cpu < dstp->size * 8);
|
||||
auto &bits = dstp->bits[cpu / sizeof(uint64_t)];
|
||||
bits = insertBits(bits, cpu % sizeof(uint64_t), 0);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
cpumask_test_cpu(unsigned int cpu, const RiscvLinux::cpumask_t *cpumask)
|
||||
{
|
||||
assert(cpu < cpumask->size * 8);
|
||||
return bits(cpumask->bits[cpu / sizeof(uint64_t)], cpu % sizeof(uint64_t)) != 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_and(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *src1p,
|
||||
const RiscvLinux::cpumask_t *src2p)
|
||||
{
|
||||
assert(dstp->size == src1p->size);
|
||||
assert(dstp->size == src2p->size);
|
||||
for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) {
|
||||
dstp->bits[i] = src1p->bits[i] & src2p->bits[i];
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
cpumask_empty(const RiscvLinux::cpumask_t *dstp)
|
||||
{
|
||||
for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) {
|
||||
if (dstp->bits[i] != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_copy(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *srcp)
|
||||
{
|
||||
assert(dstp->size == srcp->size);
|
||||
memcpy(dstp->bits, srcp->bits, srcp->size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_clear(RiscvLinux::cpumask_t *dstp)
|
||||
{
|
||||
memset(dstp->bits, 0, dstp->size);
|
||||
}
|
||||
|
||||
static inline RiscvLinux::cpumask_t *
|
||||
cpumask_malloc(ThreadContext *tc)
|
||||
{
|
||||
RiscvLinux::cpumask_t *cpumask;
|
||||
|
||||
/* 8-bytes up-boundary alignment */
|
||||
size_t size = (tc->getSystemPtr()->threads.size() + sizeof(cpumask->bits[0]) - 1) /
|
||||
sizeof(cpumask->bits[0]) * sizeof(cpumask->bits[0]);
|
||||
cpumask = (RiscvLinux::cpumask_t *)malloc(sizeof(cpumask->size) + size);
|
||||
if (cpumask != nullptr) {
|
||||
cpumask->size = size;
|
||||
cpumask_clear(cpumask);
|
||||
}
|
||||
|
||||
return cpumask;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpumask_free(RiscvLinux::cpumask_t *cpu_online_mask)
|
||||
{
|
||||
free(cpu_online_mask);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
riscv_hwprobe_key_is_valid(int64_t key)
|
||||
{
|
||||
return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
hwprobe_key_is_bitmask(int64_t key)
|
||||
{
|
||||
switch (key) {
|
||||
case RiscvLinux::BaseBehavior:
|
||||
case RiscvLinux::IMAExt0:
|
||||
case RiscvLinux::Cpuperf0:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
riscv_hwprobe_pair_cmp(RiscvLinux::riscv_hwprobe *pair,
|
||||
RiscvLinux::riscv_hwprobe *other_pair)
|
||||
{
|
||||
if (pair->key != other_pair->key) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hwprobe_key_is_bitmask(pair->key)) {
|
||||
return (pair->value & other_pair->value) == other_pair->value;
|
||||
}
|
||||
|
||||
return pair->value == other_pair->value;
|
||||
}
|
||||
|
||||
static inline RiscvLinux::cpumask_t *
|
||||
get_cpu_online_mask(ThreadContext *tc)
|
||||
{
|
||||
RiscvLinux::cpumask_t *cpu_online_mask = cpumask_malloc(tc);
|
||||
if (cpu_online_mask != nullptr) {
|
||||
for (int i = 0; i < tc->getSystemPtr()->threads.size(); i++) {
|
||||
CPU_SET(i, (cpu_set_t *)&cpu_online_mask->bits);
|
||||
}
|
||||
}
|
||||
|
||||
return cpu_online_mask;
|
||||
}
|
||||
|
||||
static void
|
||||
hwprobe_one_pair(ThreadContext *tc, RiscvLinux::riscv_hwprobe *pair,
|
||||
RiscvLinux::cpumask_t *cpus)
|
||||
{
|
||||
switch (pair->key) {
|
||||
case RiscvLinux::Mvendorid:
|
||||
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MVENDORID).physIndex);
|
||||
break;
|
||||
case RiscvLinux::Marchid:
|
||||
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MARCHID).physIndex);
|
||||
break;
|
||||
case RiscvLinux::Mimpid:
|
||||
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MIMPID).physIndex);
|
||||
break;
|
||||
case RiscvLinux::BaseBehavior:
|
||||
{
|
||||
MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA);
|
||||
RiscvLinux::key_base_behavior_t *base_behavior =
|
||||
(RiscvLinux::key_base_behavior_t *)&pair->value;
|
||||
if (misa.rvi && misa.rvm && misa.rva) {
|
||||
base_behavior->ima = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case RiscvLinux::IMAExt0:
|
||||
{
|
||||
MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA);
|
||||
RiscvLinux::key_ima_ext_0_t *ext = (RiscvLinux::key_ima_ext_0_t *)&pair->value;
|
||||
if (misa.rvf && misa.rvd) ext->FD = 1;
|
||||
if (misa.rvc) ext->C = 1;
|
||||
if (misa.rvv) ext->V = 1;
|
||||
ext->ZBA = 1;
|
||||
ext->ZBB = 1;
|
||||
ext->ZBS = 1;
|
||||
ext->ZICBOZ = 1;
|
||||
ext->ZBC = 1;
|
||||
ext->ZBKB = 1;
|
||||
ext->ZBKC = 1;
|
||||
ext->ZBKX = 1;
|
||||
ext->ZKND = 1;
|
||||
ext->ZKNE = 1;
|
||||
ext->ZKNH = 1;
|
||||
ext->ZKSED = 1;
|
||||
ext->ZKSH = 1;
|
||||
ext->ZKT = 1;
|
||||
ext->ZFH = 1;
|
||||
ext->ZFHMIN = 1;
|
||||
ext->ZVFH = 1;
|
||||
ext->ZVFHMIN = 1;
|
||||
ext->ZICOND = 1;
|
||||
ext->ZVE64D = 1;
|
||||
ext->ZCB = 1;
|
||||
ext->ZCD = 1;
|
||||
ext->ZCF = 1;
|
||||
}
|
||||
break;
|
||||
case RiscvLinux::Cpuperf0:
|
||||
case RiscvLinux::MisalignedScalarPerf:
|
||||
pair->value = RiscvLinux::Slow;
|
||||
break;
|
||||
case RiscvLinux::ZicbozBlockSize:
|
||||
pair->value = tc->getSystemPtr()->cacheLineSize();
|
||||
break;
|
||||
case RiscvLinux::HighestVirtAddress:
|
||||
pair->value = tc->getProcessPtr()->memState->getMmapEnd();
|
||||
break;
|
||||
|
||||
/*
|
||||
* For forward compatibility, unknown keys don't fail the whole
|
||||
* call, but get their element key set to -1 and value set to 0
|
||||
* indicating they're unrecognized.
|
||||
*/
|
||||
default:
|
||||
pair->key = -1;
|
||||
pair->value = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <class OS>
|
||||
static int
|
||||
hwprobe_get_values(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count,
|
||||
typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags)
|
||||
{
|
||||
/* Check the reserved flags. */
|
||||
if (flags != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc);
|
||||
if (cpu_online_mask == nullptr) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc);
|
||||
if (cpus == nullptr) {
|
||||
cpumask_free(cpu_online_mask);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (cpusetsize > cpu_online_mask->size) {
|
||||
cpusetsize = cpu_online_mask->size;
|
||||
}
|
||||
|
||||
RiscvLinux::riscv_hwprobe *pair;
|
||||
BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count);
|
||||
|
||||
/*
|
||||
* The interface supports taking in a CPU mask, and returns values that
|
||||
* are consistent across that mask. Allow userspace to specify NULL and
|
||||
* 0 as a shortcut to all online CPUs.
|
||||
*/
|
||||
if (cpusetsize == 0 && !cpus_user) {
|
||||
cpumask_copy(cpus, cpu_online_mask);
|
||||
cpusetsize = cpu_online_mask->size;
|
||||
} else {
|
||||
BufferArg cpus_user_buf(cpus_user, cpusetsize);
|
||||
cpus_user_buf.copyIn(SETranslatingPortProxy(tc));
|
||||
|
||||
cpu_online_mask->size = cpusetsize;
|
||||
cpus->size = cpusetsize;
|
||||
memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize);
|
||||
|
||||
/*
|
||||
* Userspace must provide at least one online CPU, without that
|
||||
* there's no way to define what is supported.
|
||||
*/
|
||||
cpumask_and(cpus, cpus, cpu_online_mask);
|
||||
if (cpumask_empty(cpus)) {
|
||||
cpumask_free(cpu_online_mask);
|
||||
cpumask_free(cpus);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
pairs_buf.copyIn(SETranslatingPortProxy(tc));
|
||||
pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr();
|
||||
|
||||
for (size_t i = 0; i < pair_count; i++, pair++) {
|
||||
pair->value = 0;
|
||||
hwprobe_one_pair(tc, pair, cpus);
|
||||
}
|
||||
|
||||
pairs_buf.copyOut(SETranslatingPortProxy(tc));
|
||||
|
||||
cpumask_free(cpu_online_mask);
|
||||
cpumask_free(cpus);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class OS>
|
||||
static int
|
||||
hwprobe_get_cpus(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count,
|
||||
typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags)
|
||||
{
|
||||
if (flags != RISCV_HWPROBE_WHICH_CPUS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (cpusetsize == 0 || !cpus_user) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc);
|
||||
if (cpu_online_mask == nullptr) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc);
|
||||
if (cpus == nullptr) {
|
||||
cpumask_free(cpu_online_mask);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
RiscvLinux::cpumask_t *one_cpu = cpumask_malloc(tc);
|
||||
if (one_cpu == nullptr) {
|
||||
cpumask_free(cpu_online_mask);
|
||||
cpumask_free(cpus);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (cpusetsize > cpu_online_mask->size) {
|
||||
cpusetsize = cpu_online_mask->size;
|
||||
}
|
||||
|
||||
RiscvLinux::riscv_hwprobe *pair;
|
||||
BufferArg cpus_user_buf(cpus_user, cpusetsize);
|
||||
cpus_user_buf.copyIn(SETranslatingPortProxy(tc));
|
||||
memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize);
|
||||
|
||||
if (cpumask_empty(cpus)) {
|
||||
cpumask_copy(cpus, cpu_online_mask);
|
||||
cpusetsize = cpu_online_mask->size;
|
||||
}
|
||||
|
||||
cpumask_and(cpus, cpus, cpu_online_mask);
|
||||
|
||||
BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count);
|
||||
pairs_buf.copyIn(SETranslatingPortProxy(tc));
|
||||
pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr();
|
||||
|
||||
for (size_t i = 0; i < pair_count; i++, pair++) {
|
||||
if (!riscv_hwprobe_key_is_valid(pair->key)) {
|
||||
*pair = (RiscvLinux::riscv_hwprobe){ .key = -1, .value = 0 };
|
||||
memset(cpus_user_buf.bufferPtr(), 0, cpusetsize);
|
||||
break;
|
||||
}
|
||||
|
||||
RiscvLinux::riscv_hwprobe tmp =
|
||||
(RiscvLinux::riscv_hwprobe){ .key = pair->key, .value = 0 };
|
||||
|
||||
for (int cpu = 0; cpu < cpusetsize * 8; cpu++) {
|
||||
if (!cpumask_test_cpu(cpu, cpus)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cpumask_set_cpu(cpu, one_cpu);
|
||||
|
||||
hwprobe_one_pair(tc, &tmp, one_cpu);
|
||||
|
||||
if (!riscv_hwprobe_pair_cmp(&tmp, pair)) {
|
||||
cpumask_clear_cpu(cpu, cpus);
|
||||
}
|
||||
|
||||
cpumask_clear_cpu(cpu, one_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
pairs_buf.copyOut(SETranslatingPortProxy(tc));
|
||||
cpus_user_buf.copyOut(SETranslatingPortProxy(tc));
|
||||
|
||||
cpumask_free(cpu_online_mask);
|
||||
cpumask_free(cpus);
|
||||
cpumask_free(one_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class OS>
|
||||
static SyscallReturn
|
||||
riscvHWProbeFunc(SyscallDesc *desc, ThreadContext *tc, VPtr<> pairs,
|
||||
typename OS::size_t pair_count, typename OS::size_t cpusetsize,
|
||||
VPtr<> cpus_user, unsigned int flags)
|
||||
{
|
||||
if (flags & RISCV_HWPROBE_WHICH_CPUS) {
|
||||
return hwprobe_get_cpus<OS>(tc, pairs, pair_count, cpusetsize,
|
||||
cpus_user, flags);
|
||||
}
|
||||
|
||||
return hwprobe_get_values<OS>(tc, pairs, pair_count, cpusetsize,
|
||||
cpus_user, flags);
|
||||
}
|
||||
|
||||
SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
|
||||
{ 0, "io_setup" },
|
||||
{ 1, "io_destroy" },
|
||||
@@ -382,6 +766,7 @@ SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
|
||||
{ 241, "perf_event_open" },
|
||||
{ 242, "accept4" },
|
||||
{ 243, "recvmmsg" },
|
||||
{ 258, "riscv_hwprobe", riscvHWProbeFunc<RiscvLinux64> },
|
||||
{ 260, "wait4", wait4Func<RiscvLinux64> },
|
||||
{ 261, "prlimit64", prlimitFunc<RiscvLinux64> },
|
||||
{ 262, "fanotify_init" },
|
||||
@@ -410,6 +795,33 @@ SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
|
||||
{ 285, "copy_file_range" },
|
||||
{ 286, "preadv2" },
|
||||
{ 287, "pwritev2" },
|
||||
{ 424, "pidfd_send_signal" },
|
||||
{ 425, "io_uring_setup" },
|
||||
{ 426, "io_uring_enter" },
|
||||
{ 427, "io_uring_register" },
|
||||
{ 428, "open_tree" },
|
||||
{ 429, "move_mount" },
|
||||
{ 430, "fsopen" },
|
||||
{ 431, "fsconfig" },
|
||||
{ 432, "fsmount" },
|
||||
{ 433, "fspick" },
|
||||
{ 434, "pidfd_open" },
|
||||
{ 435, "clone3", clone3Func<RiscvLinux64> },
|
||||
{ 436, "close_range" },
|
||||
{ 437, "openat2" },
|
||||
{ 438, "pidfd_getfd" },
|
||||
{ 439, "faccessat2" },
|
||||
{ 440, "process_madvise" },
|
||||
{ 441, "epoll_pwait2" },
|
||||
{ 442, "mount_setattr" },
|
||||
{ 443, "quotactl_fd" },
|
||||
{ 444, "landlock_create_ruleset" },
|
||||
{ 445, "landlock_add_rule" },
|
||||
{ 446, "landlock_restrict_self" },
|
||||
{ 447, "memfd_secret" },
|
||||
{ 448, "process_mrelease" },
|
||||
{ 449, "futex_waitv" },
|
||||
{ 450, "set_mempolicy_home_node" },
|
||||
{ 1024, "open", openFunc<RiscvLinux64> },
|
||||
{ 1025, "link", linkFunc },
|
||||
{ 1026, "unlink", unlinkFunc },
|
||||
@@ -721,6 +1133,7 @@ SyscallDescTable<SEWorkload::SyscallABI32> EmuLinux::syscallDescs32 = {
|
||||
{ 241, "perf_event_open" },
|
||||
{ 242, "accept4" },
|
||||
{ 243, "recvmmsg" },
|
||||
{ 258, "riscv_hwprobe", riscvHWProbeFunc<RiscvLinux32> },
|
||||
{ 260, "wait4", wait4Func<RiscvLinux32> },
|
||||
{ 261, "prlimit64", prlimitFunc<RiscvLinux32> },
|
||||
{ 262, "fanotify_init" },
|
||||
|
||||
@@ -149,6 +149,18 @@ inline constexpr RegId ArgumentRegs[] = {
|
||||
int_reg::A4, int_reg::A5, int_reg::A6, int_reg::A7
|
||||
};
|
||||
|
||||
const std::vector<RegId> PushPopRegList = {
|
||||
int_reg::S11, int_reg::S10, int_reg::S9, int_reg::S8,
|
||||
int_reg::S7, int_reg::S6, int_reg::S5, int_reg::S4,
|
||||
int_reg::S3, int_reg::S2, int_reg::S1, int_reg::S0,
|
||||
int_reg::Ra
|
||||
};
|
||||
|
||||
inline constexpr RegId StackRegs[] = {
|
||||
int_reg::S0, int_reg::S1, int_reg::S2, int_reg::S3,
|
||||
int_reg::S4, int_reg::S5, int_reg::S6, int_reg::S7,
|
||||
};
|
||||
|
||||
} // namespace RiscvISA
|
||||
} // namespace gem5
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ BitUnion64(ExtMachInst)
|
||||
// Decoder state
|
||||
Bitfield<63, 62> rv_type;
|
||||
Bitfield<61> compressed;
|
||||
Bitfield<60> enable_zcd;
|
||||
// More bits for vector extension
|
||||
Bitfield<57, 41> vl; // [0, 2**16]
|
||||
Bitfield<40> vill;
|
||||
@@ -126,6 +127,8 @@ BitUnion64(ExtMachInst)
|
||||
Bitfield< 6, 2> rc2;
|
||||
Bitfield< 9, 7> rp1;
|
||||
Bitfield< 4, 2> rp2;
|
||||
Bitfield< 9, 7> r1s;
|
||||
Bitfield< 4, 2> r2s;
|
||||
Bitfield<11, 7> fc1;
|
||||
Bitfield< 6, 2> fc2;
|
||||
Bitfield< 4, 2> fp2;
|
||||
@@ -144,6 +147,8 @@ BitUnion64(ExtMachInst)
|
||||
Bitfield<12, 10> cimm3;
|
||||
Bitfield< 6, 5> cimm2;
|
||||
Bitfield<12> cimm1;
|
||||
Bitfield< 7, 4> rlist;
|
||||
Bitfield< 3, 2> spimm;
|
||||
// Pseudo instructions
|
||||
Bitfield<31, 25> m5func;
|
||||
// vector
|
||||
|
||||
@@ -41,8 +41,6 @@ namespace gem5
|
||||
namespace X86ISA
|
||||
{
|
||||
|
||||
X86ISAInst::MicrocodeRom Decoder::microcodeRom;
|
||||
|
||||
Decoder::State
|
||||
Decoder::doResetState()
|
||||
{
|
||||
@@ -671,9 +669,6 @@ Decoder::doImmediateState()
|
||||
return nextState;
|
||||
}
|
||||
|
||||
Decoder::InstBytes Decoder::dummy;
|
||||
Decoder::InstCacheMap Decoder::instCacheMap;
|
||||
|
||||
StaticInstPtr
|
||||
Decoder::decode(ExtMachInst mach_inst, Addr addr)
|
||||
{
|
||||
|
||||
@@ -60,19 +60,19 @@ class Decoder : public InstDecoder
|
||||
// These are defined and documented in decoder_tables.cc
|
||||
static const uint8_t SizeTypeToSize[3][10];
|
||||
typedef const uint8_t ByteTable[256];
|
||||
static ByteTable Prefixes[2];
|
||||
static const ByteTable Prefixes[2];
|
||||
|
||||
static ByteTable UsesModRMOneByte;
|
||||
static ByteTable UsesModRMTwoByte;
|
||||
static ByteTable UsesModRMThreeByte0F38;
|
||||
static ByteTable UsesModRMThreeByte0F3A;
|
||||
static const ByteTable UsesModRMOneByte;
|
||||
static const ByteTable UsesModRMTwoByte;
|
||||
static const ByteTable UsesModRMThreeByte0F38;
|
||||
static const ByteTable UsesModRMThreeByte0F3A;
|
||||
|
||||
static ByteTable ImmediateTypeOneByte;
|
||||
static ByteTable ImmediateTypeTwoByte;
|
||||
static ByteTable ImmediateTypeThreeByte0F38;
|
||||
static ByteTable ImmediateTypeThreeByte0F3A;
|
||||
static const ByteTable ImmediateTypeOneByte;
|
||||
static const ByteTable ImmediateTypeTwoByte;
|
||||
static const ByteTable ImmediateTypeThreeByte0F38;
|
||||
static const ByteTable ImmediateTypeThreeByte0F3A;
|
||||
|
||||
static X86ISAInst::MicrocodeRom microcodeRom;
|
||||
X86ISAInst::MicrocodeRom microcodeRom;
|
||||
|
||||
protected:
|
||||
using MachInst = uint64_t;
|
||||
@@ -88,7 +88,7 @@ class Decoder : public InstDecoder
|
||||
{}
|
||||
};
|
||||
|
||||
static InstBytes dummy;
|
||||
InstBytes dummy;
|
||||
|
||||
// The bytes to be predecoded.
|
||||
MachInst fetchChunk;
|
||||
@@ -244,7 +244,7 @@ class Decoder : public InstDecoder
|
||||
decode_cache::InstMap<ExtMachInst> *instMap = nullptr;
|
||||
typedef std::unordered_map<
|
||||
CacheKey, decode_cache::InstMap<ExtMachInst> *> InstCacheMap;
|
||||
static InstCacheMap instCacheMap;
|
||||
InstCacheMap instCacheMap;
|
||||
|
||||
StaticInstPtr decodeInst(ExtMachInst mach_inst);
|
||||
|
||||
|
||||
@@ -350,9 +350,9 @@ class Rate : public Base
|
||||
"otherwise, it would be a Ratio");
|
||||
|
||||
private:
|
||||
Rate<T1,T2>() {}
|
||||
Rate() {}
|
||||
public:
|
||||
Rate<T1,T2>(Rate<T1,T2> const&) = delete;
|
||||
Rate(Rate const&) = delete;
|
||||
void operator=(Rate<T1,T2> const&) = delete;
|
||||
static Rate<T1,T2>*
|
||||
get()
|
||||
|
||||
@@ -240,7 +240,11 @@ BaseCPU::postInterrupt(ThreadID tid, int int_num, int index)
|
||||
// Only wake up syscall emulation if it is not waiting on a futex.
|
||||
// This is to model the fact that instructions such as ARM SEV
|
||||
// should wake up a WFE sleep, but not a futex syscall WAIT.
|
||||
if (FullSystem || !system->futexMap.is_waiting(threadContexts[tid]))
|
||||
//
|
||||
// For RISC-V, the WFI sleep wake up is implementation defined.
|
||||
// The SiFive WFI wake up the hart only if mip & mie != 0
|
||||
if ((FullSystem && interrupts[tid]->isWakeUp()) ||
|
||||
!system->futexMap.is_waiting(threadContexts[tid]))
|
||||
wakeup(tid);
|
||||
}
|
||||
|
||||
@@ -855,13 +859,13 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
|
||||
"Simulator op (including micro ops) rate (op/s)")
|
||||
{
|
||||
simInsts
|
||||
.functor(BaseCPU::numSimulatedInsts)
|
||||
.functor(BaseCPU::GlobalStats::numSimulatedInsts)
|
||||
.precision(0)
|
||||
.prereq(simInsts)
|
||||
;
|
||||
|
||||
simOps
|
||||
.functor(BaseCPU::numSimulatedOps)
|
||||
.functor(BaseCPU::GlobalStats::numSimulatedOps)
|
||||
.precision(0)
|
||||
.prereq(simOps)
|
||||
;
|
||||
|
||||
@@ -156,6 +156,30 @@ class BaseCPU : public ClockedObject
|
||||
|
||||
statistics::Formula hostInstRate;
|
||||
statistics::Formula hostOpRate;
|
||||
|
||||
Counter previousInsts = 0;
|
||||
Counter previousOps = 0;
|
||||
|
||||
static Counter
|
||||
numSimulatedInsts()
|
||||
{
|
||||
return totalNumSimulatedInsts() - (globalStats->previousInsts);
|
||||
}
|
||||
|
||||
static Counter
|
||||
numSimulatedOps()
|
||||
{
|
||||
return totalNumSimulatedOps() - (globalStats->previousOps);
|
||||
}
|
||||
|
||||
void
|
||||
resetStats() override
|
||||
{
|
||||
previousInsts = totalNumSimulatedInsts();
|
||||
previousOps = totalNumSimulatedOps();
|
||||
|
||||
statistics::Group::resetStats();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -609,7 +633,7 @@ class BaseCPU : public ClockedObject
|
||||
|
||||
static int numSimulatedCPUs() { return cpuList.size(); }
|
||||
static Counter
|
||||
numSimulatedInsts()
|
||||
totalNumSimulatedInsts()
|
||||
{
|
||||
Counter total = 0;
|
||||
|
||||
@@ -621,7 +645,7 @@ class BaseCPU : public ClockedObject
|
||||
}
|
||||
|
||||
static Counter
|
||||
numSimulatedOps()
|
||||
totalNumSimulatedOps()
|
||||
{
|
||||
Counter total = 0;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2017 ARM Limited
|
||||
# Copyright (c) 2017, 2024 Arm Limited
|
||||
# All rights reserved
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -57,6 +57,7 @@ class DefaultFUPool(FUPool):
|
||||
FP_MultDiv(),
|
||||
ReadPort(),
|
||||
SIMD_Unit(),
|
||||
Matrix_Unit(),
|
||||
PredALU(),
|
||||
WritePort(),
|
||||
RdWrPort(),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2010, 2017, 2020 ARM Limited
|
||||
# Copyright (c) 2010, 2017, 2020, 2024 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -109,10 +109,27 @@ class SIMD_Unit(FUDesc):
|
||||
OpDesc(opClass="SimdExt"),
|
||||
OpDesc(opClass="SimdFloatExt"),
|
||||
OpDesc(opClass="SimdConfig"),
|
||||
OpDesc(opClass="SimdAes"),
|
||||
OpDesc(opClass="SimdAesMix"),
|
||||
OpDesc(opClass="SimdSha1Hash"),
|
||||
OpDesc(opClass="SimdSha1Hash2"),
|
||||
OpDesc(opClass="SimdSha256Hash"),
|
||||
OpDesc(opClass="SimdSha256Hash2"),
|
||||
OpDesc(opClass="SimdShaSigma2"),
|
||||
OpDesc(opClass="SimdShaSigma3"),
|
||||
]
|
||||
count = 4
|
||||
|
||||
|
||||
class Matrix_Unit(FUDesc):
|
||||
opList = [
|
||||
OpDesc(opClass="Matrix"),
|
||||
OpDesc(opClass="MatrixMov"),
|
||||
OpDesc(opClass="MatrixOP"),
|
||||
]
|
||||
count = 1
|
||||
|
||||
|
||||
class PredALU(FUDesc):
|
||||
opList = [OpDesc(opClass="SimdPredAlu")]
|
||||
count = 1
|
||||
|
||||
@@ -122,7 +122,7 @@ ElasticTrace::regEtraceListeners()
|
||||
{
|
||||
assert(!allProbesReg);
|
||||
inform("@%llu: No. of instructions committed = %llu, registering elastic"
|
||||
" probe listeners", curTick(), cpu->numSimulatedInsts());
|
||||
" probe listeners", curTick(), cpu->totalNumSimulatedInsts());
|
||||
// Create new listeners: provide method to be called upon a notify() for
|
||||
// each probe point.
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
|
||||
|
||||
@@ -38,6 +38,8 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects.ClockedObject import ClockedObject
|
||||
from m5.objects.IndexingPolicies import *
|
||||
from m5.objects.ReplacementPolicies import *
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
from m5.SimObject import *
|
||||
@@ -83,6 +85,38 @@ class BranchTargetBuffer(ClockedObject):
|
||||
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
|
||||
|
||||
|
||||
class BTBIndexingPolicy(SimObject):
|
||||
type = "BTBIndexingPolicy"
|
||||
abstract = True
|
||||
cxx_class = "gem5::IndexingPolicyTemplate<gem5::BTBTagType>"
|
||||
cxx_header = "cpu/pred/btb_entry.hh"
|
||||
cxx_template_params = ["class Types"]
|
||||
|
||||
# Get the associativity
|
||||
assoc = Param.Int(Parent.assoc, "associativity")
|
||||
|
||||
|
||||
class BTBSetAssociative(BTBIndexingPolicy):
|
||||
type = "BTBSetAssociative"
|
||||
cxx_class = "gem5::BTBSetAssociative"
|
||||
cxx_header = "cpu/pred/btb_entry.hh"
|
||||
|
||||
# Get the number of entries in the BTB from the parent
|
||||
num_entries = Param.Unsigned(
|
||||
Parent.numEntries, "Number of entries in the BTB"
|
||||
)
|
||||
|
||||
# Set shift for the index. Ignore lower 2 bits for a 4 byte instruction.
|
||||
set_shift = Param.Unsigned(2, "Number of bits to shift PC to get index")
|
||||
|
||||
# Total number of bits in the tag.
|
||||
# This is above the index and offset bit
|
||||
tag_bits = Param.Unsigned(64, "number of bits in the tag")
|
||||
|
||||
# Number of threads sharing the BTB
|
||||
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
|
||||
|
||||
|
||||
class SimpleBTB(BranchTargetBuffer):
|
||||
type = "SimpleBTB"
|
||||
cxx_class = "gem5::branch_prediction::SimpleBTB"
|
||||
@@ -93,6 +127,19 @@ class SimpleBTB(BranchTargetBuffer):
|
||||
instShiftAmt = Param.Unsigned(
|
||||
Parent.instShiftAmt, "Number of bits to shift instructions by"
|
||||
)
|
||||
associativity = Param.Unsigned(1, "BTB associativity")
|
||||
btbReplPolicy = Param.BaseReplacementPolicy(
|
||||
LRURP(), "BTB replacement policy"
|
||||
)
|
||||
btbIndexingPolicy = Param.BTBIndexingPolicy(
|
||||
BTBSetAssociative(
|
||||
assoc=Parent.associativity,
|
||||
num_entries=Parent.numEntries,
|
||||
set_shift=Parent.instShiftAmt,
|
||||
numThreads=1,
|
||||
),
|
||||
"BTB indexing policy",
|
||||
)
|
||||
|
||||
|
||||
class IndirectPredictor(SimObject):
|
||||
|
||||
@@ -45,7 +45,7 @@ SimObject('BranchPredictor.py',
|
||||
sim_objects=[
|
||||
'BranchPredictor',
|
||||
'IndirectPredictor', 'SimpleIndirectPredictor',
|
||||
'BranchTargetBuffer', 'SimpleBTB',
|
||||
'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative',
|
||||
'ReturnAddrStack',
|
||||
'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor',
|
||||
'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB',
|
||||
|
||||
288
src/cpu/pred/btb_entry.hh
Normal file
288
src/cpu/pred/btb_entry.hh
Normal file
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Pranith Kumar
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Declaration of a BTB entry and BTB indexing policy.
|
||||
*/
|
||||
|
||||
#ifndef __CPU_PRED_BTB_ENTRY_HH__
|
||||
#define __CPU_PRED_BTB_ENTRY_HH__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "arch/generic/pcstate.hh"
|
||||
#include "base/intmath.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
#include "mem/cache/replacement_policies/replaceable_entry.hh"
|
||||
#include "mem/cache/tags/indexing_policies/base.hh"
|
||||
#include "params/BTBIndexingPolicy.hh"
|
||||
#include "params/BTBSetAssociative.hh"
|
||||
|
||||
namespace gem5 {
|
||||
|
||||
class BTBTagType
|
||||
{
|
||||
public:
|
||||
struct KeyType
|
||||
{
|
||||
Addr address;
|
||||
ThreadID tid;
|
||||
};
|
||||
using Params = BTBIndexingPolicyParams;
|
||||
};
|
||||
|
||||
using BTBIndexingPolicy = IndexingPolicyTemplate<BTBTagType>;
|
||||
template class IndexingPolicyTemplate<BTBTagType>;
|
||||
|
||||
class BTBSetAssociative : public BTBIndexingPolicy
|
||||
{
|
||||
public:
|
||||
PARAMS(BTBSetAssociative);
|
||||
using KeyType = BTBTagType::KeyType;
|
||||
|
||||
BTBSetAssociative(const Params &p)
|
||||
: BTBIndexingPolicy(p, p.num_entries, p.set_shift),
|
||||
tagMask(mask(p.tag_bits))
|
||||
{
|
||||
setNumThreads(p.numThreads);
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Extract the set index for the instruction PC based on tid.
|
||||
*/
|
||||
uint32_t
|
||||
extractSet(const KeyType &key) const
|
||||
{
|
||||
return ((key.address >> setShift)
|
||||
^ (key.tid << (tagShift - setShift - log2NumThreads)))
|
||||
& setMask;
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Find all possible entries for insertion and replacement of an address.
|
||||
*/
|
||||
std::vector<ReplaceableEntry*>
|
||||
getPossibleEntries(const KeyType &key) const override
|
||||
{
|
||||
auto set_idx = extractSet(key);
|
||||
|
||||
assert(set_idx < sets.size());
|
||||
|
||||
return sets[set_idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* Set number of threads sharing the BTB
|
||||
*/
|
||||
void
|
||||
setNumThreads(unsigned num_threads)
|
||||
{
|
||||
log2NumThreads = log2i(num_threads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the tag from the given address.
|
||||
*/
|
||||
Addr
|
||||
extractTag(const Addr addr) const override
|
||||
{
|
||||
return (addr >> tagShift) & tagMask;
|
||||
}
|
||||
|
||||
Addr regenerateAddr(const KeyType &key,
|
||||
const ReplaceableEntry* entry) const override
|
||||
{
|
||||
panic("Not implemented!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
const uint64_t tagMask;
|
||||
unsigned log2NumThreads;
|
||||
};
|
||||
|
||||
namespace branch_prediction
|
||||
{
|
||||
|
||||
class BTBEntry : public ReplaceableEntry
|
||||
{
|
||||
public:
|
||||
using IndexingPolicy = gem5::BTBIndexingPolicy;
|
||||
using KeyType = gem5::BTBTagType::KeyType;
|
||||
using TagExtractor = std::function<Addr(Addr)>;
|
||||
|
||||
/** Default constructor */
|
||||
BTBEntry(TagExtractor ext)
|
||||
: inst(nullptr), extractTag(ext), valid(false), tag({MaxAddr, -1})
|
||||
{}
|
||||
|
||||
/** Update the target and instruction in the BTB entry.
|
||||
* During insertion, only the tag (key) is updated.
|
||||
*/
|
||||
void
|
||||
update(const PCStateBase &_target,
|
||||
StaticInstPtr _inst)
|
||||
{
|
||||
set(target, _target);
|
||||
inst = _inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given tag information corresponds to this entry's.
|
||||
*/
|
||||
bool
|
||||
match(const KeyType &key) const
|
||||
{
|
||||
return isValid() && (tag.address == extractTag(key.address))
|
||||
&& (tag.tid == key.tid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert the block by assigning it a tag and marking it valid. Touches
|
||||
* block if it hadn't been touched previously.
|
||||
*/
|
||||
void
|
||||
insert(const KeyType &key)
|
||||
{
|
||||
setValid();
|
||||
setTag({extractTag(key.address), key.tid});
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
BTBEntry(const BTBEntry &other)
|
||||
{
|
||||
valid = other.valid;
|
||||
tag = other.tag;
|
||||
inst = other.inst;
|
||||
extractTag = other.extractTag;
|
||||
set(target, other.target);
|
||||
}
|
||||
|
||||
/** Assignment operator */
|
||||
BTBEntry& operator=(const BTBEntry &other)
|
||||
{
|
||||
valid = other.valid;
|
||||
tag = other.tag;
|
||||
inst = other.inst;
|
||||
extractTag = other.extractTag;
|
||||
set(target, other.target);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the entry is valid.
|
||||
*/
|
||||
bool isValid() const { return valid; }
|
||||
|
||||
/**
|
||||
* Get tag associated to this block.
|
||||
*/
|
||||
KeyType getTag() const { return tag; }
|
||||
|
||||
/** Invalidate the block. Its contents are no longer valid. */
|
||||
void
|
||||
invalidate()
|
||||
{
|
||||
valid = false;
|
||||
setTag({MaxAddr, -1});
|
||||
}
|
||||
|
||||
/** The entry's target. */
|
||||
std::unique_ptr<PCStateBase> target;
|
||||
|
||||
/** Pointer to the static branch inst at this address */
|
||||
StaticInstPtr inst;
|
||||
|
||||
std::string
|
||||
print() const override
|
||||
{
|
||||
return csprintf("tag: %#x tid: %d valid: %d | %s", tag.address, tag.tid,
|
||||
isValid(), ReplaceableEntry::print());
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Set tag associated to this block.
|
||||
*/
|
||||
void setTag(KeyType _tag) { tag = _tag; }
|
||||
|
||||
/** Set valid bit. The block must be invalid beforehand. */
|
||||
void
|
||||
setValid()
|
||||
{
|
||||
assert(!isValid());
|
||||
valid = true;
|
||||
}
|
||||
|
||||
private:
|
||||
/** Callback used to extract the tag from the entry */
|
||||
TagExtractor extractTag;
|
||||
|
||||
/**
|
||||
* Valid bit. The contents of this entry are only valid if this bit is set.
|
||||
* @sa invalidate()
|
||||
* @sa insert()
|
||||
*/
|
||||
bool valid;
|
||||
|
||||
/** The entry's tag. */
|
||||
KeyType tag;
|
||||
};
|
||||
|
||||
} // namespace gem5::branch_prediction
|
||||
/**
|
||||
* This helper generates a tag extractor function object
|
||||
* which will be typically used by Replaceable entries indexed
|
||||
* with the BaseIndexingPolicy.
|
||||
* It allows to "decouple" indexing from tagging. Those entries
|
||||
* would call the functor without directly holding a pointer
|
||||
* to the indexing policy which should reside in the cache.
|
||||
*/
|
||||
static constexpr auto
|
||||
genTagExtractor(BTBIndexingPolicy *ip)
|
||||
{
|
||||
return [ip] (Addr addr) { return ip->extractTag(addr); };
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif //__CPU_PRED_BTB_ENTRY_HH__
|
||||
@@ -44,84 +44,38 @@
|
||||
#include "base/trace.hh"
|
||||
#include "debug/BTB.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace branch_prediction
|
||||
namespace gem5::branch_prediction
|
||||
{
|
||||
|
||||
SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
|
||||
: BranchTargetBuffer(p),
|
||||
numEntries(p.numEntries),
|
||||
tagBits(p.tagBits),
|
||||
instShiftAmt(p.instShiftAmt),
|
||||
log2NumThreads(floorLog2(p.numThreads))
|
||||
btb("simpleBTB", p.numEntries, p.associativity,
|
||||
p.btbReplPolicy, p.btbIndexingPolicy,
|
||||
BTBEntry(genTagExtractor(p.btbIndexingPolicy)))
|
||||
{
|
||||
DPRINTF(BTB, "BTB: Creating BTB object.\n");
|
||||
|
||||
if (!isPowerOf2(numEntries)) {
|
||||
if (!isPowerOf2(p.numEntries)) {
|
||||
fatal("BTB entries is not a power of 2!");
|
||||
}
|
||||
|
||||
btb.resize(numEntries);
|
||||
|
||||
for (unsigned i = 0; i < numEntries; ++i) {
|
||||
btb[i].valid = false;
|
||||
}
|
||||
|
||||
idxMask = numEntries - 1;
|
||||
|
||||
tagMask = (1 << tagBits) - 1;
|
||||
|
||||
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
|
||||
}
|
||||
|
||||
void
|
||||
SimpleBTB::memInvalidate()
|
||||
{
|
||||
for (unsigned i = 0; i < numEntries; ++i) {
|
||||
btb[i].valid = false;
|
||||
}
|
||||
btb.clear();
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned
|
||||
SimpleBTB::getIndex(Addr instPC, ThreadID tid)
|
||||
{
|
||||
// Need to shift PC over by the word offset.
|
||||
return ((instPC >> instShiftAmt)
|
||||
^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
|
||||
& idxMask;
|
||||
}
|
||||
|
||||
inline
|
||||
Addr
|
||||
SimpleBTB::getTag(Addr instPC)
|
||||
{
|
||||
return (instPC >> tagShiftAmt) & tagMask;
|
||||
}
|
||||
|
||||
SimpleBTB::BTBEntry *
|
||||
BTBEntry *
|
||||
SimpleBTB::findEntry(Addr instPC, ThreadID tid)
|
||||
{
|
||||
unsigned btb_idx = getIndex(instPC, tid);
|
||||
Addr inst_tag = getTag(instPC);
|
||||
|
||||
assert(btb_idx < numEntries);
|
||||
|
||||
if (btb[btb_idx].valid
|
||||
&& inst_tag == btb[btb_idx].tag
|
||||
&& btb[btb_idx].tid == tid) {
|
||||
return &btb[btb_idx];
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return btb.findEntry({instPC, tid});
|
||||
}
|
||||
|
||||
bool
|
||||
SimpleBTB::valid(ThreadID tid, Addr instPC)
|
||||
{
|
||||
BTBEntry *entry = findEntry(instPC, tid);
|
||||
BTBEntry *entry = btb.findEntry({instPC, tid});
|
||||
|
||||
return entry != nullptr;
|
||||
}
|
||||
@@ -134,11 +88,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
|
||||
{
|
||||
stats.lookups[type]++;
|
||||
|
||||
BTBEntry *entry = findEntry(instPC, tid);
|
||||
BTBEntry *entry = btb.accessEntry({instPC, tid});
|
||||
|
||||
if (entry) {
|
||||
return entry->target.get();
|
||||
}
|
||||
|
||||
stats.misses[type]++;
|
||||
return nullptr;
|
||||
}
|
||||
@@ -146,11 +101,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
|
||||
const StaticInstPtr
|
||||
SimpleBTB::getInst(ThreadID tid, Addr instPC)
|
||||
{
|
||||
BTBEntry *entry = findEntry(instPC, tid);
|
||||
BTBEntry *entry = btb.findEntry({instPC, tid});
|
||||
|
||||
if (entry) {
|
||||
return entry->inst;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -159,18 +115,13 @@ SimpleBTB::update(ThreadID tid, Addr instPC,
|
||||
const PCStateBase &target,
|
||||
BranchType type, StaticInstPtr inst)
|
||||
{
|
||||
unsigned btb_idx = getIndex(instPC, tid);
|
||||
|
||||
assert(btb_idx < numEntries);
|
||||
|
||||
stats.updates[type]++;
|
||||
|
||||
btb[btb_idx].tid = tid;
|
||||
btb[btb_idx].valid = true;
|
||||
set(btb[btb_idx].target, target);
|
||||
btb[btb_idx].tag = getTag(instPC);
|
||||
btb[btb_idx].inst = inst;
|
||||
BTBEntry *victim = btb.findVictim({instPC, tid});
|
||||
|
||||
btb.insertEntry({instPC, tid}, victim);
|
||||
victim->update(target, inst);
|
||||
}
|
||||
|
||||
} // namespace branch_prediction
|
||||
} // namespace gem5
|
||||
|
||||
} // namespace gem5::branch_prediction
|
||||
|
||||
@@ -41,15 +41,16 @@
|
||||
#ifndef __CPU_PRED_SIMPLE_BTB_HH__
|
||||
#define __CPU_PRED_SIMPLE_BTB_HH__
|
||||
|
||||
#include "base/cache/associative_cache.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/pred/btb.hh"
|
||||
#include "cpu/pred/btb_entry.hh"
|
||||
#include "mem/cache/replacement_policies/replaceable_entry.hh"
|
||||
#include "mem/cache/tags/indexing_policies/base.hh"
|
||||
#include "params/SimpleBTB.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace branch_prediction
|
||||
namespace gem5::branch_prediction
|
||||
{
|
||||
|
||||
class SimpleBTB : public BranchTargetBuffer
|
||||
@@ -66,38 +67,7 @@ class SimpleBTB : public BranchTargetBuffer
|
||||
StaticInstPtr inst = nullptr) override;
|
||||
const StaticInstPtr getInst(ThreadID tid, Addr instPC) override;
|
||||
|
||||
|
||||
private:
|
||||
struct BTBEntry
|
||||
{
|
||||
/** The entry's tag. */
|
||||
Addr tag = 0;
|
||||
|
||||
/** The entry's target. */
|
||||
std::unique_ptr<PCStateBase> target;
|
||||
|
||||
/** The entry's thread id. */
|
||||
ThreadID tid;
|
||||
|
||||
/** Whether or not the entry is valid. */
|
||||
bool valid = false;
|
||||
|
||||
/** Pointer to the static branch instruction at this address */
|
||||
StaticInstPtr inst = nullptr;
|
||||
};
|
||||
|
||||
|
||||
/** Returns the index into the BTB, based on the branch's PC.
|
||||
* @param inst_PC The branch to look up.
|
||||
* @return Returns the index into the BTB.
|
||||
*/
|
||||
inline unsigned getIndex(Addr instPC, ThreadID tid);
|
||||
|
||||
/** Returns the tag bits of a given address.
|
||||
* @param inst_PC The branch's address.
|
||||
* @return Returns the tag bits.
|
||||
*/
|
||||
inline Addr getTag(Addr instPC);
|
||||
|
||||
/** Internal call to find an address in the BTB
|
||||
* @param instPC The branch's address.
|
||||
@@ -106,31 +76,9 @@ class SimpleBTB : public BranchTargetBuffer
|
||||
BTBEntry *findEntry(Addr instPC, ThreadID tid);
|
||||
|
||||
/** The actual BTB. */
|
||||
std::vector<BTBEntry> btb;
|
||||
|
||||
/** The number of entries in the BTB. */
|
||||
unsigned numEntries;
|
||||
|
||||
/** The index mask. */
|
||||
unsigned idxMask;
|
||||
|
||||
/** The number of tag bits per entry. */
|
||||
unsigned tagBits;
|
||||
|
||||
/** The tag mask. */
|
||||
unsigned tagMask;
|
||||
|
||||
/** Number of bits to shift PC when calculating index. */
|
||||
unsigned instShiftAmt;
|
||||
|
||||
/** Number of bits to shift PC when calculating tag. */
|
||||
unsigned tagShiftAmt;
|
||||
|
||||
/** Log2 NumThreads used for hashing threadid */
|
||||
unsigned log2NumThreads;
|
||||
AssociativeCache<BTBEntry> btb;
|
||||
};
|
||||
|
||||
} // namespace branch_prediction
|
||||
} // namespace gem5
|
||||
} // namespace gem5::branch_prediction
|
||||
|
||||
#endif // __CPU_PRED_SIMPLE_BTB_HH__
|
||||
|
||||
@@ -41,3 +41,4 @@ class TesterThread(ClockedObject):
|
||||
thread_id = Param.Int("Unique TesterThread ID")
|
||||
num_lanes = Param.Int("Number of lanes this thread has")
|
||||
deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
|
||||
cache_line_size = Param.UInt32("Size of cache line in cache")
|
||||
|
||||
@@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
|
||||
std::shuffle(
|
||||
randAddressMap.begin(),
|
||||
randAddressMap.end(),
|
||||
std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX))
|
||||
// TODO: This is a bug unrelated to this draft PR but the GPU tester is
|
||||
// useful for testing this PR.
|
||||
std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX-1))
|
||||
);
|
||||
|
||||
// initialize atomic locations
|
||||
|
||||
@@ -70,7 +70,7 @@ DmaThread::issueLoadOps()
|
||||
Addr address = addrManager->getAddress(location);
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
|
||||
this->getName(), curEpisode->getEpisodeId(),
|
||||
ruby::printAddress(address));
|
||||
printAddress(address));
|
||||
|
||||
int load_size = sizeof(Value);
|
||||
|
||||
@@ -127,7 +127,7 @@ DmaThread::issueStoreOps()
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
|
||||
"Value %d\n", this->getName(),
|
||||
curEpisode->getEpisodeId(), ruby::printAddress(address),
|
||||
curEpisode->getEpisodeId(), printAddress(address),
|
||||
new_value);
|
||||
|
||||
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||
@@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt)
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -"
|
||||
" Addr %s\n", this->getName(), curEpisode->getEpisodeId(),
|
||||
resp_cmd.toString(), ruby::printAddress(addr));
|
||||
resp_cmd.toString(), printAddress(addr));
|
||||
|
||||
if (resp_cmd == MemCmd::SwapResp) {
|
||||
// response to a pending atomic
|
||||
|
||||
@@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps()
|
||||
Addr address = addrManager->getAddress(location);
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
|
||||
this->getName(), curEpisode->getEpisodeId(),
|
||||
ruby::printAddress(address));
|
||||
printAddress(address));
|
||||
|
||||
int load_size = sizeof(Value);
|
||||
|
||||
@@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps()
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
|
||||
"Value %d\n", this->getName(),
|
||||
curEpisode->getEpisodeId(), ruby::printAddress(address),
|
||||
curEpisode->getEpisodeId(), printAddress(address),
|
||||
new_value);
|
||||
|
||||
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||
@@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps()
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
|
||||
this->getName(), curEpisode->getEpisodeId(),
|
||||
ruby::printAddress(address));
|
||||
printAddress(address));
|
||||
|
||||
// must be aligned with store size
|
||||
assert(address % sizeof(Value) == 0);
|
||||
@@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt)
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
|
||||
"Addr %s\n", this->getName(),
|
||||
curEpisode->getEpisodeId(), resp_cmd.toString(),
|
||||
ruby::printAddress(addr));
|
||||
printAddress(addr));
|
||||
|
||||
// whether the transaction is done after this hitCallback
|
||||
bool isTransactionDone = true;
|
||||
|
||||
@@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p)
|
||||
: ClockedObject(p),
|
||||
threadEvent(this, "TesterThread tick"),
|
||||
deadlockCheckEvent(this),
|
||||
cacheLineSize(p.cache_line_size),
|
||||
threadId(p.thread_id),
|
||||
numLanes(p.num_lanes),
|
||||
tester(nullptr), addrManager(nullptr), port(nullptr),
|
||||
@@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val)
|
||||
ss << threadName << ": Atomic Op returned unexpected value\n"
|
||||
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||
<< "\tLane ID " << lane << "\n"
|
||||
<< "\tAddress " << ruby::printAddress(addr) << "\n"
|
||||
<< "\tAddress " << printAddress(addr) << "\n"
|
||||
<< "\tAtomic Op's return value " << ret_val << "\n";
|
||||
|
||||
// print out basic info
|
||||
@@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val)
|
||||
<< "\tTesterThread " << threadId << "\n"
|
||||
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||
<< "\tLane ID " << lane << "\n"
|
||||
<< "\tAddress " << ruby::printAddress(addr) << "\n"
|
||||
<< "\tAddress " << printAddress(addr) << "\n"
|
||||
<< "\tLoaded value " << ret_val << "\n"
|
||||
<< "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
|
||||
|
||||
@@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table,
|
||||
|
||||
for (const auto& m : table) {
|
||||
for (const auto& req : m.second) {
|
||||
ss << "\t\t\tAddr " << ruby::printAddress(m.first)
|
||||
ss << "\t\t\tAddr " << printAddress(m.first)
|
||||
<< ": delta (curCycle - issueCycle) = "
|
||||
<< (cur_cycle - req.issueCycle) << std::endl;
|
||||
}
|
||||
@@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const
|
||||
<< pendingFenceCount << std::endl;
|
||||
}
|
||||
|
||||
std::string
|
||||
TesterThread::printAddress(Addr addr) const
|
||||
{
|
||||
return ruby::printAddress(addr, cacheLineSize * 8);
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -132,6 +132,7 @@ class TesterThread : public ClockedObject
|
||||
{}
|
||||
};
|
||||
|
||||
int cacheLineSize;
|
||||
// the unique global id of this thread
|
||||
int threadId;
|
||||
// width of this thread (1 for cpu thread & wf size for gpu wavefront)
|
||||
@@ -204,6 +205,7 @@ class TesterThread : public ClockedObject
|
||||
|
||||
void printOutstandingReqs(const OutstandingReqTable& table,
|
||||
std::stringstream& ss) const;
|
||||
std::string printAddress(Addr addr) const;
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -124,7 +124,8 @@ Check::initiatePrefetch()
|
||||
|
||||
// push the subblock onto the sender state. The sequencer will
|
||||
// update the subblock on the return
|
||||
pkt->senderState = new SenderState(m_address, req->getSize());
|
||||
pkt->senderState = new SenderState(m_address, req->getSize(),
|
||||
CACHE_LINE_BITS);
|
||||
|
||||
if (port->sendTimingReq(pkt)) {
|
||||
DPRINTF(RubyTest, "successfully initiated prefetch.\n");
|
||||
@@ -161,7 +162,8 @@ Check::initiateFlush()
|
||||
|
||||
// push the subblock onto the sender state. The sequencer will
|
||||
// update the subblock on the return
|
||||
pkt->senderState = new SenderState(m_address, req->getSize());
|
||||
pkt->senderState = new SenderState(m_address, req->getSize(),
|
||||
CACHE_LINE_BITS);
|
||||
|
||||
if (port->sendTimingReq(pkt)) {
|
||||
DPRINTF(RubyTest, "initiating Flush - successful\n");
|
||||
@@ -207,7 +209,8 @@ Check::initiateAction()
|
||||
|
||||
// push the subblock onto the sender state. The sequencer will
|
||||
// update the subblock on the return
|
||||
pkt->senderState = new SenderState(writeAddr, req->getSize());
|
||||
pkt->senderState = new SenderState(m_address, req->getSize(),
|
||||
CACHE_LINE_BITS);
|
||||
|
||||
if (port->sendTimingReq(pkt)) {
|
||||
DPRINTF(RubyTest, "initiating action - successful\n");
|
||||
@@ -261,7 +264,8 @@ Check::initiateCheck()
|
||||
|
||||
// push the subblock onto the sender state. The sequencer will
|
||||
// update the subblock on the return
|
||||
pkt->senderState = new SenderState(m_address, req->getSize());
|
||||
pkt->senderState = new SenderState(m_address, req->getSize(),
|
||||
CACHE_LINE_BITS);
|
||||
|
||||
if (port->sendTimingReq(pkt)) {
|
||||
DPRINTF(RubyTest, "initiating check - successful\n");
|
||||
@@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
|
||||
// This isn't exactly right since we now have multi-byte checks
|
||||
// assert(getAddress() == address);
|
||||
|
||||
assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address));
|
||||
int block_size_bits = CACHE_LINE_BITS;
|
||||
assert(ruby::makeLineAddress(m_address, block_size_bits) ==
|
||||
ruby::makeLineAddress(address, block_size_bits));
|
||||
assert(data != NULL);
|
||||
|
||||
DPRINTF(RubyTest, "RubyTester Callback\n");
|
||||
@@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
|
||||
}
|
||||
|
||||
DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc,
|
||||
ruby::makeLineAddress(m_address));
|
||||
ruby::makeLineAddress(m_address, block_size_bits));
|
||||
DPRINTF(RubyTest, "Callback done\n");
|
||||
debugPrint();
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ class SubBlock;
|
||||
|
||||
const int CHECK_SIZE_BITS = 2;
|
||||
const int CHECK_SIZE = (1 << CHECK_SIZE_BITS);
|
||||
const int CACHE_LINE_BITS = 6;
|
||||
|
||||
class Check
|
||||
{
|
||||
|
||||
@@ -90,7 +90,9 @@ class RubyTester : public ClockedObject
|
||||
{
|
||||
ruby::SubBlock subBlock;
|
||||
|
||||
SenderState(Addr addr, int size) : subBlock(addr, size) {}
|
||||
SenderState(Addr addr, int size, int cl_size)
|
||||
: subBlock(addr, size, cl_size)
|
||||
{}
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -81,8 +81,6 @@ class AMDGPUDevice(PciDevice):
|
||||
InterruptPin = 2
|
||||
ExpansionROM = 0
|
||||
|
||||
rom_binary = Param.String("ROM binary dumped from hardware")
|
||||
trace_file = Param.String("MMIO trace collected on hardware")
|
||||
checkpoint_before_mmios = Param.Bool(
|
||||
False, "Take a checkpoint before the device begins sending MMIOs"
|
||||
)
|
||||
|
||||
@@ -58,12 +58,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
init_interrupt_count(0), _lastVMID(0),
|
||||
deviceMem(name() + ".deviceMem", p.memories, false, "", false)
|
||||
{
|
||||
// Loading the rom binary dumped from hardware.
|
||||
std::ifstream romBin;
|
||||
romBin.open(p.rom_binary, std::ios::binary);
|
||||
romBin.read((char *)rom.data(), ROM_SIZE);
|
||||
romBin.close();
|
||||
|
||||
// System pointer needs to be explicitly set for device memory since
|
||||
// DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
|
||||
// Note this means the cache line size is system wide.
|
||||
@@ -92,10 +86,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
panic("Unknown GPU device %s\n", p.device_name);
|
||||
}
|
||||
|
||||
if (p.trace_file != "") {
|
||||
mmioReader.readMMIOTrace(p.trace_file);
|
||||
}
|
||||
|
||||
int sdma_id = 0;
|
||||
for (auto& s : p.sdmas) {
|
||||
s->setGPUDevice(this);
|
||||
|
||||
@@ -168,10 +168,15 @@ GenericPciHost::write(PacketPtr pkt)
|
||||
pkt->getSize());
|
||||
|
||||
PciDevice *const pci_dev(getDevice(dev_addr.first));
|
||||
panic_if(!pci_dev,
|
||||
warn_if(!pci_dev,
|
||||
"%02x:%02x.%i: Write to config space on non-existent PCI device\n",
|
||||
dev_addr.first.bus, dev_addr.first.dev, dev_addr.first.func);
|
||||
|
||||
if (!pci_dev) {
|
||||
pkt->makeAtomicResponse();
|
||||
return 20000; // 20ns default from PciDevice.py
|
||||
}
|
||||
|
||||
// @todo Remove this after testing
|
||||
pkt->headerDelay = pkt->payloadDelay = 0;
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ Clint::Clint(const Params ¶ms) :
|
||||
BasicPioDevice(params, params.pio_size),
|
||||
system(params.system),
|
||||
nThread(params.num_threads),
|
||||
signal(params.name + ".signal", 0, this),
|
||||
signal(params.name + ".signal", 0, this, INT_RTC),
|
||||
reset(params.name + ".reset"),
|
||||
resetMtimecmp(params.reset_mtimecmp),
|
||||
registers(params.name + ".registers", params.pio_addr, this,
|
||||
@@ -69,9 +69,11 @@ Clint::Clint(const Params ¶ms) :
|
||||
void
|
||||
Clint::raiseInterruptPin(int id)
|
||||
{
|
||||
// Increment mtime
|
||||
// Increment mtime when received RTC signal
|
||||
uint64_t& mtime = registers.mtime.get();
|
||||
if (id == INT_RTC) {
|
||||
mtime++;
|
||||
}
|
||||
|
||||
for (int context_id = 0; context_id < nThread; context_id++) {
|
||||
|
||||
@@ -261,7 +263,7 @@ Clint::doReset() {
|
||||
registers.msip[i].reset();
|
||||
}
|
||||
// We need to update the mtip interrupt bits when reset
|
||||
raiseInterruptPin(0);
|
||||
raiseInterruptPin(INT_RESET);
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -91,6 +91,13 @@ class Clint : public BasicPioDevice
|
||||
void raiseInterruptPin(int id);
|
||||
void lowerInterruptPin(int id) {}
|
||||
|
||||
// Interrupt ID
|
||||
enum InterruptId
|
||||
{
|
||||
INT_RTC = 0, // received from RTC(signal port)
|
||||
INT_RESET, // received from reset port
|
||||
};
|
||||
|
||||
// Register bank
|
||||
public:
|
||||
|
||||
|
||||
@@ -477,7 +477,7 @@ class VirtQueue : public Serializable
|
||||
Index index;
|
||||
};
|
||||
|
||||
VirtRing<T>(PortProxy &proxy, ByteOrder bo, uint16_t size) :
|
||||
VirtRing(PortProxy &proxy, ByteOrder bo, uint16_t size) :
|
||||
header{0, 0}, ring(size), _proxy(proxy), _base(0), byteOrder(bo)
|
||||
{}
|
||||
|
||||
@@ -550,7 +550,7 @@ class VirtQueue : public Serializable
|
||||
|
||||
private:
|
||||
// Remove default constructor
|
||||
VirtRing<T>();
|
||||
VirtRing();
|
||||
|
||||
/** Guest physical memory proxy */
|
||||
PortProxy &_proxy;
|
||||
|
||||
2
src/mem/cache/cache_blk.hh
vendored
2
src/mem/cache/cache_blk.hh
vendored
@@ -461,7 +461,7 @@ class CacheBlk : public TaggedEntry
|
||||
|
||||
protected:
|
||||
/** The current coherence status of this block. @sa CoherenceBits */
|
||||
unsigned coherence;
|
||||
unsigned coherence = 0;
|
||||
|
||||
// The following setters have been marked as protected because their
|
||||
// respective variables should only be modified at 2 moments:
|
||||
|
||||
16
src/mem/cache/prefetch/Prefetcher.py
vendored
16
src/mem/cache/prefetch/Prefetcher.py
vendored
@@ -599,6 +599,22 @@ class BOPPrefetcher(QueuedPrefetcher):
|
||||
on_inst = False
|
||||
|
||||
|
||||
class SmsPrefetcher(QueuedPrefetcher):
|
||||
# Paper: https://web.eecs.umich.edu/~twenisch/papers/isca06.pdf
|
||||
type = "SmsPrefetcher"
|
||||
cxx_class = "gem5::prefetch::Sms"
|
||||
cxx_header = "mem/cache/prefetch/sms.hh"
|
||||
ft_size = Param.Unsigned(64, "Size of Filter and Active generation table")
|
||||
pht_size = Param.Unsigned(16384, "Size of pattern history table")
|
||||
region_size = Param.Unsigned(4096, "Spatial region size")
|
||||
|
||||
queue_squash = True
|
||||
queue_filter = True
|
||||
cache_snoop = True
|
||||
prefetch_on_access = True
|
||||
on_inst = False
|
||||
|
||||
|
||||
class SBOOEPrefetcher(QueuedPrefetcher):
|
||||
type = "SBOOEPrefetcher"
|
||||
cxx_class = "gem5::prefetch::SBOOE"
|
||||
|
||||
6
src/mem/cache/prefetch/SConscript
vendored
6
src/mem/cache/prefetch/SConscript
vendored
@@ -31,8 +31,9 @@ Import('*')
|
||||
SimObject('Prefetcher.py', sim_objects=[
|
||||
'BasePrefetcher', 'MultiPrefetcher', 'QueuedPrefetcher',
|
||||
'StridePrefetcherHashedSetAssociative', 'StridePrefetcher',
|
||||
'TaggedPrefetcher', 'IndirectMemoryPrefetcher', 'SignaturePathPrefetcher',
|
||||
'SignaturePathPrefetcherV2', 'AccessMapPatternMatching', 'AMPMPrefetcher',
|
||||
'SmsPrefetcher', 'TaggedPrefetcher', 'IndirectMemoryPrefetcher',
|
||||
'SignaturePathPrefetcher', 'SignaturePathPrefetcherV2',
|
||||
'AccessMapPatternMatching', 'AMPMPrefetcher',
|
||||
'DeltaCorrelatingPredictionTables', 'DCPTPrefetcher',
|
||||
'IrregularStreamBufferPrefetcher', 'SlimAMPMPrefetcher',
|
||||
'BOPPrefetcher', 'SBOOEPrefetcher', 'STeMSPrefetcher', 'PIFPrefetcher'])
|
||||
@@ -47,6 +48,7 @@ Source('indirect_memory.cc')
|
||||
Source('pif.cc')
|
||||
Source('queued.cc')
|
||||
Source('sbooe.cc')
|
||||
Source('sms.cc')
|
||||
Source('signature_path.cc')
|
||||
Source('signature_path_v2.cc')
|
||||
Source('slim_ampm.cc')
|
||||
|
||||
161
src/mem/cache/prefetch/sms.cc
vendored
Normal file
161
src/mem/cache/prefetch/sms.cc
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Samsung Electronics
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Describes a SMS prefetcher based on template policies.
|
||||
*/
|
||||
|
||||
#include "mem/cache/prefetch/sms.hh"
|
||||
|
||||
#include "debug/HWPrefetch.hh"
|
||||
#include "params/SmsPrefetcher.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace prefetch
|
||||
{
|
||||
|
||||
Sms::Sms(const SmsPrefetcherParams &p)
|
||||
: Queued(p), Max_Contexts(p.ft_size), MAX_PHTSize(p.pht_size),
|
||||
Region_Size(p.region_size)
|
||||
{
|
||||
AGT.clear();
|
||||
AGTPC.clear();
|
||||
FT.clear();
|
||||
PHT.clear();
|
||||
fifoFT.clear();
|
||||
lruAGT.clear();
|
||||
lruPHT.clear();
|
||||
|
||||
}
|
||||
void
|
||||
Sms::notifyEvict(const EvictionInfo &info)
|
||||
{
|
||||
//Check if any active generation has ended
|
||||
Addr region_base = roundDown(info.addr, Region_Size);
|
||||
std::pair <Addr,Addr> pc_offset = AGTPC[region_base];
|
||||
if (AGT.find(region_base) != AGT.end()) {
|
||||
//remove old recording
|
||||
if (PHT.find(pc_offset) != PHT.end()) {
|
||||
PHT[pc_offset].clear();
|
||||
}
|
||||
//Move from AGT to PHT
|
||||
for (std::set<Addr>::iterator it = AGT[region_base].begin();
|
||||
it != AGT[region_base].end(); it ++) {
|
||||
PHT[pc_offset].insert(*it);
|
||||
}
|
||||
lruPHT.push_front(pc_offset);
|
||||
}
|
||||
|
||||
while (PHT.size() > MAX_PHTSize) {
|
||||
PHT.erase(lruPHT.back());
|
||||
lruPHT.pop_back();
|
||||
}
|
||||
|
||||
AGTPC.erase(region_base);
|
||||
AGT.erase(region_base);
|
||||
}
|
||||
void
|
||||
Sms::calculatePrefetch(const PrefetchInfo &pfi,
|
||||
std::vector<AddrPriority> &addresses,
|
||||
const CacheAccessor &cache)
|
||||
{
|
||||
|
||||
if (!pfi.hasPC()) {
|
||||
DPRINTF(HWPrefetch, "Ignoring request with no PC.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
Addr blk_addr = blockAddress(pfi.getAddr());
|
||||
Addr pc = pfi.getPC();
|
||||
Addr region_base = roundDown(blk_addr, Region_Size);
|
||||
Addr offset = blk_addr - region_base;
|
||||
|
||||
//Training
|
||||
if (AGT.find(region_base) != AGT.end()) {
|
||||
assert (FT.find(region_base) == FT.end());
|
||||
// Record Pattern
|
||||
AGT[region_base].insert(offset);
|
||||
//update LRU
|
||||
for (std::deque <Addr>::iterator lit = lruAGT.begin();
|
||||
lit != lruAGT.end(); lit ++) {
|
||||
if ((*lit) == region_base) {
|
||||
lruAGT.erase(lit);
|
||||
lruAGT.push_front(region_base);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (FT.find(region_base) != FT.end()) {
|
||||
//move entry from FT to AGT
|
||||
AGT[region_base].insert(FT[region_base].second);
|
||||
AGTPC[region_base] = FT[region_base];
|
||||
lruAGT.push_front(region_base);
|
||||
//Record latest offset
|
||||
AGT[region_base].insert(offset);
|
||||
//Recycle FT entry
|
||||
FT.erase(region_base);
|
||||
//Make space for next entry
|
||||
while (AGT.size() > Max_Contexts) {
|
||||
AGT.erase(lruAGT.back());
|
||||
AGTPC.erase(lruAGT.back());
|
||||
lruAGT.pop_back();
|
||||
}
|
||||
} else {
|
||||
// Trigger Access
|
||||
FT[region_base] = std::make_pair (pc,offset);
|
||||
fifoFT.push_front(region_base);
|
||||
while (FT.size() > Max_Contexts) {
|
||||
FT.erase(fifoFT.back());
|
||||
fifoFT.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
//Prediction
|
||||
std::pair <Addr, Addr> pc_offset = std::make_pair(pc,offset);
|
||||
if (PHT.find(pc_offset) != PHT.end()) {
|
||||
for (std::set<Addr>::iterator it = PHT[pc_offset].begin();
|
||||
it != PHT[pc_offset].end(); it ++) {
|
||||
Addr pref_addr = blockAddress(region_base + (*it));
|
||||
addresses.push_back(AddrPriority(pref_addr,0));
|
||||
}
|
||||
for (std::deque < std::pair <Addr,Addr> >::iterator lit
|
||||
= lruPHT.begin(); lit != lruPHT.end(); lit ++) {
|
||||
if ((*lit) == pc_offset) {
|
||||
lruPHT.erase(lit);
|
||||
lruPHT.push_front(pc_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // namespace prefetch
|
||||
} // namespace gem5
|
||||
82
src/mem/cache/prefetch/sms.hh
vendored
Normal file
82
src/mem/cache/prefetch/sms.hh
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Samsung Electronics
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Describes a SMS prefetcher.
|
||||
*/
|
||||
|
||||
#ifndef __MEM_CACHE_PREFETCH_SMS_HH__
|
||||
#define __MEM_CACHE_PREFETCH_SMS_HH__
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "mem/cache/prefetch/queued.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
struct SmsPrefetcherParams;
|
||||
|
||||
namespace prefetch
|
||||
{
|
||||
|
||||
|
||||
class Sms : public Queued
|
||||
{
|
||||
|
||||
private:
|
||||
const int Max_Contexts; //= 64;
|
||||
const uint64_t MAX_PHTSize; //= 512;
|
||||
const Addr Region_Size; //= 4096;
|
||||
|
||||
std::map< Addr, std::set<Addr> > AGT;
|
||||
std::map< Addr, std::pair<Addr,Addr> > AGTPC;
|
||||
std::map< Addr, std::pair<Addr,Addr> > FT;
|
||||
std::map< std::pair <Addr,Addr> , std::set<Addr> > PHT;
|
||||
std::deque<Addr> fifoFT;
|
||||
std::deque<Addr> lruAGT;
|
||||
std::deque< std::pair <Addr,Addr> > lruPHT;
|
||||
|
||||
using EvictionInfo = CacheDataUpdateProbeArg;
|
||||
void notifyEvict(const EvictionInfo &info) override;
|
||||
|
||||
public:
|
||||
Sms(const SmsPrefetcherParams &p);
|
||||
~Sms() = default;
|
||||
|
||||
void calculatePrefetch(const PrefetchInfo &pfi,
|
||||
std::vector<AddrPriority> &addresses,
|
||||
const CacheAccessor &cache) override;
|
||||
};
|
||||
|
||||
} // namespace prefetch
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __MEM_CACHE_PREFETCH_SMS_HH__
|
||||
@@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number)
|
||||
}
|
||||
|
||||
Addr
|
||||
getOffset(Addr addr)
|
||||
getOffset(Addr addr, int cacheLineBits)
|
||||
{
|
||||
return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1);
|
||||
}
|
||||
|
||||
Addr
|
||||
makeLineAddress(Addr addr)
|
||||
{
|
||||
return mbits<Addr>(addr, 63, RubySystem::getBlockSizeBits());
|
||||
assert(cacheLineBits < 64);
|
||||
return bitSelect(addr, 0, cacheLineBits - 1);
|
||||
}
|
||||
|
||||
Addr
|
||||
makeLineAddress(Addr addr, int cacheLineBits)
|
||||
{
|
||||
assert(cacheLineBits < 64);
|
||||
return maskLowOrderBits(addr, cacheLineBits);
|
||||
}
|
||||
|
||||
// returns the next stride address based on line address
|
||||
Addr
|
||||
makeNextStrideAddress(Addr addr, int stride)
|
||||
makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes)
|
||||
{
|
||||
return makeLineAddress(addr) +
|
||||
static_cast<int>(RubySystem::getBlockSizeBytes()) * stride;
|
||||
return makeLineAddress(addr, floorLog2(cacheLineBytes))
|
||||
+ cacheLineBytes * stride;
|
||||
}
|
||||
|
||||
std::string
|
||||
printAddress(Addr addr)
|
||||
printAddress(Addr addr, int cacheLineBits)
|
||||
{
|
||||
std::stringstream out;
|
||||
out << "[" << std::hex << "0x" << addr << "," << " line 0x"
|
||||
<< makeLineAddress(addr) << std::dec << "]";
|
||||
<< makeLineAddress(addr, cacheLineBits) << std::dec << "]";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/types.hh"
|
||||
|
||||
namespace gem5
|
||||
@@ -44,11 +45,10 @@ namespace ruby
|
||||
// selects bits inclusive
|
||||
Addr bitSelect(Addr addr, unsigned int small, unsigned int big);
|
||||
Addr maskLowOrderBits(Addr addr, unsigned int number);
|
||||
Addr getOffset(Addr addr);
|
||||
Addr makeLineAddress(Addr addr);
|
||||
Addr getOffset(Addr addr, int cacheLineBits);
|
||||
Addr makeLineAddress(Addr addr, int cacheLineBits);
|
||||
Addr makeNextStrideAddress(Addr addr, int stride);
|
||||
std::string printAddress(Addr addr);
|
||||
Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes);
|
||||
std::string printAddress(Addr addr, int cacheLineBits);
|
||||
|
||||
} // namespace ruby
|
||||
} // namespace gem5
|
||||
|
||||
@@ -40,8 +40,8 @@
|
||||
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/WriteMask.hh"
|
||||
#include "mem/ruby/system/RubySystem.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
@@ -51,17 +51,22 @@ namespace ruby
|
||||
|
||||
DataBlock::DataBlock(const DataBlock &cp)
|
||||
{
|
||||
assert(cp.isAlloc());
|
||||
assert(cp.getBlockSize() > 0);
|
||||
assert(!m_alloc);
|
||||
|
||||
uint8_t *block_update;
|
||||
size_t block_bytes = RubySystem::getBlockSizeBytes();
|
||||
m_data = new uint8_t[block_bytes];
|
||||
memcpy(m_data, cp.m_data, block_bytes);
|
||||
m_block_size = cp.getBlockSize();
|
||||
m_data = new uint8_t[m_block_size];
|
||||
memcpy(m_data, cp.m_data, m_block_size);
|
||||
m_alloc = true;
|
||||
m_block_size = m_block_size;
|
||||
// If this data block is involved in an atomic operation, the effect
|
||||
// of applying the atomic operations on the data block are recorded in
|
||||
// m_atomicLog. If so, we must copy over every entry in the change log
|
||||
for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
|
||||
block_update = new uint8_t[block_bytes];
|
||||
memcpy(block_update, cp.m_atomicLog[i], block_bytes);
|
||||
block_update = new uint8_t[m_block_size];
|
||||
memcpy(block_update, cp.m_atomicLog[i], m_block_size);
|
||||
m_atomicLog.push_back(block_update);
|
||||
}
|
||||
}
|
||||
@@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp)
|
||||
void
|
||||
DataBlock::alloc()
|
||||
{
|
||||
m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
|
||||
assert(!m_alloc);
|
||||
|
||||
if (!m_block_size) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_data = new uint8_t[m_block_size];
|
||||
m_alloc = true;
|
||||
clear();
|
||||
}
|
||||
|
||||
void
|
||||
DataBlock::realloc(int blk_size)
|
||||
{
|
||||
m_block_size = blk_size;
|
||||
assert(m_block_size > 0);
|
||||
|
||||
if (m_alloc) {
|
||||
delete [] m_data;
|
||||
m_alloc = false;
|
||||
}
|
||||
alloc();
|
||||
}
|
||||
|
||||
void
|
||||
DataBlock::clear()
|
||||
{
|
||||
memset(m_data, 0, RubySystem::getBlockSizeBytes());
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
memset(m_data, 0, m_block_size);
|
||||
}
|
||||
|
||||
bool
|
||||
DataBlock::equal(const DataBlock& obj) const
|
||||
{
|
||||
size_t block_bytes = RubySystem::getBlockSizeBytes();
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
size_t block_bytes = m_block_size;
|
||||
// Check that the block contents match
|
||||
if (memcmp(m_data, obj.m_data, block_bytes)) {
|
||||
return false;
|
||||
@@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const
|
||||
void
|
||||
DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask)
|
||||
{
|
||||
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
for (int i = 0; i < m_block_size; i++) {
|
||||
if (mask.getMask(i, 1)) {
|
||||
m_data[i] = dblk.m_data[i];
|
||||
}
|
||||
@@ -113,7 +143,9 @@ void
|
||||
DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
|
||||
bool isAtomicNoReturn)
|
||||
{
|
||||
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
for (int i = 0; i < m_block_size; i++) {
|
||||
m_data[i] = dblk.m_data[i];
|
||||
}
|
||||
mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn);
|
||||
@@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
|
||||
void
|
||||
DataBlock::print(std::ostream& out) const
|
||||
{
|
||||
int size = RubySystem::getBlockSizeBytes();
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
int size = m_block_size;
|
||||
out << "[ ";
|
||||
for (int i = 0; i < size; i++) {
|
||||
out << std::setw(2) << std::setfill('0') << std::hex
|
||||
@@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront()
|
||||
void
|
||||
DataBlock::clearAtomicLogEntries()
|
||||
{
|
||||
assert(m_alloc);
|
||||
for (auto log : m_atomicLog) {
|
||||
delete [] log;
|
||||
}
|
||||
@@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries()
|
||||
const uint8_t*
|
||||
DataBlock::getData(int offset, int len) const
|
||||
{
|
||||
assert(offset + len <= RubySystem::getBlockSizeBytes());
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
assert(offset + len <= m_block_size);
|
||||
return &m_data[offset];
|
||||
}
|
||||
|
||||
uint8_t*
|
||||
DataBlock::getDataMod(int offset)
|
||||
{
|
||||
assert(m_alloc);
|
||||
return &m_data[offset];
|
||||
}
|
||||
|
||||
void
|
||||
DataBlock::setData(const uint8_t *data, int offset, int len)
|
||||
{
|
||||
assert(m_alloc);
|
||||
memcpy(&m_data[offset], data, len);
|
||||
}
|
||||
|
||||
void
|
||||
DataBlock::setData(PacketPtr pkt)
|
||||
{
|
||||
int offset = getOffset(pkt->getAddr());
|
||||
assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes());
|
||||
assert(m_alloc);
|
||||
assert(m_block_size > 0);
|
||||
int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size));
|
||||
assert(offset + pkt->getSize() <= m_block_size);
|
||||
pkt->writeData(&m_data[offset]);
|
||||
}
|
||||
|
||||
DataBlock &
|
||||
DataBlock::operator=(const DataBlock & obj)
|
||||
{
|
||||
// Reallocate if needed
|
||||
if (m_alloc && m_block_size != obj.getBlockSize()) {
|
||||
delete [] m_data;
|
||||
m_block_size = obj.getBlockSize();
|
||||
alloc();
|
||||
} else if (!m_alloc) {
|
||||
m_block_size = obj.getBlockSize();
|
||||
alloc();
|
||||
|
||||
// Assume this will be realloc'd later if zero.
|
||||
if (m_block_size == 0) {
|
||||
return *this;
|
||||
}
|
||||
} else {
|
||||
assert(m_alloc && m_block_size == obj.getBlockSize());
|
||||
}
|
||||
assert(m_block_size > 0);
|
||||
|
||||
uint8_t *block_update;
|
||||
size_t block_bytes = RubySystem::getBlockSizeBytes();
|
||||
size_t block_bytes = m_block_size;
|
||||
// Copy entire block contents from obj to current block
|
||||
memcpy(m_data, obj.m_data, block_bytes);
|
||||
// If this data block is involved in an atomic operation, the effect
|
||||
|
||||
@@ -61,8 +61,14 @@ class WriteMask;
|
||||
class DataBlock
|
||||
{
|
||||
public:
|
||||
DataBlock()
|
||||
// Ideally this should nost be called. We allow default so that protocols
|
||||
// do not need to be changed.
|
||||
DataBlock() = default;
|
||||
|
||||
DataBlock(int blk_size)
|
||||
{
|
||||
assert(!m_alloc);
|
||||
m_block_size = blk_size;
|
||||
alloc();
|
||||
}
|
||||
|
||||
@@ -101,10 +107,16 @@ class DataBlock
|
||||
bool equal(const DataBlock& obj) const;
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
int getBlockSize() const { return m_block_size; }
|
||||
void setBlockSize(int block_size) { realloc(block_size); }
|
||||
bool isAlloc() const { return m_alloc; }
|
||||
void realloc(int blk_size);
|
||||
|
||||
private:
|
||||
void alloc();
|
||||
uint8_t *m_data;
|
||||
bool m_alloc;
|
||||
uint8_t *m_data = nullptr;
|
||||
bool m_alloc = false;
|
||||
int m_block_size = 0;
|
||||
|
||||
// Tracks block changes when atomic ops are applied
|
||||
std::deque<uint8_t*> m_atomicLog;
|
||||
@@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data)
|
||||
inline uint8_t
|
||||
DataBlock::getByte(int whichByte) const
|
||||
{
|
||||
assert(m_alloc);
|
||||
return m_data[whichByte];
|
||||
}
|
||||
|
||||
inline void
|
||||
DataBlock::setByte(int whichByte, uint8_t data)
|
||||
{
|
||||
assert(m_alloc);
|
||||
m_data[whichByte] = data;
|
||||
}
|
||||
|
||||
inline void
|
||||
DataBlock::copyPartial(const DataBlock & dblk, int offset, int len)
|
||||
{
|
||||
assert(m_alloc);
|
||||
setData(&dblk.m_data[offset], offset, len);
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "mem/ruby/system/RubySystem.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
@@ -37,6 +39,11 @@ namespace ruby
|
||||
{
|
||||
|
||||
NetDest::NetDest()
|
||||
{
|
||||
}
|
||||
|
||||
NetDest::NetDest(RubySystem *ruby_system)
|
||||
: m_ruby_system(ruby_system)
|
||||
{
|
||||
resize();
|
||||
}
|
||||
@@ -44,6 +51,7 @@ NetDest::NetDest()
|
||||
void
|
||||
NetDest::add(MachineID newElement)
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize());
|
||||
m_bits[vecIndex(newElement)].add(bitIndex(newElement.num));
|
||||
}
|
||||
@@ -51,6 +59,7 @@ NetDest::add(MachineID newElement)
|
||||
void
|
||||
NetDest::addNetDest(const NetDest& netDest)
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == netDest.getSize());
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
m_bits[i].addSet(netDest.m_bits[i]);
|
||||
@@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest)
|
||||
void
|
||||
NetDest::setNetDest(MachineType machine, const Set& set)
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
|
||||
// assure that there is only one set of destinations for this machine
|
||||
assert(MachineType_base_level((MachineType)(machine + 1)) -
|
||||
MachineType_base_level(machine) == 1);
|
||||
@@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set)
|
||||
void
|
||||
NetDest::remove(MachineID oldElement)
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num));
|
||||
}
|
||||
|
||||
void
|
||||
NetDest::removeNetDest(const NetDest& netDest)
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == netDest.getSize());
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
m_bits[i].removeSet(netDest.m_bits[i]);
|
||||
@@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest)
|
||||
void
|
||||
NetDest::clear()
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
m_bits[i].clear();
|
||||
}
|
||||
@@ -101,6 +115,8 @@ NetDest::broadcast()
|
||||
void
|
||||
NetDest::broadcast(MachineType machineType)
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
|
||||
for (NodeID i = 0; i < MachineType_base_count(machineType); i++) {
|
||||
MachineID mach = {machineType, i};
|
||||
add(mach);
|
||||
@@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType)
|
||||
std::vector<NodeID>
|
||||
NetDest::getAllDest()
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
assert(m_bits.size() > 0);
|
||||
|
||||
std::vector<NodeID> dest;
|
||||
dest.clear();
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
@@ -127,6 +146,8 @@ NetDest::getAllDest()
|
||||
int
|
||||
NetDest::count() const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
|
||||
int counter = 0;
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
counter += m_bits[i].count();
|
||||
@@ -137,12 +158,14 @@ NetDest::count() const
|
||||
NodeID
|
||||
NetDest::elementAt(MachineID index)
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
return m_bits[vecIndex(index)].elementAt(bitIndex(index.num));
|
||||
}
|
||||
|
||||
MachineID
|
||||
NetDest::smallestElement() const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(count() > 0);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
for (NodeID j = 0; j < m_bits[i].getSize(); j++) {
|
||||
@@ -158,6 +181,9 @@ NetDest::smallestElement() const
|
||||
MachineID
|
||||
NetDest::smallestElement(MachineType machine) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_ruby_system != nullptr);
|
||||
|
||||
int size = m_bits[MachineType_base_level(machine)].getSize();
|
||||
for (NodeID j = 0; j < size; j++) {
|
||||
if (m_bits[MachineType_base_level(machine)].isElement(j)) {
|
||||
@@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const
|
||||
bool
|
||||
NetDest::isBroadcast() const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
if (!m_bits[i].isBroadcast()) {
|
||||
return false;
|
||||
@@ -185,6 +212,7 @@ NetDest::isBroadcast() const
|
||||
bool
|
||||
NetDest::isEmpty() const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
if (!m_bits[i].isEmpty()) {
|
||||
return false;
|
||||
@@ -197,8 +225,9 @@ NetDest::isEmpty() const
|
||||
NetDest
|
||||
NetDest::OR(const NetDest& orNetDest) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == orNetDest.getSize());
|
||||
NetDest result;
|
||||
NetDest result(m_ruby_system);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]);
|
||||
}
|
||||
@@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const
|
||||
NetDest
|
||||
NetDest::AND(const NetDest& andNetDest) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == andNetDest.getSize());
|
||||
NetDest result;
|
||||
NetDest result(m_ruby_system);
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]);
|
||||
}
|
||||
@@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const
|
||||
bool
|
||||
NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == other_netDest.getSize());
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) {
|
||||
@@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
|
||||
bool
|
||||
NetDest::isSuperset(const NetDest& test) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == test.getSize());
|
||||
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
@@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const
|
||||
bool
|
||||
NetDest::isElement(MachineID element) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num));
|
||||
}
|
||||
|
||||
void
|
||||
NetDest::resize()
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
|
||||
m_bits.resize(MachineType_base_level(MachineType_NUM));
|
||||
assert(m_bits.size() == MachineType_NUM);
|
||||
|
||||
@@ -263,6 +298,7 @@ NetDest::resize()
|
||||
void
|
||||
NetDest::print(std::ostream& out) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
out << "[NetDest (" << m_bits.size() << ") ";
|
||||
|
||||
for (int i = 0; i < m_bits.size(); i++) {
|
||||
@@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const
|
||||
bool
|
||||
NetDest::isEqual(const NetDest& n) const
|
||||
{
|
||||
assert(m_bits.size() > 0);
|
||||
assert(m_bits.size() == n.m_bits.size());
|
||||
for (unsigned int i = 0; i < m_bits.size(); ++i) {
|
||||
if (!m_bits[i].isEqual(n.m_bits[i]))
|
||||
@@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
NetDest::MachineType_base_count(const MachineType& obj)
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
return m_ruby_system->MachineType_base_count(obj);
|
||||
}
|
||||
|
||||
int
|
||||
NetDest::MachineType_base_number(const MachineType& obj)
|
||||
{
|
||||
assert(m_ruby_system != nullptr);
|
||||
return m_ruby_system->MachineType_base_number(obj);
|
||||
}
|
||||
|
||||
} // namespace ruby
|
||||
} // namespace gem5
|
||||
|
||||
@@ -41,6 +41,8 @@ namespace gem5
|
||||
namespace ruby
|
||||
{
|
||||
|
||||
class RubySystem;
|
||||
|
||||
// NetDest specifies the network destination of a Message
|
||||
class NetDest
|
||||
{
|
||||
@@ -48,6 +50,7 @@ class NetDest
|
||||
// Constructors
|
||||
// creates and empty set
|
||||
NetDest();
|
||||
NetDest(RubySystem *ruby_system);
|
||||
explicit NetDest(int bit_size);
|
||||
|
||||
NetDest& operator=(const Set& obj);
|
||||
@@ -98,6 +101,8 @@ class NetDest
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); }
|
||||
|
||||
private:
|
||||
// returns a value >= MachineType_base_level("this machine")
|
||||
// and < MachineType_base_level("next highest machine")
|
||||
@@ -112,6 +117,12 @@ class NetDest
|
||||
NodeID bitIndex(NodeID index) const { return index; }
|
||||
|
||||
std::vector<Set> m_bits; // a vector of bit vectors - i.e. Sets
|
||||
|
||||
// Needed to call MacheinType_base_count/level
|
||||
RubySystem *m_ruby_system = nullptr;
|
||||
|
||||
int MachineType_base_count(const MachineType& obj);
|
||||
int MachineType_base_number(const MachineType& obj);
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
|
||||
@@ -38,13 +38,14 @@ namespace ruby
|
||||
|
||||
using stl_helpers::operator<<;
|
||||
|
||||
SubBlock::SubBlock(Addr addr, int size)
|
||||
SubBlock::SubBlock(Addr addr, int size, int cl_bits)
|
||||
{
|
||||
m_address = addr;
|
||||
resize(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
setByte(i, 0);
|
||||
}
|
||||
m_cache_line_bits = cl_bits;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data)
|
||||
{
|
||||
int size = getSize();
|
||||
assert(size > 0);
|
||||
int offset = getOffset(m_address);
|
||||
int offset = getOffset(m_address, m_cache_line_bits);
|
||||
for (int i = 0; i < size; i++) {
|
||||
this->setByte(i, data.getByte(offset + i));
|
||||
}
|
||||
@@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const
|
||||
{
|
||||
int size = getSize();
|
||||
assert(size > 0);
|
||||
int offset = getOffset(m_address);
|
||||
int offset = getOffset(m_address, m_cache_line_bits);
|
||||
for (int i = 0; i < size; i++) {
|
||||
// This will detect crossing a cache line boundary
|
||||
data.setByte(offset + i, this->getByte(i));
|
||||
|
||||
@@ -45,7 +45,7 @@ class SubBlock
|
||||
{
|
||||
public:
|
||||
SubBlock() { }
|
||||
SubBlock(Addr addr, int size);
|
||||
SubBlock(Addr addr, int size, int cl_bits);
|
||||
~SubBlock() { }
|
||||
|
||||
Addr getAddress() const { return m_address; }
|
||||
@@ -74,6 +74,7 @@ class SubBlock
|
||||
// Data Members (m_ prefix)
|
||||
Addr m_address;
|
||||
std::vector<uint8_t> m_data;
|
||||
int m_cache_line_bits;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user