Merge branch 'develop' into update-pannotia-tests

This commit is contained in:
Bobby R. Bruce
2024-10-18 13:40:59 -07:00
committed by GitHub
232 changed files with 4652 additions and 883 deletions

View File

@@ -5,7 +5,7 @@ name: CI Tests
on:
pull_request:
types: [opened, edited, synchronize, ready_for_review]
types: [opened, synchronize, ready_for_review]
concurrency:
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
@@ -21,17 +21,48 @@ jobs:
- uses: actions/setup-python@v5
- uses: pre-commit/action@v3.0.1
get-date:
# We use the date to label caches. A cache is a a "hit" if the date is the
# request binary and date are the same as what is stored in the cache.
# This essentially means the first job to run on a given day for a given
# binary will always be a "miss" and will have to build the binary then
# upload it as that day's binary to upload. While this isn't the most
# efficient way to do this, the alternative was to run take a hash of the
# `src` directory contents and use it as a hash. We found there to be bugs
# with the hash function where this task would timeout. This approach is
# simple, works, and still provides some level of caching.
runs-on: ubuntu-latest
outputs:
date: ${{ steps.date.outputs.date }}
steps:
- name: Get the current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
unittests-all-opt:
runs-on: [self-hosted, linux, x64]
if: github.event.pull_request.draft == false
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
needs: [pre-commit] # only runs if pre-commit passes.
needs: [pre-commit, get-date] # only runs if pre-commit passes.
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
# Restore the cache if available. As this just builds the unittests
# we only obtain the cache and do not provide if if is not
# available.
- name: Cache build/ALL
uses: actions/cache/restore@v4
with:
path: build/ALL
key: testlib-build-all-${{ env.date }}
restore-keys: |
testlib-build-all
- name: CI Unittests
working-directory: ${{ github.workspace }}
run: scons build/ALL/unittests.opt -j $(nproc)
run: scons --no-compress-debug build/ALL/unittests.opt -j $(nproc)
- run: echo "This job's status is ${{ job.status }}."
testlib-quick-matrix:
@@ -83,14 +114,24 @@ jobs:
runs-on: [self-hosted, linux, x64]
if: github.event.pull_request.draft == false
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
needs: [pre-commit, testlib-quick-matrix]
needs: [pre-commit, testlib-quick-matrix, get-date]
strategy:
matrix:
build-target: ${{ fromJson(needs.testlib-quick-matrix.outputs.build-matrix) }}
steps:
- uses: actions/checkout@v4
- name: Cache build/ALL
uses: actions/cache@v4
if: ${{ endsWith(matrix.build-target, 'build/ALL/gem5.opt') }}
with:
path: build/ALL
key: testlib-build-all-${{ env.date }}
restore-keys: |
testlib-build-all
- name: Build gem5
run: scons ${{ matrix.build-target }} -j $(nproc)
run: scons --no-compress-debug ${{ matrix.build-target }} -j $(nproc)
# Upload the gem5 binary as an artifact.
# Note: the "achor.txt" file is a hack to make sure the paths are
@@ -199,13 +240,23 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 180
needs: [pre-commit]
needs: [pre-commit, get-date]
steps:
- uses: actions/checkout@v4
# Obtain the cache if available. If not available this will upload
# this job's instance of the cache.
- name: Cache build/VEGA_X86
uses: actions/cache@v4
with:
path: build/VEGA_X86
key: testlib-build-vega-${{ env.date }}
restore-keys: |
testlib-build-vega
# Build the VEGA_X86/gem5.opt binary.
- name: Build VEGA_X86/gem5.opt
run: scons build/VEGA_X86/gem5.opt -j`nproc`
run: scons --no-compress-debug build/VEGA_X86/gem5.opt -j`nproc`
# Run the GPU tests.
- name: Run Testlib GPU Tests

View File

@@ -13,8 +13,8 @@ jobs:
strategy:
fail-fast: false
matrix:
image: [gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16, clang-version-15,
clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies]
image: [gcc-version-14, gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16,
clang-version-15, clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies]
opts: [.opt, .fast]
runs-on: [self-hosted, linux, x64]
timeout-minutes: 2880 # 48 hours
@@ -32,7 +32,7 @@ jobs:
matrix:
gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, MIPS, 'NULL', NULL_MESI_Two_Level,
NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86]
image: [gcc-version-13, clang-version-18]
image: [gcc-version-14, clang-version-18]
opts: [.opt]
runs-on: [self-hosted, linux, x64]
timeout-minutes: 2880 # 48 hours

View File

@@ -8,6 +8,14 @@ on:
workflow_dispatch:
jobs:
get-date:
runs-on: ubuntu-latest
steps:
- name: Get the current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
# this builds both unittests.fast and unittests.debug
unittests-fast-debug:
strategy:
@@ -16,13 +24,14 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
timeout-minutes: 60
needs: get-date
steps:
- uses: actions/checkout@v4
- name: Cache build/ALL
uses: actions/cache/restore@v4
with:
path: build/ALL
key: testlib-build-all-${{ hashFiles('src/**') }}
key: testlib-build-all-${{ env.date }}
restore-keys: |
testlib-build-all
- name: ALL/unittests.${{ matrix.type }} UnitTests
@@ -38,6 +47,7 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
timeout-minutes: 1440 # 24 hours for entire matrix to run
needs: get-date
steps:
- name: Clean runner
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
@@ -47,13 +57,13 @@ jobs:
uses: actions/cache@v4
with:
path: build/NULL
key: testlib-build-null-${{ hashFiles('src/**') }}
key: testlib-build-null-${{ env.date }}
- name: Restore build/ALL cache
uses: actions/cache@v4
with:
path: build/ALL
key: testlib-build-all-${{ hashFiles('src/**') }}
key: testlib-build-all-${{ env.date }}
- name: long ${{ matrix.test-type }} tests
working-directory: ${{ github.workspace }}/tests
@@ -81,6 +91,7 @@ jobs:
gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
timeout-minutes: 1440 # 24 hours
needs: get-date
steps:
- name: Clean runner
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
@@ -90,7 +101,7 @@ jobs:
uses: actions/cache@v4
with:
path: build/ALL
key: testlib-build-all-${{ hashFiles('src/**') }}
key: testlib-build-all-${{ env.date }}
restore-keys: |
testlib-build-all
@@ -113,6 +124,7 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 720 # 12 hours
needs: get-date
steps:
- uses: actions/checkout@v4
@@ -123,7 +135,7 @@ jobs:
uses: actions/cache@v4
with:
path: build/VEGA_X86
key: testlib-build-vega-${{ hashFiles('src/**') }}
key: testlib-build-vega-${{ env.date }}
restore-keys: |
testlib-build-vega

View File

@@ -9,6 +9,13 @@ on:
jobs:
get-date:
runs-on: ubuntu-latest
steps:
- name: Get the current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
# start running the very-long tests
testlib-very-long-tests:
strategy:
@@ -18,6 +25,7 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
timeout-minutes: 4320 # 3 days
needs: get-date
steps:
- name: Clean runner
run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
@@ -27,7 +35,7 @@ jobs:
uses: actions/cache@v4
with:
path: build/ALL
key: testlib-build-all-${{ hashFiles('src/**') }}
key: testlib-build-all-${{ env.date }}
restore-keys: |
testlib-build-all
@@ -49,6 +57,7 @@ jobs:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 4320 # 3 days
needs: get-date
steps:
- uses: actions/checkout@v4
@@ -59,7 +68,7 @@ jobs:
uses: actions/cache@v4
with:
path: build/VEGA_X86
key: testlib-build-vega-${{ hashFiles('src/**') }}
key: testlib-build-vega-${{ env.date }}
restore-keys: |
testlib-build-vega

View File

@@ -49,11 +49,11 @@ exclude: |
tests/.*/ref/.*
)$
default_stages: [commit]
default_stages: [pre-commit]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
@@ -69,7 +69,7 @@ repos:
- id: destroyed-symlinks
- id: requirements-txt-fixer
- repo: https://github.com/PyCQA/isort
rev: 5.11.5
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
@@ -77,11 +77,11 @@ repos:
hooks:
- id: yamlfmt
- repo: https://github.com/psf/black
rev: 23.9.1
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/asottile/pyupgrade
rev: v3.14.0
rev: v3.17.0
hooks:
- id: pyupgrade
# Python 3.8 is the earliest version supported.

7
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,7 @@
{
"python.analysis.extraPaths": [
"src/python",
"ext",
"tests"
]
}

View File

@@ -568,9 +568,9 @@ def config_hmc_dev(opt, system, hmc_host):
# Attach 4 serial link to 4 crossbar/s
for i in range(opt.num_serial_links):
if opt.enable_link_monitor:
system.hmc_host.seriallink[
i
].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port
system.hmc_host.seriallink[i].mem_side_port = (
system.hmc_dev.lmonitor[i].cpu_side_port
)
system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[
i
].cpu_side_ports
@@ -613,14 +613,12 @@ def config_hmc_dev(opt, system, hmc_host):
]
# Connect the bridge between corssbars
system.hmc_dev.xbar[
i
].mem_side_ports = system.hmc_dev.buffers[
index
].cpu_side_port
system.hmc_dev.buffers[
index
].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports
system.hmc_dev.xbar[i].mem_side_ports = (
system.hmc_dev.buffers[index].cpu_side_port
)
system.hmc_dev.buffers[index].mem_side_port = (
system.hmc_dev.xbar[j].cpu_side_ports
)
else:
# Don't connect the xbar to itself
pass
@@ -629,49 +627,49 @@ def config_hmc_dev(opt, system, hmc_host):
# can only direct traffic to it local vaults
if opt.arch == "mixed":
system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port
system.hmc_dev.xbar[3].mem_side_ports = (
system.hmc_dev.buffer30.cpu_side_port
)
system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[
0
].cpu_side_ports
system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port
system.hmc_dev.xbar[3].mem_side_ports = (
system.hmc_dev.buffer31.cpu_side_port
)
system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[
1
].cpu_side_ports
system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port
system.hmc_dev.xbar[3].mem_side_ports = (
system.hmc_dev.buffer32.cpu_side_port
)
system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[
2
].cpu_side_ports
system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port
system.hmc_dev.xbar[2].mem_side_ports = (
system.hmc_dev.buffer20.cpu_side_port
)
system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[
0
].cpu_side_ports
system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port
system.hmc_dev.xbar[2].mem_side_ports = (
system.hmc_dev.buffer21.cpu_side_port
)
system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[
1
].cpu_side_ports
system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port
system.hmc_dev.xbar[2].mem_side_ports = (
system.hmc_dev.buffer23.cpu_side_port
)
system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[
3
].cpu_side_ports

View File

@@ -541,9 +541,9 @@ def run(options, root, testsys, cpu_class):
IndirectBPClass = ObjectList.indirect_bp_list.get(
options.indirect_bp_type
)
switch_cpus[
i
].branchPred.indirectBranchPred = IndirectBPClass()
switch_cpus[i].branchPred.indirectBranchPred = (
IndirectBPClass()
)
switch_cpus[i].createThreads()
# If elastic tracing is enabled attach the elastic trace probe

View File

@@ -1683,6 +1683,15 @@ class HPI_MMU(ArmMMU):
class HPI_BTB(SimpleBTB):
numEntries = 128
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
class HPI_BP(TournamentBP):

View File

@@ -111,6 +111,15 @@ class O3_ARM_v7a_FUP(FUPool):
class O3_ARM_v7a_BTB(SimpleBTB):
numEntries = 2048
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
# Bi-Mode Branch Predictor

View File

@@ -108,6 +108,15 @@ class ex5_big_FUP(FUPool):
class ex5_big_BTB(SimpleBTB):
numEntries = 4096
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
# Bi-Mode Branch Predictor

View File

@@ -213,9 +213,9 @@ def build_test_system(np, isa: ISA):
IndirectBPClass = ObjectList.indirect_bp_list.get(
args.indirect_bp_type
)
test_sys.cpu[
i
].branchPred.indirectBranchPred = IndirectBPClass()
test_sys.cpu[i].branchPred.indirectBranchPred = (
IndirectBPClass()
)
test_sys.cpu[i].createThreads()
# If elastic tracing is enabled when not restoring from checkpoint and

View File

@@ -935,9 +935,9 @@ gpu_port_idx = gpu_port_idx - args.num_cp * 2
token_port_idx = 0
for i in range(len(system.ruby._cpu_ports)):
if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
system.cpu[shader_idx].CUs[
token_port_idx
].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = (
system.ruby._cpu_ports[i].gmTokenPort
)
token_port_idx += 1
wavefront_size = args.wf_size

View File

@@ -0,0 +1,92 @@
# Copyright (c) 2024 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script further shows an example of booting an ARM based full system Ubuntu
disk image. This simulation boots the disk image using the ArmDemoBoard.
Usage
-----
```bash
scons build/ARM/gem5.opt -j $(nproc)
./build/ARM/gem5.opt configs/example/gem5_library/arm-demo-ubuntu-run.py
```
"""
import argparse
from gem5.isas import ISA
from gem5.prebuilt.demo.arm_demo_board import ArmDemoBoard
from gem5.resources.resource import obtain_resource
from gem5.simulate.exit_event import ExitEvent
from gem5.simulate.simulator import Simulator
from gem5.utils.requires import requires
# This runs a check to ensure the gem5 binary interpreting this file is compiled to include the ARM ISA.
requires(isa_required=ISA.ARM)
parser = argparse.ArgumentParser(
description="An example configuration script to run the ArmDemoBoard."
)
parser.add_argument(
"--use-kvm",
action="store_true",
help="Use KVM cores instead of Timing.",
)
args = parser.parse_args()
board = ArmDemoBoard(use_kvm=args.use_kvm)
board.set_workload(
obtain_resource(
"arm-ubuntu-24.04-boot-with-systemd", resource_version="2.0.0"
)
)
def exit_event_handler():
print("First exit: kernel booted")
yield False # gem5 is now executing systemd startup
print("Second exit: Started `after_boot.sh` script")
# The after_boot.sh script is executed after the kernel and systemd have
# booted.
yield False # gem5 is now executing the `after_boot.sh` script
print("Third exit: Finished `after_boot.sh` script")
# The after_boot.sh script will run a script if it is passed via
# m5 readfile. This is the last exit event before the simulation exits.
yield True
# We define the system with the aforementioned system defined.
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.EXIT: exit_event_handler(),
},
)
simulator.run()

View File

@@ -0,0 +1,120 @@
# Copyright (c) 2024 Barcelona Supercomputing Center
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script demonstrates how to run RISC-V vector-enabled binaries in SE mode
with gem5. It accepts the number of CORES, VLEN, and ELEN as optional
parameters, as well as the resource name to run. If no resource name is
provided, a list of available resources will be displayed. If one is given the
simulation will then execute the specified resource binary with the selected
parameters until completion.
Usage
-----
# Compile gem5 for RISC-V
scons build/RISCV/gem5.opt
# Run the simulation
./build/RISCV/gem5.opt configs/example/gem5_library/riscv-rvv-example.py \
[-c CORES] [-v VLEN] [-e ELEN] <resource>
"""
import argparse
from m5.objects import RiscvO3CPU
from gem5.components.boards.simple_board import SimpleBoard
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
from gem5.components.memory import SingleChannelDDR3_1600
from gem5.components.processors.base_cpu_core import BaseCPUCore
from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
from gem5.isas import ISA
from gem5.resources.resource import obtain_resource
from gem5.simulate.simulator import Simulator
from gem5.utils.requires import requires
class RVVCore(BaseCPUCore):
def __init__(self, elen, vlen, cpu_id):
super().__init__(core=RiscvO3CPU(cpu_id=cpu_id), isa=ISA.RISCV)
self.core.isa[0].elen = elen
self.core.isa[0].vlen = vlen
requires(isa_required=ISA.RISCV)
resources = [
"rvv-branch",
"rvv-index",
"rvv-matmul",
"rvv-memcpy",
"rvv-reduce",
"rvv-saxpy",
"rvv-sgemm",
"rvv-strcmp",
"rvv-strcpy",
"rvv-strlen",
"rvv-strlen-fault",
"rvv-strncpy",
]
parser = argparse.ArgumentParser()
parser.add_argument("resource", type=str, choices=resources)
parser.add_argument("-c", "--cores", required=False, type=int, default=1)
parser.add_argument("-v", "--vlen", required=False, type=int, default=256)
parser.add_argument("-e", "--elen", required=False, type=int, default=64)
args = parser.parse_args()
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB"
)
memory = SingleChannelDDR3_1600()
processor = BaseCPUProcessor(
cores=[RVVCore(args.elen, args.vlen, i) for i in range(args.cores)]
)
board = SimpleBoard(
clk_freq="1GHz",
processor=processor,
memory=memory,
cache_hierarchy=cache_hierarchy,
)
binary = obtain_resource(args.resource)
board.set_se_binary_workload(binary)
simulator = Simulator(board=board, full_system=False)
print("Beginning simulation!")
simulator.run()

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2021 Advanced Micro Devices, Inc.
# Copyright (c) 2021-2024 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -82,10 +82,6 @@ def addRunFSOptions(parser):
help="The second disk image to mount (/dev/sdb)",
)
parser.add_argument("--kernel", default=None, help="Linux kernel to boot")
parser.add_argument("--gpu-rom", default=None, help="GPU BIOS to load")
parser.add_argument(
"--gpu-mmio-trace", default=None, help="GPU MMIO trace to load"
)
parser.add_argument(
"--checkpoint-before-mmios",
default=False,
@@ -241,16 +237,6 @@ def runGpuFSSystem(args):
math.ceil(float(n_cu) / args.cu_per_scalar_cache)
)
# Verify MMIO trace is valid. This is only needed for Vega10 simulations.
# The md5sum refers to the md5sum of the Vega10 MMIO hardware trace in
# the gem5-resources repository. By checking it here, we avoid potential
# errors that would cause the driver not to load and simulations to fail.
if args.gpu_device == "Vega10":
mmio_file = open(args.gpu_mmio_trace, "rb")
mmio_md5 = hashlib.md5(mmio_file.read()).hexdigest()
if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d":
m5.util.panic("MMIO file does not match gem5 resources")
system = makeGpuFSSystem(args)
root = Root(

View File

@@ -176,8 +176,6 @@ def createGPU(system, args):
def connectGPU(system, args):
system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0)
system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace
system.pc.south_bridge.gpu.rom_binary = args.gpu_rom
system.pc.south_bridge.gpu.checkpoint_before_mmios = (
args.checkpoint_before_mmios
)

View File

@@ -336,9 +336,9 @@ def makeGpuFSSystem(args):
token_port_idx = 0
for i in range(len(system.ruby._cpu_ports)):
if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
system.cpu[shader_idx].CUs[
token_port_idx
].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = (
system.ruby._cpu_ports[i].gmTokenPort
)
token_port_idx += 1
wavefront_size = args.wf_size
@@ -346,9 +346,9 @@ def makeGpuFSSystem(args):
# The pipeline issues wavefront_size number of uncoalesced requests
# in one GPU issue cycle. Hence wavefront_size mem ports.
for j in range(wavefront_size):
system.cpu[shader_idx].CUs[i].memory_port[
j
] = system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
system.cpu[shader_idx].CUs[i].memory_port[j] = (
system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
)
gpu_port_idx += 1
for i in range(args.num_compute_units):

View File

@@ -110,8 +110,7 @@ board.set_kernel_disk_workload(
# Begin running of the simulation.
print("Running with ISA: " + processor.get_isa().name)
print()
root = Root(full_system=True, system=board)
board._pre_instantiate()
root = board._pre_instantiate()
m5.instantiate()
print("Beginning simulation!")

View File

@@ -250,9 +250,11 @@ class ConfigManager:
obj,
param_name,
[
(
self.objects_by_name[name]
if name != "Null"
else m5.params.NULL
)
for name in param_values
],
)

View File

@@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs):
num_lanes=1,
clk_domain=thread_clock,
deadlock_threshold=tester_deadlock_threshold,
cache_line_size=system.cache_line_size,
)
)
g_thread_idx += 1
@@ -393,6 +394,7 @@ for cu_idx in range(n_CUs):
num_lanes=args.wf_size,
clk_domain=thread_clock,
deadlock_threshold=tester_deadlock_threshold,
cache_line_size=system.cache_line_size,
)
)
g_thread_idx += 1

View File

@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
# I/D cache is combined and grab from ctrl
dcache=self.controllers[i].cacheMemory,
clk_domain=self.controllers[i].clk_domain,
ruby_system=self,
)
for i in range(len(cpus))
]
@@ -191,7 +192,9 @@ class DirController(Directory_Controller):
self.version = self.versionCount()
self.addr_ranges = ranges
self.ruby_system = ruby_system
self.directory = RubyDirectoryMemory()
self.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
# Connect this directory to the memory side.
self.memory = mem_ctrls[0].port
self.connectQueues(ruby_system)

View File

@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
# I/D cache is combined and grab from ctrl
dcache=self.controllers[i].cacheMemory,
clk_domain=self.controllers[i].clk_domain,
ruby_system=self,
)
for i in range(len(cpus))
]
@@ -180,7 +181,9 @@ class DirController(Directory_Controller):
self.version = self.versionCount()
self.addr_ranges = ranges
self.ruby_system = ruby_system
self.directory = RubyDirectoryMemory()
self.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
# Connect this directory to the memory side.
self.memory = mem_ctrls[0].port
self.connectQueues(ruby_system)

View File

@@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem):
# I/D cache is combined and grab from ctrl
dcache=self.controllers[i].cacheMemory,
clk_domain=self.clk_domain,
ruby_system=self,
)
for i in range(num_testers)
]

View File

@@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase):
self.L2cache = L2Cache()
self.L2cache.create(options.l2_size, options.l2_assoc, options)
self.sequencer = RubySequencer()
self.sequencer = RubySequencer(ruby_system=ruby_system)
self.sequencer.version = self.seqCount()
self.sequencer.dcache = self.L1D0cache
self.sequencer.ruby_system = ruby_system
self.sequencer.coreid = 0
self.sequencer.is_cpu_sequencer = True
self.sequencer1 = RubySequencer()
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
self.sequencer1.version = self.seqCount()
self.sequencer1.dcache = self.L1D1cache
self.sequencer1.ruby_system = ruby_system

View File

@@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
self.L2cache = L2Cache()
self.L2cache.create(options.l2_size, options.l2_assoc, options)
self.sequencer = RubySequencer()
self.sequencer = RubySequencer(ruby_system=ruby_system)
self.sequencer.version = self.seqCount()
self.sequencer.dcache = self.L1D0cache
self.sequencer.ruby_system = ruby_system
self.sequencer.coreid = 0
self.sequencer.is_cpu_sequencer = True
self.sequencer1 = RubySequencer()
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
self.sequencer1.version = self.seqCount()
self.sequencer1.dcache = self.L1D1cache
self.sequencer1.ruby_system = ruby_system
@@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
# TCP_Controller inherits this from RubyController
self.mandatory_queue_latency = options.mandatory_queue_latency
self.coalescer = VIPERCoalescer()
self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
self.coalescer.version = self.seqCount()
self.coalescer.icache = self.L1cache
self.coalescer.dcache = self.L1cache
@@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
options.max_coalesces_per_cycle
)
self.sequencer = RubySequencer()
self.sequencer = RubySequencer(ruby_system=ruby_system)
self.sequencer.version = self.seqCount()
self.sequencer.dcache = self.L1cache
self.sequencer.ruby_system = ruby_system
@@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
self.L1cache.create(options)
self.issue_latency = 1
self.coalescer = VIPERCoalescer()
self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
self.coalescer.version = self.seqCount()
self.coalescer.icache = self.L1cache
self.coalescer.dcache = self.L1cache
@@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
self.coalescer.support_inst_reqs = False
self.coalescer.is_cpu_sequencer = False
self.sequencer = RubySequencer()
self.sequencer = RubySequencer(ruby_system=ruby_system)
self.sequencer.version = self.seqCount()
self.sequencer.dcache = self.L1cache
self.sequencer.ruby_system = ruby_system
@@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
self.response_latency = 30
self.addr_ranges = dir_ranges
self.directory = RubyDirectoryMemory()
self.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
self.L3CacheMemory = L3Cache()
self.L3CacheMemory.create(options, ruby_system, system)
@@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network):
dir_cntrl.addr_ranges = dram_intf.range
# Append
exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i)
exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i)
dir_cntrl_nodes.append(dir_cntrl)
mem_ctrls.append(mem_ctrl)

View File

@@ -148,6 +148,7 @@ def create_system(
train_misses=5,
num_startup_pfs=4,
cross_page=True,
block_size=options.cacheline_size,
)
l0_cntrl = L0Cache_Controller(

View File

@@ -148,6 +148,7 @@ def create_system(
train_misses=5,
num_startup_pfs=4,
cross_page=True,
block_size=options.cacheline_size,
)
l0_cntrl = L0Cache_Controller(

View File

@@ -94,7 +94,7 @@ def create_system(
is_icache=False,
)
prefetcher = RubyPrefetcher()
prefetcher = RubyPrefetcher(block_size=options.cacheline_size)
clk_domain = cpus[i].clk_domain

View File

@@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
self.L2cache = L2Cache()
self.L2cache.create(options)
self.sequencer = RubySequencer()
self.sequencer = RubySequencer(ruby_system=ruby_system)
self.sequencer.version = self.seqCount()
self.sequencer.dcache = self.L1D0cache
self.sequencer.ruby_system = ruby_system
self.sequencer.coreid = 0
self.sequencer.is_cpu_sequencer = True
self.sequencer1 = RubySequencer()
self.sequencer1 = RubySequencer(ruby_system=ruby_system)
self.sequencer1.version = self.seqCount()
self.sequencer1.dcache = self.L1D1cache
self.sequencer1.ruby_system = ruby_system
@@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
self.response_latency = 30
self.addr_ranges = dir_ranges
self.directory = RubyDirectoryMemory()
self.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
self.L3CacheMemory = L3Cache()
self.L3CacheMemory.create(options, ruby_system, system)

View File

@@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system):
for i in range(options.num_dirs):
dir_cntrl = Directory_Controller()
dir_cntrl.version = i
dir_cntrl.directory = RubyDirectoryMemory()
dir_cntrl.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
dir_cntrl.ruby_system = ruby_system
exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
@@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system):
if bootmem is not None:
rom_dir_cntrl = Directory_Controller()
rom_dir_cntrl.directory = RubyDirectoryMemory()
rom_dir_cntrl.directory = RubyDirectoryMemory(
block_size=ruby_system.block_size_bytes
)
rom_dir_cntrl.ruby_system = ruby_system
rom_dir_cntrl.version = i + 1
rom_dir_cntrl.memory = bootmem.port

View File

@@ -960,11 +960,14 @@ class PackedReg
uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
value &= elem_mask;
// Clear the bits where the value goes so that operator| can be used.
elem_mask <<= qw_lbit;
qword &= elem_mask;
qword &= ~elem_mask;
value <<= qw_lbit;
qword |= value;
// Promote to 64-bit to prevent shifting out of range
uint64_t value64 = value;
value64 <<= qw_lbit;
qword |= value64;
dwords[udw] = uint32_t(qword >> 32);
dwords[ldw] = uint32_t(qword & mask(32));

View File

@@ -53,8 +53,6 @@ namespace gem5
namespace ArmISA
{
GenericISA::BasicDecodeCache<Decoder, ExtMachInst> Decoder::defaultCache;
Decoder::Decoder(const ArmDecoderParams &params)
: InstDecoder(params, &data),
dvmEnabled(params.dvm_enabled),

View File

@@ -94,7 +94,7 @@ class Decoder : public InstDecoder
enums::DecoderFlavor decoderFlavor;
/// A cache of decoded instruction objects.
static GenericISA::BasicDecodeCache<Decoder, ExtMachInst> defaultCache;
GenericISA::BasicDecodeCache<Decoder, ExtMachInst> defaultCache;
friend class GenericISA::BasicDecodeCache<Decoder, ExtMachInst>;
/**

View File

@@ -264,7 +264,7 @@ class ArmFaultVals : public ArmFault
static FaultVals vals;
public:
ArmFaultVals<T>(ExtMachInst mach_inst = 0, uint32_t _iss = 0) :
ArmFaultVals(ExtMachInst mach_inst = 0, uint32_t _iss = 0) :
ArmFault(mach_inst, _iss) {}
FaultName name() const override { return vals.name; }
FaultOffset offset(ThreadContext *tc) override;

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2011, 2016-2019 ARM Limited
// Copyright (c) 2010-2011, 2016-2019, 2024 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -1891,6 +1891,150 @@ let {{
return new NVrsqrteD<uint32_t>(machInst, vd, vm);
}
}
} else if ((b & 0x1c) == 0x00) {
if (bits(b, 1)) {
switch(size) {
case 1:
if (q) {
return new NVcvt2uhAQ<uint16_t>(machInst, vd, vm);
} else {
return new NVcvt2uhAD<uint16_t>(machInst, vd, vm);
}
case 2:
if (q) {
return new NVcvt2usAQ<uint32_t>(machInst, vd, vm);
} else {
return new NVcvt2usAD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2shAQ<int16_t>(machInst, vd, vm);
} else {
return new NVcvt2shAD<int16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2ssAQ<int32_t>(machInst, vd, vm);
} else {
return new NVcvt2ssAD<int32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
}
} else if ((b & 0x1c) == 0x04) {
if (bits(b, 1)) {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2uhNQ<uint16_t>(machInst, vd, vm);
} else {
return new NVcvt2uhND<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2usNQ<uint32_t>(machInst, vd, vm);
} else {
return new NVcvt2usND<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2shNQ<int16_t>(machInst, vd, vm);
} else {
return new NVcvt2shND<int16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2ssNQ<int32_t>(machInst, vd, vm);
} else {
return new NVcvt2ssND<int32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
}
} else if ((b & 0x1c) == 0x08) {
if (bits(b, 1)) {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2uhPQ<uint16_t>(machInst, vd, vm);
} else {
return new NVcvt2uhPD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2usPQ<uint32_t>(machInst, vd, vm);
} else {
return new NVcvt2usPD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2shPQ<int16_t>(machInst, vd, vm);
} else {
return new NVcvt2shPD<int16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2ssPQ<int32_t>(machInst, vd, vm);
} else {
return new NVcvt2ssPD<int32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
}
} else if ((b & 0x1c) == 0x0c) {
if (bits(b, 1)) {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2uhMQ<uint16_t>(machInst, vd, vm);
} else {
return new NVcvt2uhMD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2usMQ<uint32_t>(machInst, vd, vm);
} else {
return new NVcvt2usMD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 0b01:
if (q) {
return new NVcvt2shMQ<int16_t>(machInst, vd, vm);
} else {
return new NVcvt2shMD<int16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVcvt2ssMQ<int32_t>(machInst, vd, vm);
} else {
return new NVcvt2ssMD<int32_t>(machInst, vd, vm);
}
default:
return new Unknown(machInst);
}
}
} else {
return new Unknown(machInst);
}

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
// Copyright (c) 2010-2011, 2015, 2019, 2024 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -3579,6 +3579,128 @@ let {{
'''
twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
vcvthp2hCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1);
if (flushToZero(mid))
fpscr.idc = 1;
destElem = vfpFpToFixed<float>(mid, %s, 16, 0, true, %s);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
'''
vcvtahp2uhCode = vcvthp2hCode % ("false", "VfpRoundAway")
twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAD", "SimdCvtOp",
("uint16_t",), 2, vcvtahp2uhCode)
twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAQ", "SimdCvtOp",
("uint16_t",), 4, vcvtahp2uhCode)
vcvtnhp2uhCode = vcvthp2hCode % ("false", "VfpRoundNearest")
twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhND", "SimdCvtOp",
("uint16_t",), 2, vcvtnhp2uhCode)
twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhNQ", "SimdCvtOp",
("uint16_t",), 4, vcvtnhp2uhCode)
vcvtphp2uhCode = vcvthp2hCode % ("false", "VfpRoundUpward")
twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPD", "SimdCvtOp",
("uint16_t",), 2, vcvtphp2uhCode)
twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPQ", "SimdCvtOp",
("uint16_t",), 4, vcvtphp2uhCode)
vcvtmhp2uhCode = vcvthp2hCode % ("false", "VfpRoundDown")
twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMD", "SimdCvtOp",
("uint16_t",), 2, vcvtmhp2uhCode)
twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMQ", "SimdCvtOp",
("uint16_t",), 4, vcvtmhp2uhCode)
vcvtahp2shCode = vcvthp2hCode % ("true", "VfpRoundAway")
twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAD", "SimdCvtOp",
("int16_t",), 2, vcvtahp2shCode)
twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAQ", "SimdCvtOp",
("int16_t",), 4, vcvtahp2shCode)
vcvtnhp2shCode = vcvthp2hCode % ("true", "VfpRoundNearest")
twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shND", "SimdCvtOp",
("int16_t",), 2, vcvtnhp2shCode)
twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shNQ", "SimdCvtOp",
("int16_t",), 4, vcvtnhp2shCode)
vcvtphp2shCode = vcvthp2hCode % ("true", "VfpRoundUpward")
twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPD", "SimdCvtOp",
("int16_t",), 2, vcvtphp2shCode)
twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPQ", "SimdCvtOp",
("int16_t",), 4, vcvtphp2shCode)
vcvtmhp2shCode = vcvthp2hCode % ("true", "VfpRoundDown")
twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMD", "SimdCvtOp",
("int16_t",), 2, vcvtmhp2shCode)
twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMQ", "SimdCvtOp",
("int16_t",), 4, vcvtmhp2shCode)
vcvtsp2sCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
float mid = bitsToFp(srcElem1, (float)0.0);
if (flushToZero(mid))
fpscr.idc = 1;
destElem = vfpFpToFixed<float>(mid, %s, 32, 0, true, %s);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
'''
vcvtasp2usCode = vcvtsp2sCode % ("false", "VfpRoundAway")
twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAD", "SimdCvtOp",
("uint32_t",), 2, vcvtasp2usCode)
twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAQ", "SimdCvtOp",
("uint32_t",), 4, vcvtasp2usCode)
vcvtnsp2usCode = vcvtsp2sCode % ("false", "VfpRoundNearest")
twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usND", "SimdCvtOp",
("uint32_t",), 2, vcvtnsp2usCode)
twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usNQ", "SimdCvtOp",
("uint32_t",), 4, vcvtnsp2usCode)
vcvtpsp2usCode = vcvtsp2sCode % ("false", "VfpRoundUpward")
twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPD", "SimdCvtOp",
("uint32_t",), 2, vcvtpsp2usCode)
twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPQ", "SimdCvtOp",
("uint32_t",), 4, vcvtpsp2usCode)
vcvtmsp2usCode = vcvtsp2sCode % ("false", "VfpRoundDown")
twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMD", "SimdCvtOp",
("uint32_t",), 2, vcvtmsp2usCode)
twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMQ", "SimdCvtOp",
("uint32_t",), 4, vcvtmsp2usCode)
vcvtasp2ssCode = vcvtsp2sCode % ("true", "VfpRoundAway")
twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAD", "SimdCvtOp",
("int32_t",), 2, vcvtasp2ssCode)
twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAQ", "SimdCvtOp",
("int32_t",), 4, vcvtasp2ssCode)
vcvtnsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundNearest")
twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssND", "SimdCvtOp",
("int32_t",), 2, vcvtnsp2ssCode)
twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssNQ", "SimdCvtOp",
("int32_t",), 4, vcvtnsp2ssCode)
vcvtpsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundUpward")
twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPD", "SimdCvtOp",
("int32_t",), 2, vcvtpsp2ssCode)
twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPQ", "SimdCvtOp",
("int32_t",), 4, vcvtpsp2ssCode)
vcvtmsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundDown")
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMD", "SimdCvtOp",
("int32_t",), 2, vcvtmsp2ssCode)
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp",
("int32_t",), 4, vcvtmsp2ssCode)
vrsqrteCode = '''
destElem = unsignedRSqrtEstimate(srcElem1);
'''

View File

@@ -89,6 +89,12 @@ class BaseInterrupts : public SimObject
{
panic("Interrupts::clearAll unimplemented!\n");
}
virtual bool
isWakeUp() const
{
return true;
}
};
} // namespace gem5

View File

@@ -111,12 +111,13 @@ class Template:
operands = SubOperandList(self.parser, compositeCode, d.operands)
myDict[
"reg_idx_arr_decl"
] = "RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]" % (
myDict["reg_idx_arr_decl"] = (
"RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]"
% (
d.operands.numSrcRegs + d.srcRegIdxPadding,
d.operands.numDestRegs + d.destRegIdxPadding,
)
)
# The reinterpret casts are largely because an array with a known
# size cannot be passed as an argument which is an array with an
@@ -821,7 +822,7 @@ class ISAParser(Grammar):
"DBLCOLON",
"ASTERISK",
# C preprocessor directives
"CPPDIRECTIVE"
"CPPDIRECTIVE",
# The following are matched but never returned. commented out to
# suppress PLY warning
# newfile directive

View File

@@ -140,9 +140,9 @@ def handle_statement(parser, container, statement):
if statement.is_microop:
if statement.mnemonic not in parser.microops.keys():
raise Exception(f"Unrecognized mnemonic: {statement.mnemonic}")
parser.symbols[
"__microopClassFromInsideTheAssembler"
] = parser.microops[statement.mnemonic]
parser.symbols["__microopClassFromInsideTheAssembler"] = (
parser.microops[statement.mnemonic]
)
try:
microop = eval(
f"__microopClassFromInsideTheAssembler({statement.params})",
@@ -166,9 +166,9 @@ def handle_statement(parser, container, statement):
elif statement.is_directive:
if statement.name not in container.directives.keys():
raise Exception(f"Unrecognized directive: {statement.name}")
parser.symbols[
"__directiveFunctionFromInsideTheAssembler"
] = container.directives[statement.name]
parser.symbols["__directiveFunctionFromInsideTheAssembler"] = (
container.directives[statement.name]
)
try:
eval(
f"__directiveFunctionFromInsideTheAssembler({statement.params})",

View File

@@ -114,6 +114,13 @@ class RiscvISA(BaseISA):
enable_Zicbom_fs = Param.Bool(True, "Enable Zicbom extension in FS mode")
enable_Zicboz_fs = Param.Bool(True, "Enable Zicboz extension in FS mode")
enable_Zcd = Param.Bool(
True,
"Enable Zcd extensions. "
"Set the option to false implies the Zcmp and Zcmt is enable as "
"c.fsdsp is overlap with them."
"Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc",
)
wfi_resume_on_pending = Param.Bool(
False,

View File

@@ -44,6 +44,7 @@ Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
ISA *isa = dynamic_cast<ISA*>(p.isa);
vlen = isa->getVecLenInBits();
elen = isa->getVecElemLenInBits();
_enableZcd = isa->enableZcd();
reset();
}
@@ -127,6 +128,7 @@ Decoder::decode(PCStateBase &_next_pc)
emi.vtype8 = next_pc.vtype() & 0xff;
emi.vill = next_pc.vtype().vill;
emi.rv_type = static_cast<int>(next_pc.rvType());
emi.enable_zcd = _enableZcd;
return decode(emi, next_pc.instAddr());
}

View File

@@ -62,6 +62,7 @@ class Decoder : public InstDecoder
uint32_t vlen;
uint32_t elen;
bool _enableZcd;
virtual StaticInstPtr decodeInst(ExtMachInst mach_inst);

View File

@@ -34,3 +34,4 @@ Source('mem.cc', tags='riscv isa')
Source('standard.cc', tags='riscv isa')
Source('static_inst.cc', tags='riscv isa')
Source('vector.cc', tags='riscv isa')
Source('zcmp.cc', tags='riscv isa')

View File

@@ -0,0 +1,130 @@
/*
* Copyright (c) 2024 Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/riscv/insts/zcmp.hh"
#include <string>
#include "arch/riscv/regs/int.hh"
#include "arch/riscv/utility.hh"
namespace gem5
{
namespace RiscvISA
{
CmMacroInst::CmMacroInst(
const char* mnem, ExtMachInst machInst, OpClass opClass)
: RiscvMacroInst(mnem, machInst, opClass), rlist(machInst.rlist)
{
}
// Ref: https://github.com/riscv-software-src/riscv-isa-sim/blob/f7d0dba60/
// riscv/decode.h#L168
uint64_t
CmMacroInst::stackAdj() const
{
uint64_t stack_adj_base = 0;
switch (machInst.rlist) {
case 15:
stack_adj_base += 16;
[[fallthrough]];
case 14:
if (machInst.rv_type == RV64) {
stack_adj_base += 16;
}
[[fallthrough]];
case 13:
case 12:
stack_adj_base += 16;
[[fallthrough]];
case 11:
case 10:
if (machInst.rv_type == RV64) {
stack_adj_base += 16;
}
[[fallthrough]];
case 9:
case 8:
stack_adj_base += 16;
[[fallthrough]];
case 7:
case 6:
if (machInst.rv_type == RV64) {
stack_adj_base += 16;
}
[[fallthrough]];
case 5:
case 4:
stack_adj_base += 16;
break;
}
return stack_adj_base + machInst.spimm * 16;
}
std::string
CmMacroInst::getRlistStr() const
{
std::string s = "";
switch (machInst.rlist) {
case 15:
s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg),
registerName(int_reg::S0),
registerName(PushPopRegList[0]));
break;
case 14:
case 13:
case 12:
case 11:
case 10:
case 9:
case 8:
case 7:
case 6:
s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg),
registerName(int_reg::S0),
registerName(PushPopRegList[16-machInst.rlist]));
break;
case 5:
s = csprintf("{%s, %s}", registerName(ReturnAddrReg),
registerName(int_reg::S0));
break;
case 4:
s = csprintf("{%s}", registerName(ReturnAddrReg));
break;
default:
break;
}
return s;
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024 Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_RISCV_INSTS_ZCMP_HH__
#define __ARCH_RISCV_INSTS_ZCMP_HH__
#include <string>
#include "arch/riscv/insts/static_inst.hh"
#include "cpu/static_inst.hh"
namespace gem5
{
namespace RiscvISA
{
class CmMacroInst : public RiscvMacroInst
{
public:
CmMacroInst(const char* mnem, ExtMachInst machInst, OpClass opClass);
protected:
using RiscvMacroInst::RiscvMacroInst;
uint64_t stackAdj() const;
std::string getRlistStr() const;
uint64_t rlist;
};
} // namespace RiscvISA
} // namespace gem5
#endif // __ARCH_RISCV_INSTS_ZCMP_HH__

View File

@@ -95,6 +95,11 @@ class Interrupts : public BaseInterrupts
void clearAll() override;
bool isWakeUp() const override
{
return checkNonMaskableInterrupt() || (ip & ie).any();
}
uint64_t readIP() const { return (uint64_t)ip.to_ulong(); }
uint64_t readIE() const { return (uint64_t)ie.to_ulong(); }
void setIP(const uint64_t& val) { ip = val; }

View File

@@ -260,7 +260,7 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
ISA::ISA(const Params &p) : BaseISA(p, "riscv"),
_rvType(p.riscv_type), enableRvv(p.enable_rvv), vlen(p.vlen), elen(p.elen),
_privilegeModeSet(p.privilege_mode_set),
_wfiResumeOnPending(p.wfi_resume_on_pending)
_wfiResumeOnPending(p.wfi_resume_on_pending), _enableZcd(p.enable_Zcd)
{
_regClasses.push_back(&intRegClass);
_regClasses.push_back(&floatRegClass);

View File

@@ -108,6 +108,14 @@ class ISA : public BaseISA
*/
const bool _wfiResumeOnPending;
/**
* Enable Zcd extensions.
* Set the option to false implies the Zcmp and Zcmt is enable as c.fsdsp
* is overlap with them.
* Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc
*/
bool _enableZcd;
public:
using Params = RiscvISAParams;
@@ -184,6 +192,8 @@ class ISA : public BaseISA
bool resumeOnPending() { return _wfiResumeOnPending; }
bool enableZcd() { return _enableZcd; }
virtual Addr getFaultHandlerAddr(
RegIndex idx, uint64_t cause, bool intr) const;
};

View File

@@ -34,6 +34,7 @@
// Bitfield definitions.
//
def bitfield RVTYPE rv_type;
def bitfield ENABLE_ZCD enable_zcd;
def bitfield QUADRANT <1:0>;
def bitfield OPCODE5 <6:2>;
@@ -103,10 +104,13 @@ def bitfield CFUNCT1 <12>;
def bitfield CFUNCT1BIT6 <6>;
def bitfield CFUNCT2HIGH <11:10>;
def bitfield CFUNCT2LOW <6:5>;
def bitfield CFUNCT2MID <9:8>;
def bitfield RC1 <11:7>;
def bitfield RC2 <6:2>;
def bitfield RP1 <9:7>;
def bitfield RP2 <4:2>;
def bitfield R1S <9:7>;
def bitfield R2S <4:2>;
def bitfield FC1 <11:7>;
def bitfield FC2 <6:2>;
def bitfield FP2 <4:2>;

View File

@@ -54,6 +54,7 @@ decode QUADRANT default Unknown::unknown() {
Rp2 = rvSext(sp + imm);
}}, uint64_t);
format CompressedLoad {
0x1: decode ENABLE_ZCD {
0x1: c_fld({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
@@ -71,6 +72,7 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
EA = rvSext(Rp1 + offset);
}});
}
0x2: c_lw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
@@ -152,7 +154,8 @@ decode QUADRANT default Unknown::unknown() {
}
}
format CompressedStore {
0x5: c_fsd({{
0x5: decode ENABLE_ZCD {
0x1: c_fsd({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
@@ -164,6 +167,7 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
EA = rvSext(Rp1 + offset);
}});
}
0x6: c_sw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
@@ -381,6 +385,7 @@ decode QUADRANT default Unknown::unknown() {
Rc1 = rvSext(Rc1 << imm);
}}, uint64_t);
format CompressedLoad {
0x1: decode ENABLE_ZCD {
0x1: c_fldsp({{
offset = CIMM5<4:3> << 3 |
CIMM1 << 5 |
@@ -398,6 +403,7 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
EA = rvSext(sp + offset);
}});
}
0x2: c_lwsp({{
offset = CIMM5<4:2> << 2 |
CIMM1 << 5 |
@@ -480,7 +486,22 @@ decode QUADRANT default Unknown::unknown() {
}
}
format CompressedStore {
0x5: c_fsdsp({{
0x5: decode ENABLE_ZCD {
0x0: decode CFUNCT6LOW3 {
0x3: decode CFUNCT2LOW {
0x1: CmMvsa01::cm_mvsa01();
0x3: CmMva01s::cm_mva01s();
}
0x6: decode CFUNCT2MID {
0x0: CmPush::cm_push();
0x2: CmPop::cm_pop();
}
0x7: decode CFUNCT2MID {
0x0: CmPop::cm_popretz(is_ret=True, has_a0=True);
0x2: CmPop::cm_popret(is_ret=True);
}
}
0x1: c_fsdsp({{
offset = CIMM6<5:3> << 3 |
CIMM6<2:0> << 6;
}}, {{
@@ -493,6 +514,7 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
EA = rvSext(sp + offset);
}});
}
0x6: c_swsp({{
offset = CIMM6<5:2> << 2 |
CIMM6<1:0> << 6;

View File

@@ -40,6 +40,7 @@
##include "vector_conf.isa"
##include "vector_arith.isa"
##include "vector_mem.isa"
##include "zcmp.isa"
// Include formats for nonstandard extensions
##include "compressed.isa"

View File

@@ -0,0 +1,782 @@
// -*- mode:c++ -*-
// Copyright (c) 2015 RISC-V Foundation
// Copyright (c) 2016 The University of Virginia
// Copyright (c) 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Cmpush template.
def template CmPushDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst);
protected:
using %(base_class)s::%(base_class)s;
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
}};
def template CmPushConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
StaticInstPtr cur_inst = nullptr;
if (rlist < 4) {
cur_inst = new Unknown(machInst);
cur_inst->setFlag(IsMicroop);
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
} else {
int start_reg = 0;
if (rlist != 15) {
start_reg = (16-rlist);
}
int offset = 0;
for (int i = start_reg; i < PushPopRegList.size(); i++) {
offset -= rvSelect(4, 8);
if (machInst.rv_type == RV32) {
cur_inst = new %(class_name)s32MicroInst(
machInst, PushPopRegList[i], offset);
} else {
cur_inst = new %(class_name)s64MicroInst(
machInst, PushPopRegList[i], offset);
}
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
}
cur_inst = new %(class_name)sSpAdjMicroInst(machInst, -stackAdj());
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
}
microops.front()->setFirstMicroop();
microops.back()->setLastMicroop();
}
}};
def template CmPushExecute {{
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << getRlistStr() << ", " << (int64_t)-stackAdj();
return ss.str();
}
}};
def template CmStoreMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegId push_reg, int64_t offset);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(
Packet *, ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr, const loader::SymbolTable *) const override;
protected:
using %(base_class)s::%(base_class)s;
private:
%(reg_idx_arr_decl)s;
int64_t offset;
Request::Flags memAccessFlags;
};
}};
def template CmStoreMicroConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, RegId push_reg, int64_t offset)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s),
offset(offset)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template CmStoreMicroExecute {{
Fault
%(class_name)s::execute(
ExecContext *xc, trace::InstRecord *traceData) const
{
Addr EA;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
%(memacc_code)s;
{
Fault fault =
writeMemAtomicLE(xc, traceData, Mem, EA, memAccessFlags,
nullptr);
if (fault != NoFault)
return fault;
}
%(op_wb)s;
return NoFault;
}
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
offset << '(' << registerName(srcRegIdx(0)) << ')';
return ss.str();
}
}};
def template CmStoreMicroInitiateAcc {{
Fault
%(class_name)s::initiateAcc(ExecContext *xc,
trace::InstRecord *traceData) const
{
Addr EA;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
%(memacc_code)s;
{
Fault fault = writeMemTimingLE(xc, traceData, Mem, EA,
memAccessFlags, nullptr);
if (fault != NoFault)
return fault;
}
%(op_wb)s;
return NoFault;
}
}};
def template CmStoreMicroCompleteAcc {{
Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
trace::InstRecord *traceData) const
{
return NoFault;
}
}};
def template SpAdjMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, int64_t adj);
protected:
using %(base_class)s::%(base_class)s;
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr, const loader::SymbolTable *) const override;
private:
%(reg_idx_arr_decl)s;
int64_t adj;
};
}};
def template SpAdjMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst, int64_t adj)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s), adj(adj)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template SpAdjMicroExecute {{
Fault
%(class_name)s::execute(
ExecContext *xc, trace::InstRecord *traceData) const
{
%(op_decl)s;
%(op_rd)s;
%(code)s;
%(op_wb)s;
return NoFault;
}
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' '
<< registerName(srcRegIdx(0)) << ' ' << adj;
return ss.str();
}
}};
// Cmpop decode template.
def template CmPopDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst);
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
protected:
using %(base_class)s::%(base_class)s;
};
}};
def template CmPopConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
StaticInstPtr cur_inst = nullptr;
if (rlist < 4) {
cur_inst = new Unknown(machInst);
cur_inst->setFlag(IsMicroop);
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
} else {
int start_reg = 0;
if (rlist != 15) {
start_reg = (16-rlist);
}
int offset = stackAdj();
for (int i = start_reg; i < PushPopRegList.size(); i++) {
offset -= rvSelect(4, 8);
if (machInst.rv_type == RV32) {
cur_inst = new %(class_name)s32MicroInst(
machInst, PushPopRegList[i], offset);
} else {
cur_inst = new %(class_name)s64MicroInst(
machInst, PushPopRegList[i], offset);
}
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
}
cur_inst = new %(class_name)sSpAdjMicroInst(machInst, stackAdj());
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
%(move_a0_desc)s;
%(return_desc)s;
}
microops.front()->setFirstMicroop();
microops.back()->setLastMicroop();
}
}};
def template CmPopExecute {{
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << getRlistStr() << ", " << stackAdj();
return ss.str();
}
}};
def template CmLoadMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegId pop_reg, int64_t offset);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(
Packet *, ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr, const loader::SymbolTable *) const override;
protected:
using %(base_class)s::%(base_class)s;
private:
%(reg_idx_arr_decl)s;
int64_t offset;
Request::Flags memAccessFlags;
};
}};
def template CmLoadMicroConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, RegId pop_reg, int64_t offset)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s),
offset(offset)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template CmLoadMicroExecute {{
Fault
%(class_name)s::execute(
ExecContext *xc, trace::InstRecord *traceData) const
{
Addr EA;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
{
Fault fault =
readMemAtomicLE(xc, traceData, EA, Mem, memAccessFlags);
if (fault != NoFault)
return fault;
}
%(memacc_code)s;
%(op_wb)s;
return NoFault;
}
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
offset << '(' << registerName(srcRegIdx(0)) << ')';
return ss.str();
}
}};
def template CmLoadMicroInitiateAcc {{
Fault
%(class_name)s::initiateAcc(ExecContext *xc,
trace::InstRecord *traceData) const
{
Addr EA;
%(op_src_decl)s;
%(op_rd)s;
%(ea_code)s;
return initiateMemRead(xc, traceData, EA, Mem, memAccessFlags);
}
}};
def template CmLoadMicroCompleteAcc {{
Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
trace::InstRecord *traceData) const
{
%(op_decl)s;
%(op_rd)s;
getMemLE(pkt, Mem, traceData);
%(memacc_code)s;
%(op_wb)s;
return NoFault;
}
}};
def template CmRetMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
/// Constructor.
%(class_name)s(ExtMachInst machInst);
protected:
using %(base_class)s::%(base_class)s;
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string
generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
std::unique_ptr<PCStateBase> branchTarget(
ThreadContext *tc) const override;
using StaticInst::branchTarget;
private:
%(reg_idx_arr_decl)s;
};
}};
def template CmRetMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template CmRetMicroExecute {{
Fault
%(class_name)s::execute(
ExecContext *xc, trace::InstRecord *traceData) const
{
%(op_decl)s;
%(op_rd)s;
%(code)s;
%(op_wb)s;
return NoFault;
}
std::unique_ptr<PCStateBase>
%(class_name)s::branchTarget(ThreadContext *tc) const
{
PCStateBase *pc_ptr = tc->pcState().clone();
pc_ptr->as<PCState>().set(rvSext(tc->getReg(srcRegIdx(0)) & ~0x1));
return std::unique_ptr<PCStateBase>{pc_ptr};
}
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(srcRegIdx(0));
return ss.str();
}
}};
// Cmmvsa01 decode template
def template CmMvDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst);
protected:
using %(base_class)s::%(base_class)s;
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
}};
def template CmMvsa01Constructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
StaticInstPtr cur_inst;
cur_inst = new %(class_name)sMvMicroInst(
machInst, int_reg::A0, StackRegs[machInst.r1s]);
microops.emplace_back(cur_inst);
cur_inst = new %(class_name)sMvMicroInst(
machInst, int_reg::A1, StackRegs[machInst.r2s]);
microops.emplace_back(cur_inst);
microops.front()->setFirstMicroop();
microops.back()->setLastMicroop();
}
}};
def template CmMva01sConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
StaticInstPtr cur_inst;
cur_inst = new %(class_name)sMvMicroInst(
machInst, StackRegs[machInst.r1s], int_reg::A0);
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
cur_inst = new %(class_name)sMvMicroInst(
machInst, StackRegs[machInst.r2s], int_reg::A1);
cur_inst->setDelayedCommit();
microops.emplace_back(cur_inst);
microops.front()->setFirstMicroop();
microops.back()->setLastMicroop();
}
}};
def template CmMvExecute {{
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(StackRegs[machInst.r1s])
<< ", " << registerName(StackRegs[machInst.r2s]);
return ss.str();
}
}};
def template CmMvMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegId push_reg, RegId pop_reg);
protected:
using %(base_class)s::%(base_class)s;
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr, const loader::SymbolTable *) const override;
private:
%(reg_idx_arr_decl)s;
};
}};
def template CmMvMicroConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, RegId push_reg, RegId pop_reg)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
}};
def template CmMvMicroExecute {{
Fault
%(class_name)s::execute(
ExecContext *xc, trace::InstRecord *traceData) const
{
%(op_decl)s;
%(op_rd)s;
%(code)s;
%(op_wb)s;
return NoFault;
}
std::string
%(class_name)s::generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' '
<< registerName(srcRegIdx(0));
return ss.str();
}
}};
def format CmPush(*flags) {{
code = ''
macro_iop = InstObjParams(name, Name, 'CmMacroInst', code, flags)
header_output = CmPushDeclare.subst(macro_iop)
decoder_output = CmPushConstructor.subst(macro_iop)
exec_output = CmPushExecute.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)
memacc_code = 'Mem_sw = CmPushReg_sw;'
ea_code = 'EA = rvSext(sp + offset);'
micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst',
{'ea_code': ea_code, 'memacc_code': memacc_code},
flags)
mem_flags = [getAlignFlag(micro32_iop)]
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
micro32_iop.constructor += s
header_output += CmStoreMicroDeclare.subst(micro32_iop)
decoder_output += CmStoreMicroConstructor.subst(micro32_iop)
exec_output += CmStoreMicroExecute.subst(micro32_iop) \
+ CmStoreMicroInitiateAcc.subst(micro32_iop) \
+ CmStoreMicroCompleteAcc.subst(micro32_iop)
memacc_code = 'Mem = CmPushReg;'
ea_code = 'EA = rvSext(sp + offset);'
micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst',
{'ea_code': ea_code, 'memacc_code': memacc_code},
flags)
mem_flags = [getAlignFlag(micro64_iop)]
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
micro64_iop.constructor += s
header_output += CmStoreMicroDeclare.subst(micro64_iop)
decoder_output += CmStoreMicroConstructor.subst(micro64_iop)
exec_output += CmStoreMicroExecute.subst(micro64_iop) \
+ CmStoreMicroInitiateAcc.subst(micro64_iop) \
+ CmStoreMicroCompleteAcc.subst(micro64_iop)
code = 'spd = rvSext(sp + adj);'
sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst',
'RiscvMicroInst', code, flags)
header_output += SpAdjMicroDeclare.subst(sp_adj_iop)
decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop)
exec_output += SpAdjMicroExecute.subst(sp_adj_iop)
}};
def format CmPop(is_ret=False, has_a0=False, *flags) {{
code = ''
flags = []
has_a0 = eval(has_a0)
is_ret = eval(is_ret)
move_a0_desc = ''
return_desc = ''
if has_a0:
move_a0_desc = rf'''
cur_inst = new {Name}MvMicroInst(
machInst, ReturnValueReg, int_reg::Zero);
microops.emplace_back(cur_inst);
'''
if is_ret:
return_desc = rf'''
cur_inst = new {Name}RetMicroInst(machInst);
microops.emplace_back(cur_inst);
'''
macro_iop = InstObjParams(name, Name, 'CmMacroInst',
{'code': code, 'move_a0_desc': move_a0_desc,
'return_desc': return_desc},
flags)
header_output = CmPopDeclare.subst(macro_iop)
decoder_output = CmPopConstructor.subst(macro_iop)
exec_output = CmPopExecute.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)
memacc_code = 'CmPopReg_sw = Mem_sw;'
ea_code = 'EA = rvSext(sp + offset);'
micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst',
{'ea_code': ea_code, 'memacc_code': memacc_code},
flags)
mem_flags = [getAlignFlag(micro32_iop)]
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
micro32_iop.constructor += s
header_output += CmLoadMicroDeclare.subst(micro32_iop)
decoder_output += CmLoadMicroConstructor.subst(micro32_iop)
exec_output += CmLoadMicroExecute.subst(micro32_iop) \
+ CmLoadMicroInitiateAcc.subst(micro32_iop) \
+ CmLoadMicroCompleteAcc.subst(micro32_iop)
memacc_code = 'CmPopReg = Mem;'
ea_code = 'EA = rvSext(sp + offset);'
micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst',
{'ea_code': ea_code, 'memacc_code': memacc_code},
flags)
mem_flags = [getAlignFlag(micro64_iop)]
s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
micro64_iop.constructor += s
header_output += CmLoadMicroDeclare.subst(micro64_iop)
decoder_output += CmLoadMicroConstructor.subst(micro64_iop)
exec_output += CmLoadMicroExecute.subst(micro64_iop) \
+ CmLoadMicroInitiateAcc.subst(micro64_iop) \
+ CmLoadMicroCompleteAcc.subst(micro64_iop)
code = 'spd = rvSext(sp + adj);'
sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst',
'RiscvMicroInst', code, flags)
header_output += SpAdjMicroDeclare.subst(sp_adj_iop)
decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop)
exec_output += SpAdjMicroExecute.subst(sp_adj_iop)
if has_a0:
code = 'CmPopReg = CmPushReg;'
has_a0_iop = InstObjParams('mv', f'{Name}MvMicroInst',
'RiscvMicroInst', code, flags)
header_output += CmMvMicroDeclare.subst(has_a0_iop)
decoder_output += CmMvMicroConstructor.subst(has_a0_iop)
exec_output += CmMvMicroExecute.subst(has_a0_iop)
if is_ret:
code = 'NPC = rvSext(ra & (~0x1));'
ret_flags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn']
is_ret_iop = InstObjParams('jr', f'{Name}RetMicroInst',
'RiscvMicroInst', code, ret_flags)
header_output += CmRetMicroDeclare.subst(is_ret_iop)
decoder_output += CmRetMicroConstructor.subst(is_ret_iop)
exec_output += CmRetMicroExecute.subst(is_ret_iop)
}};
def format CmMvsa01() {{
code = ''
flags = []
iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags)
header_output = CmMvDeclare.subst(iop)
decoder_output = CmMvsa01Constructor.subst(iop)
exec_output = CmMvExecute.subst(iop)
decode_block = BasicDecode.subst(iop)
code = 'CmPopReg = CmPushReg;'
micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst',
code, flags)
header_output += CmMvMicroDeclare.subst(micro_iop)
decoder_output += CmMvMicroConstructor.subst(micro_iop)
exec_output += CmMvMicroExecute.subst(micro_iop)
}};
def format CmMva01s() {{
code = ''
flags = []
iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags)
header_output = CmMvDeclare.subst(iop)
decoder_output = CmMva01sConstructor.subst(iop)
exec_output = CmMvExecute.subst(iop)
decode_block = BasicDecode.subst(iop)
code = 'CmPopReg = CmPushReg;'
micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst',
code, flags)
header_output += CmMvMicroDeclare.subst(micro_iop)
decoder_output += CmMvMicroConstructor.subst(micro_iop)
exec_output += CmMvMicroExecute.subst(micro_iop)
}};

View File

@@ -55,6 +55,7 @@ output header {{
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/insts/unknown.hh"
#include "arch/riscv/insts/vector.hh"
#include "arch/riscv/insts/zcmp.hh"
#include "arch/riscv/interrupts.hh"
#include "cpu/static_inst.hh"
#include "mem/packet.hh"

View File

@@ -70,10 +70,14 @@ def operands {{
'Rp2': IntReg('ud', 'RP2 + 8', 'IsInteger', 3),
'ra': IntReg('ud', 'ReturnAddrReg', 'IsInteger', 1),
'sp': IntReg('ud', 'StackPointerReg', 'IsInteger', 2),
'spd': IntReg('ud', 'StackPointerReg', 'IsInteger', 1),
'a0': IntReg('ud', '10', 'IsInteger', 1),
'a1': IntReg('ud', '11', 'IsInteger', 2),
'CmPushReg': IntReg('ud', 'push_reg', 'IsInteger', 3),
'CmPopReg': IntReg('ud', 'pop_reg', 'IsInteger', 1),
'Fd': FloatRegOp('df', 'FD', 'IsFloating', 1),
'Fd_bits': FloatRegOp('ud', 'FD', 'IsFloating', 1),
'Fs1': FloatRegOp('df', 'FS1', 'IsFloating', 2),

View File

@@ -34,6 +34,7 @@
#include "arch/riscv/utility.hh"
#include "kern/linux/flag_tables.hh"
#include "kern/linux/linux.hh"
#include "base/bitfield.hh"
namespace gem5
{
@@ -42,6 +43,101 @@ class RiscvLinux : public Linux
{
public:
static const ByteOrder byteOrder = ByteOrder::little;
enum RiscvHwprobeKey
{
Mvendorid,
Marchid,
Mimpid,
BaseBehavior,
IMAExt0,
Cpuperf0,
ZicbozBlockSize,
HighestVirtAddress,
TimeCsrFreq,
MisalignedScalarPerf
};
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
#define RISCV_HWPROBE_MAX_KEY 9
BitUnion64(key_base_behavior_t)
Bitfield<0> ima;
EndBitUnion(key_base_behavior_t)
BitUnion64(key_ima_ext_0_t)
Bitfield<49> ZAWRS;
Bitfield<48> ZCMOP;
Bitfield<47> ZCF;
Bitfield<46> ZCD;
Bitfield<45> ZCB;
Bitfield<44> ZCA;
Bitfield<43> ZIMOP;
Bitfield<42> ZVE64D;
Bitfield<41> ZVE64F;
Bitfield<40> ZVE64X;
Bitfield<39> ZVE32F;
Bitfield<38> ZVE32X;
Bitfield<37> ZIHINTPAUSE;
Bitfield<36> ZICOND;
Bitfield<35> ZACAS;
Bitfield<34> ZTSO;
Bitfield<33> ZFA;
Bitfield<32> ZVFHMIN;
Bitfield<31> ZVFH;
Bitfield<30> ZIHINTNTL;
Bitfield<29> ZFHMIN;
Bitfield<28> ZFH;
Bitfield<27> ZVKT;
Bitfield<26> ZVKSH;
Bitfield<25> ZVKSED;
Bitfield<24> ZVKNHB;
Bitfield<22> ZVKNHA;
Bitfield<21> ZVKNED;
Bitfield<20> ZVKG;
Bitfield<19> ZVKB;
Bitfield<18> ZVBC;
Bitfield<17> ZVBB;
Bitfield<16> ZKT;
Bitfield<15> ZKSH;
Bitfield<14> ZKSED;
Bitfield<13> ZKNH;
Bitfield<12> ZKNE;
Bitfield<11> ZKND;
Bitfield<10> ZBKX;
Bitfield<9> ZBKC;
Bitfield<8> ZBKB;
Bitfield<7> ZBC;
Bitfield<6> ZICBOZ;
Bitfield<5> ZBS;
Bitfield<4> ZBB;
Bitfield<3> ZBA;
Bitfield<2> V;
Bitfield<1> C;
Bitfield<0> FD;
EndBitUnion(key_ima_ext_0_t)
enum MisalignedScalarPerf
{
Unknown,
Emulated,
Slow,
Fast,
Unsupported
};
/* Flags */
#define RISCV_HWPROBE_WHICH_CPUS (1 << 0)
struct riscv_hwprobe {
int64_t key;
uint64_t value;
};
typedef struct cpumask {
size_t size;
uint64_t bits[];
} cpumask_t;
};
class RiscvLinux64 : public RiscvLinux, public OpenFlagTable<RiscvLinux64>
@@ -195,6 +291,21 @@ class RiscvLinux64 : public RiscvLinux, public OpenFlagTable<RiscvLinux64>
uint32_t mem_unit;
};
struct tgt_clone_args
{
uint64_t flags;
uint64_t pidfd;
uint64_t child_tid;
uint64_t parent_tid;
uint64_t exit_signal;
uint64_t stack;
uint64_t stack_size;
uint64_t tls;
uint64_t set_tid;
uint64_t set_tid_size;
uint64_t cgroup;
};
static void
archClone(uint64_t flags,
Process *pp, Process *cp,

View File

@@ -44,6 +44,8 @@
#include <sys/syscall.h>
#include "arch/riscv/process.hh"
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/regs/misc.hh"
#include "base/loader/object_file.hh"
#include "base/trace.hh"
#include "cpu/thread_context.hh"
@@ -134,6 +136,388 @@ unameFunc32(SyscallDesc *desc, ThreadContext *tc, VPtr<Linux::utsname> name)
return 0;
}
static inline void
cpumask_set_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp)
{
assert(cpu < dstp->size * 8);
auto &bits = dstp->bits[cpu / sizeof(uint64_t)];
bits = insertBits(bits, cpu % sizeof(uint64_t), 1);
}
static inline void
cpumask_clear_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp)
{
assert(cpu < dstp->size * 8);
auto &bits = dstp->bits[cpu / sizeof(uint64_t)];
bits = insertBits(bits, cpu % sizeof(uint64_t), 0);
}
static inline bool
cpumask_test_cpu(unsigned int cpu, const RiscvLinux::cpumask_t *cpumask)
{
assert(cpu < cpumask->size * 8);
return bits(cpumask->bits[cpu / sizeof(uint64_t)], cpu % sizeof(uint64_t)) != 0;
}
static inline void
cpumask_and(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *src1p,
const RiscvLinux::cpumask_t *src2p)
{
assert(dstp->size == src1p->size);
assert(dstp->size == src2p->size);
for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) {
dstp->bits[i] = src1p->bits[i] & src2p->bits[i];
}
}
static inline bool
cpumask_empty(const RiscvLinux::cpumask_t *dstp)
{
for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) {
if (dstp->bits[i] != 0) {
return false;
}
}
return true;
}
static inline void
cpumask_copy(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *srcp)
{
assert(dstp->size == srcp->size);
memcpy(dstp->bits, srcp->bits, srcp->size);
}
static inline void
cpumask_clear(RiscvLinux::cpumask_t *dstp)
{
memset(dstp->bits, 0, dstp->size);
}
static inline RiscvLinux::cpumask_t *
cpumask_malloc(ThreadContext *tc)
{
RiscvLinux::cpumask_t *cpumask;
/* 8-bytes up-boundary alignment */
size_t size = (tc->getSystemPtr()->threads.size() + sizeof(cpumask->bits[0]) - 1) /
sizeof(cpumask->bits[0]) * sizeof(cpumask->bits[0]);
cpumask = (RiscvLinux::cpumask_t *)malloc(sizeof(cpumask->size) + size);
if (cpumask != nullptr) {
cpumask->size = size;
cpumask_clear(cpumask);
}
return cpumask;
}
static inline void
cpumask_free(RiscvLinux::cpumask_t *cpu_online_mask)
{
free(cpu_online_mask);
}
static inline bool
riscv_hwprobe_key_is_valid(int64_t key)
{
return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
}
static inline bool
hwprobe_key_is_bitmask(int64_t key)
{
switch (key) {
case RiscvLinux::BaseBehavior:
case RiscvLinux::IMAExt0:
case RiscvLinux::Cpuperf0:
return true;
}
return false;
}
static inline bool
riscv_hwprobe_pair_cmp(RiscvLinux::riscv_hwprobe *pair,
RiscvLinux::riscv_hwprobe *other_pair)
{
if (pair->key != other_pair->key) {
return false;
}
if (hwprobe_key_is_bitmask(pair->key)) {
return (pair->value & other_pair->value) == other_pair->value;
}
return pair->value == other_pair->value;
}
static inline RiscvLinux::cpumask_t *
get_cpu_online_mask(ThreadContext *tc)
{
RiscvLinux::cpumask_t *cpu_online_mask = cpumask_malloc(tc);
if (cpu_online_mask != nullptr) {
for (int i = 0; i < tc->getSystemPtr()->threads.size(); i++) {
CPU_SET(i, (cpu_set_t *)&cpu_online_mask->bits);
}
}
return cpu_online_mask;
}
static void
hwprobe_one_pair(ThreadContext *tc, RiscvLinux::riscv_hwprobe *pair,
RiscvLinux::cpumask_t *cpus)
{
switch (pair->key) {
case RiscvLinux::Mvendorid:
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MVENDORID).physIndex);
break;
case RiscvLinux::Marchid:
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MARCHID).physIndex);
break;
case RiscvLinux::Mimpid:
pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MIMPID).physIndex);
break;
case RiscvLinux::BaseBehavior:
{
MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA);
RiscvLinux::key_base_behavior_t *base_behavior =
(RiscvLinux::key_base_behavior_t *)&pair->value;
if (misa.rvi && misa.rvm && misa.rva) {
base_behavior->ima = 1;
}
}
break;
case RiscvLinux::IMAExt0:
{
MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA);
RiscvLinux::key_ima_ext_0_t *ext = (RiscvLinux::key_ima_ext_0_t *)&pair->value;
if (misa.rvf && misa.rvd) ext->FD = 1;
if (misa.rvc) ext->C = 1;
if (misa.rvv) ext->V = 1;
ext->ZBA = 1;
ext->ZBB = 1;
ext->ZBS = 1;
ext->ZICBOZ = 1;
ext->ZBC = 1;
ext->ZBKB = 1;
ext->ZBKC = 1;
ext->ZBKX = 1;
ext->ZKND = 1;
ext->ZKNE = 1;
ext->ZKNH = 1;
ext->ZKSED = 1;
ext->ZKSH = 1;
ext->ZKT = 1;
ext->ZFH = 1;
ext->ZFHMIN = 1;
ext->ZVFH = 1;
ext->ZVFHMIN = 1;
ext->ZICOND = 1;
ext->ZVE64D = 1;
ext->ZCB = 1;
ext->ZCD = 1;
ext->ZCF = 1;
}
break;
case RiscvLinux::Cpuperf0:
case RiscvLinux::MisalignedScalarPerf:
pair->value = RiscvLinux::Slow;
break;
case RiscvLinux::ZicbozBlockSize:
pair->value = tc->getSystemPtr()->cacheLineSize();
break;
case RiscvLinux::HighestVirtAddress:
pair->value = tc->getProcessPtr()->memState->getMmapEnd();
break;
/*
* For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0
* indicating they're unrecognized.
*/
default:
pair->key = -1;
pair->value = 0;
break;
}
}
template <class OS>
static int
hwprobe_get_values(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count,
typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags)
{
/* Check the reserved flags. */
if (flags != 0) {
return -EINVAL;
}
RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc);
if (cpu_online_mask == nullptr) {
return -ENOMEM;
}
RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc);
if (cpus == nullptr) {
cpumask_free(cpu_online_mask);
return -ENOMEM;
}
if (cpusetsize > cpu_online_mask->size) {
cpusetsize = cpu_online_mask->size;
}
RiscvLinux::riscv_hwprobe *pair;
BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count);
/*
* The interface supports taking in a CPU mask, and returns values that
* are consistent across that mask. Allow userspace to specify NULL and
* 0 as a shortcut to all online CPUs.
*/
if (cpusetsize == 0 && !cpus_user) {
cpumask_copy(cpus, cpu_online_mask);
cpusetsize = cpu_online_mask->size;
} else {
BufferArg cpus_user_buf(cpus_user, cpusetsize);
cpus_user_buf.copyIn(SETranslatingPortProxy(tc));
cpu_online_mask->size = cpusetsize;
cpus->size = cpusetsize;
memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize);
/*
* Userspace must provide at least one online CPU, without that
* there's no way to define what is supported.
*/
cpumask_and(cpus, cpus, cpu_online_mask);
if (cpumask_empty(cpus)) {
cpumask_free(cpu_online_mask);
cpumask_free(cpus);
return -EINVAL;
}
}
pairs_buf.copyIn(SETranslatingPortProxy(tc));
pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr();
for (size_t i = 0; i < pair_count; i++, pair++) {
pair->value = 0;
hwprobe_one_pair(tc, pair, cpus);
}
pairs_buf.copyOut(SETranslatingPortProxy(tc));
cpumask_free(cpu_online_mask);
cpumask_free(cpus);
return 0;
}
template <class OS>
static int
hwprobe_get_cpus(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count,
typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags)
{
if (flags != RISCV_HWPROBE_WHICH_CPUS) {
return -EINVAL;
}
if (cpusetsize == 0 || !cpus_user) {
return -EINVAL;
}
RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc);
if (cpu_online_mask == nullptr) {
return -ENOMEM;
}
RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc);
if (cpus == nullptr) {
cpumask_free(cpu_online_mask);
return -ENOMEM;
}
RiscvLinux::cpumask_t *one_cpu = cpumask_malloc(tc);
if (one_cpu == nullptr) {
cpumask_free(cpu_online_mask);
cpumask_free(cpus);
return -ENOMEM;
}
if (cpusetsize > cpu_online_mask->size) {
cpusetsize = cpu_online_mask->size;
}
RiscvLinux::riscv_hwprobe *pair;
BufferArg cpus_user_buf(cpus_user, cpusetsize);
cpus_user_buf.copyIn(SETranslatingPortProxy(tc));
memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize);
if (cpumask_empty(cpus)) {
cpumask_copy(cpus, cpu_online_mask);
cpusetsize = cpu_online_mask->size;
}
cpumask_and(cpus, cpus, cpu_online_mask);
BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count);
pairs_buf.copyIn(SETranslatingPortProxy(tc));
pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr();
for (size_t i = 0; i < pair_count; i++, pair++) {
if (!riscv_hwprobe_key_is_valid(pair->key)) {
*pair = (RiscvLinux::riscv_hwprobe){ .key = -1, .value = 0 };
memset(cpus_user_buf.bufferPtr(), 0, cpusetsize);
break;
}
RiscvLinux::riscv_hwprobe tmp =
(RiscvLinux::riscv_hwprobe){ .key = pair->key, .value = 0 };
for (int cpu = 0; cpu < cpusetsize * 8; cpu++) {
if (!cpumask_test_cpu(cpu, cpus)) {
continue;
}
cpumask_set_cpu(cpu, one_cpu);
hwprobe_one_pair(tc, &tmp, one_cpu);
if (!riscv_hwprobe_pair_cmp(&tmp, pair)) {
cpumask_clear_cpu(cpu, cpus);
}
cpumask_clear_cpu(cpu, one_cpu);
}
}
pairs_buf.copyOut(SETranslatingPortProxy(tc));
cpus_user_buf.copyOut(SETranslatingPortProxy(tc));
cpumask_free(cpu_online_mask);
cpumask_free(cpus);
cpumask_free(one_cpu);
return 0;
}
template <class OS>
static SyscallReturn
riscvHWProbeFunc(SyscallDesc *desc, ThreadContext *tc, VPtr<> pairs,
typename OS::size_t pair_count, typename OS::size_t cpusetsize,
VPtr<> cpus_user, unsigned int flags)
{
if (flags & RISCV_HWPROBE_WHICH_CPUS) {
return hwprobe_get_cpus<OS>(tc, pairs, pair_count, cpusetsize,
cpus_user, flags);
}
return hwprobe_get_values<OS>(tc, pairs, pair_count, cpusetsize,
cpus_user, flags);
}
SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
{ 0, "io_setup" },
{ 1, "io_destroy" },
@@ -382,6 +766,7 @@ SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
{ 241, "perf_event_open" },
{ 242, "accept4" },
{ 243, "recvmmsg" },
{ 258, "riscv_hwprobe", riscvHWProbeFunc<RiscvLinux64> },
{ 260, "wait4", wait4Func<RiscvLinux64> },
{ 261, "prlimit64", prlimitFunc<RiscvLinux64> },
{ 262, "fanotify_init" },
@@ -410,6 +795,33 @@ SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
{ 285, "copy_file_range" },
{ 286, "preadv2" },
{ 287, "pwritev2" },
{ 424, "pidfd_send_signal" },
{ 425, "io_uring_setup" },
{ 426, "io_uring_enter" },
{ 427, "io_uring_register" },
{ 428, "open_tree" },
{ 429, "move_mount" },
{ 430, "fsopen" },
{ 431, "fsconfig" },
{ 432, "fsmount" },
{ 433, "fspick" },
{ 434, "pidfd_open" },
{ 435, "clone3", clone3Func<RiscvLinux64> },
{ 436, "close_range" },
{ 437, "openat2" },
{ 438, "pidfd_getfd" },
{ 439, "faccessat2" },
{ 440, "process_madvise" },
{ 441, "epoll_pwait2" },
{ 442, "mount_setattr" },
{ 443, "quotactl_fd" },
{ 444, "landlock_create_ruleset" },
{ 445, "landlock_add_rule" },
{ 446, "landlock_restrict_self" },
{ 447, "memfd_secret" },
{ 448, "process_mrelease" },
{ 449, "futex_waitv" },
{ 450, "set_mempolicy_home_node" },
{ 1024, "open", openFunc<RiscvLinux64> },
{ 1025, "link", linkFunc },
{ 1026, "unlink", unlinkFunc },
@@ -721,6 +1133,7 @@ SyscallDescTable<SEWorkload::SyscallABI32> EmuLinux::syscallDescs32 = {
{ 241, "perf_event_open" },
{ 242, "accept4" },
{ 243, "recvmmsg" },
{ 258, "riscv_hwprobe", riscvHWProbeFunc<RiscvLinux32> },
{ 260, "wait4", wait4Func<RiscvLinux32> },
{ 261, "prlimit64", prlimitFunc<RiscvLinux32> },
{ 262, "fanotify_init" },

View File

@@ -149,6 +149,18 @@ inline constexpr RegId ArgumentRegs[] = {
int_reg::A4, int_reg::A5, int_reg::A6, int_reg::A7
};
const std::vector<RegId> PushPopRegList = {
int_reg::S11, int_reg::S10, int_reg::S9, int_reg::S8,
int_reg::S7, int_reg::S6, int_reg::S5, int_reg::S4,
int_reg::S3, int_reg::S2, int_reg::S1, int_reg::S0,
int_reg::Ra
};
inline constexpr RegId StackRegs[] = {
int_reg::S0, int_reg::S1, int_reg::S2, int_reg::S3,
int_reg::S4, int_reg::S5, int_reg::S6, int_reg::S7,
};
} // namespace RiscvISA
} // namespace gem5

View File

@@ -58,6 +58,7 @@ BitUnion64(ExtMachInst)
// Decoder state
Bitfield<63, 62> rv_type;
Bitfield<61> compressed;
Bitfield<60> enable_zcd;
// More bits for vector extension
Bitfield<57, 41> vl; // [0, 2**16]
Bitfield<40> vill;
@@ -126,6 +127,8 @@ BitUnion64(ExtMachInst)
Bitfield< 6, 2> rc2;
Bitfield< 9, 7> rp1;
Bitfield< 4, 2> rp2;
Bitfield< 9, 7> r1s;
Bitfield< 4, 2> r2s;
Bitfield<11, 7> fc1;
Bitfield< 6, 2> fc2;
Bitfield< 4, 2> fp2;
@@ -144,6 +147,8 @@ BitUnion64(ExtMachInst)
Bitfield<12, 10> cimm3;
Bitfield< 6, 5> cimm2;
Bitfield<12> cimm1;
Bitfield< 7, 4> rlist;
Bitfield< 3, 2> spimm;
// Pseudo instructions
Bitfield<31, 25> m5func;
// vector

View File

@@ -41,8 +41,6 @@ namespace gem5
namespace X86ISA
{
X86ISAInst::MicrocodeRom Decoder::microcodeRom;
Decoder::State
Decoder::doResetState()
{
@@ -671,9 +669,6 @@ Decoder::doImmediateState()
return nextState;
}
Decoder::InstBytes Decoder::dummy;
Decoder::InstCacheMap Decoder::instCacheMap;
StaticInstPtr
Decoder::decode(ExtMachInst mach_inst, Addr addr)
{

View File

@@ -60,19 +60,19 @@ class Decoder : public InstDecoder
// These are defined and documented in decoder_tables.cc
static const uint8_t SizeTypeToSize[3][10];
typedef const uint8_t ByteTable[256];
static ByteTable Prefixes[2];
static const ByteTable Prefixes[2];
static ByteTable UsesModRMOneByte;
static ByteTable UsesModRMTwoByte;
static ByteTable UsesModRMThreeByte0F38;
static ByteTable UsesModRMThreeByte0F3A;
static const ByteTable UsesModRMOneByte;
static const ByteTable UsesModRMTwoByte;
static const ByteTable UsesModRMThreeByte0F38;
static const ByteTable UsesModRMThreeByte0F3A;
static ByteTable ImmediateTypeOneByte;
static ByteTable ImmediateTypeTwoByte;
static ByteTable ImmediateTypeThreeByte0F38;
static ByteTable ImmediateTypeThreeByte0F3A;
static const ByteTable ImmediateTypeOneByte;
static const ByteTable ImmediateTypeTwoByte;
static const ByteTable ImmediateTypeThreeByte0F38;
static const ByteTable ImmediateTypeThreeByte0F3A;
static X86ISAInst::MicrocodeRom microcodeRom;
X86ISAInst::MicrocodeRom microcodeRom;
protected:
using MachInst = uint64_t;
@@ -88,7 +88,7 @@ class Decoder : public InstDecoder
{}
};
static InstBytes dummy;
InstBytes dummy;
// The bytes to be predecoded.
MachInst fetchChunk;
@@ -244,7 +244,7 @@ class Decoder : public InstDecoder
decode_cache::InstMap<ExtMachInst> *instMap = nullptr;
typedef std::unordered_map<
CacheKey, decode_cache::InstMap<ExtMachInst> *> InstCacheMap;
static InstCacheMap instCacheMap;
InstCacheMap instCacheMap;
StaticInstPtr decodeInst(ExtMachInst mach_inst);

View File

@@ -350,9 +350,9 @@ class Rate : public Base
"otherwise, it would be a Ratio");
private:
Rate<T1,T2>() {}
Rate() {}
public:
Rate<T1,T2>(Rate<T1,T2> const&) = delete;
Rate(Rate const&) = delete;
void operator=(Rate<T1,T2> const&) = delete;
static Rate<T1,T2>*
get()

View File

@@ -240,7 +240,11 @@ BaseCPU::postInterrupt(ThreadID tid, int int_num, int index)
// Only wake up syscall emulation if it is not waiting on a futex.
// This is to model the fact that instructions such as ARM SEV
// should wake up a WFE sleep, but not a futex syscall WAIT.
if (FullSystem || !system->futexMap.is_waiting(threadContexts[tid]))
//
// For RISC-V, the WFI sleep wake up is implementation defined.
// The SiFive WFI wake up the hart only if mip & mie != 0
if ((FullSystem && interrupts[tid]->isWakeUp()) ||
!system->futexMap.is_waiting(threadContexts[tid]))
wakeup(tid);
}
@@ -855,13 +859,13 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
"Simulator op (including micro ops) rate (op/s)")
{
simInsts
.functor(BaseCPU::numSimulatedInsts)
.functor(BaseCPU::GlobalStats::numSimulatedInsts)
.precision(0)
.prereq(simInsts)
;
simOps
.functor(BaseCPU::numSimulatedOps)
.functor(BaseCPU::GlobalStats::numSimulatedOps)
.precision(0)
.prereq(simOps)
;

View File

@@ -156,6 +156,30 @@ class BaseCPU : public ClockedObject
statistics::Formula hostInstRate;
statistics::Formula hostOpRate;
Counter previousInsts = 0;
Counter previousOps = 0;
static Counter
numSimulatedInsts()
{
return totalNumSimulatedInsts() - (globalStats->previousInsts);
}
static Counter
numSimulatedOps()
{
return totalNumSimulatedOps() - (globalStats->previousOps);
}
void
resetStats() override
{
previousInsts = totalNumSimulatedInsts();
previousOps = totalNumSimulatedOps();
statistics::Group::resetStats();
}
};
/**
@@ -609,7 +633,7 @@ class BaseCPU : public ClockedObject
static int numSimulatedCPUs() { return cpuList.size(); }
static Counter
numSimulatedInsts()
totalNumSimulatedInsts()
{
Counter total = 0;
@@ -621,7 +645,7 @@ class BaseCPU : public ClockedObject
}
static Counter
numSimulatedOps()
totalNumSimulatedOps()
{
Counter total = 0;

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2017 ARM Limited
# Copyright (c) 2017, 2024 Arm Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -57,6 +57,7 @@ class DefaultFUPool(FUPool):
FP_MultDiv(),
ReadPort(),
SIMD_Unit(),
Matrix_Unit(),
PredALU(),
WritePort(),
RdWrPort(),

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2010, 2017, 2020 ARM Limited
# Copyright (c) 2010, 2017, 2020, 2024 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -109,10 +109,27 @@ class SIMD_Unit(FUDesc):
OpDesc(opClass="SimdExt"),
OpDesc(opClass="SimdFloatExt"),
OpDesc(opClass="SimdConfig"),
OpDesc(opClass="SimdAes"),
OpDesc(opClass="SimdAesMix"),
OpDesc(opClass="SimdSha1Hash"),
OpDesc(opClass="SimdSha1Hash2"),
OpDesc(opClass="SimdSha256Hash"),
OpDesc(opClass="SimdSha256Hash2"),
OpDesc(opClass="SimdShaSigma2"),
OpDesc(opClass="SimdShaSigma3"),
]
count = 4
class Matrix_Unit(FUDesc):
opList = [
OpDesc(opClass="Matrix"),
OpDesc(opClass="MatrixMov"),
OpDesc(opClass="MatrixOP"),
]
count = 1
class PredALU(FUDesc):
opList = [OpDesc(opClass="SimdPredAlu")]
count = 1

View File

@@ -122,7 +122,7 @@ ElasticTrace::regEtraceListeners()
{
assert(!allProbesReg);
inform("@%llu: No. of instructions committed = %llu, registering elastic"
" probe listeners", curTick(), cpu->numSimulatedInsts());
" probe listeners", curTick(), cpu->totalNumSimulatedInsts());
// Create new listeners: provide method to be called upon a notify() for
// each probe point.
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,

View File

@@ -38,6 +38,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.objects.ClockedObject import ClockedObject
from m5.objects.IndexingPolicies import *
from m5.objects.ReplacementPolicies import *
from m5.params import *
from m5.proxy import *
from m5.SimObject import *
@@ -83,6 +85,38 @@ class BranchTargetBuffer(ClockedObject):
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
class BTBIndexingPolicy(SimObject):
type = "BTBIndexingPolicy"
abstract = True
cxx_class = "gem5::IndexingPolicyTemplate<gem5::BTBTagType>"
cxx_header = "cpu/pred/btb_entry.hh"
cxx_template_params = ["class Types"]
# Get the associativity
assoc = Param.Int(Parent.assoc, "associativity")
class BTBSetAssociative(BTBIndexingPolicy):
type = "BTBSetAssociative"
cxx_class = "gem5::BTBSetAssociative"
cxx_header = "cpu/pred/btb_entry.hh"
# Get the number of entries in the BTB from the parent
num_entries = Param.Unsigned(
Parent.numEntries, "Number of entries in the BTB"
)
# Set shift for the index. Ignore lower 2 bits for a 4 byte instruction.
set_shift = Param.Unsigned(2, "Number of bits to shift PC to get index")
# Total number of bits in the tag.
# This is above the index and offset bit
tag_bits = Param.Unsigned(64, "number of bits in the tag")
# Number of threads sharing the BTB
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
class SimpleBTB(BranchTargetBuffer):
type = "SimpleBTB"
cxx_class = "gem5::branch_prediction::SimpleBTB"
@@ -93,6 +127,19 @@ class SimpleBTB(BranchTargetBuffer):
instShiftAmt = Param.Unsigned(
Parent.instShiftAmt, "Number of bits to shift instructions by"
)
associativity = Param.Unsigned(1, "BTB associativity")
btbReplPolicy = Param.BaseReplacementPolicy(
LRURP(), "BTB replacement policy"
)
btbIndexingPolicy = Param.BTBIndexingPolicy(
BTBSetAssociative(
assoc=Parent.associativity,
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
numThreads=1,
),
"BTB indexing policy",
)
class IndirectPredictor(SimObject):

View File

@@ -45,7 +45,7 @@ SimObject('BranchPredictor.py',
sim_objects=[
'BranchPredictor',
'IndirectPredictor', 'SimpleIndirectPredictor',
'BranchTargetBuffer', 'SimpleBTB',
'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative',
'ReturnAddrStack',
'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor',
'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB',

288
src/cpu/pred/btb_entry.hh Normal file
View File

@@ -0,0 +1,288 @@
/*
* Copyright (c) 2024 Pranith Kumar
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Declaration of a BTB entry and BTB indexing policy.
*/
#ifndef __CPU_PRED_BTB_ENTRY_HH__
#define __CPU_PRED_BTB_ENTRY_HH__
#include <vector>
#include "arch/generic/pcstate.hh"
#include "base/intmath.hh"
#include "base/types.hh"
#include "cpu/static_inst.hh"
#include "mem/cache/replacement_policies/replaceable_entry.hh"
#include "mem/cache/tags/indexing_policies/base.hh"
#include "params/BTBIndexingPolicy.hh"
#include "params/BTBSetAssociative.hh"
namespace gem5 {
class BTBTagType
{
public:
struct KeyType
{
Addr address;
ThreadID tid;
};
using Params = BTBIndexingPolicyParams;
};
using BTBIndexingPolicy = IndexingPolicyTemplate<BTBTagType>;
template class IndexingPolicyTemplate<BTBTagType>;
class BTBSetAssociative : public BTBIndexingPolicy
{
public:
PARAMS(BTBSetAssociative);
using KeyType = BTBTagType::KeyType;
BTBSetAssociative(const Params &p)
: BTBIndexingPolicy(p, p.num_entries, p.set_shift),
tagMask(mask(p.tag_bits))
{
setNumThreads(p.numThreads);
}
protected:
/**
* Extract the set index for the instruction PC based on tid.
*/
uint32_t
extractSet(const KeyType &key) const
{
return ((key.address >> setShift)
^ (key.tid << (tagShift - setShift - log2NumThreads)))
& setMask;
}
public:
/**
* Find all possible entries for insertion and replacement of an address.
*/
std::vector<ReplaceableEntry*>
getPossibleEntries(const KeyType &key) const override
{
auto set_idx = extractSet(key);
assert(set_idx < sets.size());
return sets[set_idx];
}
/**
* Set number of threads sharing the BTB
*/
void
setNumThreads(unsigned num_threads)
{
log2NumThreads = log2i(num_threads);
}
/**
* Generate the tag from the given address.
*/
Addr
extractTag(const Addr addr) const override
{
return (addr >> tagShift) & tagMask;
}
Addr regenerateAddr(const KeyType &key,
const ReplaceableEntry* entry) const override
{
panic("Not implemented!");
return 0;
}
private:
const uint64_t tagMask;
unsigned log2NumThreads;
};
namespace branch_prediction
{
class BTBEntry : public ReplaceableEntry
{
public:
using IndexingPolicy = gem5::BTBIndexingPolicy;
using KeyType = gem5::BTBTagType::KeyType;
using TagExtractor = std::function<Addr(Addr)>;
/** Default constructor */
BTBEntry(TagExtractor ext)
: inst(nullptr), extractTag(ext), valid(false), tag({MaxAddr, -1})
{}
/** Update the target and instruction in the BTB entry.
* During insertion, only the tag (key) is updated.
*/
void
update(const PCStateBase &_target,
StaticInstPtr _inst)
{
set(target, _target);
inst = _inst;
}
/**
* Checks if the given tag information corresponds to this entry's.
*/
bool
match(const KeyType &key) const
{
return isValid() && (tag.address == extractTag(key.address))
&& (tag.tid == key.tid);
}
/**
* Insert the block by assigning it a tag and marking it valid. Touches
* block if it hadn't been touched previously.
*/
void
insert(const KeyType &key)
{
setValid();
setTag({extractTag(key.address), key.tid});
}
/** Copy constructor */
BTBEntry(const BTBEntry &other)
{
valid = other.valid;
tag = other.tag;
inst = other.inst;
extractTag = other.extractTag;
set(target, other.target);
}
/** Assignment operator */
BTBEntry& operator=(const BTBEntry &other)
{
valid = other.valid;
tag = other.tag;
inst = other.inst;
extractTag = other.extractTag;
set(target, other.target);
return *this;
}
/**
* Checks if the entry is valid.
*/
bool isValid() const { return valid; }
/**
* Get tag associated to this block.
*/
KeyType getTag() const { return tag; }
/** Invalidate the block. Its contents are no longer valid. */
void
invalidate()
{
valid = false;
setTag({MaxAddr, -1});
}
/** The entry's target. */
std::unique_ptr<PCStateBase> target;
/** Pointer to the static branch inst at this address */
StaticInstPtr inst;
std::string
print() const override
{
return csprintf("tag: %#x tid: %d valid: %d | %s", tag.address, tag.tid,
isValid(), ReplaceableEntry::print());
}
protected:
/**
* Set tag associated to this block.
*/
void setTag(KeyType _tag) { tag = _tag; }
/** Set valid bit. The block must be invalid beforehand. */
void
setValid()
{
assert(!isValid());
valid = true;
}
private:
/** Callback used to extract the tag from the entry */
TagExtractor extractTag;
/**
* Valid bit. The contents of this entry are only valid if this bit is set.
* @sa invalidate()
* @sa insert()
*/
bool valid;
/** The entry's tag. */
KeyType tag;
};
} // namespace gem5::branch_prediction
/**
* This helper generates a tag extractor function object
* which will be typically used by Replaceable entries indexed
* with the BaseIndexingPolicy.
* It allows to "decouple" indexing from tagging. Those entries
* would call the functor without directly holding a pointer
* to the indexing policy which should reside in the cache.
*/
static constexpr auto
genTagExtractor(BTBIndexingPolicy *ip)
{
return [ip] (Addr addr) { return ip->extractTag(addr); };
}
}
#endif //__CPU_PRED_BTB_ENTRY_HH__

View File

@@ -44,84 +44,38 @@
#include "base/trace.hh"
#include "debug/BTB.hh"
namespace gem5
{
namespace branch_prediction
namespace gem5::branch_prediction
{
SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
: BranchTargetBuffer(p),
numEntries(p.numEntries),
tagBits(p.tagBits),
instShiftAmt(p.instShiftAmt),
log2NumThreads(floorLog2(p.numThreads))
btb("simpleBTB", p.numEntries, p.associativity,
p.btbReplPolicy, p.btbIndexingPolicy,
BTBEntry(genTagExtractor(p.btbIndexingPolicy)))
{
DPRINTF(BTB, "BTB: Creating BTB object.\n");
if (!isPowerOf2(numEntries)) {
if (!isPowerOf2(p.numEntries)) {
fatal("BTB entries is not a power of 2!");
}
btb.resize(numEntries);
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
idxMask = numEntries - 1;
tagMask = (1 << tagBits) - 1;
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
}
void
SimpleBTB::memInvalidate()
{
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
btb.clear();
}
inline
unsigned
SimpleBTB::getIndex(Addr instPC, ThreadID tid)
{
// Need to shift PC over by the word offset.
return ((instPC >> instShiftAmt)
^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
& idxMask;
}
inline
Addr
SimpleBTB::getTag(Addr instPC)
{
return (instPC >> tagShiftAmt) & tagMask;
}
SimpleBTB::BTBEntry *
BTBEntry *
SimpleBTB::findEntry(Addr instPC, ThreadID tid)
{
unsigned btb_idx = getIndex(instPC, tid);
Addr inst_tag = getTag(instPC);
assert(btb_idx < numEntries);
if (btb[btb_idx].valid
&& inst_tag == btb[btb_idx].tag
&& btb[btb_idx].tid == tid) {
return &btb[btb_idx];
}
return nullptr;
return btb.findEntry({instPC, tid});
}
bool
SimpleBTB::valid(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.findEntry({instPC, tid});
return entry != nullptr;
}
@@ -134,11 +88,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
{
stats.lookups[type]++;
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.accessEntry({instPC, tid});
if (entry) {
return entry->target.get();
}
stats.misses[type]++;
return nullptr;
}
@@ -146,11 +101,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
const StaticInstPtr
SimpleBTB::getInst(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.findEntry({instPC, tid});
if (entry) {
return entry->inst;
}
return nullptr;
}
@@ -159,18 +115,13 @@ SimpleBTB::update(ThreadID tid, Addr instPC,
const PCStateBase &target,
BranchType type, StaticInstPtr inst)
{
unsigned btb_idx = getIndex(instPC, tid);
assert(btb_idx < numEntries);
stats.updates[type]++;
btb[btb_idx].tid = tid;
btb[btb_idx].valid = true;
set(btb[btb_idx].target, target);
btb[btb_idx].tag = getTag(instPC);
btb[btb_idx].inst = inst;
BTBEntry *victim = btb.findVictim({instPC, tid});
btb.insertEntry({instPC, tid}, victim);
victim->update(target, inst);
}
} // namespace branch_prediction
} // namespace gem5
} // namespace gem5::branch_prediction

View File

@@ -41,15 +41,16 @@
#ifndef __CPU_PRED_SIMPLE_BTB_HH__
#define __CPU_PRED_SIMPLE_BTB_HH__
#include "base/cache/associative_cache.hh"
#include "base/logging.hh"
#include "base/types.hh"
#include "cpu/pred/btb.hh"
#include "cpu/pred/btb_entry.hh"
#include "mem/cache/replacement_policies/replaceable_entry.hh"
#include "mem/cache/tags/indexing_policies/base.hh"
#include "params/SimpleBTB.hh"
namespace gem5
{
namespace branch_prediction
namespace gem5::branch_prediction
{
class SimpleBTB : public BranchTargetBuffer
@@ -66,38 +67,7 @@ class SimpleBTB : public BranchTargetBuffer
StaticInstPtr inst = nullptr) override;
const StaticInstPtr getInst(ThreadID tid, Addr instPC) override;
private:
struct BTBEntry
{
/** The entry's tag. */
Addr tag = 0;
/** The entry's target. */
std::unique_ptr<PCStateBase> target;
/** The entry's thread id. */
ThreadID tid;
/** Whether or not the entry is valid. */
bool valid = false;
/** Pointer to the static branch instruction at this address */
StaticInstPtr inst = nullptr;
};
/** Returns the index into the BTB, based on the branch's PC.
* @param inst_PC The branch to look up.
* @return Returns the index into the BTB.
*/
inline unsigned getIndex(Addr instPC, ThreadID tid);
/** Returns the tag bits of a given address.
* @param inst_PC The branch's address.
* @return Returns the tag bits.
*/
inline Addr getTag(Addr instPC);
/** Internal call to find an address in the BTB
* @param instPC The branch's address.
@@ -106,31 +76,9 @@ class SimpleBTB : public BranchTargetBuffer
BTBEntry *findEntry(Addr instPC, ThreadID tid);
/** The actual BTB. */
std::vector<BTBEntry> btb;
/** The number of entries in the BTB. */
unsigned numEntries;
/** The index mask. */
unsigned idxMask;
/** The number of tag bits per entry. */
unsigned tagBits;
/** The tag mask. */
unsigned tagMask;
/** Number of bits to shift PC when calculating index. */
unsigned instShiftAmt;
/** Number of bits to shift PC when calculating tag. */
unsigned tagShiftAmt;
/** Log2 NumThreads used for hashing threadid */
unsigned log2NumThreads;
AssociativeCache<BTBEntry> btb;
};
} // namespace branch_prediction
} // namespace gem5
} // namespace gem5::branch_prediction
#endif // __CPU_PRED_SIMPLE_BTB_HH__

View File

@@ -41,3 +41,4 @@ class TesterThread(ClockedObject):
thread_id = Param.Int("Unique TesterThread ID")
num_lanes = Param.Int("Number of lanes this thread has")
deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
cache_line_size = Param.UInt32("Size of cache line in cache")

View File

@@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
std::shuffle(
randAddressMap.begin(),
randAddressMap.end(),
std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX))
// TODO: This is a bug unrelated to this draft PR but the GPU tester is
// useful for testing this PR.
std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX-1))
);
// initialize atomic locations

View File

@@ -70,7 +70,7 @@ DmaThread::issueLoadOps()
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
ruby::printAddress(address));
printAddress(address));
int load_size = sizeof(Value);
@@ -127,7 +127,7 @@ DmaThread::issueStoreOps()
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
"Value %d\n", this->getName(),
curEpisode->getEpisodeId(), ruby::printAddress(address),
curEpisode->getEpisodeId(), printAddress(address),
new_value);
auto req = std::make_shared<Request>(address, sizeof(Value),
@@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt)
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -"
" Addr %s\n", this->getName(), curEpisode->getEpisodeId(),
resp_cmd.toString(), ruby::printAddress(addr));
resp_cmd.toString(), printAddress(addr));
if (resp_cmd == MemCmd::SwapResp) {
// response to a pending atomic

View File

@@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps()
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
ruby::printAddress(address));
printAddress(address));
int load_size = sizeof(Value);
@@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps()
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
"Value %d\n", this->getName(),
curEpisode->getEpisodeId(), ruby::printAddress(address),
curEpisode->getEpisodeId(), printAddress(address),
new_value);
auto req = std::make_shared<Request>(address, sizeof(Value),
@@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps()
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
ruby::printAddress(address));
printAddress(address));
// must be aligned with store size
assert(address % sizeof(Value) == 0);
@@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt)
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
"Addr %s\n", this->getName(),
curEpisode->getEpisodeId(), resp_cmd.toString(),
ruby::printAddress(addr));
printAddress(addr));
// whether the transaction is done after this hitCallback
bool isTransactionDone = true;

View File

@@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p)
: ClockedObject(p),
threadEvent(this, "TesterThread tick"),
deadlockCheckEvent(this),
cacheLineSize(p.cache_line_size),
threadId(p.thread_id),
numLanes(p.num_lanes),
tester(nullptr), addrManager(nullptr), port(nullptr),
@@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val)
ss << threadName << ": Atomic Op returned unexpected value\n"
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
<< "\tLane ID " << lane << "\n"
<< "\tAddress " << ruby::printAddress(addr) << "\n"
<< "\tAddress " << printAddress(addr) << "\n"
<< "\tAtomic Op's return value " << ret_val << "\n";
// print out basic info
@@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val)
<< "\tTesterThread " << threadId << "\n"
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
<< "\tLane ID " << lane << "\n"
<< "\tAddress " << ruby::printAddress(addr) << "\n"
<< "\tAddress " << printAddress(addr) << "\n"
<< "\tLoaded value " << ret_val << "\n"
<< "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
@@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table,
for (const auto& m : table) {
for (const auto& req : m.second) {
ss << "\t\t\tAddr " << ruby::printAddress(m.first)
ss << "\t\t\tAddr " << printAddress(m.first)
<< ": delta (curCycle - issueCycle) = "
<< (cur_cycle - req.issueCycle) << std::endl;
}
@@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const
<< pendingFenceCount << std::endl;
}
std::string
TesterThread::printAddress(Addr addr) const
{
return ruby::printAddress(addr, cacheLineSize * 8);
}
} // namespace gem5

View File

@@ -132,6 +132,7 @@ class TesterThread : public ClockedObject
{}
};
int cacheLineSize;
// the unique global id of this thread
int threadId;
// width of this thread (1 for cpu thread & wf size for gpu wavefront)
@@ -204,6 +205,7 @@ class TesterThread : public ClockedObject
void printOutstandingReqs(const OutstandingReqTable& table,
std::stringstream& ss) const;
std::string printAddress(Addr addr) const;
};
} // namespace gem5

View File

@@ -124,7 +124,8 @@ Check::initiatePrefetch()
// push the subblock onto the sender state. The sequencer will
// update the subblock on the return
pkt->senderState = new SenderState(m_address, req->getSize());
pkt->senderState = new SenderState(m_address, req->getSize(),
CACHE_LINE_BITS);
if (port->sendTimingReq(pkt)) {
DPRINTF(RubyTest, "successfully initiated prefetch.\n");
@@ -161,7 +162,8 @@ Check::initiateFlush()
// push the subblock onto the sender state. The sequencer will
// update the subblock on the return
pkt->senderState = new SenderState(m_address, req->getSize());
pkt->senderState = new SenderState(m_address, req->getSize(),
CACHE_LINE_BITS);
if (port->sendTimingReq(pkt)) {
DPRINTF(RubyTest, "initiating Flush - successful\n");
@@ -207,7 +209,8 @@ Check::initiateAction()
// push the subblock onto the sender state. The sequencer will
// update the subblock on the return
pkt->senderState = new SenderState(writeAddr, req->getSize());
pkt->senderState = new SenderState(m_address, req->getSize(),
CACHE_LINE_BITS);
if (port->sendTimingReq(pkt)) {
DPRINTF(RubyTest, "initiating action - successful\n");
@@ -261,7 +264,8 @@ Check::initiateCheck()
// push the subblock onto the sender state. The sequencer will
// update the subblock on the return
pkt->senderState = new SenderState(m_address, req->getSize());
pkt->senderState = new SenderState(m_address, req->getSize(),
CACHE_LINE_BITS);
if (port->sendTimingReq(pkt)) {
DPRINTF(RubyTest, "initiating check - successful\n");
@@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
// This isn't exactly right since we now have multi-byte checks
// assert(getAddress() == address);
assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address));
int block_size_bits = CACHE_LINE_BITS;
assert(ruby::makeLineAddress(m_address, block_size_bits) ==
ruby::makeLineAddress(address, block_size_bits));
assert(data != NULL);
DPRINTF(RubyTest, "RubyTester Callback\n");
@@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
}
DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc,
ruby::makeLineAddress(m_address));
ruby::makeLineAddress(m_address, block_size_bits));
DPRINTF(RubyTest, "Callback done\n");
debugPrint();
}

View File

@@ -47,6 +47,7 @@ class SubBlock;
const int CHECK_SIZE_BITS = 2;
const int CHECK_SIZE = (1 << CHECK_SIZE_BITS);
const int CACHE_LINE_BITS = 6;
class Check
{

View File

@@ -90,7 +90,9 @@ class RubyTester : public ClockedObject
{
ruby::SubBlock subBlock;
SenderState(Addr addr, int size) : subBlock(addr, size) {}
SenderState(Addr addr, int size, int cl_size)
: subBlock(addr, size, cl_size)
{}
};

View File

@@ -81,8 +81,6 @@ class AMDGPUDevice(PciDevice):
InterruptPin = 2
ExpansionROM = 0
rom_binary = Param.String("ROM binary dumped from hardware")
trace_file = Param.String("MMIO trace collected on hardware")
checkpoint_before_mmios = Param.Bool(
False, "Take a checkpoint before the device begins sending MMIOs"
)

View File

@@ -58,12 +58,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
init_interrupt_count(0), _lastVMID(0),
deviceMem(name() + ".deviceMem", p.memories, false, "", false)
{
// Loading the rom binary dumped from hardware.
std::ifstream romBin;
romBin.open(p.rom_binary, std::ios::binary);
romBin.read((char *)rom.data(), ROM_SIZE);
romBin.close();
// System pointer needs to be explicitly set for device memory since
// DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
// Note this means the cache line size is system wide.
@@ -92,10 +86,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
panic("Unknown GPU device %s\n", p.device_name);
}
if (p.trace_file != "") {
mmioReader.readMMIOTrace(p.trace_file);
}
int sdma_id = 0;
for (auto& s : p.sdmas) {
s->setGPUDevice(this);

View File

@@ -168,10 +168,15 @@ GenericPciHost::write(PacketPtr pkt)
pkt->getSize());
PciDevice *const pci_dev(getDevice(dev_addr.first));
panic_if(!pci_dev,
warn_if(!pci_dev,
"%02x:%02x.%i: Write to config space on non-existent PCI device\n",
dev_addr.first.bus, dev_addr.first.dev, dev_addr.first.func);
if (!pci_dev) {
pkt->makeAtomicResponse();
return 20000; // 20ns default from PciDevice.py
}
// @todo Remove this after testing
pkt->headerDelay = pkt->payloadDelay = 0;

View File

@@ -53,7 +53,7 @@ Clint::Clint(const Params &params) :
BasicPioDevice(params, params.pio_size),
system(params.system),
nThread(params.num_threads),
signal(params.name + ".signal", 0, this),
signal(params.name + ".signal", 0, this, INT_RTC),
reset(params.name + ".reset"),
resetMtimecmp(params.reset_mtimecmp),
registers(params.name + ".registers", params.pio_addr, this,
@@ -69,9 +69,11 @@ Clint::Clint(const Params &params) :
void
Clint::raiseInterruptPin(int id)
{
// Increment mtime
// Increment mtime when received RTC signal
uint64_t& mtime = registers.mtime.get();
if (id == INT_RTC) {
mtime++;
}
for (int context_id = 0; context_id < nThread; context_id++) {
@@ -261,7 +263,7 @@ Clint::doReset() {
registers.msip[i].reset();
}
// We need to update the mtip interrupt bits when reset
raiseInterruptPin(0);
raiseInterruptPin(INT_RESET);
}
} // namespace gem5

View File

@@ -91,6 +91,13 @@ class Clint : public BasicPioDevice
void raiseInterruptPin(int id);
void lowerInterruptPin(int id) {}
// Interrupt ID
enum InterruptId
{
INT_RTC = 0, // received from RTC(signal port)
INT_RESET, // received from reset port
};
// Register bank
public:

View File

@@ -477,7 +477,7 @@ class VirtQueue : public Serializable
Index index;
};
VirtRing<T>(PortProxy &proxy, ByteOrder bo, uint16_t size) :
VirtRing(PortProxy &proxy, ByteOrder bo, uint16_t size) :
header{0, 0}, ring(size), _proxy(proxy), _base(0), byteOrder(bo)
{}
@@ -550,7 +550,7 @@ class VirtQueue : public Serializable
private:
// Remove default constructor
VirtRing<T>();
VirtRing();
/** Guest physical memory proxy */
PortProxy &_proxy;

View File

@@ -461,7 +461,7 @@ class CacheBlk : public TaggedEntry
protected:
/** The current coherence status of this block. @sa CoherenceBits */
unsigned coherence;
unsigned coherence = 0;
// The following setters have been marked as protected because their
// respective variables should only be modified at 2 moments:

View File

@@ -599,6 +599,22 @@ class BOPPrefetcher(QueuedPrefetcher):
on_inst = False
class SmsPrefetcher(QueuedPrefetcher):
# Paper: https://web.eecs.umich.edu/~twenisch/papers/isca06.pdf
type = "SmsPrefetcher"
cxx_class = "gem5::prefetch::Sms"
cxx_header = "mem/cache/prefetch/sms.hh"
ft_size = Param.Unsigned(64, "Size of Filter and Active generation table")
pht_size = Param.Unsigned(16384, "Size of pattern history table")
region_size = Param.Unsigned(4096, "Spatial region size")
queue_squash = True
queue_filter = True
cache_snoop = True
prefetch_on_access = True
on_inst = False
class SBOOEPrefetcher(QueuedPrefetcher):
type = "SBOOEPrefetcher"
cxx_class = "gem5::prefetch::SBOOE"

View File

@@ -31,8 +31,9 @@ Import('*')
SimObject('Prefetcher.py', sim_objects=[
'BasePrefetcher', 'MultiPrefetcher', 'QueuedPrefetcher',
'StridePrefetcherHashedSetAssociative', 'StridePrefetcher',
'TaggedPrefetcher', 'IndirectMemoryPrefetcher', 'SignaturePathPrefetcher',
'SignaturePathPrefetcherV2', 'AccessMapPatternMatching', 'AMPMPrefetcher',
'SmsPrefetcher', 'TaggedPrefetcher', 'IndirectMemoryPrefetcher',
'SignaturePathPrefetcher', 'SignaturePathPrefetcherV2',
'AccessMapPatternMatching', 'AMPMPrefetcher',
'DeltaCorrelatingPredictionTables', 'DCPTPrefetcher',
'IrregularStreamBufferPrefetcher', 'SlimAMPMPrefetcher',
'BOPPrefetcher', 'SBOOEPrefetcher', 'STeMSPrefetcher', 'PIFPrefetcher'])
@@ -47,6 +48,7 @@ Source('indirect_memory.cc')
Source('pif.cc')
Source('queued.cc')
Source('sbooe.cc')
Source('sms.cc')
Source('signature_path.cc')
Source('signature_path_v2.cc')
Source('slim_ampm.cc')

161
src/mem/cache/prefetch/sms.cc vendored Normal file
View File

@@ -0,0 +1,161 @@
/*
* Copyright (c) 2024 Samsung Electronics
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Describes a SMS prefetcher based on template policies.
*/
#include "mem/cache/prefetch/sms.hh"
#include "debug/HWPrefetch.hh"
#include "params/SmsPrefetcher.hh"
namespace gem5
{
namespace prefetch
{
Sms::Sms(const SmsPrefetcherParams &p)
: Queued(p), Max_Contexts(p.ft_size), MAX_PHTSize(p.pht_size),
Region_Size(p.region_size)
{
AGT.clear();
AGTPC.clear();
FT.clear();
PHT.clear();
fifoFT.clear();
lruAGT.clear();
lruPHT.clear();
}
void
Sms::notifyEvict(const EvictionInfo &info)
{
//Check if any active generation has ended
Addr region_base = roundDown(info.addr, Region_Size);
std::pair <Addr,Addr> pc_offset = AGTPC[region_base];
if (AGT.find(region_base) != AGT.end()) {
//remove old recording
if (PHT.find(pc_offset) != PHT.end()) {
PHT[pc_offset].clear();
}
//Move from AGT to PHT
for (std::set<Addr>::iterator it = AGT[region_base].begin();
it != AGT[region_base].end(); it ++) {
PHT[pc_offset].insert(*it);
}
lruPHT.push_front(pc_offset);
}
while (PHT.size() > MAX_PHTSize) {
PHT.erase(lruPHT.back());
lruPHT.pop_back();
}
AGTPC.erase(region_base);
AGT.erase(region_base);
}
void
Sms::calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses,
const CacheAccessor &cache)
{
if (!pfi.hasPC()) {
DPRINTF(HWPrefetch, "Ignoring request with no PC.\n");
return;
}
Addr blk_addr = blockAddress(pfi.getAddr());
Addr pc = pfi.getPC();
Addr region_base = roundDown(blk_addr, Region_Size);
Addr offset = blk_addr - region_base;
//Training
if (AGT.find(region_base) != AGT.end()) {
assert (FT.find(region_base) == FT.end());
// Record Pattern
AGT[region_base].insert(offset);
//update LRU
for (std::deque <Addr>::iterator lit = lruAGT.begin();
lit != lruAGT.end(); lit ++) {
if ((*lit) == region_base) {
lruAGT.erase(lit);
lruAGT.push_front(region_base);
break;
}
}
} else if (FT.find(region_base) != FT.end()) {
//move entry from FT to AGT
AGT[region_base].insert(FT[region_base].second);
AGTPC[region_base] = FT[region_base];
lruAGT.push_front(region_base);
//Record latest offset
AGT[region_base].insert(offset);
//Recycle FT entry
FT.erase(region_base);
//Make space for next entry
while (AGT.size() > Max_Contexts) {
AGT.erase(lruAGT.back());
AGTPC.erase(lruAGT.back());
lruAGT.pop_back();
}
} else {
// Trigger Access
FT[region_base] = std::make_pair (pc,offset);
fifoFT.push_front(region_base);
while (FT.size() > Max_Contexts) {
FT.erase(fifoFT.back());
fifoFT.pop_back();
}
}
//Prediction
std::pair <Addr, Addr> pc_offset = std::make_pair(pc,offset);
if (PHT.find(pc_offset) != PHT.end()) {
for (std::set<Addr>::iterator it = PHT[pc_offset].begin();
it != PHT[pc_offset].end(); it ++) {
Addr pref_addr = blockAddress(region_base + (*it));
addresses.push_back(AddrPriority(pref_addr,0));
}
for (std::deque < std::pair <Addr,Addr> >::iterator lit
= lruPHT.begin(); lit != lruPHT.end(); lit ++) {
if ((*lit) == pc_offset) {
lruPHT.erase(lit);
lruPHT.push_front(pc_offset);
break;
}
}
}
}
} // namespace prefetch
} // namespace gem5

82
src/mem/cache/prefetch/sms.hh vendored Normal file
View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2024 Samsung Electronics
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Describes a SMS prefetcher.
*/
#ifndef __MEM_CACHE_PREFETCH_SMS_HH__
#define __MEM_CACHE_PREFETCH_SMS_HH__
#include <set>
#include "mem/cache/prefetch/queued.hh"
#include "mem/packet.hh"
namespace gem5
{
struct SmsPrefetcherParams;
namespace prefetch
{
class Sms : public Queued
{
private:
const int Max_Contexts; //= 64;
const uint64_t MAX_PHTSize; //= 512;
const Addr Region_Size; //= 4096;
std::map< Addr, std::set<Addr> > AGT;
std::map< Addr, std::pair<Addr,Addr> > AGTPC;
std::map< Addr, std::pair<Addr,Addr> > FT;
std::map< std::pair <Addr,Addr> , std::set<Addr> > PHT;
std::deque<Addr> fifoFT;
std::deque<Addr> lruAGT;
std::deque< std::pair <Addr,Addr> > lruPHT;
using EvictionInfo = CacheDataUpdateProbeArg;
void notifyEvict(const EvictionInfo &info) override;
public:
Sms(const SmsPrefetcherParams &p);
~Sms() = default;
void calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses,
const CacheAccessor &cache) override;
};
} // namespace prefetch
} // namespace gem5
#endif // __MEM_CACHE_PREFETCH_SMS_HH__

View File

@@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number)
}
Addr
getOffset(Addr addr)
getOffset(Addr addr, int cacheLineBits)
{
return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1);
}
Addr
makeLineAddress(Addr addr)
{
return mbits<Addr>(addr, 63, RubySystem::getBlockSizeBits());
assert(cacheLineBits < 64);
return bitSelect(addr, 0, cacheLineBits - 1);
}
Addr
makeLineAddress(Addr addr, int cacheLineBits)
{
assert(cacheLineBits < 64);
return maskLowOrderBits(addr, cacheLineBits);
}
// returns the next stride address based on line address
Addr
makeNextStrideAddress(Addr addr, int stride)
makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes)
{
return makeLineAddress(addr) +
static_cast<int>(RubySystem::getBlockSizeBytes()) * stride;
return makeLineAddress(addr, floorLog2(cacheLineBytes))
+ cacheLineBytes * stride;
}
std::string
printAddress(Addr addr)
printAddress(Addr addr, int cacheLineBits)
{
std::stringstream out;
out << "[" << std::hex << "0x" << addr << "," << " line 0x"
<< makeLineAddress(addr) << std::dec << "]";
<< makeLineAddress(addr, cacheLineBits) << std::dec << "]";
return out.str();
}

View File

@@ -33,6 +33,7 @@
#include <iomanip>
#include <iostream>
#include "base/intmath.hh"
#include "base/types.hh"
namespace gem5
@@ -44,11 +45,10 @@ namespace ruby
// selects bits inclusive
Addr bitSelect(Addr addr, unsigned int small, unsigned int big);
Addr maskLowOrderBits(Addr addr, unsigned int number);
Addr getOffset(Addr addr);
Addr makeLineAddress(Addr addr);
Addr getOffset(Addr addr, int cacheLineBits);
Addr makeLineAddress(Addr addr, int cacheLineBits);
Addr makeNextStrideAddress(Addr addr, int stride);
std::string printAddress(Addr addr);
Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes);
std::string printAddress(Addr addr, int cacheLineBits);
} // namespace ruby
} // namespace gem5

View File

@@ -40,8 +40,8 @@
#include "mem/ruby/common/DataBlock.hh"
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/WriteMask.hh"
#include "mem/ruby/system/RubySystem.hh"
namespace gem5
{
@@ -51,17 +51,22 @@ namespace ruby
DataBlock::DataBlock(const DataBlock &cp)
{
assert(cp.isAlloc());
assert(cp.getBlockSize() > 0);
assert(!m_alloc);
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
m_data = new uint8_t[block_bytes];
memcpy(m_data, cp.m_data, block_bytes);
m_block_size = cp.getBlockSize();
m_data = new uint8_t[m_block_size];
memcpy(m_data, cp.m_data, m_block_size);
m_alloc = true;
m_block_size = m_block_size;
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, cp.m_atomicLog[i], block_bytes);
block_update = new uint8_t[m_block_size];
memcpy(block_update, cp.m_atomicLog[i], m_block_size);
m_atomicLog.push_back(block_update);
}
}
@@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp)
void
DataBlock::alloc()
{
m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
assert(!m_alloc);
if (!m_block_size) {
return;
}
m_data = new uint8_t[m_block_size];
m_alloc = true;
clear();
}
void
DataBlock::realloc(int blk_size)
{
m_block_size = blk_size;
assert(m_block_size > 0);
if (m_alloc) {
delete [] m_data;
m_alloc = false;
}
alloc();
}
void
DataBlock::clear()
{
memset(m_data, 0, RubySystem::getBlockSizeBytes());
assert(m_alloc);
assert(m_block_size > 0);
memset(m_data, 0, m_block_size);
}
bool
DataBlock::equal(const DataBlock& obj) const
{
size_t block_bytes = RubySystem::getBlockSizeBytes();
assert(m_alloc);
assert(m_block_size > 0);
size_t block_bytes = m_block_size;
// Check that the block contents match
if (memcmp(m_data, obj.m_data, block_bytes)) {
return false;
@@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const
void
DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask)
{
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
assert(m_alloc);
assert(m_block_size > 0);
for (int i = 0; i < m_block_size; i++) {
if (mask.getMask(i, 1)) {
m_data[i] = dblk.m_data[i];
}
@@ -113,7 +143,9 @@ void
DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
bool isAtomicNoReturn)
{
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
assert(m_alloc);
assert(m_block_size > 0);
for (int i = 0; i < m_block_size; i++) {
m_data[i] = dblk.m_data[i];
}
mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn);
@@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
void
DataBlock::print(std::ostream& out) const
{
int size = RubySystem::getBlockSizeBytes();
assert(m_alloc);
assert(m_block_size > 0);
int size = m_block_size;
out << "[ ";
for (int i = 0; i < size; i++) {
out << std::setw(2) << std::setfill('0') << std::hex
@@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront()
void
DataBlock::clearAtomicLogEntries()
{
assert(m_alloc);
for (auto log : m_atomicLog) {
delete [] log;
}
@@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries()
const uint8_t*
DataBlock::getData(int offset, int len) const
{
assert(offset + len <= RubySystem::getBlockSizeBytes());
assert(m_alloc);
assert(m_block_size > 0);
assert(offset + len <= m_block_size);
return &m_data[offset];
}
uint8_t*
DataBlock::getDataMod(int offset)
{
assert(m_alloc);
return &m_data[offset];
}
void
DataBlock::setData(const uint8_t *data, int offset, int len)
{
assert(m_alloc);
memcpy(&m_data[offset], data, len);
}
void
DataBlock::setData(PacketPtr pkt)
{
int offset = getOffset(pkt->getAddr());
assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes());
assert(m_alloc);
assert(m_block_size > 0);
int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size));
assert(offset + pkt->getSize() <= m_block_size);
pkt->writeData(&m_data[offset]);
}
DataBlock &
DataBlock::operator=(const DataBlock & obj)
{
// Reallocate if needed
if (m_alloc && m_block_size != obj.getBlockSize()) {
delete [] m_data;
m_block_size = obj.getBlockSize();
alloc();
} else if (!m_alloc) {
m_block_size = obj.getBlockSize();
alloc();
// Assume this will be realloc'd later if zero.
if (m_block_size == 0) {
return *this;
}
} else {
assert(m_alloc && m_block_size == obj.getBlockSize());
}
assert(m_block_size > 0);
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
size_t block_bytes = m_block_size;
// Copy entire block contents from obj to current block
memcpy(m_data, obj.m_data, block_bytes);
// If this data block is involved in an atomic operation, the effect

View File

@@ -61,8 +61,14 @@ class WriteMask;
class DataBlock
{
public:
DataBlock()
// Ideally this should nost be called. We allow default so that protocols
// do not need to be changed.
DataBlock() = default;
DataBlock(int blk_size)
{
assert(!m_alloc);
m_block_size = blk_size;
alloc();
}
@@ -101,10 +107,16 @@ class DataBlock
bool equal(const DataBlock& obj) const;
void print(std::ostream& out) const;
int getBlockSize() const { return m_block_size; }
void setBlockSize(int block_size) { realloc(block_size); }
bool isAlloc() const { return m_alloc; }
void realloc(int blk_size);
private:
void alloc();
uint8_t *m_data;
bool m_alloc;
uint8_t *m_data = nullptr;
bool m_alloc = false;
int m_block_size = 0;
// Tracks block changes when atomic ops are applied
std::deque<uint8_t*> m_atomicLog;
@@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data)
inline uint8_t
DataBlock::getByte(int whichByte) const
{
assert(m_alloc);
return m_data[whichByte];
}
inline void
DataBlock::setByte(int whichByte, uint8_t data)
{
assert(m_alloc);
m_data[whichByte] = data;
}
inline void
DataBlock::copyPartial(const DataBlock & dblk, int offset, int len)
{
assert(m_alloc);
setData(&dblk.m_data[offset], offset, len);
}

View File

@@ -30,6 +30,8 @@
#include <algorithm>
#include "mem/ruby/system/RubySystem.hh"
namespace gem5
{
@@ -37,6 +39,11 @@ namespace ruby
{
NetDest::NetDest()
{
}
NetDest::NetDest(RubySystem *ruby_system)
: m_ruby_system(ruby_system)
{
resize();
}
@@ -44,6 +51,7 @@ NetDest::NetDest()
void
NetDest::add(MachineID newElement)
{
assert(m_bits.size() > 0);
assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize());
m_bits[vecIndex(newElement)].add(bitIndex(newElement.num));
}
@@ -51,6 +59,7 @@ NetDest::add(MachineID newElement)
void
NetDest::addNetDest(const NetDest& netDest)
{
assert(m_bits.size() > 0);
assert(m_bits.size() == netDest.getSize());
for (int i = 0; i < m_bits.size(); i++) {
m_bits[i].addSet(netDest.m_bits[i]);
@@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest)
void
NetDest::setNetDest(MachineType machine, const Set& set)
{
assert(m_ruby_system != nullptr);
// assure that there is only one set of destinations for this machine
assert(MachineType_base_level((MachineType)(machine + 1)) -
MachineType_base_level(machine) == 1);
@@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set)
void
NetDest::remove(MachineID oldElement)
{
assert(m_bits.size() > 0);
m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num));
}
void
NetDest::removeNetDest(const NetDest& netDest)
{
assert(m_bits.size() > 0);
assert(m_bits.size() == netDest.getSize());
for (int i = 0; i < m_bits.size(); i++) {
m_bits[i].removeSet(netDest.m_bits[i]);
@@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest)
void
NetDest::clear()
{
assert(m_bits.size() > 0);
for (int i = 0; i < m_bits.size(); i++) {
m_bits[i].clear();
}
@@ -101,6 +115,8 @@ NetDest::broadcast()
void
NetDest::broadcast(MachineType machineType)
{
assert(m_ruby_system != nullptr);
for (NodeID i = 0; i < MachineType_base_count(machineType); i++) {
MachineID mach = {machineType, i};
add(mach);
@@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType)
std::vector<NodeID>
NetDest::getAllDest()
{
assert(m_ruby_system != nullptr);
assert(m_bits.size() > 0);
std::vector<NodeID> dest;
dest.clear();
for (int i = 0; i < m_bits.size(); i++) {
@@ -127,6 +146,8 @@ NetDest::getAllDest()
int
NetDest::count() const
{
assert(m_bits.size() > 0);
int counter = 0;
for (int i = 0; i < m_bits.size(); i++) {
counter += m_bits[i].count();
@@ -137,12 +158,14 @@ NetDest::count() const
NodeID
NetDest::elementAt(MachineID index)
{
assert(m_bits.size() > 0);
return m_bits[vecIndex(index)].elementAt(bitIndex(index.num));
}
MachineID
NetDest::smallestElement() const
{
assert(m_bits.size() > 0);
assert(count() > 0);
for (int i = 0; i < m_bits.size(); i++) {
for (NodeID j = 0; j < m_bits[i].getSize(); j++) {
@@ -158,6 +181,9 @@ NetDest::smallestElement() const
MachineID
NetDest::smallestElement(MachineType machine) const
{
assert(m_bits.size() > 0);
assert(m_ruby_system != nullptr);
int size = m_bits[MachineType_base_level(machine)].getSize();
for (NodeID j = 0; j < size; j++) {
if (m_bits[MachineType_base_level(machine)].isElement(j)) {
@@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const
bool
NetDest::isBroadcast() const
{
assert(m_bits.size() > 0);
for (int i = 0; i < m_bits.size(); i++) {
if (!m_bits[i].isBroadcast()) {
return false;
@@ -185,6 +212,7 @@ NetDest::isBroadcast() const
bool
NetDest::isEmpty() const
{
assert(m_bits.size() > 0);
for (int i = 0; i < m_bits.size(); i++) {
if (!m_bits[i].isEmpty()) {
return false;
@@ -197,8 +225,9 @@ NetDest::isEmpty() const
NetDest
NetDest::OR(const NetDest& orNetDest) const
{
assert(m_bits.size() > 0);
assert(m_bits.size() == orNetDest.getSize());
NetDest result;
NetDest result(m_ruby_system);
for (int i = 0; i < m_bits.size(); i++) {
result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]);
}
@@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const
NetDest
NetDest::AND(const NetDest& andNetDest) const
{
assert(m_bits.size() > 0);
assert(m_bits.size() == andNetDest.getSize());
NetDest result;
NetDest result(m_ruby_system);
for (int i = 0; i < m_bits.size(); i++) {
result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]);
}
@@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const
bool
NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
{
assert(m_bits.size() > 0);
assert(m_bits.size() == other_netDest.getSize());
for (int i = 0; i < m_bits.size(); i++) {
if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) {
@@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
bool
NetDest::isSuperset(const NetDest& test) const
{
assert(m_bits.size() > 0);
assert(m_bits.size() == test.getSize());
for (int i = 0; i < m_bits.size(); i++) {
@@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const
bool
NetDest::isElement(MachineID element) const
{
assert(m_bits.size() > 0);
return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num));
}
void
NetDest::resize()
{
assert(m_ruby_system != nullptr);
m_bits.resize(MachineType_base_level(MachineType_NUM));
assert(m_bits.size() == MachineType_NUM);
@@ -263,6 +298,7 @@ NetDest::resize()
void
NetDest::print(std::ostream& out) const
{
assert(m_bits.size() > 0);
out << "[NetDest (" << m_bits.size() << ") ";
for (int i = 0; i < m_bits.size(); i++) {
@@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const
bool
NetDest::isEqual(const NetDest& n) const
{
assert(m_bits.size() > 0);
assert(m_bits.size() == n.m_bits.size());
for (unsigned int i = 0; i < m_bits.size(); ++i) {
if (!m_bits[i].isEqual(n.m_bits[i]))
@@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const
return true;
}
int
NetDest::MachineType_base_count(const MachineType& obj)
{
assert(m_ruby_system != nullptr);
return m_ruby_system->MachineType_base_count(obj);
}
int
NetDest::MachineType_base_number(const MachineType& obj)
{
assert(m_ruby_system != nullptr);
return m_ruby_system->MachineType_base_number(obj);
}
} // namespace ruby
} // namespace gem5

View File

@@ -41,6 +41,8 @@ namespace gem5
namespace ruby
{
class RubySystem;
// NetDest specifies the network destination of a Message
class NetDest
{
@@ -48,6 +50,7 @@ class NetDest
// Constructors
// creates and empty set
NetDest();
NetDest(RubySystem *ruby_system);
explicit NetDest(int bit_size);
NetDest& operator=(const Set& obj);
@@ -98,6 +101,8 @@ class NetDest
void print(std::ostream& out) const;
void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); }
private:
// returns a value >= MachineType_base_level("this machine")
// and < MachineType_base_level("next highest machine")
@@ -112,6 +117,12 @@ class NetDest
NodeID bitIndex(NodeID index) const { return index; }
std::vector<Set> m_bits; // a vector of bit vectors - i.e. Sets
// Needed to call MacheinType_base_count/level
RubySystem *m_ruby_system = nullptr;
int MachineType_base_count(const MachineType& obj);
int MachineType_base_number(const MachineType& obj);
};
inline std::ostream&

View File

@@ -38,13 +38,14 @@ namespace ruby
using stl_helpers::operator<<;
SubBlock::SubBlock(Addr addr, int size)
SubBlock::SubBlock(Addr addr, int size, int cl_bits)
{
m_address = addr;
resize(size);
for (int i = 0; i < size; i++) {
setByte(i, 0);
}
m_cache_line_bits = cl_bits;
}
void
@@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data)
{
int size = getSize();
assert(size > 0);
int offset = getOffset(m_address);
int offset = getOffset(m_address, m_cache_line_bits);
for (int i = 0; i < size; i++) {
this->setByte(i, data.getByte(offset + i));
}
@@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const
{
int size = getSize();
assert(size > 0);
int offset = getOffset(m_address);
int offset = getOffset(m_address, m_cache_line_bits);
for (int i = 0; i < size; i++) {
// This will detect crossing a cache line boundary
data.setByte(offset + i, this->getByte(i));

View File

@@ -45,7 +45,7 @@ class SubBlock
{
public:
SubBlock() { }
SubBlock(Addr addr, int size);
SubBlock(Addr addr, int size, int cl_bits);
~SubBlock() { }
Addr getAddress() const { return m_address; }
@@ -74,6 +74,7 @@ class SubBlock
// Data Members (m_ prefix)
Addr m_address;
std::vector<uint8_t> m_data;
int m_cache_line_bits;
};
inline std::ostream&

Some files were not shown because too many files have changed in this diff Show More