diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 8e828675af..f40f3faff9 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -5,7 +5,7 @@ name: CI Tests on: pull_request: - types: [opened, edited, synchronize, ready_for_review] + types: [opened, synchronize, ready_for_review] concurrency: group: ${{ github.workflow }}-${{ github.ref || github.run_id }} @@ -21,17 +21,48 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 + get-date: + # We use the date to label caches. A cache is a a "hit" if the date is the + # request binary and date are the same as what is stored in the cache. + # This essentially means the first job to run on a given day for a given + # binary will always be a "miss" and will have to build the binary then + # upload it as that day's binary to upload. While this isn't the most + # efficient way to do this, the alternative was to run take a hash of the + # `src` directory contents and use it as a hash. We found there to be bugs + # with the hash function where this task would timeout. This approach is + # simple, works, and still provides some level of caching. + runs-on: ubuntu-latest + outputs: + date: ${{ steps.date.outputs.date }} + steps: + - name: Get the current date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + unittests-all-opt: runs-on: [self-hosted, linux, x64] if: github.event.pull_request.draft == false container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest - needs: [pre-commit] # only runs if pre-commit passes. + needs: [pre-commit, get-date] # only runs if pre-commit passes. timeout-minutes: 60 steps: - uses: actions/checkout@v4 + + + # Restore the cache if available. As this just builds the unittests + # we only obtain the cache and do not provide if if is not + # available. + - name: Cache build/ALL + uses: actions/cache/restore@v4 + with: + path: build/ALL + key: testlib-build-all-${{ env.date }} + restore-keys: | + testlib-build-all + - name: CI Unittests working-directory: ${{ github.workspace }} - run: scons build/ALL/unittests.opt -j $(nproc) + run: scons --no-compress-debug build/ALL/unittests.opt -j $(nproc) - run: echo "This job's status is ${{ job.status }}." testlib-quick-matrix: @@ -83,14 +114,24 @@ jobs: runs-on: [self-hosted, linux, x64] if: github.event.pull_request.draft == false container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest - needs: [pre-commit, testlib-quick-matrix] + needs: [pre-commit, testlib-quick-matrix, get-date] strategy: matrix: build-target: ${{ fromJson(needs.testlib-quick-matrix.outputs.build-matrix) }} steps: - uses: actions/checkout@v4 + + - name: Cache build/ALL + uses: actions/cache@v4 + if: ${{ endsWith(matrix.build-target, 'build/ALL/gem5.opt') }} + with: + path: build/ALL + key: testlib-build-all-${{ env.date }} + restore-keys: | + testlib-build-all + - name: Build gem5 - run: scons ${{ matrix.build-target }} -j $(nproc) + run: scons --no-compress-debug ${{ matrix.build-target }} -j $(nproc) # Upload the gem5 binary as an artifact. # Note: the "achor.txt" file is a hack to make sure the paths are @@ -199,13 +240,23 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/gcn-gpu:latest timeout-minutes: 180 - needs: [pre-commit] + needs: [pre-commit, get-date] steps: - uses: actions/checkout@v4 + # Obtain the cache if available. If not available this will upload + # this job's instance of the cache. + - name: Cache build/VEGA_X86 + uses: actions/cache@v4 + with: + path: build/VEGA_X86 + key: testlib-build-vega-${{ env.date }} + restore-keys: | + testlib-build-vega + # Build the VEGA_X86/gem5.opt binary. - name: Build VEGA_X86/gem5.opt - run: scons build/VEGA_X86/gem5.opt -j`nproc` + run: scons --no-compress-debug build/VEGA_X86/gem5.opt -j`nproc` # Run the GPU tests. - name: Run Testlib GPU Tests diff --git a/.github/workflows/compiler-tests.yaml b/.github/workflows/compiler-tests.yaml index eb570916bc..c44d2d9161 100644 --- a/.github/workflows/compiler-tests.yaml +++ b/.github/workflows/compiler-tests.yaml @@ -13,8 +13,8 @@ jobs: strategy: fail-fast: false matrix: - image: [gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16, clang-version-15, - clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies] + image: [gcc-version-14, gcc-version-13, gcc-version-12, gcc-version-11, gcc-version-10, clang-version-18, clang-version-17, clang-version-16, + clang-version-15, clang-version-14, ubuntu-22.04_all-dependencies, ubuntu-24.04_all-dependencies, ubuntu-24.04_min-dependencies] opts: [.opt, .fast] runs-on: [self-hosted, linux, x64] timeout-minutes: 2880 # 48 hours @@ -32,7 +32,7 @@ jobs: matrix: gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, MIPS, 'NULL', NULL_MESI_Two_Level, NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86] - image: [gcc-version-13, clang-version-18] + image: [gcc-version-14, clang-version-18] opts: [.opt] runs-on: [self-hosted, linux, x64] timeout-minutes: 2880 # 48 hours diff --git a/.github/workflows/daily-tests.yaml b/.github/workflows/daily-tests.yaml index 54711ad63d..584cce0d90 100644 --- a/.github/workflows/daily-tests.yaml +++ b/.github/workflows/daily-tests.yaml @@ -8,6 +8,14 @@ on: workflow_dispatch: jobs: + + get-date: + runs-on: ubuntu-latest + steps: + - name: Get the current date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + # this builds both unittests.fast and unittests.debug unittests-fast-debug: strategy: @@ -16,13 +24,14 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest timeout-minutes: 60 + needs: get-date steps: - uses: actions/checkout@v4 - name: Cache build/ALL uses: actions/cache/restore@v4 with: path: build/ALL - key: testlib-build-all-${{ hashFiles('src/**') }} + key: testlib-build-all-${{ env.date }} restore-keys: | testlib-build-all - name: ALL/unittests.${{ matrix.type }} UnitTests @@ -38,6 +47,7 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest timeout-minutes: 1440 # 24 hours for entire matrix to run + needs: get-date steps: - name: Clean runner run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true @@ -47,13 +57,13 @@ jobs: uses: actions/cache@v4 with: path: build/NULL - key: testlib-build-null-${{ hashFiles('src/**') }} + key: testlib-build-null-${{ env.date }} - name: Restore build/ALL cache uses: actions/cache@v4 with: path: build/ALL - key: testlib-build-all-${{ hashFiles('src/**') }} + key: testlib-build-all-${{ env.date }} - name: long ${{ matrix.test-type }} tests working-directory: ${{ github.workspace }}/tests @@ -81,6 +91,7 @@ jobs: gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt] container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest timeout-minutes: 1440 # 24 hours + needs: get-date steps: - name: Clean runner run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true @@ -90,7 +101,7 @@ jobs: uses: actions/cache@v4 with: path: build/ALL - key: testlib-build-all-${{ hashFiles('src/**') }} + key: testlib-build-all-${{ env.date }} restore-keys: | testlib-build-all @@ -113,6 +124,7 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/gcn-gpu:latest timeout-minutes: 720 # 12 hours + needs: get-date steps: - uses: actions/checkout@v4 @@ -123,7 +135,7 @@ jobs: uses: actions/cache@v4 with: path: build/VEGA_X86 - key: testlib-build-vega-${{ hashFiles('src/**') }} + key: testlib-build-vega-${{ env.date }} restore-keys: | testlib-build-vega diff --git a/.github/workflows/weekly-tests.yaml b/.github/workflows/weekly-tests.yaml index 7ada70fddb..6baec1fa68 100644 --- a/.github/workflows/weekly-tests.yaml +++ b/.github/workflows/weekly-tests.yaml @@ -9,6 +9,13 @@ on: jobs: + get-date: + runs-on: ubuntu-latest + steps: + - name: Get the current date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + # start running the very-long tests testlib-very-long-tests: strategy: @@ -18,6 +25,7 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest timeout-minutes: 4320 # 3 days + needs: get-date steps: - name: Clean runner run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true @@ -27,7 +35,7 @@ jobs: uses: actions/cache@v4 with: path: build/ALL - key: testlib-build-all-${{ hashFiles('src/**') }} + key: testlib-build-all-${{ env.date }} restore-keys: | testlib-build-all @@ -49,6 +57,7 @@ jobs: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/gcn-gpu:latest timeout-minutes: 4320 # 3 days + needs: get-date steps: - uses: actions/checkout@v4 @@ -59,7 +68,7 @@ jobs: uses: actions/cache@v4 with: path: build/VEGA_X86 - key: testlib-build-vega-${{ hashFiles('src/**') }} + key: testlib-build-vega-${{ env.date }} restore-keys: | testlib-build-vega diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e17adca7f..03e39a3639 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,11 +49,11 @@ exclude: | tests/.*/ref/.* )$ -default_stages: [commit] +default_stages: [pre-commit] repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -69,7 +69,7 @@ repos: - id: destroyed-symlinks - id: requirements-txt-fixer - repo: https://github.com/PyCQA/isort - rev: 5.11.5 + rev: 5.13.2 hooks: - id: isort - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt @@ -77,11 +77,11 @@ repos: hooks: - id: yamlfmt - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/asottile/pyupgrade - rev: v3.14.0 + rev: v3.17.0 hooks: - id: pyupgrade # Python 3.8 is the earliest version supported. diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..9543f965b7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.analysis.extraPaths": [ + "src/python", + "ext", + "tests" + ] +} diff --git a/configs/common/HMC.py b/configs/common/HMC.py index 98ff091115..0dfbebb3e5 100644 --- a/configs/common/HMC.py +++ b/configs/common/HMC.py @@ -568,9 +568,9 @@ def config_hmc_dev(opt, system, hmc_host): # Attach 4 serial link to 4 crossbar/s for i in range(opt.num_serial_links): if opt.enable_link_monitor: - system.hmc_host.seriallink[ - i - ].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port + system.hmc_host.seriallink[i].mem_side_port = ( + system.hmc_dev.lmonitor[i].cpu_side_port + ) system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[ i ].cpu_side_ports @@ -613,14 +613,12 @@ def config_hmc_dev(opt, system, hmc_host): ] # Connect the bridge between corssbars - system.hmc_dev.xbar[ - i - ].mem_side_ports = system.hmc_dev.buffers[ - index - ].cpu_side_port - system.hmc_dev.buffers[ - index - ].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports + system.hmc_dev.xbar[i].mem_side_ports = ( + system.hmc_dev.buffers[index].cpu_side_port + ) + system.hmc_dev.buffers[index].mem_side_port = ( + system.hmc_dev.xbar[j].cpu_side_ports + ) else: # Don't connect the xbar to itself pass @@ -629,49 +627,49 @@ def config_hmc_dev(opt, system, hmc_host): # can only direct traffic to it local vaults if opt.arch == "mixed": system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4]) - system.hmc_dev.xbar[ - 3 - ].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port + system.hmc_dev.xbar[3].mem_side_ports = ( + system.hmc_dev.buffer30.cpu_side_port + ) system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[ 0 ].cpu_side_ports system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8]) - system.hmc_dev.xbar[ - 3 - ].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port + system.hmc_dev.xbar[3].mem_side_ports = ( + system.hmc_dev.buffer31.cpu_side_port + ) system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[ 1 ].cpu_side_ports system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12]) - system.hmc_dev.xbar[ - 3 - ].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port + system.hmc_dev.xbar[3].mem_side_ports = ( + system.hmc_dev.buffer32.cpu_side_port + ) system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[ 2 ].cpu_side_ports system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4]) - system.hmc_dev.xbar[ - 2 - ].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port + system.hmc_dev.xbar[2].mem_side_ports = ( + system.hmc_dev.buffer20.cpu_side_port + ) system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[ 0 ].cpu_side_ports system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8]) - system.hmc_dev.xbar[ - 2 - ].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port + system.hmc_dev.xbar[2].mem_side_ports = ( + system.hmc_dev.buffer21.cpu_side_port + ) system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[ 1 ].cpu_side_ports system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16]) - system.hmc_dev.xbar[ - 2 - ].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port + system.hmc_dev.xbar[2].mem_side_ports = ( + system.hmc_dev.buffer23.cpu_side_port + ) system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[ 3 ].cpu_side_ports diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py index 3e332d76b4..be928651ae 100644 --- a/configs/common/Simulation.py +++ b/configs/common/Simulation.py @@ -541,9 +541,9 @@ def run(options, root, testsys, cpu_class): IndirectBPClass = ObjectList.indirect_bp_list.get( options.indirect_bp_type ) - switch_cpus[ - i - ].branchPred.indirectBranchPred = IndirectBPClass() + switch_cpus[i].branchPred.indirectBranchPred = ( + IndirectBPClass() + ) switch_cpus[i].createThreads() # If elastic tracing is enabled attach the elastic trace probe diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py index 826d4e19f4..36aa64eca5 100644 --- a/configs/common/cores/arm/HPI.py +++ b/configs/common/cores/arm/HPI.py @@ -1683,6 +1683,15 @@ class HPI_MMU(ArmMMU): class HPI_BTB(SimpleBTB): numEntries = 128 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) class HPI_BP(TournamentBP): diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py index 45bb391bb1..ee42c3c062 100644 --- a/configs/common/cores/arm/O3_ARM_v7a.py +++ b/configs/common/cores/arm/O3_ARM_v7a.py @@ -111,6 +111,15 @@ class O3_ARM_v7a_FUP(FUPool): class O3_ARM_v7a_BTB(SimpleBTB): numEntries = 2048 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) # Bi-Mode Branch Predictor diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py index f3b55fd3a8..8ea04aa5f7 100644 --- a/configs/common/cores/arm/ex5_big.py +++ b/configs/common/cores/arm/ex5_big.py @@ -108,6 +108,15 @@ class ex5_big_FUP(FUPool): class ex5_big_BTB(SimpleBTB): numEntries = 4096 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) # Bi-Mode Branch Predictor diff --git a/configs/deprecated/example/fs.py b/configs/deprecated/example/fs.py index 7426c47c7e..df77b6d830 100644 --- a/configs/deprecated/example/fs.py +++ b/configs/deprecated/example/fs.py @@ -213,9 +213,9 @@ def build_test_system(np, isa: ISA): IndirectBPClass = ObjectList.indirect_bp_list.get( args.indirect_bp_type ) - test_sys.cpu[ - i - ].branchPred.indirectBranchPred = IndirectBPClass() + test_sys.cpu[i].branchPred.indirectBranchPred = ( + IndirectBPClass() + ) test_sys.cpu[i].createThreads() # If elastic tracing is enabled when not restoring from checkpoint and diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 1ae6edf391..d512594afe 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -935,9 +935,9 @@ gpu_port_idx = gpu_port_idx - args.num_cp * 2 token_port_idx = 0 for i in range(len(system.ruby._cpu_ports)): if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer): - system.cpu[shader_idx].CUs[ - token_port_idx - ].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort + system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = ( + system.ruby._cpu_ports[i].gmTokenPort + ) token_port_idx += 1 wavefront_size = args.wf_size diff --git a/configs/example/gem5_library/arm-demo-ubuntu-run.py b/configs/example/gem5_library/arm-demo-ubuntu-run.py new file mode 100644 index 0000000000..9b39c34330 --- /dev/null +++ b/configs/example/gem5_library/arm-demo-ubuntu-run.py @@ -0,0 +1,92 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script further shows an example of booting an ARM based full system Ubuntu +disk image. This simulation boots the disk image using the ArmDemoBoard. + +Usage +----- + +```bash +scons build/ARM/gem5.opt -j $(nproc) +./build/ARM/gem5.opt configs/example/gem5_library/arm-demo-ubuntu-run.py +``` +""" +import argparse + +from gem5.isas import ISA +from gem5.prebuilt.demo.arm_demo_board import ArmDemoBoard +from gem5.resources.resource import obtain_resource +from gem5.simulate.exit_event import ExitEvent +from gem5.simulate.simulator import Simulator +from gem5.utils.requires import requires + +# This runs a check to ensure the gem5 binary interpreting this file is compiled to include the ARM ISA. +requires(isa_required=ISA.ARM) + +parser = argparse.ArgumentParser( + description="An example configuration script to run the ArmDemoBoard." +) + +parser.add_argument( + "--use-kvm", + action="store_true", + help="Use KVM cores instead of Timing.", +) +args = parser.parse_args() + +board = ArmDemoBoard(use_kvm=args.use_kvm) + +board.set_workload( + obtain_resource( + "arm-ubuntu-24.04-boot-with-systemd", resource_version="2.0.0" + ) +) + + +def exit_event_handler(): + print("First exit: kernel booted") + yield False # gem5 is now executing systemd startup + print("Second exit: Started `after_boot.sh` script") + # The after_boot.sh script is executed after the kernel and systemd have + # booted. + yield False # gem5 is now executing the `after_boot.sh` script + print("Third exit: Finished `after_boot.sh` script") + # The after_boot.sh script will run a script if it is passed via + # m5 readfile. This is the last exit event before the simulation exits. + yield True + + +# We define the system with the aforementioned system defined. +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.EXIT: exit_event_handler(), + }, +) + +simulator.run() diff --git a/configs/example/gem5_library/riscv-rvv-example.py b/configs/example/gem5_library/riscv-rvv-example.py new file mode 100755 index 0000000000..57a6fd7afd --- /dev/null +++ b/configs/example/gem5_library/riscv-rvv-example.py @@ -0,0 +1,120 @@ +# Copyright (c) 2024 Barcelona Supercomputing Center +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script demonstrates how to run RISC-V vector-enabled binaries in SE mode +with gem5. It accepts the number of CORES, VLEN, and ELEN as optional +parameters, as well as the resource name to run. If no resource name is +provided, a list of available resources will be displayed. If one is given the +simulation will then execute the specified resource binary with the selected +parameters until completion. + + +Usage +----- + +# Compile gem5 for RISC-V +scons build/RISCV/gem5.opt + +# Run the simulation +./build/RISCV/gem5.opt configs/example/gem5_library/riscv-rvv-example.py \ + [-c CORES] [-v VLEN] [-e ELEN] + +""" + +import argparse + +from m5.objects import RiscvO3CPU + +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.memory import SingleChannelDDR3_1600 +from gem5.components.processors.base_cpu_core import BaseCPUCore +from gem5.components.processors.base_cpu_processor import BaseCPUProcessor +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from gem5.simulate.simulator import Simulator +from gem5.utils.requires import requires + + +class RVVCore(BaseCPUCore): + def __init__(self, elen, vlen, cpu_id): + super().__init__(core=RiscvO3CPU(cpu_id=cpu_id), isa=ISA.RISCV) + self.core.isa[0].elen = elen + self.core.isa[0].vlen = vlen + + +requires(isa_required=ISA.RISCV) + +resources = [ + "rvv-branch", + "rvv-index", + "rvv-matmul", + "rvv-memcpy", + "rvv-reduce", + "rvv-saxpy", + "rvv-sgemm", + "rvv-strcmp", + "rvv-strcpy", + "rvv-strlen", + "rvv-strlen-fault", + "rvv-strncpy", +] + +parser = argparse.ArgumentParser() +parser.add_argument("resource", type=str, choices=resources) +parser.add_argument("-c", "--cores", required=False, type=int, default=1) +parser.add_argument("-v", "--vlen", required=False, type=int, default=256) +parser.add_argument("-e", "--elen", required=False, type=int, default=64) + +args = parser.parse_args() + +cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( + l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB" +) + +memory = SingleChannelDDR3_1600() + +processor = BaseCPUProcessor( + cores=[RVVCore(args.elen, args.vlen, i) for i in range(args.cores)] +) + +board = SimpleBoard( + clk_freq="1GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, +) + +binary = obtain_resource(args.resource) +board.set_se_binary_workload(binary) + +simulator = Simulator(board=board, full_system=False) +print("Beginning simulation!") +simulator.run() diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index eb95526509..8cb29b07ba 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Advanced Micro Devices, Inc. +# Copyright (c) 2021-2024 Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -82,10 +82,6 @@ def addRunFSOptions(parser): help="The second disk image to mount (/dev/sdb)", ) parser.add_argument("--kernel", default=None, help="Linux kernel to boot") - parser.add_argument("--gpu-rom", default=None, help="GPU BIOS to load") - parser.add_argument( - "--gpu-mmio-trace", default=None, help="GPU MMIO trace to load" - ) parser.add_argument( "--checkpoint-before-mmios", default=False, @@ -241,16 +237,6 @@ def runGpuFSSystem(args): math.ceil(float(n_cu) / args.cu_per_scalar_cache) ) - # Verify MMIO trace is valid. This is only needed for Vega10 simulations. - # The md5sum refers to the md5sum of the Vega10 MMIO hardware trace in - # the gem5-resources repository. By checking it here, we avoid potential - # errors that would cause the driver not to load and simulations to fail. - if args.gpu_device == "Vega10": - mmio_file = open(args.gpu_mmio_trace, "rb") - mmio_md5 = hashlib.md5(mmio_file.read()).hexdigest() - if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d": - m5.util.panic("MMIO file does not match gem5 resources") - system = makeGpuFSSystem(args) root = Root( diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py index dedbcc9324..bdeda9024a 100644 --- a/configs/example/gpufs/system/amdgpu.py +++ b/configs/example/gpufs/system/amdgpu.py @@ -176,8 +176,6 @@ def createGPU(system, args): def connectGPU(system, args): system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0) - system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace - system.pc.south_bridge.gpu.rom_binary = args.gpu_rom system.pc.south_bridge.gpu.checkpoint_before_mmios = ( args.checkpoint_before_mmios ) diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 1ce261d764..b650659303 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -336,9 +336,9 @@ def makeGpuFSSystem(args): token_port_idx = 0 for i in range(len(system.ruby._cpu_ports)): if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer): - system.cpu[shader_idx].CUs[ - token_port_idx - ].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort + system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = ( + system.ruby._cpu_ports[i].gmTokenPort + ) token_port_idx += 1 wavefront_size = args.wf_size @@ -346,9 +346,9 @@ def makeGpuFSSystem(args): # The pipeline issues wavefront_size number of uncoalesced requests # in one GPU issue cycle. Hence wavefront_size mem ports. for j in range(wavefront_size): - system.cpu[shader_idx].CUs[i].memory_port[ - j - ] = system.ruby._cpu_ports[gpu_port_idx].in_ports[j] + system.cpu[shader_idx].CUs[i].memory_port[j] = ( + system.ruby._cpu_ports[gpu_port_idx].in_ports[j] + ) gpu_port_idx += 1 for i in range(args.num_compute_units): diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py index f6f938b16c..57bf6ca6b8 100644 --- a/configs/example/lupv/run_lupv.py +++ b/configs/example/lupv/run_lupv.py @@ -110,8 +110,7 @@ board.set_kernel_disk_workload( # Begin running of the simulation. print("Running with ISA: " + processor.get_isa().name) print() -root = Root(full_system=True, system=board) -board._pre_instantiate() +root = board._pre_instantiate() m5.instantiate() print("Beginning simulation!") diff --git a/configs/example/read_config.py b/configs/example/read_config.py index 27e23b69ee..9f86c3af49 100644 --- a/configs/example/read_config.py +++ b/configs/example/read_config.py @@ -250,9 +250,11 @@ class ConfigManager: obj, param_name, [ - self.objects_by_name[name] - if name != "Null" - else m5.params.NULL + ( + self.objects_by_name[name] + if name != "Null" + else m5.params.NULL + ) for name in param_values ], ) diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py index bfcd2c953d..eb7dd3acbd 100644 --- a/configs/example/ruby_gpu_random_test.py +++ b/configs/example/ruby_gpu_random_test.py @@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs): num_lanes=1, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 @@ -393,6 +394,7 @@ for cu_idx in range(n_CUs): num_lanes=args.wf_size, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 diff --git a/configs/learning_gem5/part3/msi_caches.py b/configs/learning_gem5/part3/msi_caches.py index c198662c5e..b719c7ab60 100644 --- a/configs/learning_gem5/part3/msi_caches.py +++ b/configs/learning_gem5/part3/msi_caches.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -191,7 +192,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py b/configs/learning_gem5/part3/ruby_caches_MI_example.py index baee120bb9..583041a674 100644 --- a/configs/learning_gem5/part3/ruby_caches_MI_example.py +++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -180,7 +181,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/test_caches.py b/configs/learning_gem5/part3/test_caches.py index 4e8e8febda..be2d46253e 100644 --- a/configs/learning_gem5/part3/test_caches.py +++ b/configs/learning_gem5/part3/test_caches.py @@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.clk_domain, + ruby_system=self, ) for i in range(num_testers) ] diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py index ff4246a7e0..7d40862517 100644 --- a/configs/ruby/AMD_Base_Constructor.py +++ b/configs/ruby/AMD_Base_Constructor.py @@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index 313d1d514a..15108bb674 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): # TCP_Controller inherits this from RubyController self.mandatory_queue_latency = options.mandatory_queue_latency - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): options.max_coalesces_per_cycle ) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.L1cache.create(options) self.issue_latency = 1 - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.coalescer.support_inst_reqs = False self.coalescer.is_cpu_sequencer = False - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network): dir_cntrl.addr_ranges = dram_intf.range # Append - exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i) + exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mem_ctrls.append(mem_ctrl) diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index e0de4e0636..9054fefc01 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index e6c4e81f91..d7ad3bdc04 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 500afbc199..6e1e0b97f3 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -94,7 +94,7 @@ def create_system( is_icache=False, ) - prefetcher = RubyPrefetcher() + prefetcher = RubyPrefetcher(block_size=options.cacheline_size) clk_domain = cpus[i].clk_domain diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py index aeab96a85f..1095defc57 100644 --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index e427a39de8..0a6671aa4b 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system): for i in range(options.num_dirs): dir_cntrl = Directory_Controller() dir_cntrl.version = i - dir_cntrl.directory = RubyDirectoryMemory() + dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) dir_cntrl.ruby_system = ruby_system exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) @@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system): if bootmem is not None: rom_dir_cntrl = Directory_Controller() - rom_dir_cntrl.directory = RubyDirectoryMemory() + rom_dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) rom_dir_cntrl.ruby_system = ruby_system rom_dir_cntrl.version = i + 1 rom_dir_cntrl.memory = bootmem.port diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh index 1bb9b43d1f..8e76405562 100644 --- a/src/arch/amdgpu/vega/operand.hh +++ b/src/arch/amdgpu/vega/operand.hh @@ -960,11 +960,14 @@ class PackedReg uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1; value &= elem_mask; + // Clear the bits where the value goes so that operator| can be used. elem_mask <<= qw_lbit; - qword &= elem_mask; + qword &= ~elem_mask; - value <<= qw_lbit; - qword |= value; + // Promote to 64-bit to prevent shifting out of range + uint64_t value64 = value; + value64 <<= qw_lbit; + qword |= value64; dwords[udw] = uint32_t(qword >> 32); dwords[ldw] = uint32_t(qword & mask(32)); diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index 9fc4be0e9a..3e898c5a47 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -53,8 +53,6 @@ namespace gem5 namespace ArmISA { -GenericISA::BasicDecodeCache Decoder::defaultCache; - Decoder::Decoder(const ArmDecoderParams ¶ms) : InstDecoder(params, &data), dvmEnabled(params.dvm_enabled), diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 75488b6750..57c29546ae 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -94,7 +94,7 @@ class Decoder : public InstDecoder enums::DecoderFlavor decoderFlavor; /// A cache of decoded instruction objects. - static GenericISA::BasicDecodeCache defaultCache; + GenericISA::BasicDecodeCache defaultCache; friend class GenericISA::BasicDecodeCache; /** diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index a76439574a..bcd067c284 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -264,7 +264,7 @@ class ArmFaultVals : public ArmFault static FaultVals vals; public: - ArmFaultVals(ExtMachInst mach_inst = 0, uint32_t _iss = 0) : + ArmFaultVals(ExtMachInst mach_inst = 0, uint32_t _iss = 0) : ArmFault(mach_inst, _iss) {} FaultName name() const override { return vals.name; } FaultOffset offset(ThreadContext *tc) override; diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index c8508e16e1..45b0985838 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2016-2019 ARM Limited +// Copyright (c) 2010-2011, 2016-2019, 2024 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -1891,6 +1891,150 @@ let {{ return new NVrsqrteD(machInst, vd, vm); } } + } else if ((b & 0x1c) == 0x00) { + if (bits(b, 1)) { + switch(size) { + case 1: + if (q) { + return new NVcvt2uhAQ(machInst, vd, vm); + } else { + return new NVcvt2uhAD(machInst, vd, vm); + } + case 2: + if (q) { + return new NVcvt2usAQ(machInst, vd, vm); + } else { + return new NVcvt2usAD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shAQ(machInst, vd, vm); + } else { + return new NVcvt2shAD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssAQ(machInst, vd, vm); + } else { + return new NVcvt2ssAD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x04) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhNQ(machInst, vd, vm); + } else { + return new NVcvt2uhND(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usNQ(machInst, vd, vm); + } else { + return new NVcvt2usND(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shNQ(machInst, vd, vm); + } else { + return new NVcvt2shND(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssNQ(machInst, vd, vm); + } else { + return new NVcvt2ssND(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x08) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhPQ(machInst, vd, vm); + } else { + return new NVcvt2uhPD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usPQ(machInst, vd, vm); + } else { + return new NVcvt2usPD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shPQ(machInst, vd, vm); + } else { + return new NVcvt2shPD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssPQ(machInst, vd, vm); + } else { + return new NVcvt2ssPD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x0c) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhMQ(machInst, vd, vm); + } else { + return new NVcvt2uhMD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usMQ(machInst, vd, vm); + } else { + return new NVcvt2usMD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shMQ(machInst, vd, vm); + } else { + return new NVcvt2shMD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssMQ(machInst, vd, vm); + } else { + return new NVcvt2ssMD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } } else { return new Unknown(machInst); } diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 5f39e48cce..04d6929ae0 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2015, 2019 ARM Limited +// Copyright (c) 2010-2011, 2015, 2019, 2024 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -3579,6 +3579,128 @@ let {{ ''' twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) + vcvthp2hCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); + float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1); + if (flushToZero(mid)) + fpscr.idc = 1; + destElem = vfpFpToFixed(mid, %s, 16, 0, true, %s); + __asm__ __volatile__("" :: "m" (destElem)); + finishVfp(fpscr, state, true); + FpscrExc = fpscr; + ''' + + vcvtahp2uhCode = vcvthp2hCode % ("false", "VfpRoundAway") + twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAD", "SimdCvtOp", + ("uint16_t",), 2, vcvtahp2uhCode) + twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtahp2uhCode) + + vcvtnhp2uhCode = vcvthp2hCode % ("false", "VfpRoundNearest") + twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhND", "SimdCvtOp", + ("uint16_t",), 2, vcvtnhp2uhCode) + twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhNQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtnhp2uhCode) + + vcvtphp2uhCode = vcvthp2hCode % ("false", "VfpRoundUpward") + twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPD", "SimdCvtOp", + ("uint16_t",), 2, vcvtphp2uhCode) + twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtphp2uhCode) + + vcvtmhp2uhCode = vcvthp2hCode % ("false", "VfpRoundDown") + twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMD", "SimdCvtOp", + ("uint16_t",), 2, vcvtmhp2uhCode) + twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtmhp2uhCode) + + vcvtahp2shCode = vcvthp2hCode % ("true", "VfpRoundAway") + twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAD", "SimdCvtOp", + ("int16_t",), 2, vcvtahp2shCode) + twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAQ", "SimdCvtOp", + ("int16_t",), 4, vcvtahp2shCode) + + vcvtnhp2shCode = vcvthp2hCode % ("true", "VfpRoundNearest") + twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shND", "SimdCvtOp", + ("int16_t",), 2, vcvtnhp2shCode) + twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shNQ", "SimdCvtOp", + ("int16_t",), 4, vcvtnhp2shCode) + + vcvtphp2shCode = vcvthp2hCode % ("true", "VfpRoundUpward") + twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPD", "SimdCvtOp", + ("int16_t",), 2, vcvtphp2shCode) + twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPQ", "SimdCvtOp", + ("int16_t",), 4, vcvtphp2shCode) + + vcvtmhp2shCode = vcvthp2hCode % ("true", "VfpRoundDown") + twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMD", "SimdCvtOp", + ("int16_t",), 2, vcvtmhp2shCode) + twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMQ", "SimdCvtOp", + ("int16_t",), 4, vcvtmhp2shCode) + + vcvtsp2sCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); + float mid = bitsToFp(srcElem1, (float)0.0); + if (flushToZero(mid)) + fpscr.idc = 1; + destElem = vfpFpToFixed(mid, %s, 32, 0, true, %s); + __asm__ __volatile__("" :: "m" (destElem)); + finishVfp(fpscr, state, true); + FpscrExc = fpscr; + ''' + + vcvtasp2usCode = vcvtsp2sCode % ("false", "VfpRoundAway") + twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAD", "SimdCvtOp", + ("uint32_t",), 2, vcvtasp2usCode) + twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtasp2usCode) + + vcvtnsp2usCode = vcvtsp2sCode % ("false", "VfpRoundNearest") + twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usND", "SimdCvtOp", + ("uint32_t",), 2, vcvtnsp2usCode) + twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usNQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtnsp2usCode) + + vcvtpsp2usCode = vcvtsp2sCode % ("false", "VfpRoundUpward") + twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPD", "SimdCvtOp", + ("uint32_t",), 2, vcvtpsp2usCode) + twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtpsp2usCode) + + vcvtmsp2usCode = vcvtsp2sCode % ("false", "VfpRoundDown") + twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMD", "SimdCvtOp", + ("uint32_t",), 2, vcvtmsp2usCode) + twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtmsp2usCode) + + vcvtasp2ssCode = vcvtsp2sCode % ("true", "VfpRoundAway") + twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAD", "SimdCvtOp", + ("int32_t",), 2, vcvtasp2ssCode) + twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAQ", "SimdCvtOp", + ("int32_t",), 4, vcvtasp2ssCode) + + vcvtnsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundNearest") + twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssND", "SimdCvtOp", + ("int32_t",), 2, vcvtnsp2ssCode) + twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssNQ", "SimdCvtOp", + ("int32_t",), 4, vcvtnsp2ssCode) + + vcvtpsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundUpward") + twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPD", "SimdCvtOp", + ("int32_t",), 2, vcvtpsp2ssCode) + twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPQ", "SimdCvtOp", + ("int32_t",), 4, vcvtpsp2ssCode) + + vcvtmsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundDown") + twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMD", "SimdCvtOp", + ("int32_t",), 2, vcvtmsp2ssCode) + twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp", + ("int32_t",), 4, vcvtmsp2ssCode) + vrsqrteCode = ''' destElem = unsignedRSqrtEstimate(srcElem1); ''' diff --git a/src/arch/generic/interrupts.hh b/src/arch/generic/interrupts.hh index 510775594e..c2ffce038d 100644 --- a/src/arch/generic/interrupts.hh +++ b/src/arch/generic/interrupts.hh @@ -89,6 +89,12 @@ class BaseInterrupts : public SimObject { panic("Interrupts::clearAll unimplemented!\n"); } + + virtual bool + isWakeUp() const + { + return true; + } }; } // namespace gem5 diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py index 7cc95ed6e8..0499beab83 100755 --- a/src/arch/isa_parser/isa_parser.py +++ b/src/arch/isa_parser/isa_parser.py @@ -111,11 +111,12 @@ class Template: operands = SubOperandList(self.parser, compositeCode, d.operands) - myDict[ - "reg_idx_arr_decl" - ] = "RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]" % ( - d.operands.numSrcRegs + d.srcRegIdxPadding, - d.operands.numDestRegs + d.destRegIdxPadding, + myDict["reg_idx_arr_decl"] = ( + "RegId srcRegIdxArr[%d]; RegId destRegIdxArr[%d]" + % ( + d.operands.numSrcRegs + d.srcRegIdxPadding, + d.operands.numDestRegs + d.destRegIdxPadding, + ) ) # The reinterpret casts are largely because an array with a known @@ -821,7 +822,7 @@ class ISAParser(Grammar): "DBLCOLON", "ASTERISK", # C preprocessor directives - "CPPDIRECTIVE" + "CPPDIRECTIVE", # The following are matched but never returned. commented out to # suppress PLY warning # newfile directive diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py index 0329800896..5b4f79fce3 100644 --- a/src/arch/micro_asm.py +++ b/src/arch/micro_asm.py @@ -140,9 +140,9 @@ def handle_statement(parser, container, statement): if statement.is_microop: if statement.mnemonic not in parser.microops.keys(): raise Exception(f"Unrecognized mnemonic: {statement.mnemonic}") - parser.symbols[ - "__microopClassFromInsideTheAssembler" - ] = parser.microops[statement.mnemonic] + parser.symbols["__microopClassFromInsideTheAssembler"] = ( + parser.microops[statement.mnemonic] + ) try: microop = eval( f"__microopClassFromInsideTheAssembler({statement.params})", @@ -166,9 +166,9 @@ def handle_statement(parser, container, statement): elif statement.is_directive: if statement.name not in container.directives.keys(): raise Exception(f"Unrecognized directive: {statement.name}") - parser.symbols[ - "__directiveFunctionFromInsideTheAssembler" - ] = container.directives[statement.name] + parser.symbols["__directiveFunctionFromInsideTheAssembler"] = ( + container.directives[statement.name] + ) try: eval( f"__directiveFunctionFromInsideTheAssembler({statement.params})", diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py index f87941d413..05854f48c5 100644 --- a/src/arch/riscv/RiscvISA.py +++ b/src/arch/riscv/RiscvISA.py @@ -114,6 +114,13 @@ class RiscvISA(BaseISA): enable_Zicbom_fs = Param.Bool(True, "Enable Zicbom extension in FS mode") enable_Zicboz_fs = Param.Bool(True, "Enable Zicboz extension in FS mode") + enable_Zcd = Param.Bool( + True, + "Enable Zcd extensions. " + "Set the option to false implies the Zcmp and Zcmt is enable as " + "c.fsdsp is overlap with them." + "Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc", + ) wfi_resume_on_pending = Param.Bool( False, diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index ee5d313587..557be1cbef 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -44,6 +44,7 @@ Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) ISA *isa = dynamic_cast(p.isa); vlen = isa->getVecLenInBits(); elen = isa->getVecElemLenInBits(); + _enableZcd = isa->enableZcd(); reset(); } @@ -127,6 +128,7 @@ Decoder::decode(PCStateBase &_next_pc) emi.vtype8 = next_pc.vtype() & 0xff; emi.vill = next_pc.vtype().vill; emi.rv_type = static_cast(next_pc.rvType()); + emi.enable_zcd = _enableZcd; return decode(emi, next_pc.instAddr()); } diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index bf863fda22..d44455cd0b 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -62,6 +62,7 @@ class Decoder : public InstDecoder uint32_t vlen; uint32_t elen; + bool _enableZcd; virtual StaticInstPtr decodeInst(ExtMachInst mach_inst); diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript index 2822cf86b4..2519b3e07d 100644 --- a/src/arch/riscv/insts/SConscript +++ b/src/arch/riscv/insts/SConscript @@ -34,3 +34,4 @@ Source('mem.cc', tags='riscv isa') Source('standard.cc', tags='riscv isa') Source('static_inst.cc', tags='riscv isa') Source('vector.cc', tags='riscv isa') +Source('zcmp.cc', tags='riscv isa') diff --git a/src/arch/riscv/insts/zcmp.cc b/src/arch/riscv/insts/zcmp.cc new file mode 100644 index 0000000000..018ea45a60 --- /dev/null +++ b/src/arch/riscv/insts/zcmp.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2024 Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/riscv/insts/zcmp.hh" + +#include + +#include "arch/riscv/regs/int.hh" +#include "arch/riscv/utility.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +CmMacroInst::CmMacroInst( + const char* mnem, ExtMachInst machInst, OpClass opClass) + : RiscvMacroInst(mnem, machInst, opClass), rlist(machInst.rlist) +{ +} + +// Ref: https://github.com/riscv-software-src/riscv-isa-sim/blob/f7d0dba60/ +// riscv/decode.h#L168 +uint64_t +CmMacroInst::stackAdj() const +{ + uint64_t stack_adj_base = 0; + switch (machInst.rlist) { + case 15: + stack_adj_base += 16; + [[fallthrough]]; + case 14: + if (machInst.rv_type == RV64) { + stack_adj_base += 16; + } + [[fallthrough]]; + case 13: + case 12: + stack_adj_base += 16; + [[fallthrough]]; + case 11: + case 10: + if (machInst.rv_type == RV64) { + stack_adj_base += 16; + } + [[fallthrough]]; + case 9: + case 8: + stack_adj_base += 16; + [[fallthrough]]; + case 7: + case 6: + if (machInst.rv_type == RV64) { + stack_adj_base += 16; + } + [[fallthrough]]; + case 5: + case 4: + stack_adj_base += 16; + break; + } + + return stack_adj_base + machInst.spimm * 16; +} + +std::string +CmMacroInst::getRlistStr() const +{ + std::string s = ""; + switch (machInst.rlist) { + case 15: + s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg), + registerName(int_reg::S0), + registerName(PushPopRegList[0])); + break; + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + case 8: + case 7: + case 6: + s = csprintf("{%s, %s-%s}", registerName(ReturnAddrReg), + registerName(int_reg::S0), + registerName(PushPopRegList[16-machInst.rlist])); + break; + case 5: + s = csprintf("{%s, %s}", registerName(ReturnAddrReg), + registerName(int_reg::S0)); + break; + case 4: + s = csprintf("{%s}", registerName(ReturnAddrReg)); + break; + default: + break; + } + + return s; +} + +} // namespace RiscvISA +} // namespace gem5 diff --git a/src/arch/riscv/insts/zcmp.hh b/src/arch/riscv/insts/zcmp.hh new file mode 100644 index 0000000000..5f0d734b10 --- /dev/null +++ b/src/arch/riscv/insts/zcmp.hh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_RISCV_INSTS_ZCMP_HH__ +#define __ARCH_RISCV_INSTS_ZCMP_HH__ + +#include + +#include "arch/riscv/insts/static_inst.hh" +#include "cpu/static_inst.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +class CmMacroInst : public RiscvMacroInst +{ + public: + CmMacroInst(const char* mnem, ExtMachInst machInst, OpClass opClass); + + protected: + using RiscvMacroInst::RiscvMacroInst; + + uint64_t stackAdj() const; + std::string getRlistStr() const; + + uint64_t rlist; +}; + +} // namespace RiscvISA +} // namespace gem5 + +#endif // __ARCH_RISCV_INSTS_ZCMP_HH__ diff --git a/src/arch/riscv/interrupts.hh b/src/arch/riscv/interrupts.hh index a10479fb65..54cf501f0a 100644 --- a/src/arch/riscv/interrupts.hh +++ b/src/arch/riscv/interrupts.hh @@ -95,6 +95,11 @@ class Interrupts : public BaseInterrupts void clearAll() override; + bool isWakeUp() const override + { + return checkNonMaskableInterrupt() || (ip & ie).any(); + } + uint64_t readIP() const { return (uint64_t)ip.to_ulong(); } uint64_t readIE() const { return (uint64_t)ie.to_ulong(); } void setIP(const uint64_t& val) { ip = val; } diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index bcc22d7cb0..7f4d97f4e9 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -260,7 +260,7 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); ISA::ISA(const Params &p) : BaseISA(p, "riscv"), _rvType(p.riscv_type), enableRvv(p.enable_rvv), vlen(p.vlen), elen(p.elen), _privilegeModeSet(p.privilege_mode_set), - _wfiResumeOnPending(p.wfi_resume_on_pending) + _wfiResumeOnPending(p.wfi_resume_on_pending), _enableZcd(p.enable_Zcd) { _regClasses.push_back(&intRegClass); _regClasses.push_back(&floatRegClass); diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 29a75854c7..cda2df41e6 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -108,6 +108,14 @@ class ISA : public BaseISA */ const bool _wfiResumeOnPending; + /** + * Enable Zcd extensions. + * Set the option to false implies the Zcmp and Zcmt is enable as c.fsdsp + * is overlap with them. + * Refs: https://github.com/riscv/riscv-isa-manual/blob/main/src/zc.adoc + */ + bool _enableZcd; + public: using Params = RiscvISAParams; @@ -184,6 +192,8 @@ class ISA : public BaseISA bool resumeOnPending() { return _wfiResumeOnPending; } + bool enableZcd() { return _enableZcd; } + virtual Addr getFaultHandlerAddr( RegIndex idx, uint64_t cause, bool intr) const; }; diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 0b4fae7b82..5fc624acc1 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -34,6 +34,7 @@ // Bitfield definitions. // def bitfield RVTYPE rv_type; +def bitfield ENABLE_ZCD enable_zcd; def bitfield QUADRANT <1:0>; def bitfield OPCODE5 <6:2>; @@ -103,10 +104,13 @@ def bitfield CFUNCT1 <12>; def bitfield CFUNCT1BIT6 <6>; def bitfield CFUNCT2HIGH <11:10>; def bitfield CFUNCT2LOW <6:5>; +def bitfield CFUNCT2MID <9:8>; def bitfield RC1 <11:7>; def bitfield RC2 <6:2>; def bitfield RP1 <9:7>; def bitfield RP2 <4:2>; +def bitfield R1S <9:7>; +def bitfield R2S <4:2>; def bitfield FC1 <11:7>; def bitfield FC2 <6:2>; def bitfield FP2 <4:2>; diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index c1dc790f26..90efb8ad82 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -54,23 +54,25 @@ decode QUADRANT default Unknown::unknown() { Rp2 = rvSext(sp + imm); }}, uint64_t); format CompressedLoad { - 0x1: c_fld({{ - offset = CIMM3 << 3 | CIMM2 << 6; - }}, {{ - STATUS status = xc->readMiscReg(MISCREG_STATUS); - if (status.fs == FPUStatus::OFF) - return std::make_shared("FPU is off", - machInst); + 0x1: decode ENABLE_ZCD { + 0x1: c_fld({{ + offset = CIMM3 << 3 | CIMM2 << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); - // Mutating any floating point register changes the FS bit - // of the STATUS CSR. - status.fs = FPUStatus::DIRTY; - xc->setMiscReg(MISCREG_STATUS, status); + // Mutating any floating point register changes the FS bit + // of the STATUS CSR. + status.fs = FPUStatus::DIRTY; + xc->setMiscReg(MISCREG_STATUS, status); - Fp2_bits = Mem; - }}, {{ - EA = rvSext(Rp1 + offset); - }}); + Fp2_bits = Mem; + }}, {{ + EA = rvSext(Rp1 + offset); + }}); + } 0x2: c_lw({{ offset = CIMM2<1:1> << 2 | CIMM3 << 3 | @@ -152,18 +154,20 @@ decode QUADRANT default Unknown::unknown() { } } format CompressedStore { - 0x5: c_fsd({{ - offset = CIMM3 << 3 | CIMM2 << 6; - }}, {{ - STATUS status = xc->readMiscReg(MISCREG_STATUS); - if (status.fs == FPUStatus::OFF) - return std::make_shared("FPU is off", - machInst); + 0x5: decode ENABLE_ZCD { + 0x1: c_fsd({{ + offset = CIMM3 << 3 | CIMM2 << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); - Mem = Fp2_bits; - }}, {{ - EA = rvSext(Rp1 + offset); - }}); + Mem = Fp2_bits; + }}, {{ + EA = rvSext(Rp1 + offset); + }}); + } 0x6: c_sw({{ offset = CIMM2<1:1> << 2 | CIMM3 << 3 | @@ -381,23 +385,25 @@ decode QUADRANT default Unknown::unknown() { Rc1 = rvSext(Rc1 << imm); }}, uint64_t); format CompressedLoad { - 0x1: c_fldsp({{ - offset = CIMM5<4:3> << 3 | - CIMM1 << 5 | - CIMM5<2:0> << 6; - }}, {{ - STATUS status = xc->readMiscReg(MISCREG_STATUS); - if (status.fs == FPUStatus::OFF) - return std::make_shared("FPU is off", - machInst); + 0x1: decode ENABLE_ZCD { + 0x1: c_fldsp({{ + offset = CIMM5<4:3> << 3 | + CIMM1 << 5 | + CIMM5<2:0> << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); - status.fs = FPUStatus::DIRTY; - xc->setMiscReg(MISCREG_STATUS, status); + status.fs = FPUStatus::DIRTY; + xc->setMiscReg(MISCREG_STATUS, status); - Fc1_bits = Mem; - }}, {{ - EA = rvSext(sp + offset); - }}); + Fc1_bits = Mem; + }}, {{ + EA = rvSext(sp + offset); + }}); + } 0x2: c_lwsp({{ offset = CIMM5<4:2> << 2 | CIMM1 << 5 | @@ -480,19 +486,35 @@ decode QUADRANT default Unknown::unknown() { } } format CompressedStore { - 0x5: c_fsdsp({{ - offset = CIMM6<5:3> << 3 | - CIMM6<2:0> << 6; - }}, {{ - STATUS status = xc->readMiscReg(MISCREG_STATUS); - if (status.fs == FPUStatus::OFF) - return std::make_shared("FPU is off", - machInst); + 0x5: decode ENABLE_ZCD { + 0x0: decode CFUNCT6LOW3 { + 0x3: decode CFUNCT2LOW { + 0x1: CmMvsa01::cm_mvsa01(); + 0x3: CmMva01s::cm_mva01s(); + } + 0x6: decode CFUNCT2MID { + 0x0: CmPush::cm_push(); + 0x2: CmPop::cm_pop(); + } + 0x7: decode CFUNCT2MID { + 0x0: CmPop::cm_popretz(is_ret=True, has_a0=True); + 0x2: CmPop::cm_popret(is_ret=True); + } + } + 0x1: c_fsdsp({{ + offset = CIMM6<5:3> << 3 | + CIMM6<2:0> << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); - Mem_ud = Fc2_bits; - }}, {{ - EA = rvSext(sp + offset); - }}); + Mem_ud = Fc2_bits; + }}, {{ + EA = rvSext(sp + offset); + }}); + } 0x6: c_swsp({{ offset = CIMM6<5:2> << 2 | CIMM6<1:0> << 6; diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 0102df17d7..377bc5d061 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -40,6 +40,7 @@ ##include "vector_conf.isa" ##include "vector_arith.isa" ##include "vector_mem.isa" +##include "zcmp.isa" // Include formats for nonstandard extensions ##include "compressed.isa" diff --git a/src/arch/riscv/isa/formats/zcmp.isa b/src/arch/riscv/isa/formats/zcmp.isa new file mode 100644 index 0000000000..263c880022 --- /dev/null +++ b/src/arch/riscv/isa/formats/zcmp.isa @@ -0,0 +1,782 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2015 RISC-V Foundation +// Copyright (c) 2016 The University of Virginia +// Copyright (c) 2024 Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Cmpush template. +def template CmPushDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst); + + protected: + using %(base_class)s::%(base_class)s; + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + }; +}}; + + +def template CmPushConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + StaticInstPtr cur_inst = nullptr; + if (rlist < 4) { + cur_inst = new Unknown(machInst); + cur_inst->setFlag(IsMicroop); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + } else { + int start_reg = 0; + if (rlist != 15) { + start_reg = (16-rlist); + } + + int offset = 0; + for (int i = start_reg; i < PushPopRegList.size(); i++) { + offset -= rvSelect(4, 8); + + if (machInst.rv_type == RV32) { + cur_inst = new %(class_name)s32MicroInst( + machInst, PushPopRegList[i], offset); + } else { + cur_inst = new %(class_name)s64MicroInst( + machInst, PushPopRegList[i], offset); + } + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + } + + cur_inst = new %(class_name)sSpAdjMicroInst(machInst, -stackAdj()); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + } + + microops.front()->setFirstMicroop(); + microops.back()->setLastMicroop(); + } +}}; + +def template CmPushExecute {{ + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << getRlistStr() << ", " << (int64_t)-stackAdj(); + return ss.str(); + } +}}; + +def template CmStoreMicroDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegId push_reg, int64_t offset); + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc( + Packet *, ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly( + Addr, const loader::SymbolTable *) const override; + + protected: + using %(base_class)s::%(base_class)s; + + private: + %(reg_idx_arr_decl)s; + + int64_t offset; + Request::Flags memAccessFlags; + }; +}}; + +def template CmStoreMicroConstructor {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegId push_reg, int64_t offset) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s), + offset(offset) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template CmStoreMicroExecute {{ + Fault + %(class_name)s::execute( + ExecContext *xc, trace::InstRecord *traceData) const + { + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + %(memacc_code)s; + + { + Fault fault = + writeMemAtomicLE(xc, traceData, Mem, EA, memAccessFlags, + nullptr); + if (fault != NoFault) + return fault; + } + + %(op_wb)s; + + return NoFault; + } + + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + offset << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); + } +}}; + +def template CmStoreMicroInitiateAcc {{ + Fault + %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + %(memacc_code)s; + + { + Fault fault = writeMemTimingLE(xc, traceData, Mem, EA, + memAccessFlags, nullptr); + if (fault != NoFault) + return fault; + } + + %(op_wb)s; + + return NoFault; + } +}}; + +def template CmStoreMicroCompleteAcc {{ + Fault + %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template SpAdjMicroDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, int64_t adj); + protected: + using %(base_class)s::%(base_class)s; + + Fault execute(ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly( + Addr, const loader::SymbolTable *) const override; + + private: + %(reg_idx_arr_decl)s; + + int64_t adj; + }; +}}; + +def template SpAdjMicroConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, int64_t adj) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s), adj(adj) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template SpAdjMicroExecute {{ + Fault + %(class_name)s::execute( + ExecContext *xc, trace::InstRecord *traceData) const + { + %(op_decl)s; + %(op_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; + } + + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' ' + << registerName(srcRegIdx(0)) << ' ' << adj; + return ss.str(); + } +}}; + +// Cmpop decode template. +def template CmPopDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst); + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + + protected: + using %(base_class)s::%(base_class)s; + }; +}}; + + +def template CmPopConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + StaticInstPtr cur_inst = nullptr; + if (rlist < 4) { + cur_inst = new Unknown(machInst); + cur_inst->setFlag(IsMicroop); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + } else { + int start_reg = 0; + if (rlist != 15) { + start_reg = (16-rlist); + } + + int offset = stackAdj(); + for (int i = start_reg; i < PushPopRegList.size(); i++) { + offset -= rvSelect(4, 8); + + if (machInst.rv_type == RV32) { + cur_inst = new %(class_name)s32MicroInst( + machInst, PushPopRegList[i], offset); + } else { + cur_inst = new %(class_name)s64MicroInst( + machInst, PushPopRegList[i], offset); + } + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + } + + cur_inst = new %(class_name)sSpAdjMicroInst(machInst, stackAdj()); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + + %(move_a0_desc)s; + %(return_desc)s; + } + + microops.front()->setFirstMicroop(); + microops.back()->setLastMicroop(); + } +}}; + +def template CmPopExecute {{ + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << getRlistStr() << ", " << stackAdj(); + return ss.str(); + } +}}; + +def template CmLoadMicroDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegId pop_reg, int64_t offset); + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc( + Packet *, ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly( + Addr, const loader::SymbolTable *) const override; + + protected: + using %(base_class)s::%(base_class)s; + + private: + %(reg_idx_arr_decl)s; + + int64_t offset; + Request::Flags memAccessFlags; + }; +}}; + +def template CmLoadMicroConstructor {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegId pop_reg, int64_t offset) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s), + offset(offset) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template CmLoadMicroExecute {{ + Fault + %(class_name)s::execute( + ExecContext *xc, trace::InstRecord *traceData) const + { + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + { + Fault fault = + readMemAtomicLE(xc, traceData, EA, Mem, memAccessFlags); + if (fault != NoFault) + return fault; + } + + %(memacc_code)s; + + %(op_wb)s; + + return NoFault; + } + + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + offset << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); + } +}}; + +def template CmLoadMicroInitiateAcc {{ + Fault + %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + return initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + } +}}; + +def template CmLoadMicroCompleteAcc {{ + Fault + %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + %(op_decl)s; + %(op_rd)s; + + getMemLE(pkt, Mem, traceData); + + %(memacc_code)s; + %(op_wb)s; + + return NoFault; + } +}}; + +def template CmRetMicroDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst); + + protected: + using %(base_class)s::%(base_class)s; + + Fault execute(ExecContext *, trace::InstRecord *) const override; + + std::string + generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + + std::unique_ptr branchTarget( + ThreadContext *tc) const override; + + using StaticInst::branchTarget; + + private: + %(reg_idx_arr_decl)s; + }; +}}; + +def template CmRetMicroConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template CmRetMicroExecute {{ + Fault + %(class_name)s::execute( + ExecContext *xc, trace::InstRecord *traceData) const + { + %(op_decl)s; + %(op_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; + } + + std::unique_ptr + %(class_name)s::branchTarget(ThreadContext *tc) const + { + PCStateBase *pc_ptr = tc->pcState().clone(); + pc_ptr->as().set(rvSext(tc->getReg(srcRegIdx(0)) & ~0x1)); + return std::unique_ptr{pc_ptr}; + } + + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(0)); + return ss.str(); + } +}}; + +// Cmmvsa01 decode template +def template CmMvDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst); + + protected: + using %(base_class)s::%(base_class)s; + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + }; +}}; + +def template CmMvsa01Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + StaticInstPtr cur_inst; + cur_inst = new %(class_name)sMvMicroInst( + machInst, int_reg::A0, StackRegs[machInst.r1s]); + microops.emplace_back(cur_inst); + cur_inst = new %(class_name)sMvMicroInst( + machInst, int_reg::A1, StackRegs[machInst.r2s]); + microops.emplace_back(cur_inst); + + microops.front()->setFirstMicroop(); + microops.back()->setLastMicroop(); + } +}}; + +def template CmMva01sConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + StaticInstPtr cur_inst; + cur_inst = new %(class_name)sMvMicroInst( + machInst, StackRegs[machInst.r1s], int_reg::A0); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + cur_inst = new %(class_name)sMvMicroInst( + machInst, StackRegs[machInst.r2s], int_reg::A1); + cur_inst->setDelayedCommit(); + microops.emplace_back(cur_inst); + + microops.front()->setFirstMicroop(); + microops.back()->setLastMicroop(); + } +}}; + +def template CmMvExecute {{ + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(StackRegs[machInst.r1s]) + << ", " << registerName(StackRegs[machInst.r2s]); + return ss.str(); + } +}}; + +def template CmMvMicroDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegId push_reg, RegId pop_reg); + protected: + using %(base_class)s::%(base_class)s; + + Fault execute(ExecContext *, trace::InstRecord *) const override; + std::string generateDisassembly( + Addr, const loader::SymbolTable *) const override; + + private: + %(reg_idx_arr_decl)s; + }; +}}; + +def template CmMvMicroConstructor {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegId push_reg, RegId pop_reg) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template CmMvMicroExecute {{ + Fault + %(class_name)s::execute( + ExecContext *xc, trace::InstRecord *traceData) const + { + %(op_decl)s; + %(op_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; + } + + std::string + %(class_name)s::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ' ' + << registerName(srcRegIdx(0)); + return ss.str(); + } +}}; + +def format CmPush(*flags) {{ + code = '' + macro_iop = InstObjParams(name, Name, 'CmMacroInst', code, flags) + header_output = CmPushDeclare.subst(macro_iop) + decoder_output = CmPushConstructor.subst(macro_iop) + exec_output = CmPushExecute.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + memacc_code = 'Mem_sw = CmPushReg_sw;' + ea_code = 'EA = rvSext(sp + offset);' + micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst', + {'ea_code': ea_code, 'memacc_code': memacc_code}, + flags) + + mem_flags = [getAlignFlag(micro32_iop)] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + micro32_iop.constructor += s + + header_output += CmStoreMicroDeclare.subst(micro32_iop) + decoder_output += CmStoreMicroConstructor.subst(micro32_iop) + exec_output += CmStoreMicroExecute.subst(micro32_iop) \ + + CmStoreMicroInitiateAcc.subst(micro32_iop) \ + + CmStoreMicroCompleteAcc.subst(micro32_iop) + + memacc_code = 'Mem = CmPushReg;' + ea_code = 'EA = rvSext(sp + offset);' + micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst', + {'ea_code': ea_code, 'memacc_code': memacc_code}, + flags) + + mem_flags = [getAlignFlag(micro64_iop)] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + micro64_iop.constructor += s + + header_output += CmStoreMicroDeclare.subst(micro64_iop) + decoder_output += CmStoreMicroConstructor.subst(micro64_iop) + exec_output += CmStoreMicroExecute.subst(micro64_iop) \ + + CmStoreMicroInitiateAcc.subst(micro64_iop) \ + + CmStoreMicroCompleteAcc.subst(micro64_iop) + + code = 'spd = rvSext(sp + adj);' + sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst', + 'RiscvMicroInst', code, flags) + + header_output += SpAdjMicroDeclare.subst(sp_adj_iop) + decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop) + exec_output += SpAdjMicroExecute.subst(sp_adj_iop) +}}; + +def format CmPop(is_ret=False, has_a0=False, *flags) {{ + code = '' + flags = [] + has_a0 = eval(has_a0) + is_ret = eval(is_ret) + move_a0_desc = '' + return_desc = '' + + if has_a0: + move_a0_desc = rf''' + cur_inst = new {Name}MvMicroInst( + machInst, ReturnValueReg, int_reg::Zero); + microops.emplace_back(cur_inst); + ''' + + if is_ret: + return_desc = rf''' + cur_inst = new {Name}RetMicroInst(machInst); + microops.emplace_back(cur_inst); + ''' + + macro_iop = InstObjParams(name, Name, 'CmMacroInst', + {'code': code, 'move_a0_desc': move_a0_desc, + 'return_desc': return_desc}, + flags) + header_output = CmPopDeclare.subst(macro_iop) + decoder_output = CmPopConstructor.subst(macro_iop) + exec_output = CmPopExecute.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + memacc_code = 'CmPopReg_sw = Mem_sw;' + ea_code = 'EA = rvSext(sp + offset);' + micro32_iop = InstObjParams('lw', f'{Name}32MicroInst', 'RiscvMicroInst', + {'ea_code': ea_code, 'memacc_code': memacc_code}, + flags) + + mem_flags = [getAlignFlag(micro32_iop)] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + micro32_iop.constructor += s + + header_output += CmLoadMicroDeclare.subst(micro32_iop) + decoder_output += CmLoadMicroConstructor.subst(micro32_iop) + exec_output += CmLoadMicroExecute.subst(micro32_iop) \ + + CmLoadMicroInitiateAcc.subst(micro32_iop) \ + + CmLoadMicroCompleteAcc.subst(micro32_iop) + + memacc_code = 'CmPopReg = Mem;' + ea_code = 'EA = rvSext(sp + offset);' + micro64_iop = InstObjParams('ld', f'{Name}64MicroInst', 'RiscvMicroInst', + {'ea_code': ea_code, 'memacc_code': memacc_code}, + flags) + + mem_flags = [getAlignFlag(micro64_iop)] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + micro64_iop.constructor += s + + header_output += CmLoadMicroDeclare.subst(micro64_iop) + decoder_output += CmLoadMicroConstructor.subst(micro64_iop) + exec_output += CmLoadMicroExecute.subst(micro64_iop) \ + + CmLoadMicroInitiateAcc.subst(micro64_iop) \ + + CmLoadMicroCompleteAcc.subst(micro64_iop) + + code = 'spd = rvSext(sp + adj);' + sp_adj_iop = InstObjParams('addi', f'{Name}SpAdjMicroInst', + 'RiscvMicroInst', code, flags) + + header_output += SpAdjMicroDeclare.subst(sp_adj_iop) + decoder_output += SpAdjMicroConstructor.subst(sp_adj_iop) + exec_output += SpAdjMicroExecute.subst(sp_adj_iop) + + if has_a0: + code = 'CmPopReg = CmPushReg;' + has_a0_iop = InstObjParams('mv', f'{Name}MvMicroInst', + 'RiscvMicroInst', code, flags) + + header_output += CmMvMicroDeclare.subst(has_a0_iop) + decoder_output += CmMvMicroConstructor.subst(has_a0_iop) + exec_output += CmMvMicroExecute.subst(has_a0_iop) + + if is_ret: + code = 'NPC = rvSext(ra & (~0x1));' + ret_flags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn'] + is_ret_iop = InstObjParams('jr', f'{Name}RetMicroInst', + 'RiscvMicroInst', code, ret_flags) + + header_output += CmRetMicroDeclare.subst(is_ret_iop) + decoder_output += CmRetMicroConstructor.subst(is_ret_iop) + exec_output += CmRetMicroExecute.subst(is_ret_iop) +}}; + +def format CmMvsa01() {{ + code = '' + flags = [] + iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags) + header_output = CmMvDeclare.subst(iop) + decoder_output = CmMvsa01Constructor.subst(iop) + exec_output = CmMvExecute.subst(iop) + decode_block = BasicDecode.subst(iop) + + code = 'CmPopReg = CmPushReg;' + micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst', + code, flags) + + header_output += CmMvMicroDeclare.subst(micro_iop) + decoder_output += CmMvMicroConstructor.subst(micro_iop) + exec_output += CmMvMicroExecute.subst(micro_iop) +}}; + +def format CmMva01s() {{ + code = '' + flags = [] + iop = InstObjParams(name, Name, 'RiscvMacroInst', code, flags) + header_output = CmMvDeclare.subst(iop) + decoder_output = CmMva01sConstructor.subst(iop) + exec_output = CmMvExecute.subst(iop) + decode_block = BasicDecode.subst(iop) + + code = 'CmPopReg = CmPushReg;' + micro_iop = InstObjParams('mv', f'{Name}MvMicroInst', 'RiscvMicroInst', + code, flags) + + header_output += CmMvMicroDeclare.subst(micro_iop) + decoder_output += CmMvMicroConstructor.subst(micro_iop) + exec_output += CmMvMicroExecute.subst(micro_iop) +}}; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index b37e62bca8..4d53958723 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -55,6 +55,7 @@ output header {{ #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/insts/unknown.hh" #include "arch/riscv/insts/vector.hh" +#include "arch/riscv/insts/zcmp.hh" #include "arch/riscv/interrupts.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" diff --git a/src/arch/riscv/isa/operands.isa b/src/arch/riscv/isa/operands.isa index de36d902b1..e2a7522b94 100644 --- a/src/arch/riscv/isa/operands.isa +++ b/src/arch/riscv/isa/operands.isa @@ -70,10 +70,14 @@ def operands {{ 'Rp2': IntReg('ud', 'RP2 + 8', 'IsInteger', 3), 'ra': IntReg('ud', 'ReturnAddrReg', 'IsInteger', 1), 'sp': IntReg('ud', 'StackPointerReg', 'IsInteger', 2), + 'spd': IntReg('ud', 'StackPointerReg', 'IsInteger', 1), 'a0': IntReg('ud', '10', 'IsInteger', 1), 'a1': IntReg('ud', '11', 'IsInteger', 2), + 'CmPushReg': IntReg('ud', 'push_reg', 'IsInteger', 3), + 'CmPopReg': IntReg('ud', 'pop_reg', 'IsInteger', 1), + 'Fd': FloatRegOp('df', 'FD', 'IsFloating', 1), 'Fd_bits': FloatRegOp('ud', 'FD', 'IsFloating', 1), 'Fs1': FloatRegOp('df', 'FS1', 'IsFloating', 2), diff --git a/src/arch/riscv/linux/linux.hh b/src/arch/riscv/linux/linux.hh index 997eb6af4c..17281340d7 100644 --- a/src/arch/riscv/linux/linux.hh +++ b/src/arch/riscv/linux/linux.hh @@ -34,6 +34,7 @@ #include "arch/riscv/utility.hh" #include "kern/linux/flag_tables.hh" #include "kern/linux/linux.hh" +#include "base/bitfield.hh" namespace gem5 { @@ -42,6 +43,101 @@ class RiscvLinux : public Linux { public: static const ByteOrder byteOrder = ByteOrder::little; + + enum RiscvHwprobeKey + { + Mvendorid, + Marchid, + Mimpid, + BaseBehavior, + IMAExt0, + Cpuperf0, + ZicbozBlockSize, + HighestVirtAddress, + TimeCsrFreq, + MisalignedScalarPerf + }; + + /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ + #define RISCV_HWPROBE_MAX_KEY 9 + + BitUnion64(key_base_behavior_t) + Bitfield<0> ima; + EndBitUnion(key_base_behavior_t) + + BitUnion64(key_ima_ext_0_t) + Bitfield<49> ZAWRS; + Bitfield<48> ZCMOP; + Bitfield<47> ZCF; + Bitfield<46> ZCD; + Bitfield<45> ZCB; + Bitfield<44> ZCA; + Bitfield<43> ZIMOP; + Bitfield<42> ZVE64D; + Bitfield<41> ZVE64F; + Bitfield<40> ZVE64X; + Bitfield<39> ZVE32F; + Bitfield<38> ZVE32X; + Bitfield<37> ZIHINTPAUSE; + Bitfield<36> ZICOND; + Bitfield<35> ZACAS; + Bitfield<34> ZTSO; + Bitfield<33> ZFA; + Bitfield<32> ZVFHMIN; + Bitfield<31> ZVFH; + Bitfield<30> ZIHINTNTL; + Bitfield<29> ZFHMIN; + Bitfield<28> ZFH; + Bitfield<27> ZVKT; + Bitfield<26> ZVKSH; + Bitfield<25> ZVKSED; + Bitfield<24> ZVKNHB; + Bitfield<22> ZVKNHA; + Bitfield<21> ZVKNED; + Bitfield<20> ZVKG; + Bitfield<19> ZVKB; + Bitfield<18> ZVBC; + Bitfield<17> ZVBB; + Bitfield<16> ZKT; + Bitfield<15> ZKSH; + Bitfield<14> ZKSED; + Bitfield<13> ZKNH; + Bitfield<12> ZKNE; + Bitfield<11> ZKND; + Bitfield<10> ZBKX; + Bitfield<9> ZBKC; + Bitfield<8> ZBKB; + Bitfield<7> ZBC; + Bitfield<6> ZICBOZ; + Bitfield<5> ZBS; + Bitfield<4> ZBB; + Bitfield<3> ZBA; + Bitfield<2> V; + Bitfield<1> C; + Bitfield<0> FD; + EndBitUnion(key_ima_ext_0_t) + + enum MisalignedScalarPerf + { + Unknown, + Emulated, + Slow, + Fast, + Unsupported + }; + + /* Flags */ + #define RISCV_HWPROBE_WHICH_CPUS (1 << 0) + + struct riscv_hwprobe { + int64_t key; + uint64_t value; + }; + + typedef struct cpumask { + size_t size; + uint64_t bits[]; + } cpumask_t; }; class RiscvLinux64 : public RiscvLinux, public OpenFlagTable @@ -195,6 +291,21 @@ class RiscvLinux64 : public RiscvLinux, public OpenFlagTable uint32_t mem_unit; }; + struct tgt_clone_args + { + uint64_t flags; + uint64_t pidfd; + uint64_t child_tid; + uint64_t parent_tid; + uint64_t exit_signal; + uint64_t stack; + uint64_t stack_size; + uint64_t tls; + uint64_t set_tid; + uint64_t set_tid_size; + uint64_t cgroup; + }; + static void archClone(uint64_t flags, Process *pp, Process *cp, diff --git a/src/arch/riscv/linux/se_workload.cc b/src/arch/riscv/linux/se_workload.cc index c1af16fb3b..d3015202b7 100644 --- a/src/arch/riscv/linux/se_workload.cc +++ b/src/arch/riscv/linux/se_workload.cc @@ -44,6 +44,8 @@ #include #include "arch/riscv/process.hh" +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/regs/misc.hh" #include "base/loader/object_file.hh" #include "base/trace.hh" #include "cpu/thread_context.hh" @@ -134,6 +136,388 @@ unameFunc32(SyscallDesc *desc, ThreadContext *tc, VPtr name) return 0; } +static inline void +cpumask_set_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp) +{ + assert(cpu < dstp->size * 8); + auto &bits = dstp->bits[cpu / sizeof(uint64_t)]; + bits = insertBits(bits, cpu % sizeof(uint64_t), 1); +} + +static inline void +cpumask_clear_cpu(unsigned int cpu, RiscvLinux::cpumask_t *dstp) +{ + assert(cpu < dstp->size * 8); + auto &bits = dstp->bits[cpu / sizeof(uint64_t)]; + bits = insertBits(bits, cpu % sizeof(uint64_t), 0); +} + +static inline bool +cpumask_test_cpu(unsigned int cpu, const RiscvLinux::cpumask_t *cpumask) +{ + assert(cpu < cpumask->size * 8); + return bits(cpumask->bits[cpu / sizeof(uint64_t)], cpu % sizeof(uint64_t)) != 0; +} + +static inline void +cpumask_and(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *src1p, + const RiscvLinux::cpumask_t *src2p) +{ + assert(dstp->size == src1p->size); + assert(dstp->size == src2p->size); + for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) { + dstp->bits[i] = src1p->bits[i] & src2p->bits[i]; + } +} + +static inline bool +cpumask_empty(const RiscvLinux::cpumask_t *dstp) +{ + for (size_t i = 0; i < dstp->size / sizeof(dstp->bits[0]); i++) { + if (dstp->bits[i] != 0) { + return false; + } + } + return true; +} + +static inline void +cpumask_copy(RiscvLinux::cpumask_t *dstp, const RiscvLinux::cpumask_t *srcp) +{ + assert(dstp->size == srcp->size); + memcpy(dstp->bits, srcp->bits, srcp->size); +} + +static inline void +cpumask_clear(RiscvLinux::cpumask_t *dstp) +{ + memset(dstp->bits, 0, dstp->size); +} + +static inline RiscvLinux::cpumask_t * +cpumask_malloc(ThreadContext *tc) +{ + RiscvLinux::cpumask_t *cpumask; + + /* 8-bytes up-boundary alignment */ + size_t size = (tc->getSystemPtr()->threads.size() + sizeof(cpumask->bits[0]) - 1) / + sizeof(cpumask->bits[0]) * sizeof(cpumask->bits[0]); + cpumask = (RiscvLinux::cpumask_t *)malloc(sizeof(cpumask->size) + size); + if (cpumask != nullptr) { + cpumask->size = size; + cpumask_clear(cpumask); + } + + return cpumask; +} + +static inline void +cpumask_free(RiscvLinux::cpumask_t *cpu_online_mask) +{ + free(cpu_online_mask); +} + +static inline bool +riscv_hwprobe_key_is_valid(int64_t key) +{ + return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY; +} + +static inline bool +hwprobe_key_is_bitmask(int64_t key) +{ + switch (key) { + case RiscvLinux::BaseBehavior: + case RiscvLinux::IMAExt0: + case RiscvLinux::Cpuperf0: + return true; + } + + return false; +} + +static inline bool +riscv_hwprobe_pair_cmp(RiscvLinux::riscv_hwprobe *pair, + RiscvLinux::riscv_hwprobe *other_pair) +{ + if (pair->key != other_pair->key) { + return false; + } + + if (hwprobe_key_is_bitmask(pair->key)) { + return (pair->value & other_pair->value) == other_pair->value; + } + + return pair->value == other_pair->value; +} + +static inline RiscvLinux::cpumask_t * +get_cpu_online_mask(ThreadContext *tc) +{ + RiscvLinux::cpumask_t *cpu_online_mask = cpumask_malloc(tc); + if (cpu_online_mask != nullptr) { + for (int i = 0; i < tc->getSystemPtr()->threads.size(); i++) { + CPU_SET(i, (cpu_set_t *)&cpu_online_mask->bits); + } + } + + return cpu_online_mask; +} + +static void +hwprobe_one_pair(ThreadContext *tc, RiscvLinux::riscv_hwprobe *pair, + RiscvLinux::cpumask_t *cpus) +{ + switch (pair->key) { + case RiscvLinux::Mvendorid: + pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MVENDORID).physIndex); + break; + case RiscvLinux::Marchid: + pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MARCHID).physIndex); + break; + case RiscvLinux::Mimpid: + pair->value = tc->readMiscRegNoEffect(CSRData.at(CSR_MIMPID).physIndex); + break; + case RiscvLinux::BaseBehavior: + { + MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA); + RiscvLinux::key_base_behavior_t *base_behavior = + (RiscvLinux::key_base_behavior_t *)&pair->value; + if (misa.rvi && misa.rvm && misa.rva) { + base_behavior->ima = 1; + } + } + break; + case RiscvLinux::IMAExt0: + { + MISA misa = tc->readMiscRegNoEffect(MISCREG_ISA); + RiscvLinux::key_ima_ext_0_t *ext = (RiscvLinux::key_ima_ext_0_t *)&pair->value; + if (misa.rvf && misa.rvd) ext->FD = 1; + if (misa.rvc) ext->C = 1; + if (misa.rvv) ext->V = 1; + ext->ZBA = 1; + ext->ZBB = 1; + ext->ZBS = 1; + ext->ZICBOZ = 1; + ext->ZBC = 1; + ext->ZBKB = 1; + ext->ZBKC = 1; + ext->ZBKX = 1; + ext->ZKND = 1; + ext->ZKNE = 1; + ext->ZKNH = 1; + ext->ZKSED = 1; + ext->ZKSH = 1; + ext->ZKT = 1; + ext->ZFH = 1; + ext->ZFHMIN = 1; + ext->ZVFH = 1; + ext->ZVFHMIN = 1; + ext->ZICOND = 1; + ext->ZVE64D = 1; + ext->ZCB = 1; + ext->ZCD = 1; + ext->ZCF = 1; + } + break; + case RiscvLinux::Cpuperf0: + case RiscvLinux::MisalignedScalarPerf: + pair->value = RiscvLinux::Slow; + break; + case RiscvLinux::ZicbozBlockSize: + pair->value = tc->getSystemPtr()->cacheLineSize(); + break; + case RiscvLinux::HighestVirtAddress: + pair->value = tc->getProcessPtr()->memState->getMmapEnd(); + break; + + /* + * For forward compatibility, unknown keys don't fail the whole + * call, but get their element key set to -1 and value set to 0 + * indicating they're unrecognized. + */ + default: + pair->key = -1; + pair->value = 0; + break; + } +} + +template +static int +hwprobe_get_values(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count, + typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags) +{ + /* Check the reserved flags. */ + if (flags != 0) { + return -EINVAL; + } + + RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc); + if (cpu_online_mask == nullptr) { + return -ENOMEM; + } + + RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc); + if (cpus == nullptr) { + cpumask_free(cpu_online_mask); + return -ENOMEM; + } + + if (cpusetsize > cpu_online_mask->size) { + cpusetsize = cpu_online_mask->size; + } + + RiscvLinux::riscv_hwprobe *pair; + BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count); + + /* + * The interface supports taking in a CPU mask, and returns values that + * are consistent across that mask. Allow userspace to specify NULL and + * 0 as a shortcut to all online CPUs. + */ + if (cpusetsize == 0 && !cpus_user) { + cpumask_copy(cpus, cpu_online_mask); + cpusetsize = cpu_online_mask->size; + } else { + BufferArg cpus_user_buf(cpus_user, cpusetsize); + cpus_user_buf.copyIn(SETranslatingPortProxy(tc)); + + cpu_online_mask->size = cpusetsize; + cpus->size = cpusetsize; + memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize); + + /* + * Userspace must provide at least one online CPU, without that + * there's no way to define what is supported. + */ + cpumask_and(cpus, cpus, cpu_online_mask); + if (cpumask_empty(cpus)) { + cpumask_free(cpu_online_mask); + cpumask_free(cpus); + return -EINVAL; + } + } + + pairs_buf.copyIn(SETranslatingPortProxy(tc)); + pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr(); + + for (size_t i = 0; i < pair_count; i++, pair++) { + pair->value = 0; + hwprobe_one_pair(tc, pair, cpus); + } + + pairs_buf.copyOut(SETranslatingPortProxy(tc)); + + cpumask_free(cpu_online_mask); + cpumask_free(cpus); + + return 0; +} + +template +static int +hwprobe_get_cpus(ThreadContext *tc, VPtr<> pairs, typename OS::size_t pair_count, + typename OS::size_t cpusetsize, VPtr<> cpus_user, unsigned int flags) +{ + if (flags != RISCV_HWPROBE_WHICH_CPUS) { + return -EINVAL; + } + + if (cpusetsize == 0 || !cpus_user) { + return -EINVAL; + } + + RiscvLinux::cpumask_t *cpu_online_mask = get_cpu_online_mask(tc); + if (cpu_online_mask == nullptr) { + return -ENOMEM; + } + + RiscvLinux::cpumask_t *cpus = cpumask_malloc(tc); + if (cpus == nullptr) { + cpumask_free(cpu_online_mask); + return -ENOMEM; + } + + RiscvLinux::cpumask_t *one_cpu = cpumask_malloc(tc); + if (one_cpu == nullptr) { + cpumask_free(cpu_online_mask); + cpumask_free(cpus); + return -ENOMEM; + } + + if (cpusetsize > cpu_online_mask->size) { + cpusetsize = cpu_online_mask->size; + } + + RiscvLinux::riscv_hwprobe *pair; + BufferArg cpus_user_buf(cpus_user, cpusetsize); + cpus_user_buf.copyIn(SETranslatingPortProxy(tc)); + memcpy(cpus->bits, cpus_user_buf.bufferPtr(), cpusetsize); + + if (cpumask_empty(cpus)) { + cpumask_copy(cpus, cpu_online_mask); + cpusetsize = cpu_online_mask->size; + } + + cpumask_and(cpus, cpus, cpu_online_mask); + + BufferArg pairs_buf(pairs, sizeof(RiscvLinux::riscv_hwprobe) * pair_count); + pairs_buf.copyIn(SETranslatingPortProxy(tc)); + pair = (RiscvLinux::riscv_hwprobe *)pairs_buf.bufferPtr(); + + for (size_t i = 0; i < pair_count; i++, pair++) { + if (!riscv_hwprobe_key_is_valid(pair->key)) { + *pair = (RiscvLinux::riscv_hwprobe){ .key = -1, .value = 0 }; + memset(cpus_user_buf.bufferPtr(), 0, cpusetsize); + break; + } + + RiscvLinux::riscv_hwprobe tmp = + (RiscvLinux::riscv_hwprobe){ .key = pair->key, .value = 0 }; + + for (int cpu = 0; cpu < cpusetsize * 8; cpu++) { + if (!cpumask_test_cpu(cpu, cpus)) { + continue; + } + + cpumask_set_cpu(cpu, one_cpu); + + hwprobe_one_pair(tc, &tmp, one_cpu); + + if (!riscv_hwprobe_pair_cmp(&tmp, pair)) { + cpumask_clear_cpu(cpu, cpus); + } + + cpumask_clear_cpu(cpu, one_cpu); + } + } + + pairs_buf.copyOut(SETranslatingPortProxy(tc)); + cpus_user_buf.copyOut(SETranslatingPortProxy(tc)); + + cpumask_free(cpu_online_mask); + cpumask_free(cpus); + cpumask_free(one_cpu); + + return 0; +} + +template +static SyscallReturn +riscvHWProbeFunc(SyscallDesc *desc, ThreadContext *tc, VPtr<> pairs, + typename OS::size_t pair_count, typename OS::size_t cpusetsize, + VPtr<> cpus_user, unsigned int flags) +{ + if (flags & RISCV_HWPROBE_WHICH_CPUS) { + return hwprobe_get_cpus(tc, pairs, pair_count, cpusetsize, + cpus_user, flags); + } + + return hwprobe_get_values(tc, pairs, pair_count, cpusetsize, + cpus_user, flags); +} + SyscallDescTable EmuLinux::syscallDescs64 = { { 0, "io_setup" }, { 1, "io_destroy" }, @@ -382,6 +766,7 @@ SyscallDescTable EmuLinux::syscallDescs64 = { { 241, "perf_event_open" }, { 242, "accept4" }, { 243, "recvmmsg" }, + { 258, "riscv_hwprobe", riscvHWProbeFunc }, { 260, "wait4", wait4Func }, { 261, "prlimit64", prlimitFunc }, { 262, "fanotify_init" }, @@ -410,6 +795,33 @@ SyscallDescTable EmuLinux::syscallDescs64 = { { 285, "copy_file_range" }, { 286, "preadv2" }, { 287, "pwritev2" }, + { 424, "pidfd_send_signal" }, + { 425, "io_uring_setup" }, + { 426, "io_uring_enter" }, + { 427, "io_uring_register" }, + { 428, "open_tree" }, + { 429, "move_mount" }, + { 430, "fsopen" }, + { 431, "fsconfig" }, + { 432, "fsmount" }, + { 433, "fspick" }, + { 434, "pidfd_open" }, + { 435, "clone3", clone3Func }, + { 436, "close_range" }, + { 437, "openat2" }, + { 438, "pidfd_getfd" }, + { 439, "faccessat2" }, + { 440, "process_madvise" }, + { 441, "epoll_pwait2" }, + { 442, "mount_setattr" }, + { 443, "quotactl_fd" }, + { 444, "landlock_create_ruleset" }, + { 445, "landlock_add_rule" }, + { 446, "landlock_restrict_self" }, + { 447, "memfd_secret" }, + { 448, "process_mrelease" }, + { 449, "futex_waitv" }, + { 450, "set_mempolicy_home_node" }, { 1024, "open", openFunc }, { 1025, "link", linkFunc }, { 1026, "unlink", unlinkFunc }, @@ -721,6 +1133,7 @@ SyscallDescTable EmuLinux::syscallDescs32 = { { 241, "perf_event_open" }, { 242, "accept4" }, { 243, "recvmmsg" }, + { 258, "riscv_hwprobe", riscvHWProbeFunc }, { 260, "wait4", wait4Func }, { 261, "prlimit64", prlimitFunc }, { 262, "fanotify_init" }, diff --git a/src/arch/riscv/regs/int.hh b/src/arch/riscv/regs/int.hh index 4ac01c60c1..dc7e37cdbe 100644 --- a/src/arch/riscv/regs/int.hh +++ b/src/arch/riscv/regs/int.hh @@ -149,6 +149,18 @@ inline constexpr RegId ArgumentRegs[] = { int_reg::A4, int_reg::A5, int_reg::A6, int_reg::A7 }; +const std::vector PushPopRegList = { + int_reg::S11, int_reg::S10, int_reg::S9, int_reg::S8, + int_reg::S7, int_reg::S6, int_reg::S5, int_reg::S4, + int_reg::S3, int_reg::S2, int_reg::S1, int_reg::S0, + int_reg::Ra +}; + +inline constexpr RegId StackRegs[] = { + int_reg::S0, int_reg::S1, int_reg::S2, int_reg::S3, + int_reg::S4, int_reg::S5, int_reg::S6, int_reg::S7, +}; + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index c7edffc2f7..8b72c782a9 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -58,6 +58,7 @@ BitUnion64(ExtMachInst) // Decoder state Bitfield<63, 62> rv_type; Bitfield<61> compressed; + Bitfield<60> enable_zcd; // More bits for vector extension Bitfield<57, 41> vl; // [0, 2**16] Bitfield<40> vill; @@ -126,6 +127,8 @@ BitUnion64(ExtMachInst) Bitfield< 6, 2> rc2; Bitfield< 9, 7> rp1; Bitfield< 4, 2> rp2; + Bitfield< 9, 7> r1s; + Bitfield< 4, 2> r2s; Bitfield<11, 7> fc1; Bitfield< 6, 2> fc2; Bitfield< 4, 2> fp2; @@ -144,6 +147,8 @@ BitUnion64(ExtMachInst) Bitfield<12, 10> cimm3; Bitfield< 6, 5> cimm2; Bitfield<12> cimm1; + Bitfield< 7, 4> rlist; + Bitfield< 3, 2> spimm; // Pseudo instructions Bitfield<31, 25> m5func; // vector diff --git a/src/arch/x86/decoder.cc b/src/arch/x86/decoder.cc index af2456d6ab..ec595856a2 100644 --- a/src/arch/x86/decoder.cc +++ b/src/arch/x86/decoder.cc @@ -41,8 +41,6 @@ namespace gem5 namespace X86ISA { -X86ISAInst::MicrocodeRom Decoder::microcodeRom; - Decoder::State Decoder::doResetState() { @@ -671,9 +669,6 @@ Decoder::doImmediateState() return nextState; } -Decoder::InstBytes Decoder::dummy; -Decoder::InstCacheMap Decoder::instCacheMap; - StaticInstPtr Decoder::decode(ExtMachInst mach_inst, Addr addr) { diff --git a/src/arch/x86/decoder.hh b/src/arch/x86/decoder.hh index e4b1de96d7..eee48c1f76 100644 --- a/src/arch/x86/decoder.hh +++ b/src/arch/x86/decoder.hh @@ -60,19 +60,19 @@ class Decoder : public InstDecoder // These are defined and documented in decoder_tables.cc static const uint8_t SizeTypeToSize[3][10]; typedef const uint8_t ByteTable[256]; - static ByteTable Prefixes[2]; + static const ByteTable Prefixes[2]; - static ByteTable UsesModRMOneByte; - static ByteTable UsesModRMTwoByte; - static ByteTable UsesModRMThreeByte0F38; - static ByteTable UsesModRMThreeByte0F3A; + static const ByteTable UsesModRMOneByte; + static const ByteTable UsesModRMTwoByte; + static const ByteTable UsesModRMThreeByte0F38; + static const ByteTable UsesModRMThreeByte0F3A; - static ByteTable ImmediateTypeOneByte; - static ByteTable ImmediateTypeTwoByte; - static ByteTable ImmediateTypeThreeByte0F38; - static ByteTable ImmediateTypeThreeByte0F3A; + static const ByteTable ImmediateTypeOneByte; + static const ByteTable ImmediateTypeTwoByte; + static const ByteTable ImmediateTypeThreeByte0F38; + static const ByteTable ImmediateTypeThreeByte0F3A; - static X86ISAInst::MicrocodeRom microcodeRom; + X86ISAInst::MicrocodeRom microcodeRom; protected: using MachInst = uint64_t; @@ -88,7 +88,7 @@ class Decoder : public InstDecoder {} }; - static InstBytes dummy; + InstBytes dummy; // The bytes to be predecoded. MachInst fetchChunk; @@ -244,7 +244,7 @@ class Decoder : public InstDecoder decode_cache::InstMap *instMap = nullptr; typedef std::unordered_map< CacheKey, decode_cache::InstMap *> InstCacheMap; - static InstCacheMap instCacheMap; + InstCacheMap instCacheMap; StaticInstPtr decodeInst(ExtMachInst mach_inst); diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh index 1d7d640ddb..eb4bcd240f 100644 --- a/src/base/stats/units.hh +++ b/src/base/stats/units.hh @@ -350,9 +350,9 @@ class Rate : public Base "otherwise, it would be a Ratio"); private: - Rate() {} + Rate() {} public: - Rate(Rate const&) = delete; + Rate(Rate const&) = delete; void operator=(Rate const&) = delete; static Rate* get() diff --git a/src/cpu/base.cc b/src/cpu/base.cc index ec219aa9f1..cc093e7000 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -240,7 +240,11 @@ BaseCPU::postInterrupt(ThreadID tid, int int_num, int index) // Only wake up syscall emulation if it is not waiting on a futex. // This is to model the fact that instructions such as ARM SEV // should wake up a WFE sleep, but not a futex syscall WAIT. - if (FullSystem || !system->futexMap.is_waiting(threadContexts[tid])) + // + // For RISC-V, the WFI sleep wake up is implementation defined. + // The SiFive WFI wake up the hart only if mip & mie != 0 + if ((FullSystem && interrupts[tid]->isWakeUp()) || + !system->futexMap.is_waiting(threadContexts[tid])) wakeup(tid); } @@ -855,13 +859,13 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) "Simulator op (including micro ops) rate (op/s)") { simInsts - .functor(BaseCPU::numSimulatedInsts) + .functor(BaseCPU::GlobalStats::numSimulatedInsts) .precision(0) .prereq(simInsts) ; simOps - .functor(BaseCPU::numSimulatedOps) + .functor(BaseCPU::GlobalStats::numSimulatedOps) .precision(0) .prereq(simOps) ; diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 0be0eda344..28cd90f3e2 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -156,6 +156,30 @@ class BaseCPU : public ClockedObject statistics::Formula hostInstRate; statistics::Formula hostOpRate; + + Counter previousInsts = 0; + Counter previousOps = 0; + + static Counter + numSimulatedInsts() + { + return totalNumSimulatedInsts() - (globalStats->previousInsts); + } + + static Counter + numSimulatedOps() + { + return totalNumSimulatedOps() - (globalStats->previousOps); + } + + void + resetStats() override + { + previousInsts = totalNumSimulatedInsts(); + previousOps = totalNumSimulatedOps(); + + statistics::Group::resetStats(); + } }; /** @@ -609,7 +633,7 @@ class BaseCPU : public ClockedObject static int numSimulatedCPUs() { return cpuList.size(); } static Counter - numSimulatedInsts() + totalNumSimulatedInsts() { Counter total = 0; @@ -621,7 +645,7 @@ class BaseCPU : public ClockedObject } static Counter - numSimulatedOps() + totalNumSimulatedOps() { Counter total = 0; diff --git a/src/cpu/o3/FUPool.py b/src/cpu/o3/FUPool.py index 67f523787b..b82b450700 100644 --- a/src/cpu/o3/FUPool.py +++ b/src/cpu/o3/FUPool.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017 ARM Limited +# Copyright (c) 2017, 2024 Arm Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -57,6 +57,7 @@ class DefaultFUPool(FUPool): FP_MultDiv(), ReadPort(), SIMD_Unit(), + Matrix_Unit(), PredALU(), WritePort(), RdWrPort(), diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py index ab01b4aa27..5606046f5e 100644 --- a/src/cpu/o3/FuncUnitConfig.py +++ b/src/cpu/o3/FuncUnitConfig.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010, 2017, 2020 ARM Limited +# Copyright (c) 2010, 2017, 2020, 2024 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -109,10 +109,27 @@ class SIMD_Unit(FUDesc): OpDesc(opClass="SimdExt"), OpDesc(opClass="SimdFloatExt"), OpDesc(opClass="SimdConfig"), + OpDesc(opClass="SimdAes"), + OpDesc(opClass="SimdAesMix"), + OpDesc(opClass="SimdSha1Hash"), + OpDesc(opClass="SimdSha1Hash2"), + OpDesc(opClass="SimdSha256Hash"), + OpDesc(opClass="SimdSha256Hash2"), + OpDesc(opClass="SimdShaSigma2"), + OpDesc(opClass="SimdShaSigma3"), ] count = 4 +class Matrix_Unit(FUDesc): + opList = [ + OpDesc(opClass="Matrix"), + OpDesc(opClass="MatrixMov"), + OpDesc(opClass="MatrixOP"), + ] + count = 1 + + class PredALU(FUDesc): opList = [OpDesc(opClass="SimdPredAlu")] count = 1 diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc index a56ef17749..2988e83038 100644 --- a/src/cpu/o3/probe/elastic_trace.cc +++ b/src/cpu/o3/probe/elastic_trace.cc @@ -122,7 +122,7 @@ ElasticTrace::regEtraceListeners() { assert(!allProbesReg); inform("@%llu: No. of instructions committed = %llu, registering elastic" - " probe listeners", curTick(), cpu->numSimulatedInsts()); + " probe listeners", curTick(), cpu->totalNumSimulatedInsts()); // Create new listeners: provide method to be called upon a notify() for // each probe point. listeners.push_back(new ProbeListenerArg(this, diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index a10b2c2cef..5b90826315 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -38,6 +38,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.objects.ClockedObject import ClockedObject +from m5.objects.IndexingPolicies import * +from m5.objects.ReplacementPolicies import * from m5.params import * from m5.proxy import * from m5.SimObject import * @@ -83,6 +85,38 @@ class BranchTargetBuffer(ClockedObject): numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") +class BTBIndexingPolicy(SimObject): + type = "BTBIndexingPolicy" + abstract = True + cxx_class = "gem5::IndexingPolicyTemplate" + cxx_header = "cpu/pred/btb_entry.hh" + cxx_template_params = ["class Types"] + + # Get the associativity + assoc = Param.Int(Parent.assoc, "associativity") + + +class BTBSetAssociative(BTBIndexingPolicy): + type = "BTBSetAssociative" + cxx_class = "gem5::BTBSetAssociative" + cxx_header = "cpu/pred/btb_entry.hh" + + # Get the number of entries in the BTB from the parent + num_entries = Param.Unsigned( + Parent.numEntries, "Number of entries in the BTB" + ) + + # Set shift for the index. Ignore lower 2 bits for a 4 byte instruction. + set_shift = Param.Unsigned(2, "Number of bits to shift PC to get index") + + # Total number of bits in the tag. + # This is above the index and offset bit + tag_bits = Param.Unsigned(64, "number of bits in the tag") + + # Number of threads sharing the BTB + numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") + + class SimpleBTB(BranchTargetBuffer): type = "SimpleBTB" cxx_class = "gem5::branch_prediction::SimpleBTB" @@ -93,6 +127,19 @@ class SimpleBTB(BranchTargetBuffer): instShiftAmt = Param.Unsigned( Parent.instShiftAmt, "Number of bits to shift instructions by" ) + associativity = Param.Unsigned(1, "BTB associativity") + btbReplPolicy = Param.BaseReplacementPolicy( + LRURP(), "BTB replacement policy" + ) + btbIndexingPolicy = Param.BTBIndexingPolicy( + BTBSetAssociative( + assoc=Parent.associativity, + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + numThreads=1, + ), + "BTB indexing policy", + ) class IndirectPredictor(SimObject): diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index ec3102cada..6c03dd8a1b 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -45,7 +45,7 @@ SimObject('BranchPredictor.py', sim_objects=[ 'BranchPredictor', 'IndirectPredictor', 'SimpleIndirectPredictor', - 'BranchTargetBuffer', 'SimpleBTB', + 'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative', 'ReturnAddrStack', 'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor', 'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB', diff --git a/src/cpu/pred/btb_entry.hh b/src/cpu/pred/btb_entry.hh new file mode 100644 index 0000000000..a445ac4775 --- /dev/null +++ b/src/cpu/pred/btb_entry.hh @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2024 Pranith Kumar + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a BTB entry and BTB indexing policy. + */ + +#ifndef __CPU_PRED_BTB_ENTRY_HH__ +#define __CPU_PRED_BTB_ENTRY_HH__ + +#include + +#include "arch/generic/pcstate.hh" +#include "base/intmath.hh" +#include "base/types.hh" +#include "cpu/static_inst.hh" +#include "mem/cache/replacement_policies/replaceable_entry.hh" +#include "mem/cache/tags/indexing_policies/base.hh" +#include "params/BTBIndexingPolicy.hh" +#include "params/BTBSetAssociative.hh" + +namespace gem5 { + +class BTBTagType +{ + public: + struct KeyType + { + Addr address; + ThreadID tid; + }; + using Params = BTBIndexingPolicyParams; +}; + +using BTBIndexingPolicy = IndexingPolicyTemplate; +template class IndexingPolicyTemplate; + +class BTBSetAssociative : public BTBIndexingPolicy +{ + public: + PARAMS(BTBSetAssociative); + using KeyType = BTBTagType::KeyType; + + BTBSetAssociative(const Params &p) + : BTBIndexingPolicy(p, p.num_entries, p.set_shift), + tagMask(mask(p.tag_bits)) + { + setNumThreads(p.numThreads); + } + + protected: + /** + * Extract the set index for the instruction PC based on tid. + */ + uint32_t + extractSet(const KeyType &key) const + { + return ((key.address >> setShift) + ^ (key.tid << (tagShift - setShift - log2NumThreads))) + & setMask; + } + + public: + /** + * Find all possible entries for insertion and replacement of an address. + */ + std::vector + getPossibleEntries(const KeyType &key) const override + { + auto set_idx = extractSet(key); + + assert(set_idx < sets.size()); + + return sets[set_idx]; + } + + /** + * Set number of threads sharing the BTB + */ + void + setNumThreads(unsigned num_threads) + { + log2NumThreads = log2i(num_threads); + } + + /** + * Generate the tag from the given address. + */ + Addr + extractTag(const Addr addr) const override + { + return (addr >> tagShift) & tagMask; + } + + Addr regenerateAddr(const KeyType &key, + const ReplaceableEntry* entry) const override + { + panic("Not implemented!"); + return 0; + } + + private: + const uint64_t tagMask; + unsigned log2NumThreads; +}; + +namespace branch_prediction +{ + +class BTBEntry : public ReplaceableEntry +{ + public: + using IndexingPolicy = gem5::BTBIndexingPolicy; + using KeyType = gem5::BTBTagType::KeyType; + using TagExtractor = std::function; + + /** Default constructor */ + BTBEntry(TagExtractor ext) + : inst(nullptr), extractTag(ext), valid(false), tag({MaxAddr, -1}) + {} + + /** Update the target and instruction in the BTB entry. + * During insertion, only the tag (key) is updated. + */ + void + update(const PCStateBase &_target, + StaticInstPtr _inst) + { + set(target, _target); + inst = _inst; + } + + /** + * Checks if the given tag information corresponds to this entry's. + */ + bool + match(const KeyType &key) const + { + return isValid() && (tag.address == extractTag(key.address)) + && (tag.tid == key.tid); + } + + /** + * Insert the block by assigning it a tag and marking it valid. Touches + * block if it hadn't been touched previously. + */ + void + insert(const KeyType &key) + { + setValid(); + setTag({extractTag(key.address), key.tid}); + } + + /** Copy constructor */ + BTBEntry(const BTBEntry &other) + { + valid = other.valid; + tag = other.tag; + inst = other.inst; + extractTag = other.extractTag; + set(target, other.target); + } + + /** Assignment operator */ + BTBEntry& operator=(const BTBEntry &other) + { + valid = other.valid; + tag = other.tag; + inst = other.inst; + extractTag = other.extractTag; + set(target, other.target); + + return *this; + } + + /** + * Checks if the entry is valid. + */ + bool isValid() const { return valid; } + + /** + * Get tag associated to this block. + */ + KeyType getTag() const { return tag; } + + /** Invalidate the block. Its contents are no longer valid. */ + void + invalidate() + { + valid = false; + setTag({MaxAddr, -1}); + } + + /** The entry's target. */ + std::unique_ptr target; + + /** Pointer to the static branch inst at this address */ + StaticInstPtr inst; + + std::string + print() const override + { + return csprintf("tag: %#x tid: %d valid: %d | %s", tag.address, tag.tid, + isValid(), ReplaceableEntry::print()); + } + + protected: + /** + * Set tag associated to this block. + */ + void setTag(KeyType _tag) { tag = _tag; } + + /** Set valid bit. The block must be invalid beforehand. */ + void + setValid() + { + assert(!isValid()); + valid = true; + } + + private: + /** Callback used to extract the tag from the entry */ + TagExtractor extractTag; + + /** + * Valid bit. The contents of this entry are only valid if this bit is set. + * @sa invalidate() + * @sa insert() + */ + bool valid; + + /** The entry's tag. */ + KeyType tag; +}; + +} // namespace gem5::branch_prediction +/** + * This helper generates a tag extractor function object + * which will be typically used by Replaceable entries indexed + * with the BaseIndexingPolicy. + * It allows to "decouple" indexing from tagging. Those entries + * would call the functor without directly holding a pointer + * to the indexing policy which should reside in the cache. + */ +static constexpr auto +genTagExtractor(BTBIndexingPolicy *ip) +{ + return [ip] (Addr addr) { return ip->extractTag(addr); }; +} + +} + +#endif //__CPU_PRED_BTB_ENTRY_HH__ diff --git a/src/cpu/pred/simple_btb.cc b/src/cpu/pred/simple_btb.cc index c78caac7a8..0260ced8b3 100644 --- a/src/cpu/pred/simple_btb.cc +++ b/src/cpu/pred/simple_btb.cc @@ -44,84 +44,38 @@ #include "base/trace.hh" #include "debug/BTB.hh" -namespace gem5 -{ - -namespace branch_prediction +namespace gem5::branch_prediction { SimpleBTB::SimpleBTB(const SimpleBTBParams &p) : BranchTargetBuffer(p), - numEntries(p.numEntries), - tagBits(p.tagBits), - instShiftAmt(p.instShiftAmt), - log2NumThreads(floorLog2(p.numThreads)) + btb("simpleBTB", p.numEntries, p.associativity, + p.btbReplPolicy, p.btbIndexingPolicy, + BTBEntry(genTagExtractor(p.btbIndexingPolicy))) { DPRINTF(BTB, "BTB: Creating BTB object.\n"); - if (!isPowerOf2(numEntries)) { + if (!isPowerOf2(p.numEntries)) { fatal("BTB entries is not a power of 2!"); } - - btb.resize(numEntries); - - for (unsigned i = 0; i < numEntries; ++i) { - btb[i].valid = false; - } - - idxMask = numEntries - 1; - - tagMask = (1 << tagBits) - 1; - - tagShiftAmt = instShiftAmt + floorLog2(numEntries); } void SimpleBTB::memInvalidate() { - for (unsigned i = 0; i < numEntries; ++i) { - btb[i].valid = false; - } + btb.clear(); } -inline -unsigned -SimpleBTB::getIndex(Addr instPC, ThreadID tid) -{ - // Need to shift PC over by the word offset. - return ((instPC >> instShiftAmt) - ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads))) - & idxMask; -} - -inline -Addr -SimpleBTB::getTag(Addr instPC) -{ - return (instPC >> tagShiftAmt) & tagMask; -} - -SimpleBTB::BTBEntry * +BTBEntry * SimpleBTB::findEntry(Addr instPC, ThreadID tid) { - unsigned btb_idx = getIndex(instPC, tid); - Addr inst_tag = getTag(instPC); - - assert(btb_idx < numEntries); - - if (btb[btb_idx].valid - && inst_tag == btb[btb_idx].tag - && btb[btb_idx].tid == tid) { - return &btb[btb_idx]; - } - - return nullptr; + return btb.findEntry({instPC, tid}); } bool SimpleBTB::valid(ThreadID tid, Addr instPC) { - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.findEntry({instPC, tid}); return entry != nullptr; } @@ -134,11 +88,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type) { stats.lookups[type]++; - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.accessEntry({instPC, tid}); if (entry) { return entry->target.get(); } + stats.misses[type]++; return nullptr; } @@ -146,31 +101,27 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type) const StaticInstPtr SimpleBTB::getInst(ThreadID tid, Addr instPC) { - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.findEntry({instPC, tid}); if (entry) { return entry->inst; } + return nullptr; } void SimpleBTB::update(ThreadID tid, Addr instPC, - const PCStateBase &target, - BranchType type, StaticInstPtr inst) + const PCStateBase &target, + BranchType type, StaticInstPtr inst) { - unsigned btb_idx = getIndex(instPC, tid); - - assert(btb_idx < numEntries); - stats.updates[type]++; - btb[btb_idx].tid = tid; - btb[btb_idx].valid = true; - set(btb[btb_idx].target, target); - btb[btb_idx].tag = getTag(instPC); - btb[btb_idx].inst = inst; + BTBEntry *victim = btb.findVictim({instPC, tid}); + + btb.insertEntry({instPC, tid}, victim); + victim->update(target, inst); } -} // namespace branch_prediction -} // namespace gem5 + +} // namespace gem5::branch_prediction diff --git a/src/cpu/pred/simple_btb.hh b/src/cpu/pred/simple_btb.hh index 3c76890348..b1ef2a9fa5 100644 --- a/src/cpu/pred/simple_btb.hh +++ b/src/cpu/pred/simple_btb.hh @@ -41,15 +41,16 @@ #ifndef __CPU_PRED_SIMPLE_BTB_HH__ #define __CPU_PRED_SIMPLE_BTB_HH__ +#include "base/cache/associative_cache.hh" #include "base/logging.hh" #include "base/types.hh" #include "cpu/pred/btb.hh" +#include "cpu/pred/btb_entry.hh" +#include "mem/cache/replacement_policies/replaceable_entry.hh" +#include "mem/cache/tags/indexing_policies/base.hh" #include "params/SimpleBTB.hh" -namespace gem5 -{ - -namespace branch_prediction +namespace gem5::branch_prediction { class SimpleBTB : public BranchTargetBuffer @@ -60,44 +61,13 @@ class SimpleBTB : public BranchTargetBuffer void memInvalidate() override; bool valid(ThreadID tid, Addr instPC) override; const PCStateBase *lookup(ThreadID tid, Addr instPC, - BranchType type = BranchType::NoBranch) override; + BranchType type = BranchType::NoBranch) override; void update(ThreadID tid, Addr instPC, const PCStateBase &target_pc, - BranchType type = BranchType::NoBranch, - StaticInstPtr inst = nullptr) override; + BranchType type = BranchType::NoBranch, + StaticInstPtr inst = nullptr) override; const StaticInstPtr getInst(ThreadID tid, Addr instPC) override; - private: - struct BTBEntry - { - /** The entry's tag. */ - Addr tag = 0; - - /** The entry's target. */ - std::unique_ptr target; - - /** The entry's thread id. */ - ThreadID tid; - - /** Whether or not the entry is valid. */ - bool valid = false; - - /** Pointer to the static branch instruction at this address */ - StaticInstPtr inst = nullptr; - }; - - - /** Returns the index into the BTB, based on the branch's PC. - * @param inst_PC The branch to look up. - * @return Returns the index into the BTB. - */ - inline unsigned getIndex(Addr instPC, ThreadID tid); - - /** Returns the tag bits of a given address. - * @param inst_PC The branch's address. - * @return Returns the tag bits. - */ - inline Addr getTag(Addr instPC); /** Internal call to find an address in the BTB * @param instPC The branch's address. @@ -106,31 +76,9 @@ class SimpleBTB : public BranchTargetBuffer BTBEntry *findEntry(Addr instPC, ThreadID tid); /** The actual BTB. */ - std::vector btb; - - /** The number of entries in the BTB. */ - unsigned numEntries; - - /** The index mask. */ - unsigned idxMask; - - /** The number of tag bits per entry. */ - unsigned tagBits; - - /** The tag mask. */ - unsigned tagMask; - - /** Number of bits to shift PC when calculating index. */ - unsigned instShiftAmt; - - /** Number of bits to shift PC when calculating tag. */ - unsigned tagShiftAmt; - - /** Log2 NumThreads used for hashing threadid */ - unsigned log2NumThreads; + AssociativeCache btb; }; -} // namespace branch_prediction -} // namespace gem5 +} // namespace gem5::branch_prediction #endif // __CPU_PRED_SIMPLE_BTB_HH__ diff --git a/src/cpu/testers/gpu_ruby_test/TesterThread.py b/src/cpu/testers/gpu_ruby_test/TesterThread.py index 49388a76e1..6ddfc66ddc 100644 --- a/src/cpu/testers/gpu_ruby_test/TesterThread.py +++ b/src/cpu/testers/gpu_ruby_test/TesterThread.py @@ -41,3 +41,4 @@ class TesterThread(ClockedObject): thread_id = Param.Int("Unique TesterThread ID") num_lanes = Param.Int("Number of lanes this thread has") deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold") + cache_line_size = Param.UInt32("Size of cache line in cache") diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc index a0c0670a8f..83d8a1a277 100644 --- a/src/cpu/testers/gpu_ruby_test/address_manager.cc +++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc @@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic) std::shuffle( randAddressMap.begin(), randAddressMap.end(), - std::default_random_engine(random_mt.random(0,UINT_MAX)) + // TODO: This is a bug unrelated to this draft PR but the GPU tester is + // useful for testing this PR. + std::default_random_engine(random_mt.random(0,UINT_MAX-1)) ); // initialize atomic locations diff --git a/src/cpu/testers/gpu_ruby_test/dma_thread.cc b/src/cpu/testers/gpu_ruby_test/dma_thread.cc index 1d6f46c44b..2c4c610c51 100644 --- a/src/cpu/testers/gpu_ruby_test/dma_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/dma_thread.cc @@ -70,7 +70,7 @@ DmaThread::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -127,7 +127,7 @@ DmaThread::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -" " Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - resp_cmd.toString(), ruby::printAddress(addr)); + resp_cmd.toString(), printAddress(addr)); if (resp_cmd == MemCmd::SwapResp) { // response to a pending atomic diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc index ae4078ee6c..516e77ddae 100644 --- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); // must be aligned with store size assert(address % sizeof(Value) == 0); @@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - " "Addr %s\n", this->getName(), curEpisode->getEpisodeId(), resp_cmd.toString(), - ruby::printAddress(addr)); + printAddress(addr)); // whether the transaction is done after this hitCallback bool isTransactionDone = true; diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.cc b/src/cpu/testers/gpu_ruby_test/tester_thread.cc index ce3a1bccc6..dbcfba8c3c 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc @@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p) : ClockedObject(p), threadEvent(this, "TesterThread tick"), deadlockCheckEvent(this), + cacheLineSize(p.cache_line_size), threadId(p.thread_id), numLanes(p.num_lanes), tester(nullptr), addrManager(nullptr), port(nullptr), @@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val) ss << threadName << ": Atomic Op returned unexpected value\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tAtomic Op's return value " << ret_val << "\n"; // print out basic info @@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val) << "\tTesterThread " << threadId << "\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tLoaded value " << ret_val << "\n" << "\tLast writer " << addrManager->printLastWriter(loc) << "\n"; @@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table, for (const auto& m : table) { for (const auto& req : m.second) { - ss << "\t\t\tAddr " << ruby::printAddress(m.first) + ss << "\t\t\tAddr " << printAddress(m.first) << ": delta (curCycle - issueCycle) = " << (cur_cycle - req.issueCycle) << std::endl; } @@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const << pendingFenceCount << std::endl; } +std::string +TesterThread::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, cacheLineSize * 8); +} + } // namespace gem5 diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.hh b/src/cpu/testers/gpu_ruby_test/tester_thread.hh index 9877d63c24..f31a5a3dea 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.hh +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.hh @@ -132,6 +132,7 @@ class TesterThread : public ClockedObject {} }; + int cacheLineSize; // the unique global id of this thread int threadId; // width of this thread (1 for cpu thread & wf size for gpu wavefront) @@ -204,6 +205,7 @@ class TesterThread : public ClockedObject void printOutstandingReqs(const OutstandingReqTable& table, std::stringstream& ss) const; + std::string printAddress(Addr addr) const; }; } // namespace gem5 diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index 5a83d9ca27..b9c777526a 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -124,7 +124,8 @@ Check::initiatePrefetch() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "successfully initiated prefetch.\n"); @@ -161,7 +162,8 @@ Check::initiateFlush() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating Flush - successful\n"); @@ -207,7 +209,8 @@ Check::initiateAction() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(writeAddr, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating action - successful\n"); @@ -261,7 +264,8 @@ Check::initiateCheck() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating check - successful\n"); @@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) // This isn't exactly right since we now have multi-byte checks // assert(getAddress() == address); - assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address)); + int block_size_bits = CACHE_LINE_BITS; + assert(ruby::makeLineAddress(m_address, block_size_bits) == + ruby::makeLineAddress(address, block_size_bits)); assert(data != NULL); DPRINTF(RubyTest, "RubyTester Callback\n"); @@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) } DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc, - ruby::makeLineAddress(m_address)); + ruby::makeLineAddress(m_address, block_size_bits)); DPRINTF(RubyTest, "Callback done\n"); debugPrint(); } diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh index 78e2bda77e..0270b800d7 100644 --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -47,6 +47,7 @@ class SubBlock; const int CHECK_SIZE_BITS = 2; const int CHECK_SIZE = (1 << CHECK_SIZE_BITS); +const int CACHE_LINE_BITS = 6; class Check { diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index 9397126180..d306c405ef 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -90,7 +90,9 @@ class RubyTester : public ClockedObject { ruby::SubBlock subBlock; - SenderState(Addr addr, int size) : subBlock(addr, size) {} + SenderState(Addr addr, int size, int cl_size) + : subBlock(addr, size, cl_size) + {} }; diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py index 35ffcfe528..b1f597aba8 100644 --- a/src/dev/amdgpu/AMDGPU.py +++ b/src/dev/amdgpu/AMDGPU.py @@ -81,8 +81,6 @@ class AMDGPUDevice(PciDevice): InterruptPin = 2 ExpansionROM = 0 - rom_binary = Param.String("ROM binary dumped from hardware") - trace_file = Param.String("MMIO trace collected on hardware") checkpoint_before_mmios = Param.Bool( False, "Take a checkpoint before the device begins sending MMIOs" ) diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index c82d0de60c..50d152cda1 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -58,12 +58,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) init_interrupt_count(0), _lastVMID(0), deviceMem(name() + ".deviceMem", p.memories, false, "", false) { - // Loading the rom binary dumped from hardware. - std::ifstream romBin; - romBin.open(p.rom_binary, std::ios::binary); - romBin.read((char *)rom.data(), ROM_SIZE); - romBin.close(); - // System pointer needs to be explicitly set for device memory since // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode. // Note this means the cache line size is system wide. @@ -92,10 +86,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) panic("Unknown GPU device %s\n", p.device_name); } - if (p.trace_file != "") { - mmioReader.readMMIOTrace(p.trace_file); - } - int sdma_id = 0; for (auto& s : p.sdmas) { s->setGPUDevice(this); diff --git a/src/dev/pci/host.cc b/src/dev/pci/host.cc index e7dea6c359..80cd9b5a5d 100644 --- a/src/dev/pci/host.cc +++ b/src/dev/pci/host.cc @@ -168,9 +168,14 @@ GenericPciHost::write(PacketPtr pkt) pkt->getSize()); PciDevice *const pci_dev(getDevice(dev_addr.first)); - panic_if(!pci_dev, - "%02x:%02x.%i: Write to config space on non-existent PCI device\n", - dev_addr.first.bus, dev_addr.first.dev, dev_addr.first.func); + warn_if(!pci_dev, + "%02x:%02x.%i: Write to config space on non-existent PCI device\n", + dev_addr.first.bus, dev_addr.first.dev, dev_addr.first.func); + + if (!pci_dev) { + pkt->makeAtomicResponse(); + return 20000; // 20ns default from PciDevice.py + } // @todo Remove this after testing pkt->headerDelay = pkt->payloadDelay = 0; diff --git a/src/dev/riscv/clint.cc b/src/dev/riscv/clint.cc index fc959aced4..a18555fc87 100644 --- a/src/dev/riscv/clint.cc +++ b/src/dev/riscv/clint.cc @@ -53,7 +53,7 @@ Clint::Clint(const Params ¶ms) : BasicPioDevice(params, params.pio_size), system(params.system), nThread(params.num_threads), - signal(params.name + ".signal", 0, this), + signal(params.name + ".signal", 0, this, INT_RTC), reset(params.name + ".reset"), resetMtimecmp(params.reset_mtimecmp), registers(params.name + ".registers", params.pio_addr, this, @@ -69,9 +69,11 @@ Clint::Clint(const Params ¶ms) : void Clint::raiseInterruptPin(int id) { - // Increment mtime + // Increment mtime when received RTC signal uint64_t& mtime = registers.mtime.get(); - mtime++; + if (id == INT_RTC) { + mtime++; + } for (int context_id = 0; context_id < nThread; context_id++) { @@ -261,7 +263,7 @@ Clint::doReset() { registers.msip[i].reset(); } // We need to update the mtip interrupt bits when reset - raiseInterruptPin(0); + raiseInterruptPin(INT_RESET); } } // namespace gem5 diff --git a/src/dev/riscv/clint.hh b/src/dev/riscv/clint.hh index 38f2117a16..2478eee0db 100644 --- a/src/dev/riscv/clint.hh +++ b/src/dev/riscv/clint.hh @@ -91,6 +91,13 @@ class Clint : public BasicPioDevice void raiseInterruptPin(int id); void lowerInterruptPin(int id) {} + // Interrupt ID + enum InterruptId + { + INT_RTC = 0, // received from RTC(signal port) + INT_RESET, // received from reset port + }; + // Register bank public: diff --git a/src/dev/virtio/base.hh b/src/dev/virtio/base.hh index 41ebb741d1..c31cd298b9 100644 --- a/src/dev/virtio/base.hh +++ b/src/dev/virtio/base.hh @@ -477,7 +477,7 @@ class VirtQueue : public Serializable Index index; }; - VirtRing(PortProxy &proxy, ByteOrder bo, uint16_t size) : + VirtRing(PortProxy &proxy, ByteOrder bo, uint16_t size) : header{0, 0}, ring(size), _proxy(proxy), _base(0), byteOrder(bo) {} @@ -550,7 +550,7 @@ class VirtQueue : public Serializable private: // Remove default constructor - VirtRing(); + VirtRing(); /** Guest physical memory proxy */ PortProxy &_proxy; diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index 2b24828259..a2027f25f1 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -461,7 +461,7 @@ class CacheBlk : public TaggedEntry protected: /** The current coherence status of this block. @sa CoherenceBits */ - unsigned coherence; + unsigned coherence = 0; // The following setters have been marked as protected because their // respective variables should only be modified at 2 moments: diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index 9864c922f6..85cc628d5b 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -599,6 +599,22 @@ class BOPPrefetcher(QueuedPrefetcher): on_inst = False +class SmsPrefetcher(QueuedPrefetcher): + # Paper: https://web.eecs.umich.edu/~twenisch/papers/isca06.pdf + type = "SmsPrefetcher" + cxx_class = "gem5::prefetch::Sms" + cxx_header = "mem/cache/prefetch/sms.hh" + ft_size = Param.Unsigned(64, "Size of Filter and Active generation table") + pht_size = Param.Unsigned(16384, "Size of pattern history table") + region_size = Param.Unsigned(4096, "Spatial region size") + + queue_squash = True + queue_filter = True + cache_snoop = True + prefetch_on_access = True + on_inst = False + + class SBOOEPrefetcher(QueuedPrefetcher): type = "SBOOEPrefetcher" cxx_class = "gem5::prefetch::SBOOE" diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript index 8ce15e9688..c971b7541b 100644 --- a/src/mem/cache/prefetch/SConscript +++ b/src/mem/cache/prefetch/SConscript @@ -31,8 +31,9 @@ Import('*') SimObject('Prefetcher.py', sim_objects=[ 'BasePrefetcher', 'MultiPrefetcher', 'QueuedPrefetcher', 'StridePrefetcherHashedSetAssociative', 'StridePrefetcher', - 'TaggedPrefetcher', 'IndirectMemoryPrefetcher', 'SignaturePathPrefetcher', - 'SignaturePathPrefetcherV2', 'AccessMapPatternMatching', 'AMPMPrefetcher', + 'SmsPrefetcher', 'TaggedPrefetcher', 'IndirectMemoryPrefetcher', + 'SignaturePathPrefetcher', 'SignaturePathPrefetcherV2', + 'AccessMapPatternMatching', 'AMPMPrefetcher', 'DeltaCorrelatingPredictionTables', 'DCPTPrefetcher', 'IrregularStreamBufferPrefetcher', 'SlimAMPMPrefetcher', 'BOPPrefetcher', 'SBOOEPrefetcher', 'STeMSPrefetcher', 'PIFPrefetcher']) @@ -47,6 +48,7 @@ Source('indirect_memory.cc') Source('pif.cc') Source('queued.cc') Source('sbooe.cc') +Source('sms.cc') Source('signature_path.cc') Source('signature_path_v2.cc') Source('slim_ampm.cc') diff --git a/src/mem/cache/prefetch/sms.cc b/src/mem/cache/prefetch/sms.cc new file mode 100644 index 0000000000..2ad4ef92e3 --- /dev/null +++ b/src/mem/cache/prefetch/sms.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2024 Samsung Electronics + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Describes a SMS prefetcher based on template policies. + */ + +#include "mem/cache/prefetch/sms.hh" + +#include "debug/HWPrefetch.hh" +#include "params/SmsPrefetcher.hh" + +namespace gem5 +{ + +namespace prefetch +{ + +Sms::Sms(const SmsPrefetcherParams &p) + : Queued(p), Max_Contexts(p.ft_size), MAX_PHTSize(p.pht_size), + Region_Size(p.region_size) +{ + AGT.clear(); + AGTPC.clear(); + FT.clear(); + PHT.clear(); + fifoFT.clear(); + lruAGT.clear(); + lruPHT.clear(); + +} +void +Sms::notifyEvict(const EvictionInfo &info) +{ + //Check if any active generation has ended + Addr region_base = roundDown(info.addr, Region_Size); + std::pair pc_offset = AGTPC[region_base]; + if (AGT.find(region_base) != AGT.end()) { + //remove old recording + if (PHT.find(pc_offset) != PHT.end()) { + PHT[pc_offset].clear(); + } + //Move from AGT to PHT + for (std::set::iterator it = AGT[region_base].begin(); + it != AGT[region_base].end(); it ++) { + PHT[pc_offset].insert(*it); + } + lruPHT.push_front(pc_offset); + } + + while (PHT.size() > MAX_PHTSize) { + PHT.erase(lruPHT.back()); + lruPHT.pop_back(); + } + + AGTPC.erase(region_base); + AGT.erase(region_base); +} +void +Sms::calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses, + const CacheAccessor &cache) +{ + + if (!pfi.hasPC()) { + DPRINTF(HWPrefetch, "Ignoring request with no PC.\n"); + return; + } + + Addr blk_addr = blockAddress(pfi.getAddr()); + Addr pc = pfi.getPC(); + Addr region_base = roundDown(blk_addr, Region_Size); + Addr offset = blk_addr - region_base; + + //Training + if (AGT.find(region_base) != AGT.end()) { + assert (FT.find(region_base) == FT.end()); + // Record Pattern + AGT[region_base].insert(offset); + //update LRU + for (std::deque ::iterator lit = lruAGT.begin(); + lit != lruAGT.end(); lit ++) { + if ((*lit) == region_base) { + lruAGT.erase(lit); + lruAGT.push_front(region_base); + break; + } + } + } else if (FT.find(region_base) != FT.end()) { + //move entry from FT to AGT + AGT[region_base].insert(FT[region_base].second); + AGTPC[region_base] = FT[region_base]; + lruAGT.push_front(region_base); + //Record latest offset + AGT[region_base].insert(offset); + //Recycle FT entry + FT.erase(region_base); + //Make space for next entry + while (AGT.size() > Max_Contexts) { + AGT.erase(lruAGT.back()); + AGTPC.erase(lruAGT.back()); + lruAGT.pop_back(); + } + } else { + // Trigger Access + FT[region_base] = std::make_pair (pc,offset); + fifoFT.push_front(region_base); + while (FT.size() > Max_Contexts) { + FT.erase(fifoFT.back()); + fifoFT.pop_back(); + } + } + + //Prediction + std::pair pc_offset = std::make_pair(pc,offset); + if (PHT.find(pc_offset) != PHT.end()) { + for (std::set::iterator it = PHT[pc_offset].begin(); + it != PHT[pc_offset].end(); it ++) { + Addr pref_addr = blockAddress(region_base + (*it)); + addresses.push_back(AddrPriority(pref_addr,0)); + } + for (std::deque < std::pair >::iterator lit + = lruPHT.begin(); lit != lruPHT.end(); lit ++) { + if ((*lit) == pc_offset) { + lruPHT.erase(lit); + lruPHT.push_front(pc_offset); + break; + } + } + } + +} + +} // namespace prefetch +} // namespace gem5 diff --git a/src/mem/cache/prefetch/sms.hh b/src/mem/cache/prefetch/sms.hh new file mode 100644 index 0000000000..4bda1694dd --- /dev/null +++ b/src/mem/cache/prefetch/sms.hh @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024 Samsung Electronics + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Describes a SMS prefetcher. + */ + +#ifndef __MEM_CACHE_PREFETCH_SMS_HH__ +#define __MEM_CACHE_PREFETCH_SMS_HH__ + +#include + +#include "mem/cache/prefetch/queued.hh" +#include "mem/packet.hh" + +namespace gem5 +{ + +struct SmsPrefetcherParams; + +namespace prefetch +{ + + +class Sms : public Queued +{ + + private: + const int Max_Contexts; //= 64; + const uint64_t MAX_PHTSize; //= 512; + const Addr Region_Size; //= 4096; + + std::map< Addr, std::set > AGT; + std::map< Addr, std::pair > AGTPC; + std::map< Addr, std::pair > FT; + std::map< std::pair , std::set > PHT; + std::deque fifoFT; + std::deque lruAGT; + std::deque< std::pair > lruPHT; + + using EvictionInfo = CacheDataUpdateProbeArg; + void notifyEvict(const EvictionInfo &info) override; + + public: + Sms(const SmsPrefetcherParams &p); + ~Sms() = default; + + void calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses, + const CacheAccessor &cache) override; +}; + +} // namespace prefetch +} // namespace gem5 + +#endif // __MEM_CACHE_PREFETCH_SMS_HH__ diff --git a/src/mem/ruby/common/Address.cc b/src/mem/ruby/common/Address.cc index fcf291af51..8b120324c7 100644 --- a/src/mem/ruby/common/Address.cc +++ b/src/mem/ruby/common/Address.cc @@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number) } Addr -getOffset(Addr addr) +getOffset(Addr addr, int cacheLineBits) { - return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1); -} - -Addr -makeLineAddress(Addr addr) -{ - return mbits(addr, 63, RubySystem::getBlockSizeBits()); + assert(cacheLineBits < 64); + return bitSelect(addr, 0, cacheLineBits - 1); } Addr makeLineAddress(Addr addr, int cacheLineBits) { + assert(cacheLineBits < 64); return maskLowOrderBits(addr, cacheLineBits); } // returns the next stride address based on line address Addr -makeNextStrideAddress(Addr addr, int stride) +makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes) { - return makeLineAddress(addr) + - static_cast(RubySystem::getBlockSizeBytes()) * stride; + return makeLineAddress(addr, floorLog2(cacheLineBytes)) + + cacheLineBytes * stride; } std::string -printAddress(Addr addr) +printAddress(Addr addr, int cacheLineBits) { std::stringstream out; out << "[" << std::hex << "0x" << addr << "," << " line 0x" - << makeLineAddress(addr) << std::dec << "]"; + << makeLineAddress(addr, cacheLineBits) << std::dec << "]"; return out.str(); } diff --git a/src/mem/ruby/common/Address.hh b/src/mem/ruby/common/Address.hh index 565c3c1fb7..51e0b5417a 100644 --- a/src/mem/ruby/common/Address.hh +++ b/src/mem/ruby/common/Address.hh @@ -33,6 +33,7 @@ #include #include +#include "base/intmath.hh" #include "base/types.hh" namespace gem5 @@ -44,11 +45,10 @@ namespace ruby // selects bits inclusive Addr bitSelect(Addr addr, unsigned int small, unsigned int big); Addr maskLowOrderBits(Addr addr, unsigned int number); -Addr getOffset(Addr addr); -Addr makeLineAddress(Addr addr); +Addr getOffset(Addr addr, int cacheLineBits); Addr makeLineAddress(Addr addr, int cacheLineBits); -Addr makeNextStrideAddress(Addr addr, int stride); -std::string printAddress(Addr addr); +Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes); +std::string printAddress(Addr addr, int cacheLineBits); } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc index 8f47d0026b..bbc0fd21c8 100644 --- a/src/mem/ruby/common/DataBlock.cc +++ b/src/mem/ruby/common/DataBlock.cc @@ -40,8 +40,8 @@ #include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/WriteMask.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -51,17 +51,22 @@ namespace ruby DataBlock::DataBlock(const DataBlock &cp) { + assert(cp.isAlloc()); + assert(cp.getBlockSize() > 0); + assert(!m_alloc); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); - m_data = new uint8_t[block_bytes]; - memcpy(m_data, cp.m_data, block_bytes); + m_block_size = cp.getBlockSize(); + m_data = new uint8_t[m_block_size]; + memcpy(m_data, cp.m_data, m_block_size); m_alloc = true; + m_block_size = m_block_size; // If this data block is involved in an atomic operation, the effect // of applying the atomic operations on the data block are recorded in // m_atomicLog. If so, we must copy over every entry in the change log for (size_t i = 0; i < cp.m_atomicLog.size(); i++) { - block_update = new uint8_t[block_bytes]; - memcpy(block_update, cp.m_atomicLog[i], block_bytes); + block_update = new uint8_t[m_block_size]; + memcpy(block_update, cp.m_atomicLog[i], m_block_size); m_atomicLog.push_back(block_update); } } @@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp) void DataBlock::alloc() { - m_data = new uint8_t[RubySystem::getBlockSizeBytes()]; + assert(!m_alloc); + + if (!m_block_size) { + return; + } + + m_data = new uint8_t[m_block_size]; m_alloc = true; clear(); } +void +DataBlock::realloc(int blk_size) +{ + m_block_size = blk_size; + assert(m_block_size > 0); + + if (m_alloc) { + delete [] m_data; + m_alloc = false; + } + alloc(); +} + void DataBlock::clear() { - memset(m_data, 0, RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + memset(m_data, 0, m_block_size); } bool DataBlock::equal(const DataBlock& obj) const { - size_t block_bytes = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + size_t block_bytes = m_block_size; // Check that the block contents match if (memcmp(m_data, obj.m_data, block_bytes)) { return false; @@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const void DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { if (mask.getMask(i, 1)) { m_data[i] = dblk.m_data[i]; } @@ -113,7 +143,9 @@ void DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, bool isAtomicNoReturn) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { m_data[i] = dblk.m_data[i]; } mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn); @@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, void DataBlock::print(std::ostream& out) const { - int size = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + int size = m_block_size; out << "[ "; for (int i = 0; i < size; i++) { out << std::setw(2) << std::setfill('0') << std::hex @@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront() void DataBlock::clearAtomicLogEntries() { + assert(m_alloc); for (auto log : m_atomicLog) { delete [] log; } @@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries() const uint8_t* DataBlock::getData(int offset, int len) const { - assert(offset + len <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + assert(offset + len <= m_block_size); return &m_data[offset]; } uint8_t* DataBlock::getDataMod(int offset) { + assert(m_alloc); return &m_data[offset]; } void DataBlock::setData(const uint8_t *data, int offset, int len) { + assert(m_alloc); memcpy(&m_data[offset], data, len); } void DataBlock::setData(PacketPtr pkt) { - int offset = getOffset(pkt->getAddr()); - assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size)); + assert(offset + pkt->getSize() <= m_block_size); pkt->writeData(&m_data[offset]); } DataBlock & DataBlock::operator=(const DataBlock & obj) { + // Reallocate if needed + if (m_alloc && m_block_size != obj.getBlockSize()) { + delete [] m_data; + m_block_size = obj.getBlockSize(); + alloc(); + } else if (!m_alloc) { + m_block_size = obj.getBlockSize(); + alloc(); + + // Assume this will be realloc'd later if zero. + if (m_block_size == 0) { + return *this; + } + } else { + assert(m_alloc && m_block_size == obj.getBlockSize()); + } + assert(m_block_size > 0); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); + size_t block_bytes = m_block_size; // Copy entire block contents from obj to current block memcpy(m_data, obj.m_data, block_bytes); // If this data block is involved in an atomic operation, the effect diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index 7456a25f3f..ebfa7d1383 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -61,8 +61,14 @@ class WriteMask; class DataBlock { public: - DataBlock() + // Ideally this should nost be called. We allow default so that protocols + // do not need to be changed. + DataBlock() = default; + + DataBlock(int blk_size) { + assert(!m_alloc); + m_block_size = blk_size; alloc(); } @@ -101,10 +107,16 @@ class DataBlock bool equal(const DataBlock& obj) const; void print(std::ostream& out) const; + int getBlockSize() const { return m_block_size; } + void setBlockSize(int block_size) { realloc(block_size); } + bool isAlloc() const { return m_alloc; } + void realloc(int blk_size); + private: void alloc(); - uint8_t *m_data; - bool m_alloc; + uint8_t *m_data = nullptr; + bool m_alloc = false; + int m_block_size = 0; // Tracks block changes when atomic ops are applied std::deque m_atomicLog; @@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data) inline uint8_t DataBlock::getByte(int whichByte) const { + assert(m_alloc); return m_data[whichByte]; } inline void DataBlock::setByte(int whichByte, uint8_t data) { + assert(m_alloc); m_data[whichByte] = data; } inline void DataBlock::copyPartial(const DataBlock & dblk, int offset, int len) { + assert(m_alloc); setData(&dblk.m_data[offset], offset, len); } diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc index ba64f2febd..944315b97f 100644 --- a/src/mem/ruby/common/NetDest.cc +++ b/src/mem/ruby/common/NetDest.cc @@ -30,6 +30,8 @@ #include +#include "mem/ruby/system/RubySystem.hh" + namespace gem5 { @@ -38,12 +40,18 @@ namespace ruby NetDest::NetDest() { - resize(); +} + +NetDest::NetDest(RubySystem *ruby_system) + : m_ruby_system(ruby_system) +{ + resize(); } void NetDest::add(MachineID newElement) { + assert(m_bits.size() > 0); assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize()); m_bits[vecIndex(newElement)].add(bitIndex(newElement.num)); } @@ -51,6 +59,7 @@ NetDest::add(MachineID newElement) void NetDest::addNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].addSet(netDest.m_bits[i]); @@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest) void NetDest::setNetDest(MachineType machine, const Set& set) { + assert(m_ruby_system != nullptr); + // assure that there is only one set of destinations for this machine assert(MachineType_base_level((MachineType)(machine + 1)) - MachineType_base_level(machine) == 1); @@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set) void NetDest::remove(MachineID oldElement) { + assert(m_bits.size() > 0); m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num)); } void NetDest::removeNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].removeSet(netDest.m_bits[i]); @@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest) void NetDest::clear() { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].clear(); } @@ -101,6 +115,8 @@ NetDest::broadcast() void NetDest::broadcast(MachineType machineType) { + assert(m_ruby_system != nullptr); + for (NodeID i = 0; i < MachineType_base_count(machineType); i++) { MachineID mach = {machineType, i}; add(mach); @@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType) std::vector NetDest::getAllDest() { + assert(m_ruby_system != nullptr); + assert(m_bits.size() > 0); + std::vector dest; dest.clear(); for (int i = 0; i < m_bits.size(); i++) { @@ -127,6 +146,8 @@ NetDest::getAllDest() int NetDest::count() const { + assert(m_bits.size() > 0); + int counter = 0; for (int i = 0; i < m_bits.size(); i++) { counter += m_bits[i].count(); @@ -137,12 +158,14 @@ NetDest::count() const NodeID NetDest::elementAt(MachineID index) { + assert(m_bits.size() > 0); return m_bits[vecIndex(index)].elementAt(bitIndex(index.num)); } MachineID NetDest::smallestElement() const { + assert(m_bits.size() > 0); assert(count() > 0); for (int i = 0; i < m_bits.size(); i++) { for (NodeID j = 0; j < m_bits[i].getSize(); j++) { @@ -158,6 +181,9 @@ NetDest::smallestElement() const MachineID NetDest::smallestElement(MachineType machine) const { + assert(m_bits.size() > 0); + assert(m_ruby_system != nullptr); + int size = m_bits[MachineType_base_level(machine)].getSize(); for (NodeID j = 0; j < size; j++) { if (m_bits[MachineType_base_level(machine)].isElement(j)) { @@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const bool NetDest::isBroadcast() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isBroadcast()) { return false; @@ -185,6 +212,7 @@ NetDest::isBroadcast() const bool NetDest::isEmpty() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isEmpty()) { return false; @@ -197,8 +225,9 @@ NetDest::isEmpty() const NetDest NetDest::OR(const NetDest& orNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == orNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]); } @@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const NetDest NetDest::AND(const NetDest& andNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == andNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]); } @@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const bool NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == other_netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) { @@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const bool NetDest::isSuperset(const NetDest& test) const { + assert(m_bits.size() > 0); assert(m_bits.size() == test.getSize()); for (int i = 0; i < m_bits.size(); i++) { @@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const bool NetDest::isElement(MachineID element) const { + assert(m_bits.size() > 0); return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num)); } void NetDest::resize() { + assert(m_ruby_system != nullptr); + m_bits.resize(MachineType_base_level(MachineType_NUM)); assert(m_bits.size() == MachineType_NUM); @@ -263,6 +298,7 @@ NetDest::resize() void NetDest::print(std::ostream& out) const { + assert(m_bits.size() > 0); out << "[NetDest (" << m_bits.size() << ") "; for (int i = 0; i < m_bits.size(); i++) { @@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const bool NetDest::isEqual(const NetDest& n) const { + assert(m_bits.size() > 0); assert(m_bits.size() == n.m_bits.size()); for (unsigned int i = 0; i < m_bits.size(); ++i) { if (!m_bits[i].isEqual(n.m_bits[i])) @@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const return true; } +int +NetDest::MachineType_base_count(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(obj); +} + +int +NetDest::MachineType_base_number(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh index e71b876754..83f340a478 100644 --- a/src/mem/ruby/common/NetDest.hh +++ b/src/mem/ruby/common/NetDest.hh @@ -41,6 +41,8 @@ namespace gem5 namespace ruby { +class RubySystem; + // NetDest specifies the network destination of a Message class NetDest { @@ -48,6 +50,7 @@ class NetDest // Constructors // creates and empty set NetDest(); + NetDest(RubySystem *ruby_system); explicit NetDest(int bit_size); NetDest& operator=(const Set& obj); @@ -98,6 +101,8 @@ class NetDest void print(std::ostream& out) const; + void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); } + private: // returns a value >= MachineType_base_level("this machine") // and < MachineType_base_level("next highest machine") @@ -112,6 +117,12 @@ class NetDest NodeID bitIndex(NodeID index) const { return index; } std::vector m_bits; // a vector of bit vectors - i.e. Sets + + // Needed to call MacheinType_base_count/level + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/common/SubBlock.cc b/src/mem/ruby/common/SubBlock.cc index 92cfd8b633..be0adc1233 100644 --- a/src/mem/ruby/common/SubBlock.cc +++ b/src/mem/ruby/common/SubBlock.cc @@ -38,13 +38,14 @@ namespace ruby using stl_helpers::operator<<; -SubBlock::SubBlock(Addr addr, int size) +SubBlock::SubBlock(Addr addr, int size, int cl_bits) { m_address = addr; resize(size); for (int i = 0; i < size; i++) { setByte(i, 0); } + m_cache_line_bits = cl_bits; } void @@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data) { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { this->setByte(i, data.getByte(offset + i)); } @@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { // This will detect crossing a cache line boundary data.setByte(offset + i, this->getByte(i)); diff --git a/src/mem/ruby/common/SubBlock.hh b/src/mem/ruby/common/SubBlock.hh index e1a83600c2..3790bbac58 100644 --- a/src/mem/ruby/common/SubBlock.hh +++ b/src/mem/ruby/common/SubBlock.hh @@ -45,7 +45,7 @@ class SubBlock { public: SubBlock() { } - SubBlock(Addr addr, int size); + SubBlock(Addr addr, int size, int cl_bits); ~SubBlock() { } Addr getAddress() const { return m_address; } @@ -74,6 +74,7 @@ class SubBlock // Data Members (m_ prefix) Addr m_address; std::vector m_data; + int m_cache_line_bits; }; inline std::ostream& diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc index 1fa03c951e..f176aec9fc 100644 --- a/src/mem/ruby/common/WriteMask.cc +++ b/src/mem/ruby/common/WriteMask.cc @@ -39,13 +39,13 @@ namespace ruby { WriteMask::WriteMask() - : mSize(RubySystem::getBlockSizeBytes()), mMask(mSize, false), - mAtomic(false) + : mSize(0), mMask(mSize, false), mAtomic(false) {} void WriteMask::print(std::ostream& out) const { + assert(mSize > 0); std::string str(mSize,'0'); for (int i = 0; i < mSize; i++) { str[i] = mMask[i] ? ('1') : ('0'); @@ -59,6 +59,7 @@ void WriteMask::performAtomic(uint8_t * p, std::deque& log, bool isAtomicNoReturn) const { + assert(mSize > 0); int offset; uint8_t *block_update; // Here, operations occur in FIFO order from the mAtomicOp diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh index 8c6b8ce976..e620997cd8 100644 --- a/src/mem/ruby/common/WriteMask.hh +++ b/src/mem/ruby/common/WriteMask.hh @@ -78,6 +78,17 @@ class WriteMask ~WriteMask() {} + int getBlockSize() const { return mSize; } + void + setBlockSize(int size) + { + // This should only be used once if the default ctor was used. Probably + // by src/mem/ruby/protocol/RubySlicc_MemControl.sm. + assert(mSize == 0); + assert(size > 0); + mSize = size; + } + void clear() { @@ -87,6 +98,7 @@ class WriteMask bool test(int offset) const { + assert(mSize > 0); assert(offset < mSize); return mMask[offset]; } @@ -94,6 +106,7 @@ class WriteMask void setMask(int offset, int len, bool val = true) { + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { mMask[offset + i] = val; @@ -102,6 +115,7 @@ class WriteMask void fillMask() { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { mMask[i] = true; } @@ -111,6 +125,7 @@ class WriteMask getMask(int offset, int len) const { bool tmp = true; + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { tmp = tmp & mMask.at(offset + i); @@ -122,6 +137,7 @@ class WriteMask isOverlap(const WriteMask &readMask) const { bool tmp = false; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -135,6 +151,7 @@ class WriteMask containsMask(const WriteMask &readMask) const { bool tmp = true; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -146,6 +163,7 @@ class WriteMask bool isEmpty() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (mMask.at(i)) { return false; @@ -157,6 +175,7 @@ class WriteMask bool isFull() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (!mMask.at(i)) { return false; @@ -168,6 +187,7 @@ class WriteMask void andMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) && (writeMask.mMask.at(i)); @@ -182,6 +202,7 @@ class WriteMask void orMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) || (writeMask.mMask.at(i)); @@ -196,6 +217,7 @@ class WriteMask void setInvertedMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = !writeMask.mMask.at(i); @@ -205,6 +227,7 @@ class WriteMask int firstBitSet(bool val, int offset = 0) const { + assert(mSize > 0); for (int i = offset; i < mSize; ++i) if (mMask[i] == val) return i; @@ -214,6 +237,7 @@ class WriteMask int count(int offset = 0) const { + assert(mSize > 0); int count = 0; for (int i = offset; i < mSize; ++i) count += mMask[i]; diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 9a4439a538..8b3a724469 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -47,7 +47,6 @@ #include "base/random.hh" #include "base/stl_helpers.hh" #include "debug/RubyQueue.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -216,6 +215,7 @@ random_time() void MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO) { // record current time incase we have a pop that also adjusts my size @@ -237,7 +237,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, // is turned on and this buffer allows it if ((m_randomization == MessageRandomization::disabled) || ((m_randomization == MessageRandomization::ruby_system) && - !RubySystem::getRandomization())) { + !ruby_is_random)) { // No randomization arrival_time = current_time + delta; } else { @@ -265,7 +265,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, } // If running a cache trace, don't worry about the last arrival checks - if (!RubySystem::getWarmupEnabled()) { + if (!ruby_warmup) { m_last_arrival_time = arrival_time; } @@ -447,7 +447,6 @@ MessageBuffer::stallMessage(Addr addr, Tick current_time) { DPRINTF(RubyQueue, "Stalling due to %#x\n", addr); assert(isReady(current_time)); - assert(getOffset(addr) == 0); MsgPtr message = m_prio_heap.front(); // Since the message will just be moved to stall map, indicate that the @@ -479,7 +478,8 @@ MessageBuffer::deferEnqueueingMessage(Addr addr, MsgPtr message) } void -MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) +MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup) { assert(!isDeferredMsgMapEmpty(addr)); std::vector& msg_vec = m_deferred_msg_map[addr]; @@ -487,7 +487,7 @@ MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) // enqueue all deferred messages associated with this address for (MsgPtr m : msg_vec) { - enqueue(m, curTime, delay); + enqueue(m, curTime, delay, ruby_is_random, ruby_warmup); } msg_vec.clear(); diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 03a0454433..b45e531d11 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -90,13 +90,14 @@ class MessageBuffer : public SimObject Tick readyTime() const; void - delayHead(Tick current_time, Tick delta) + delayHead(Tick current_time, Tick delta, bool ruby_is_random, + bool ruby_warmup) { MsgPtr m = m_prio_heap.front(); std::pop_heap(m_prio_heap.begin(), m_prio_heap.end(), std::greater()); m_prio_heap.pop_back(); - enqueue(m, current_time, delta); + enqueue(m, current_time, delta, ruby_is_random, ruby_warmup); } bool areNSlotsAvailable(unsigned int n, Tick curTime); @@ -124,6 +125,7 @@ class MessageBuffer : public SimObject const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); } void enqueue(MsgPtr message, Tick curTime, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO = false); // Defer enqueueing a message to a later cycle by putting it aside and not @@ -135,7 +137,8 @@ class MessageBuffer : public SimObject // enqueue all previously deferred messages that are associated with the // input address - void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay); + void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup); bool isDeferredMsgMapEmpty(Addr addr) const; //! Updates the delay cycles of the message at the head of the queue, diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc index 757ed9498e..480b5bcef0 100644 --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -65,7 +65,8 @@ Network::Network(const Params &p) "%s: data message size > cache line size", name()); m_data_msg_size = p.data_msg_size + m_control_msg_size; - params().ruby_system->registerNetwork(this); + m_ruby_system = p.ruby_system; + m_ruby_system->registerNetwork(this); // Populate localNodeVersions with the version of each MachineType in // this network. This will be used to compute a global to local ID. @@ -102,7 +103,8 @@ Network::Network(const Params &p) m_topology_ptr = new Topology(m_nodes, p.routers.size(), m_virtual_networks, - p.ext_links, p.int_links); + p.ext_links, p.int_links, + m_ruby_system); // Allocate to and from queues // Queues that are getting messages from protocol @@ -246,7 +248,7 @@ Network::addressToNodeID(Addr addr, MachineType mtype) } } } - return MachineType_base_count(mtype); + return m_ruby_system->MachineType_base_count(mtype); } NodeID @@ -256,5 +258,23 @@ Network::getLocalNodeID(NodeID global_id) const return globalToLocalMap.at(global_id); } +bool +Network::getRandomization() const +{ + return m_ruby_system->getRandomization(); +} + +bool +Network::getWarmupEnabled() const +{ + return m_ruby_system->getWarmupEnabled(); +} + +int +Network::MachineType_base_number(const MachineType& obj) +{ + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh index 8ca68a0279..c0d21af240 100644 --- a/src/mem/ruby/network/Network.hh +++ b/src/mem/ruby/network/Network.hh @@ -78,6 +78,7 @@ namespace ruby class NetDest; class MessageBuffer; +class RubySystem; class Network : public ClockedObject { @@ -147,6 +148,10 @@ class Network : public ClockedObject NodeID getLocalNodeID(NodeID global_id) const; + bool getRandomization() const; + bool getWarmupEnabled() const; + RubySystem *getRubySystem() const { return m_ruby_system; } + protected: // Private copy constructor and assignment operator Network(const Network& obj); @@ -176,6 +181,12 @@ class Network : public ClockedObject // Global NodeID to local node map. If there are not multiple networks in // the same RubySystem, this is a one-to-one mapping of global to local. std::unordered_map globalToLocalMap; + + // For accessing if randomization/warnup are turned on. We cannot store + // those values in the constructor in case we are constructed first. + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/network/Topology.cc b/src/mem/ruby/network/Topology.cc index 39444c9023..b2cd7897f8 100644 --- a/src/mem/ruby/network/Topology.cc +++ b/src/mem/ruby/network/Topology.cc @@ -37,6 +37,7 @@ #include "mem/ruby/network/BasicLink.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -56,10 +57,12 @@ const int INFINITE_LATENCY = 10000; // Yes, this is a big hack Topology::Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links) - : m_nodes(MachineType_base_number(MachineType_NUM)), + const std::vector &int_links, + RubySystem *ruby_system) + : m_nodes(ruby_system->MachineType_base_number(MachineType_NUM)), m_number_of_switches(num_routers), m_vnets(num_vnets), - m_ext_link_vector(ext_links), m_int_link_vector(int_links) + m_ext_link_vector(ext_links), m_int_link_vector(int_links), + m_ruby_system(ruby_system) { // Total nodes/controllers in network assert(m_nodes > 1); @@ -78,7 +81,8 @@ Topology::Topology(uint32_t num_nodes, uint32_t num_routers, AbstractController *abs_cntrl = ext_link->params().ext_node; BasicRouter *router = ext_link->params().int_node; - int machine_base_idx = MachineType_base_number(abs_cntrl->getType()); + int machine_base_idx = + ruby_system->MachineType_base_number(abs_cntrl->getType()); int ext_idx1 = machine_base_idx + abs_cntrl->getVersion(); int ext_idx2 = ext_idx1 + m_nodes; int int_idx = router->params().router_id + 2*m_nodes; @@ -189,7 +193,7 @@ Topology::createLinks(Network *net) for (int i = 0; i < topology_weights[0].size(); i++) { for (int j = 0; j < topology_weights[0][i].size(); j++) { std::vector routingMap; - routingMap.resize(m_vnets); + routingMap.resize(m_vnets, m_ruby_system); // Not all sources and destinations are connected // by direct links. We only construct the links @@ -264,7 +268,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtInLink(src, dest - (2 * m_nodes), link, @@ -287,7 +291,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtOutLink(src - (2 * m_nodes), node, link, @@ -309,7 +313,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeInternalLink(src - (2 * m_nodes), @@ -413,16 +417,17 @@ Topology::shortest_path_to_node(SwitchID src, SwitchID next, const Matrix &weights, const Matrix &dist, int vnet) { - NetDest result; + NetDest result(m_ruby_system); int d = 0; int machines; int max_machines; machines = MachineType_NUM; - max_machines = MachineType_base_number(MachineType_NUM); + max_machines = m_ruby_system->MachineType_base_number(MachineType_NUM); for (int m = 0; m < machines; m++) { - for (NodeID i = 0; i < MachineType_base_count((MachineType)m); i++) { + for (NodeID i = 0; + i < m_ruby_system->MachineType_base_count((MachineType)m); i++) { // we use "d+max_machines" below since the "destination" // switches for the machines are numbered // [MachineType_base_number(MachineType_NUM)... diff --git a/src/mem/ruby/network/Topology.hh b/src/mem/ruby/network/Topology.hh index 301811e6ab..7ab395762a 100644 --- a/src/mem/ruby/network/Topology.hh +++ b/src/mem/ruby/network/Topology.hh @@ -80,7 +80,8 @@ class Topology public: Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links); + const std::vector &int_links, + RubySystem *ruby_system); uint32_t numSwitches() const { return m_number_of_switches; } void createLinks(Network *net); @@ -108,7 +109,7 @@ class Topology const Matrix &weights, const Matrix &dist, int vnet); - const uint32_t m_nodes; + uint32_t m_nodes; const uint32_t m_number_of_switches; int m_vnets; @@ -116,6 +117,8 @@ class Topology std::vector m_int_link_vector; LinkMap m_link_map; + + RubySystem *m_ruby_system = nullptr; }; inline std::ostream& diff --git a/src/mem/ruby/network/garnet/NetworkInterface.cc b/src/mem/ruby/network/garnet/NetworkInterface.cc index 31d625c4d5..8564baca6d 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.cc +++ b/src/mem/ruby/network/garnet/NetworkInterface.cc @@ -41,6 +41,7 @@ #include "mem/ruby/network/garnet/Credit.hh" #include "mem/ruby/network/garnet/flitBuffer.hh" #include "mem/ruby/slicc_interface/Message.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -244,7 +245,9 @@ NetworkInterface::wakeup() outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { // Space is available. Enqueue to protocol buffer. outNode_ptr[vnet]->enqueue(t_flit->get_msg_ptr(), curTime, - cyclesToTicks(Cycles(1))); + cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Simply send a credit back since we are not buffering // this flit in the NI @@ -332,7 +335,9 @@ NetworkInterface::checkStallQueue() if (outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { outNode_ptr[vnet]->enqueue(stallFlit->get_msg_ptr(), - curTime, cyclesToTicks(Cycles(1))); + curTime, cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Send back a credit with free signal now that the // VC is no longer stalled. @@ -699,6 +704,12 @@ NetworkInterface::functionalWrite(Packet *pkt) return num_functional_writes; } +int +NetworkInterface::MachineType_base_number(const MachineType& obj) +{ + return m_net_ptr->getRubySystem()->MachineType_base_number(obj); +} + } // namespace garnet } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/garnet/NetworkInterface.hh b/src/mem/ruby/network/garnet/NetworkInterface.hh index d42db5ee2a..cd7bb3b171 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.hh +++ b/src/mem/ruby/network/garnet/NetworkInterface.hh @@ -306,6 +306,8 @@ class NetworkInterface : public ClockedObject, public Consumer InputPort *getInportForVnet(int vnet); OutputPort *getOutportForVnet(int vnet); + + int MachineType_base_number(const MachineType& obj); }; } // namespace garnet diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 74d78e3aae..20d57f04be 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -268,7 +268,8 @@ PerfectSwitch::operateMessageBuffer(MessageBuffer *buffer, int vnet) buffer->getIncomingLink(), vnet, outgoing, vnet); out_port.buffers[vnet]->enqueue(msg_ptr, current_time, - out_port.latency); + out_port.latency, m_switch->getNetPtr()->getRandomization(), + m_switch->getNetPtr()->getWarmupEnabled()); } } } diff --git a/src/mem/ruby/network/simple/Switch.hh b/src/mem/ruby/network/simple/Switch.hh index 86abfda871..e6e22022bc 100644 --- a/src/mem/ruby/network/simple/Switch.hh +++ b/src/mem/ruby/network/simple/Switch.hh @@ -104,6 +104,7 @@ class Switch : public BasicRouter void print(std::ostream& out) const; void init_net_ptr(SimpleNetwork* net_ptr) { m_network_ptr = net_ptr; } + SimpleNetwork* getNetPtr() const { return m_network_ptr; } bool functionalRead(Packet *); bool functionalRead(Packet *, WriteMask&); diff --git a/src/mem/ruby/network/simple/Throttle.cc b/src/mem/ruby/network/simple/Throttle.cc index 20cebccabb..fc5649330f 100644 --- a/src/mem/ruby/network/simple/Throttle.cc +++ b/src/mem/ruby/network/simple/Throttle.cc @@ -199,7 +199,9 @@ Throttle::operateVnet(int vnet, int channel, int &total_bw_remaining, // Move the message in->dequeue(current_time); out->enqueue(msg_ptr, current_time, - m_switch->cyclesToTicks(m_link_latency)); + m_switch->cyclesToTicks(m_link_latency), + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); // Count the message (*(throttleStats. diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index 05fc486c63..ce40c35a9f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -34,6 +34,7 @@ #include "base/stl_helpers.hh" #include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/protocol/RubyRequest.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -307,7 +308,8 @@ AddressProfiler::addTraceSample(Addr data_addr, Addr pc_addr, } // record data address trace info - data_addr = makeLineAddress(data_addr); + int block_size_bits = m_profiler->m_ruby_system->getBlockSizeBits(); + data_addr = makeLineAddress(data_addr, block_size_bits); lookupTraceForAddress(data_addr, m_dataAccessTrace). update(type, access_mode, id, sharing_miss); diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm index ca606a5921..43fb96c375 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm @@ -95,7 +95,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 5d98a73041..d1e1ffb7b0 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -121,7 +121,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; int WTcnt, default="0"; int Fcnt, default="0"; bool inFlush, default="false"; diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm index bcf99ff362..ed5e40cfa1 100644 --- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm index 2b5935dee5..29f6d8e87d 100644 --- a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm index 5d85ad2fc6..bac7fd1b12 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -181,7 +181,7 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -195,8 +195,8 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm index 2464e038ff..3f1ba2540f 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm @@ -155,7 +155,7 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -169,8 +169,8 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 17a92f5f90..5b5ab3148a 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -183,7 +183,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm index 4e9e9597aa..b53ebe8ee2 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm @@ -192,7 +192,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm index 4a513d6d3f..b6410d12e7 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm @@ -143,7 +143,7 @@ machine(MachineType:Directory, "Directory protocol") bool isPresent(Addr); } - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // ** OBJECTS ** TBETable TBEs, template="", constructor="m_number_of_TBEs"; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm index 865fce4e3c..24f8146a02 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm @@ -198,7 +198,7 @@ machine(MachineType:L1Cache, "Token protocol") TBETable L1_TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; PersistentTable persistentTable; TimerTable useTimerTable; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm index 7f2bdf94e0..8d035a61bb 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm @@ -171,7 +171,7 @@ machine(MachineType:Directory, "Token protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick clockEdge(Cycles c); diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index 8f0341f328..97770e3516 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -72,6 +72,8 @@ structure(WriteMask, external="yes", desc="...") { int count(); int count(int); bool test(int); + int getBlockSize(); + void setBlockSize(int); } structure(DataBlock, external = "yes", desc="..."){ diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm index 012b169dea..848ada4d12 100644 --- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm +++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm @@ -89,7 +89,9 @@ structure(MemoryMsg, desc="...", interface="Message") { if ((MessageSize == MessageSizeType:Response_Data) || (MessageSize == MessageSizeType:Writeback_Data)) { WriteMask read_mask; - read_mask.setMask(addressOffset(addr, makeLineAddress(addr)), Len, true); + read_mask.setBlockSize(mask.getBlockSize()); + read_mask.setMask(addressOffset(addr, + makeLineAddress(addr, mask.getBlockSize())), Len, true); if (MessageSize != MessageSizeType:Writeback_Data) { read_mask.setInvertedMask(mask); } diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 4e0e4f4511..848d16491d 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -94,7 +94,7 @@ structure (Set, external = "yes", non_obj="yes") { NodeID smallestElement(); } -structure (NetDest, external = "yes", non_obj="yes") { +structure (NetDest, external = "yes", non_obj="yes", implicit_ctor="m_ruby_system") { void setSize(int); void setSize(int, int); void add(NodeID); diff --git a/src/mem/ruby/protocol/RubySlicc_Util.sm b/src/mem/ruby/protocol/RubySlicc_Util.sm index 104c7c034c..93976bc4e1 100644 --- a/src/mem/ruby/protocol/RubySlicc_Util.sm +++ b/src/mem/ruby/protocol/RubySlicc_Util.sm @@ -52,6 +52,7 @@ Addr intToAddress(int addr); int addressOffset(Addr addr, Addr base); int max_tokens(); Addr makeLineAddress(Addr addr); +Addr makeLineAddress(Addr addr, int cacheLineBits); int getOffset(Addr addr); int mod(int val, int mod); Addr bitSelect(Addr addr, int small, int big); diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index dcd142ea47..a644bbe506 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -574,7 +574,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // CacheEntry structure(CacheEntry, interface="AbstractCacheEntry") { diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm index aa27c40964..f7616e9ec4 100644 --- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm +++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm @@ -192,7 +192,7 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // Helper class for tracking expected response and data messages structure(ExpectedMap, external ="yes") { diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm index 46f57456a5..58f22d2007 100644 --- a/src/mem/ruby/protocol/chi/CHI-mem.sm +++ b/src/mem/ruby/protocol/chi/CHI-mem.sm @@ -157,7 +157,7 @@ machine(MachineType:Memory, "Memory controller interface") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // TBE fields structure(TBE, desc="...") { diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh index 0e00a60c28..1305deddce 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh @@ -59,6 +59,8 @@ namespace gem5 namespace ruby { +class RubySystem; + class AbstractCacheEntry : public ReplaceableEntry { private: @@ -78,16 +80,15 @@ class AbstractCacheEntry : public ReplaceableEntry // The methods below are those called by ruby runtime, add when it // is absolutely necessary and should all be virtual function. - virtual DataBlock& + [[noreturn]] virtual DataBlock& getDataBlk() { panic("getDataBlk() not implemented!"); - - // Dummy return to appease the compiler - static DataBlock b; - return b; } + virtual void initBlockSize(int block_size) { }; + virtual void setRubySystem(RubySystem *rs) { }; + int validBlocks; virtual int& getNumValidBlocks() { diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 36092387ac..0bcc662629 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -89,6 +89,9 @@ AbstractController::init() getMemReqQueue()->setConsumer(this); } + downstreamDestinations.setRubySystem(m_ruby_system); + upstreamDestinations.setRubySystem(m_ruby_system); + // Initialize the addr->downstream machine mappings. Multiple machines // in downstream_destinations can have the same address range if they have // different types. If this is the case, mapAddressToDownstreamMachine @@ -268,7 +271,7 @@ AbstractController::serviceMemoryQueue() } const MemoryMsg *mem_msg = (const MemoryMsg*)mem_queue->peek(); - unsigned int req_size = RubySystem::getBlockSizeBytes(); + unsigned int req_size = m_ruby_system->getBlockSizeBytes(); if (mem_msg->m_Len > 0) { req_size = mem_msg->m_Len; } @@ -294,7 +297,7 @@ AbstractController::serviceMemoryQueue() SenderState *s = new SenderState(mem_msg->m_Sender); pkt->pushSenderState(s); - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { // Use functional rather than timing accesses during warmup mem_queue->dequeue(clockEdge()); memoryPort.sendFunctional(pkt); @@ -382,7 +385,10 @@ AbstractController::recvTimingResp(PacketPtr pkt) return false; } - std::shared_ptr msg = std::make_shared(clockEdge()); + int blk_size = m_ruby_system->getBlockSizeBytes(); + + std::shared_ptr msg = + std::make_shared(clockEdge(), blk_size, m_ruby_system); (*msg).m_addr = pkt->getAddr(); (*msg).m_Sender = m_machineID; @@ -396,7 +402,7 @@ AbstractController::recvTimingResp(PacketPtr pkt) // Copy data from the packet (*msg).m_DataBlk.setData(pkt->getPtr(), 0, - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); } else if (pkt->isWrite()) { (*msg).m_Type = MemoryRequestType_MEMORY_WB; (*msg).m_MessageSize = MessageSizeType_Writeback_Control; @@ -404,7 +410,8 @@ AbstractController::recvTimingResp(PacketPtr pkt) panic("Incorrect packet type received from memory controller!"); } - memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); delete pkt; return true; } @@ -471,6 +478,45 @@ AbstractController::sendRetryRespToMem() { } } +Addr +AbstractController::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +AbstractController::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +AbstractController::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +NetDest +AbstractController::broadcast(MachineType type) +{ + assert(m_ruby_system != nullptr); + NodeID type_count = m_ruby_system->MachineType_base_count(type); + + NetDest dest; + for (NodeID i = 0; i < type_count; i++) { + MachineID mach = {type, i}; + dest.add(mach); + } + return dest; +} + +int +AbstractController::machineCount(MachineType machType) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(machType); +} + bool AbstractController::MemoryPort::recvTimingResp(PacketPtr pkt) { diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index ce6a6972af..79f67073a6 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -72,6 +72,7 @@ namespace ruby class Network; class GPUCoalescer; class DMASequencer; +class RubySystem; // used to communicate that an in_port peeked the wrong message type class RejectException: public std::exception @@ -229,6 +230,11 @@ class AbstractController : public ClockedObject, public Consumer /** List of upstream destinations (towards the CPU) */ const NetDest& allUpstreamDest() const { return upstreamDestinations; } + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: //! Profiles original cache requests including PUTs void profileRequest(const std::string &request); @@ -452,6 +458,13 @@ class AbstractController : public ClockedObject, public Consumer {} }; + RubySystem *m_ruby_system = nullptr; + + // Formerly in RubySlicc_ComponentMapping.hh. Moved here to access + // RubySystem pointer. + NetDest broadcast(MachineType type); + int machineCount(MachineType machType); + private: /** The address range to which the controller responds on the CPU side. */ const AddrRangeList addrRanges; diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh index 5c824c4a38..31fb5e8e92 100644 --- a/src/mem/ruby/slicc_interface/Message.hh +++ b/src/mem/ruby/slicc_interface/Message.hh @@ -62,10 +62,12 @@ typedef std::shared_ptr MsgPtr; class Message { public: - Message(Tick curTime) - : m_time(curTime), + Message(Tick curTime, int block_size, const RubySystem *rs) + : m_block_size(block_size), + m_time(curTime), m_LastEnqueueTime(curTime), - m_DelayedTicks(0), m_msg_counter(0) + m_DelayedTicks(0), m_msg_counter(0), + p_ruby_system(rs) { } Message(const Message &other) = default; @@ -121,6 +123,9 @@ class Message int getVnet() const { return vnet; } void setVnet(int net) { vnet = net; } + protected: + int m_block_size = 0; + private: Tick m_time; Tick m_LastEnqueueTime; // my last enqueue time @@ -130,6 +135,9 @@ class Message // Variables for required network traversal int incoming_link; int vnet; + + // Needed to call MacheinType_base_count/level + const RubySystem *p_ruby_system = nullptr; }; inline bool diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index a258a18f9a..58eae229be 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -86,11 +86,12 @@ class RubyRequest : public Message bool m_isSLCSet; bool m_isSecure; - RubyRequest(Tick curTime, uint64_t _paddr, int _len, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, ContextID _proc_id = 100, ContextID _core_id = 99) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -99,13 +100,16 @@ class RubyRequest : public Message m_Prefetch(_pb), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), m_tlbiTransactionUid(0), m_isSecure(m_pkt ? m_pkt->req->isSecure() : false) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -116,10 +120,10 @@ class RubyRequest : public Message } /** RubyRequest for memory management commands */ - RubyRequest(Tick curTime, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, ContextID _proc_id, ContextID _core_id) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(0), m_Type(_type), m_ProgramCounter(_pc), @@ -128,6 +132,8 @@ class RubyRequest : public Message m_Prefetch(PrefetchBit_No), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), @@ -144,14 +150,14 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -170,7 +176,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -180,15 +187,15 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, std::vector< std::pair > _atomicOps, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -207,7 +214,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -218,7 +226,12 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime) : Message(curTime) {} + RubyRequest(Tick curTime, int block_size, RubySystem *rs) + : Message(curTime, block_size, rs), + m_writeMask(block_size), + m_WTData(block_size) + { + } MsgPtr clone() const { return std::shared_ptr(new RubyRequest(*this)); } diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh index 9a433d1cee..1195089fc3 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -41,17 +41,6 @@ namespace gem5 namespace ruby { -inline NetDest -broadcast(MachineType type) -{ - NetDest dest; - for (NodeID i = 0; i < MachineType_base_count(type); i++) { - MachineID mach = {type, i}; - dest.add(mach); - } - return dest; -} - inline MachineID mapAddressToRange(Addr addr, MachineType type, int low_bit, int num_bits, int cluster_id = 0) @@ -77,12 +66,6 @@ machineIDToMachineType(MachineID machID) return machID.type; } -inline int -machineCount(MachineType machType) -{ - return MachineType_base_count(machType); -} - inline MachineID createMachineID(MachineType type, NodeID id) { diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh index 8df56c7013..f4a49463a8 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh @@ -233,8 +233,9 @@ addressOffset(Addr addr, Addr base) inline bool testAndRead(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -259,8 +260,10 @@ testAndRead(Addr addr, DataBlock& blk, Packet *pkt) inline bool testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + assert(blk.getBlockSize() == mask.getBlockSize()); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -288,8 +291,9 @@ testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) inline bool testAndWrite(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { const uint8_t *data = pkt->getConstPtr(); diff --git a/src/mem/ruby/structures/ALUFreeListArray.cc b/src/mem/ruby/structures/ALUFreeListArray.cc index 87b5cbfbd2..3e25e5b599 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.cc +++ b/src/mem/ruby/structures/ALUFreeListArray.cc @@ -57,10 +57,10 @@ namespace ruby * - The same line has been accessed in the past accessLatency ticks */ -ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Tick access_latency) +ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks) { this->numALUs = num_ALUs; - this->accessLatency = access_latency; + this->accessClocks = access_clocks; } bool ALUFreeListArray::tryAccess(Addr addr) @@ -85,7 +85,7 @@ bool ALUFreeListArray::tryAccess(Addr addr) } // Block access if the line is already being used - if (record.lineAddr == makeLineAddress(addr)) { + if (record.lineAddr == makeLineAddress(addr, m_block_size_bits)) { return false; } } @@ -99,7 +99,9 @@ void ALUFreeListArray::reserve(Addr addr) // the access is valid // Add record to queue - accessQueue.push_front(AccessRecord(makeLineAddress(addr), curTick())); + accessQueue.push_front( + AccessRecord(makeLineAddress(addr, m_block_size_bits), curTick()) + ); } } // namespace ruby diff --git a/src/mem/ruby/structures/ALUFreeListArray.hh b/src/mem/ruby/structures/ALUFreeListArray.hh index bed1b00b5c..5c4fdd95f9 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.hh +++ b/src/mem/ruby/structures/ALUFreeListArray.hh @@ -32,6 +32,7 @@ #include +#include "base/intmath.hh" #include "mem/ruby/common/TypeDefines.hh" #include "sim/cur_tick.hh" @@ -45,7 +46,8 @@ class ALUFreeListArray { private: unsigned int numALUs; - Tick accessLatency; + Cycles accessClocks; + Tick accessLatency = 0; class AccessRecord { @@ -62,14 +64,33 @@ class ALUFreeListArray // Queue of accesses from past accessLatency cycles std::deque accessQueue; + int m_block_size_bits = 0; + public: - ALUFreeListArray(unsigned int num_ALUs, Tick access_latency); + ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks); bool tryAccess(Addr addr); void reserve(Addr addr); - Tick getLatency() const { return accessLatency; } + Tick + getLatency() const + { + assert(accessLatency > 0); + return accessLatency; + } + + void + setClockPeriod(Tick clockPeriod) + { + accessLatency = accessClocks * clockPeriod; + } + + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } }; } // namespace ruby diff --git a/src/mem/ruby/structures/BankedArray.cc b/src/mem/ruby/structures/BankedArray.cc index 0f01d5c396..2c2202dec5 100644 --- a/src/mem/ruby/structures/BankedArray.cc +++ b/src/mem/ruby/structures/BankedArray.cc @@ -42,8 +42,7 @@ namespace ruby { BankedArray::BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs) - : m_ruby_system(rs) + unsigned int startIndexBit) { this->banks = banks; this->accessLatency = accessLatency; @@ -78,6 +77,8 @@ BankedArray::reserve(int64_t idx) if (accessLatency == 0) return; + assert(clockPeriod > 0); + unsigned int bank = mapIndexToBank(idx); assert(bank < banks); @@ -95,7 +96,7 @@ BankedArray::reserve(int64_t idx) busyBanks[bank].idx = idx; busyBanks[bank].startAccess = curTick(); busyBanks[bank].endAccess = curTick() + - (accessLatency-1) * m_ruby_system->clockPeriod(); + (accessLatency-1) * clockPeriod; } unsigned int diff --git a/src/mem/ruby/structures/BankedArray.hh b/src/mem/ruby/structures/BankedArray.hh index c757759296..ecc984a617 100644 --- a/src/mem/ruby/structures/BankedArray.hh +++ b/src/mem/ruby/structures/BankedArray.hh @@ -48,6 +48,7 @@ class BankedArray private: unsigned int banks; Cycles accessLatency; + Tick clockPeriod = 0; unsigned int bankBits; unsigned int startIndexBit; RubySystem *m_ruby_system; @@ -69,7 +70,7 @@ class BankedArray public: BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs); + unsigned int startIndexBit); // Note: We try the access based on the cache index, not the address // This is so we don't get aliasing on blocks being replaced @@ -78,6 +79,8 @@ class BankedArray void reserve(int64_t idx); Cycles getLatency() const { return accessLatency; } + + void setClockPeriod(Tick _clockPeriod) { clockPeriod = _clockPeriod; } }; } // namespace ruby diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 90d67fb29b..6bc35bac7d 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -69,12 +69,9 @@ operator<<(std::ostream& out, const CacheMemory& obj) CacheMemory::CacheMemory(const Params &p) : SimObject(p), - dataArray(p.dataArrayBanks, p.dataAccessLatency, - p.start_index_bit, p.ruby_system), - tagArray(p.tagArrayBanks, p.tagAccessLatency, - p.start_index_bit, p.ruby_system), - atomicALUArray(p.atomicALUs, p.atomicLatency * - p.ruby_system->clockPeriod()), + dataArray(p.dataArrayBanks, p.dataAccessLatency, p.start_index_bit), + tagArray(p.tagArrayBanks, p.tagAccessLatency, p.start_index_bit), + atomicALUArray(p.atomicALUs, p.atomicLatency), cacheMemoryStats(this) { m_cache_size = p.size; @@ -88,12 +85,25 @@ CacheMemory::CacheMemory(const Params &p) m_replacementPolicy_ptr) ? true : false; } +void +CacheMemory::setRubySystem(RubySystem* rs) +{ + dataArray.setClockPeriod(rs->clockPeriod()); + tagArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setBlockSize(rs->getBlockSizeBytes()); + + if (m_block_size == 0) { + m_block_size = rs->getBlockSizeBytes(); + } + + m_ruby_system = rs; +} + void CacheMemory::init() { - if (m_block_size == 0) { - m_block_size = RubySystem::getBlockSizeBytes(); - } + assert(m_block_size != 0); m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size; assert(m_cache_num_sets > 1); m_cache_num_set_bits = floorLog2(m_cache_num_sets); @@ -286,6 +296,9 @@ CacheMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(cacheAvail(address)); DPRINTF(RubyCache, "allocating address: %#x\n", address); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); + // Find the first open slot int64_t cacheSet = addressToCacheSet(address); std::vector &set = m_cache[cacheSet]; diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index de7c327f63..912ae22d1f 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -154,6 +154,8 @@ class CacheMemory : public SimObject void htmAbortTransaction(); void htmCommitTransaction(); + void setRubySystem(RubySystem* rs); + public: int getCacheSize() const { return m_cache_size; } int getCacheAssoc() const { return m_cache_assoc; } @@ -213,6 +215,14 @@ class CacheMemory : public SimObject */ bool m_use_occupancy; + RubySystem *m_ruby_system = nullptr; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, floorLog2(m_block_size)); + } + private: struct CacheMemoryStats : public statistics::Group { diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc index 620254b82c..7469f72451 100644 --- a/src/mem/ruby/structures/DirectoryMemory.cc +++ b/src/mem/ruby/structures/DirectoryMemory.cc @@ -64,12 +64,14 @@ DirectoryMemory::DirectoryMemory(const Params &p) } m_size_bits = floorLog2(m_size_bytes); m_num_entries = 0; + m_block_size = p.block_size; + m_ruby_system = p.ruby_system; } void DirectoryMemory::init() { - m_num_entries = m_size_bytes / RubySystem::getBlockSizeBytes(); + m_num_entries = m_size_bytes / m_block_size; m_entries = new AbstractCacheEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) m_entries[i] = NULL; @@ -108,7 +110,7 @@ DirectoryMemory::mapAddressToLocalIdx(Addr address) } ret += r.size(); } - return ret >> RubySystem::getBlockSizeBits(); + return ret >> (floorLog2(m_block_size)); } AbstractCacheEntry* @@ -133,6 +135,8 @@ DirectoryMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(idx < m_num_entries); assert(m_entries[idx] == NULL); entry->changePermission(AccessPermission_Read_Only); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); m_entries[idx] = entry; return entry; diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh index 8a4532864d..6e77e2a4ca 100644 --- a/src/mem/ruby/structures/DirectoryMemory.hh +++ b/src/mem/ruby/structures/DirectoryMemory.hh @@ -104,6 +104,9 @@ class DirectoryMemory : public SimObject uint64_t m_size_bytes; uint64_t m_size_bits; uint64_t m_num_entries; + uint32_t m_block_size; + + RubySystem *m_ruby_system = nullptr; /** * The address range for which the directory responds. Normally diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py index 85f05367cf..202617bceb 100644 --- a/src/mem/ruby/structures/DirectoryMemory.py +++ b/src/mem/ruby/structures/DirectoryMemory.py @@ -49,3 +49,7 @@ class RubyDirectoryMemory(SimObject): addr_ranges = VectorParam.AddrRange( Parent.addr_ranges, "Address range this directory responds to" ) + block_size = Param.UInt32( + "Size of a block in bytes. Usually same as cache line size." + ) + ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh index 664d10f202..0966ca80d2 100644 --- a/src/mem/ruby/structures/PerfectCacheMemory.hh +++ b/src/mem/ruby/structures/PerfectCacheMemory.hh @@ -74,6 +74,8 @@ class PerfectCacheMemory public: PerfectCacheMemory(); + void setBlockSize(const int block_size) { m_block_size = block_size; } + // tests to see if an address is present in the cache bool isTagPresent(Addr address) const; @@ -108,6 +110,8 @@ class PerfectCacheMemory // Data Members (m_prefix) std::unordered_map > m_map; + + int m_block_size = 0; }; template @@ -130,7 +134,7 @@ template inline bool PerfectCacheMemory::isTagPresent(Addr address) const { - return m_map.count(makeLineAddress(address)) > 0; + return m_map.count(makeLineAddress(address, floorLog2(m_block_size))) > 0; } template @@ -149,7 +153,8 @@ PerfectCacheMemory::allocate(Addr address) PerfectCacheLineState line_state; line_state.m_permission = AccessPermission_Invalid; line_state.m_entry = ENTRY(); - m_map[makeLineAddress(address)] = line_state; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + m_map.emplace(line_addr, line_state); } // deallocate entry @@ -157,7 +162,8 @@ template inline void PerfectCacheMemory::deallocate(Addr address) { - [[maybe_unused]] auto num_erased = m_map.erase(makeLineAddress(address)); + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + [[maybe_unused]] auto num_erased = m_map.erase(line_addr); assert(num_erased == 1); } @@ -175,7 +181,8 @@ template inline ENTRY* PerfectCacheMemory::lookup(Addr address) { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } // looks an address up in the cache @@ -183,14 +190,16 @@ template inline const ENTRY* PerfectCacheMemory::lookup(Addr address) const { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } template inline AccessPermission PerfectCacheMemory::getPermission(Addr address) const { - return m_map[makeLineAddress(address)].m_permission; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return m_map[line_addr].m_permission; } template @@ -198,8 +207,8 @@ inline void PerfectCacheMemory::changePermission(Addr address, AccessPermission new_perm) { - Addr line_address = makeLineAddress(address); - PerfectCacheLineState& line_state = m_map[line_address]; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + PerfectCacheLineState& line_state = m_map[line_addr]; line_state.m_permission = new_perm; } diff --git a/src/mem/ruby/structures/PersistentTable.hh b/src/mem/ruby/structures/PersistentTable.hh index 5382269273..1162e1dda1 100644 --- a/src/mem/ruby/structures/PersistentTable.hh +++ b/src/mem/ruby/structures/PersistentTable.hh @@ -63,6 +63,12 @@ class PersistentTable // Destructor ~PersistentTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + // Public Methods void persistentRequestLock(Addr address, MachineID locker, AccessType type); @@ -82,9 +88,17 @@ class PersistentTable PersistentTable(const PersistentTable& obj); PersistentTable& operator=(const PersistentTable& obj); + int m_block_size_bits = 0; + // Data Members (m_prefix) typedef std::unordered_map AddressMap; AddressMap m_map; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, m_block_size_bits); + } }; inline std::ostream& diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py index 2f457f5c4a..4b1023fc61 100644 --- a/src/mem/ruby/structures/RubyCache.py +++ b/src/mem/ruby/structures/RubyCache.py @@ -54,4 +54,3 @@ class RubyCache(SimObject): dataAccessLatency = Param.Cycles(1, "cycles for a data array access") tagAccessLatency = Param.Cycles(1, "cycles for a tag array access") resourceStalls = Param.Bool(False, "stall if there is a resource failure") - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc index e45eff2c2f..bffcfe2327 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.cc +++ b/src/mem/ruby/structures/RubyPrefetcher.cc @@ -56,13 +56,15 @@ namespace ruby RubyPrefetcher::RubyPrefetcher(const Params &p) : SimObject(p), m_num_streams(p.num_streams), - m_array(p.num_streams), m_train_misses(p.train_misses), + m_array(p.num_streams, p.block_size), m_train_misses(p.train_misses), m_num_startup_pfs(p.num_startup_pfs), unitFilter(p.unit_filter), negativeFilter(p.unit_filter), nonUnitFilter(p.nonunit_filter), m_prefetch_cross_pages(p.cross_page), pageShift(p.page_shift), + m_block_size_bits(floorLog2(p.block_size)), + m_block_size_bytes(p.block_size), rubyPrefetcherStats(this) { assert(m_num_streams > 0); @@ -90,7 +92,7 @@ void RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) { DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); - Addr line_addr = makeLineAddress(address); + Addr line_addr = makeLineAddress(address, m_block_size_bits); rubyPrefetcherStats.numMissObserved++; // check to see if we have already issued a prefetch for this block @@ -214,7 +216,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // initialize the stream prefetcher PrefetchEntry *mystream = &(m_array[index]); - mystream->m_address = makeLineAddress(address); + mystream->m_address = makeLineAddress(address, m_block_size_bits); mystream->m_stride = stride; mystream->m_use_time = m_controller->curCycle(); mystream->m_is_valid = true; @@ -222,7 +224,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // create a number of initial prefetches for this stream Addr page_addr = pageAddress(mystream->m_address); - Addr line_addr = makeLineAddress(mystream->m_address); + Addr line_addr = makeLineAddress(mystream->m_address, m_block_size_bits); // insert a number of prefetches into the prefetch table for (int k = 0; k < m_num_startup_pfs; k++) { @@ -312,8 +314,7 @@ RubyPrefetcher::accessNonunitFilter(Addr line_addr, // This stride HAS to be the multiplicative constant of // dataBlockBytes (bc makeNextStrideAddress is // calculated based on this multiplicative constant!) - const int stride = entry.stride / - RubySystem::getBlockSizeBytes(); + const int stride = entry.stride / m_block_size_bytes; // clear this filter entry entry.clear(); diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh index 51e1b3c480..5627410713 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.hh +++ b/src/mem/ruby/structures/RubyPrefetcher.hh @@ -68,10 +68,10 @@ class PrefetchEntry { public: /// constructor - PrefetchEntry() + PrefetchEntry(int block_size) { // default: 1 cache-line stride - m_stride = (1 << RubySystem::getBlockSizeBits()); + m_stride = (1 << floorLog2(block_size)); m_use_time = Cycles(0); m_is_valid = false; } @@ -239,6 +239,16 @@ class RubyPrefetcher : public SimObject const unsigned pageShift; + int m_block_size_bits = 0; + int m_block_size_bytes = 0; + + Addr + makeNextStrideAddress(Addr addr, int stride) const + { + return ruby::makeNextStrideAddress(addr, stride, + m_block_size_bytes); + } + struct RubyPrefetcherStats : public statistics::Group { RubyPrefetcherStats(statistics::Group *parent); diff --git a/src/mem/ruby/structures/RubyPrefetcher.py b/src/mem/ruby/structures/RubyPrefetcher.py index d4189ae7d5..155b7c314d 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.py +++ b/src/mem/ruby/structures/RubyPrefetcher.py @@ -62,6 +62,9 @@ class RubyPrefetcher(SimObject): page_shift = Param.UInt32( 12, "Number of bits to mask to get a page number" ) + block_size = Param.UInt32( + "Size of block to prefetch, usually cache line size" + ) class Prefetcher(RubyPrefetcher): diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.cc b/src/mem/ruby/structures/RubyPrefetcherProxy.cc index 2a29fbc88e..a6fed8258c 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.cc +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.cc @@ -66,7 +66,7 @@ RubyPrefetcherProxy::RubyPrefetcherProxy(AbstractController* _parent, prefetcher->setParentInfo( cacheCntrl->params().system, cacheCntrl->getProbeManager(), - RubySystem::getBlockSizeBytes()); + cacheCntrl->m_ruby_system->getBlockSizeBytes()); } } @@ -112,7 +112,7 @@ RubyPrefetcherProxy::issuePrefetch() if (pkt) { DPRINTF(HWPrefetch, "Next prefetch ready %s\n", pkt->print()); - unsigned blk_size = RubySystem::getBlockSizeBytes(); + unsigned blk_size = cacheCntrl->m_ruby_system->getBlockSizeBytes(); Addr line_addr = pkt->getBlockAddr(blk_size); if (issuedPfPkts.count(line_addr) == 0) { @@ -126,6 +126,8 @@ RubyPrefetcherProxy::issuePrefetch() std::shared_ptr msg = std::make_shared(cacheCntrl->clockEdge(), + blk_size, + cacheCntrl->m_ruby_system, pkt->getAddr(), blk_size, 0, // pc @@ -136,7 +138,10 @@ RubyPrefetcherProxy::issuePrefetch() // enqueue request into prefetch queue to the cache pfQueue->enqueue(msg, cacheCntrl->clockEdge(), - cacheCntrl->cyclesToTicks(Cycles(1))); + cacheCntrl->cyclesToTicks(Cycles(1)), + cacheCntrl->m_ruby_system->getRandomization(), + cacheCntrl->m_ruby_system->getWarmupEnabled() + ); // track all pending PF requests issuedPfPkts[line_addr] = pkt; @@ -230,5 +235,19 @@ RubyPrefetcherProxy::regProbePoints() cacheCntrl->getProbeManager(), "Data Update"); } +Addr +RubyPrefetcherProxy::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPrefetcherProxy::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.hh b/src/mem/ruby/structures/RubyPrefetcherProxy.hh index 34c40154b6..e7c044edf8 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.hh +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.hh @@ -142,6 +142,9 @@ class RubyPrefetcherProxy : public CacheAccessor, public Named */ ProbePointArg *ppDataUpdate; + Addr makeLineAddress(Addr addr) const; + Addr getOffset(Addr addr) const; + public: /** Accessor functions */ diff --git a/src/mem/ruby/structures/TBETable.hh b/src/mem/ruby/structures/TBETable.hh index 9030d52d9f..72770ce42f 100644 --- a/src/mem/ruby/structures/TBETable.hh +++ b/src/mem/ruby/structures/TBETable.hh @@ -70,6 +70,8 @@ class TBETable return (m_number_of_TBEs - m_map.size()) >= n; } + void setBlockSize(const int block_size) { m_block_size = block_size; } + ENTRY *getNullEntry(); ENTRY *lookup(Addr address); @@ -85,7 +87,8 @@ class TBETable std::unordered_map m_map; private: - int m_number_of_TBEs; + int m_number_of_TBEs = 0; + int m_block_size = 0; }; template @@ -101,7 +104,7 @@ template inline bool TBETable::isPresent(Addr address) const { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, floorLog2(m_block_size))); assert(m_map.size() <= m_number_of_TBEs); return !!m_map.count(address); } @@ -112,7 +115,8 @@ TBETable::allocate(Addr address) { assert(!isPresent(address)); assert(m_map.size() < m_number_of_TBEs); - m_map[address] = ENTRY(); + assert(m_block_size > 0); + m_map.emplace(address, ENTRY(m_block_size)); } template diff --git a/src/mem/ruby/structures/TimerTable.cc b/src/mem/ruby/structures/TimerTable.cc index f8f24dbfc0..a9ce92252e 100644 --- a/src/mem/ruby/structures/TimerTable.cc +++ b/src/mem/ruby/structures/TimerTable.cc @@ -70,7 +70,7 @@ TimerTable::nextAddress() const void TimerTable::set(Addr address, Tick ready_time) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(!m_map.count(address)); m_map[address] = ready_time; @@ -87,7 +87,7 @@ TimerTable::set(Addr address, Tick ready_time) void TimerTable::unset(Addr address) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(m_map.count(address)); m_map.erase(address); diff --git a/src/mem/ruby/structures/TimerTable.hh b/src/mem/ruby/structures/TimerTable.hh index e676359fd4..92c485ab57 100644 --- a/src/mem/ruby/structures/TimerTable.hh +++ b/src/mem/ruby/structures/TimerTable.hh @@ -48,6 +48,12 @@ class TimerTable public: TimerTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + void setConsumer(Consumer* consumer_ptr) { @@ -88,6 +94,8 @@ class TimerTable //! Consumer to signal a wakeup() Consumer* m_consumer_ptr; + int m_block_size_bits = 0; + std::string m_name; }; diff --git a/src/mem/ruby/structures/WireBuffer.cc b/src/mem/ruby/structures/WireBuffer.cc index a839fe7cc7..3ebbe2a305 100644 --- a/src/mem/ruby/structures/WireBuffer.cc +++ b/src/mem/ruby/structures/WireBuffer.cc @@ -36,7 +36,6 @@ #include "base/cprintf.hh" #include "base/stl_helpers.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -74,7 +73,8 @@ WireBuffer::~WireBuffer() } void -WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) +WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool /*ruby_is_random*/, bool /*ruby_warmup*/) { m_msg_counter++; Tick arrival_time = current_time + delta; diff --git a/src/mem/ruby/structures/WireBuffer.hh b/src/mem/ruby/structures/WireBuffer.hh index b26043b09a..75dfc154c8 100644 --- a/src/mem/ruby/structures/WireBuffer.hh +++ b/src/mem/ruby/structures/WireBuffer.hh @@ -78,7 +78,10 @@ class WireBuffer : public SimObject void setDescription(const std::string& name) { m_description = name; }; std::string getDescription() { return m_description; }; - void enqueue(MsgPtr message, Tick current_time, Tick delta); + // ruby_is_random and ruby_warmup are not used, but this method signature + // must match that of MessageBuffer. + void enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random = false, bool ruby_warmup = false); void dequeue(Tick current_time); const Message* peek(); void recycle(Tick current_time, Tick recycle_latency); diff --git a/src/mem/ruby/structures/WireBuffer.py b/src/mem/ruby/structures/WireBuffer.py index ca67e7cb31..8cb2cfe4d6 100644 --- a/src/mem/ruby/structures/WireBuffer.py +++ b/src/mem/ruby/structures/WireBuffer.py @@ -35,5 +35,3 @@ class RubyWireBuffer(SimObject): type = "RubyWireBuffer" cxx_class = "gem5::ruby::WireBuffer" cxx_header = "mem/ruby/structures/WireBuffer.hh" - - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc index 3326856849..426c604cb0 100644 --- a/src/mem/ruby/system/CacheRecorder.cc +++ b/src/mem/ruby/system/CacheRecorder.cc @@ -49,31 +49,25 @@ TraceRecord::print(std::ostream& out) const << m_type << ", Time: " << m_time << "]"; } -CacheRecorder::CacheRecorder() - : m_uncompressed_trace(NULL), - m_uncompressed_trace_size(0), - m_block_size_bytes(RubySystem::getBlockSizeBytes()) -{ -} - CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes) + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes) : m_uncompressed_trace(uncompressed_trace), m_uncompressed_trace_size(uncompressed_trace_size), m_ruby_port_map(ruby_port_map), m_bytes_read(0), m_records_read(0), m_records_flushed(0), - m_block_size_bytes(block_size_bytes) + m_block_size_bytes(trace_block_size_bytes) { if (m_uncompressed_trace != NULL) { - if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) { + if (m_block_size_bytes < system_block_size_bytes) { // Block sizes larger than when the trace was recorded are not // supported, as we cannot reliably turn accesses to smaller blocks // into larger ones. panic("Recorded cache block size (%d) < current block size (%d) !!", - m_block_size_bytes, RubySystem::getBlockSizeBytes()); + m_block_size_bytes, system_block_size_bytes); } } } @@ -125,7 +119,7 @@ CacheRecorder::enqueueNextFetchRequest() DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord); for (int rec_bytes_read = 0; rec_bytes_read < m_block_size_bytes; - rec_bytes_read += RubySystem::getBlockSizeBytes()) { + rec_bytes_read += m_block_size_bytes) { RequestPtr req; MemCmd::Command requestType; @@ -133,19 +127,19 @@ CacheRecorder::enqueueNextFetchRequest() requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } else if (traceRecord->m_type == RubyRequestType_IFETCH) { requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), + m_block_size_bytes, Request::INST_FETCH, Request::funcRequestorId); } else { requestType = MemCmd::WriteReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh index 021da6a4da..982e8b0592 100644 --- a/src/mem/ruby/system/CacheRecorder.hh +++ b/src/mem/ruby/system/CacheRecorder.hh @@ -73,13 +73,15 @@ class TraceRecord class CacheRecorder { public: - CacheRecorder(); - ~CacheRecorder(); - + // Construction requires block size. + CacheRecorder() = delete; CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes); + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes); + ~CacheRecorder(); + void addRecord(int cntrl, Addr data_addr, Addr pc_addr, RubyRequestType type, Tick time, DataBlock& data); diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index aa3fc66814..cd9d62d12a 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -73,7 +73,7 @@ void DMASequencer::init() { RubyPort::init(); - m_data_block_mask = mask(RubySystem::getBlockSizeBits()); + m_data_block_mask = mask(m_ruby_system->getBlockSizeBits()); } RequestStatus @@ -110,8 +110,10 @@ DMASequencer::makeRequest(PacketPtr pkt) DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len); + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = paddr; msg->getLineAddress() = line_addr; @@ -145,8 +147,8 @@ DMASequencer::makeRequest(PacketPtr pkt) int offset = paddr & m_data_block_mask; - msg->getLen() = (offset + len) <= RubySystem::getBlockSizeBytes() ? - len : RubySystem::getBlockSizeBytes() - offset; + msg->getLen() = (offset + len) <= m_ruby_system->getBlockSizeBytes() ? + len : m_ruby_system->getBlockSizeBytes() - offset; if (write && (data != NULL)) { if (active_request.data != NULL) { @@ -157,7 +159,8 @@ DMASequencer::makeRequest(PacketPtr pkt) m_outstanding_count++; assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); return RequestStatus_Issued; @@ -183,8 +186,10 @@ DMASequencer::issueNext(const Addr& address) return; } + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = active_request.start_paddr + active_request.bytes_completed; @@ -196,9 +201,9 @@ DMASequencer::issueNext(const Addr& address) msg->getLen() = (active_request.len - - active_request.bytes_completed < RubySystem::getBlockSizeBytes() ? + active_request.bytes_completed < m_ruby_system->getBlockSizeBytes() ? active_request.len - active_request.bytes_completed : - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); if (active_request.write) { msg->getDataBlk(). @@ -207,7 +212,8 @@ DMASequencer::issueNext(const Addr& address) } assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); DPRINTF(RubyDma, "DMA request bytes issued %d, bytes completed %d, total len %d\n", diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 072c63efd7..4d66dc6c1b 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -142,8 +142,8 @@ UncoalescedTable::updateResources() // are accessed directly using the makeRequest() command // instead of accessing through the port. This makes // sending tokens through the port unnecessary - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!coalescer->getRubySystem()->getWarmupEnabled() && + !coalescer->getRubySystem()->getCooldownEnabled()) { if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) { DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num); @@ -177,7 +177,7 @@ UncoalescedTable::printRequestTable(std::stringstream& ss) ss << "Listing pending packets from " << instMap.size() << " instructions"; for (auto& inst : instMap) { - ss << "\tAddr: " << printAddress(inst.first) << " with " + ss << "\tAddr: " << coalescer->printAddress(inst.first) << " with " << inst.second.size() << " pending packets" << std::endl; } } @@ -590,7 +590,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, // When the Ruby system is cooldown phase, the requests come from // the cache recorder. These requests do not get coalesced and // do not return valid data. - if (RubySystem::getCooldownEnabled()) + if (m_ruby_system->getCooldownEnabled()) continue; if (pkt->getPtr()) { @@ -700,8 +700,8 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // When Ruby is in warmup or cooldown phase, the requests come from // the cache recorder. There is no dynamic instruction associated // with these requests either - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { if (!m_usingRubyTester) { num_packets = 0; for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { @@ -985,8 +985,8 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -1015,9 +1015,9 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 42efe41cb7..08412baad1 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -341,6 +341,8 @@ class GPUCoalescer : public RubyPort void insertKernel(int wavefront_id, PacketPtr pkt); + RubySystem *getRubySystem() { return m_ruby_system; } + GMTokenPort& getGMTokenPort() { return gmTokenPort; } statistics::Histogram& getOutstandReqHist() { return m_outstandReqHist; } diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 2630a6a27c..127f3c7802 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -326,6 +326,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) panic("Ruby supports atomic accesses only in noncaching mode\n"); } + RubySystem *rs = owner.m_ruby_system; + // Check for pio requests and directly send them to the dedicated // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { @@ -343,12 +345,11 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) return owner.ticksToCycles(req_ticks); } - assert(getOffset(pkt->getAddr()) + pkt->getSize() <= - RubySystem::getBlockSizeBytes()); + assert(owner.getOffset(pkt->getAddr()) + pkt->getSize() <= + rs->getBlockSizeBytes()); } // Find the machine type of memory controller interface - RubySystem *rs = owner.m_ruby_system; static int mem_interface_type = -1; if (mem_interface_type == -1) { if (rs->m_abstract_controls[MachineType_Directory].size() != 0) { @@ -404,7 +405,7 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt) } assert(pkt->getAddr() + pkt->getSize() <= - makeLineAddress(pkt->getAddr()) + RubySystem::getBlockSizeBytes()); + owner.makeLineAddress(pkt->getAddr()) + rs->getBlockSizeBytes()); if (access_backing_store) { // The attached physmem contains the official version of data. @@ -501,7 +502,7 @@ RubyPort::ruby_stale_translation_callback(Addr txnId) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - 0, RubySystem::getBlockSizeBytes(), Request::TLBI_EXT_SYNC, + 0, m_ruby_system->getBlockSizeBytes(), Request::TLBI_EXT_SYNC, Request::funcRequestorId); // Store the txnId in extraData instead of the address request->setExtraData(txnId); @@ -701,7 +702,7 @@ RubyPort::ruby_eviction_callback(Addr address) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - address, RubySystem::getBlockSizeBytes(), 0, + address, m_ruby_system->getBlockSizeBytes(), 0, Request::funcRequestorId); // Use a single packet to signal all snooping ports of the invalidation. @@ -739,5 +740,23 @@ RubyPort::functionalWrite(Packet *func_pkt) return num_written; } +Addr +RubyPort::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPort::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +RubyPort::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 66fe0a7686..39535930b3 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -181,6 +181,11 @@ class RubyPort : public ClockedObject virtual int functionalWrite(Packet *func_pkt); + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index 21062eac14..fd7b262cb1 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -66,15 +66,8 @@ namespace gem5 namespace ruby { -bool RubySystem::m_randomization; -uint32_t RubySystem::m_block_size_bytes; -uint32_t RubySystem::m_block_size_bits; -uint32_t RubySystem::m_memory_size_bits; -bool RubySystem::m_warmup_enabled = false; // To look forward to allowing multiple RubySystem instances, track the number // of RubySystems that need to be warmed up on checkpoint restore. -unsigned RubySystem::m_systems_to_warmup = 0; -bool RubySystem::m_cooldown_enabled = false; RubySystem::RubySystem(const Params &p) : ClockedObject(p), m_access_backing_store(p.access_backing_store), @@ -212,8 +205,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, // Create the CacheRecorder and record the cache trace m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, - ruby_port_map, - block_size_bytes); + ruby_port_map, block_size_bytes, + m_block_size_bytes); } void @@ -331,7 +324,7 @@ RubySystem::serialize(CheckpointOut &cp) const // Store the cache-block size, so we are able to restore on systems // with a different cache-block size. CacheRecorder depends on the // correct cache-block size upon unserializing. - uint64_t block_size_bytes = getBlockSizeBytes(); + uint64_t block_size_bytes = m_block_size_bytes; SERIALIZE_SCALAR(block_size_bytes); // Check that there's a valid trace to use. If not, then memory won't @@ -416,7 +409,6 @@ RubySystem::unserialize(CheckpointIn &cp) readCompressedTrace(cache_trace_file, uncompressed_trace, cache_trace_size); m_warmup_enabled = true; - m_systems_to_warmup++; // Create the cache recorder that will hang around until startup. makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes); @@ -467,10 +459,7 @@ RubySystem::startup() delete m_cache_recorder; m_cache_recorder = NULL; - m_systems_to_warmup--; - if (m_systems_to_warmup == 0) { - m_warmup_enabled = false; - } + m_warmup_enabled = false; // Restore eventq head eventq->replaceHead(eventq_head); @@ -509,7 +498,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; @@ -625,7 +614,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); DPRINTF(RubySystem, "Functional Read request for %#x\n", address); @@ -726,7 +715,7 @@ bool RubySystem::functionalWrite(PacketPtr pkt) { Addr addr(pkt->getAddr()); - Addr line_addr = makeLineAddress(addr); + Addr line_addr = makeLineAddress(addr, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; DPRINTF(RubySystem, "Functional Write request for %#x\n", addr); diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index e16d699204..7e18770230 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -68,12 +68,12 @@ class RubySystem : public ClockedObject ~RubySystem(); // config accessors - static int getRandomization() { return m_randomization; } - static uint32_t getBlockSizeBytes() { return m_block_size_bytes; } - static uint32_t getBlockSizeBits() { return m_block_size_bits; } - static uint32_t getMemorySizeBits() { return m_memory_size_bits; } - static bool getWarmupEnabled() { return m_warmup_enabled; } - static bool getCooldownEnabled() { return m_cooldown_enabled; } + int getRandomization() { return m_randomization; } + uint32_t getBlockSizeBytes() { return m_block_size_bytes; } + uint32_t getBlockSizeBits() { return m_block_size_bits; } + uint32_t getMemorySizeBits() { return m_memory_size_bits; } + bool getWarmupEnabled() { return m_warmup_enabled; } + bool getCooldownEnabled() { return m_cooldown_enabled; } memory::SimpleMemory *getPhysMem() { return m_phys_mem; } Cycles getStartCycle() { return m_start_cycle; } @@ -134,14 +134,13 @@ class RubySystem : public ClockedObject void processRubyEvent(); private: // configuration parameters - static bool m_randomization; - static uint32_t m_block_size_bytes; - static uint32_t m_block_size_bits; - static uint32_t m_memory_size_bits; + bool m_randomization; + uint32_t m_block_size_bytes; + uint32_t m_block_size_bits; + uint32_t m_memory_size_bits; - static bool m_warmup_enabled; - static unsigned m_systems_to_warmup; - static bool m_cooldown_enabled; + bool m_warmup_enabled = false; + bool m_cooldown_enabled = false; memory::SimpleMemory *m_phys_mem; const bool m_access_backing_store; @@ -158,6 +157,11 @@ class RubySystem : public ClockedObject Profiler* m_profiler; CacheRecorder* m_cache_recorder; std::vector > m_abstract_controls; + std::map m_num_controllers; + + // These are auto-generated by SLICC based on the built protocol. + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; } // namespace ruby diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 4b0c6a239c..e2f49f5dff 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -73,6 +73,8 @@ Sequencer::Sequencer(const Params &p) { m_outstanding_count = 0; + m_ruby_system = p.ruby_system; + m_dataCache_ptr = p.dcache; m_max_outstanding_requests = p.max_outstanding_requests; m_deadlock_threshold = p.deadlock_threshold; @@ -726,7 +728,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, printAddress(request_address)); // update the data unless it is a non-data-carrying flush - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { data.setData(pkt); } else if (!pkt->isFlush()) { if ((type == RubyRequestType_LD) || @@ -782,11 +784,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { assert(pkt->req); delete pkt; rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { delete pkt; rs->m_cache_recorder->enqueueNextFlushRequest(); } else { @@ -852,8 +854,8 @@ Sequencer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -873,9 +875,9 @@ Sequencer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); @@ -910,14 +912,16 @@ Sequencer::invL1() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(RubySequencer, @@ -1080,11 +1084,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) pc = pkt->req->getPC(); } + int blk_size = m_ruby_system->getBlockSizeBytes(); + // check if the packet has data as for example prefetch and flush // requests do not std::shared_ptr msg; if (pkt->req->isMemMgmt()) { - msg = std::make_shared(clockEdge(), + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, pc, secondary_type, RubyAccessMode_Supervisor, pkt, proc_id, core_id); @@ -1111,8 +1118,10 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) msg->m_tlbiTransactionUid); } } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, secondary_type, + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, + pkt->getAddr(), pkt->getSize(), + pc, secondary_type, RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, core_id); @@ -1147,7 +1156,9 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) assert(latency > 0); assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } template @@ -1194,7 +1205,7 @@ Sequencer::incrementUnaddressedTransactionCnt() // Limit m_unaddressedTransactionCnt to 32 bits, // top 32 bits should always be zeroed out uint64_t aligned_txid = \ - m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits(); + m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits(); if (aligned_txid > 0xFFFFFFFFull) { m_unaddressedTransactionCnt = 0; @@ -1206,7 +1217,7 @@ Sequencer::getCurrentUnaddressedTransactionID() const { return ( uint64_t(m_version & 0xFFFFFFFF) << 32) | - (m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits() + (m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits() ); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 1f60d2638f..ee16d2fe2e 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -254,6 +254,8 @@ class Sequencer : public RubyPort RubyRequestType primary_type, RubyRequestType secondary_type); + RubySystem *m_ruby_system; + private: int m_max_outstanding_requests; diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 3f570fb952..0994bb4afe 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -83,7 +83,7 @@ class RubyPort(ClockedObject): using_ruby_tester = Param.Bool(False, "") no_retry_on_stall = Param.Bool(False, "") - ruby_system = Param.RubySystem(Parent.any, "") + ruby_system = Param.RubySystem("Parent RubySystem object") system = Param.System(Parent.any, "system object") support_data_reqs = Param.Bool(True, "data cache requests supported") support_inst_reqs = Param.Bool(True, "inst cache requests supported") diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc index 47ceced3a7..67dd88fb2e 100644 --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -135,9 +135,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) // Creating WriteMask that records written bytes // and atomic operations. This enables partial writes // and partial reads of those writes - DataBlock dataBlock; + uint32_t blockSize = m_ruby_system->getBlockSizeBytes(); + DataBlock dataBlock(blockSize); dataBlock.clear(); - uint32_t blockSize = RubySystem::getBlockSizeBytes(); std::vector accessMask(blockSize,false); std::vector< std::pair > atomicOps; uint32_t tableSize = crequest->getPackets().size(); @@ -159,15 +159,17 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) } std::shared_ptr msg; if (pkt->isAtomicOp()) { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, dataBlock, atomicOps, crequest->getSeqNum()); } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, @@ -195,7 +197,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(crequest->getRubyType())); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } void @@ -241,7 +245,7 @@ VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum) std::remove_if( m_writeCompletePktMap[key].begin(), m_writeCompletePktMap[key].end(), - [addr](PacketPtr writeCompletePkt) -> bool { + [this,addr](PacketPtr writeCompletePkt) -> bool { if (makeLineAddress(writeCompletePkt->getAddr()) == addr) { RubyPort::SenderState *ss = safe_cast @@ -296,14 +300,15 @@ VIPERCoalescer::invTCP() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(GPUCoalescer, @@ -343,16 +348,17 @@ VIPERCoalescer::invTCC(PacketPtr pkt) RubyRequestType request_type = RubyRequestType_InvL2; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Sending L2 invalidate to 0x%x\n", addr); assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_pending_invl2s[addr].push_back(pkt); } diff --git a/src/mem/ruby/system/VIPERSequencer.cc b/src/mem/ruby/system/VIPERSequencer.cc index ac840777d4..b8b806aa9c 100644 --- a/src/mem/ruby/system/VIPERSequencer.cc +++ b/src/mem/ruby/system/VIPERSequencer.cc @@ -81,8 +81,8 @@ VIPERSequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, // subBlock with the recieved data. The tester will later access // this state. assert(!m_usingRubyTester); - assert(!RubySystem::getWarmupEnabled()); - assert(!RubySystem::getCooldownEnabled()); + assert(!m_ruby_system->getWarmupEnabled()); + assert(!m_ruby_system->getCooldownEnabled()); ruby_hit_callback(pkt); testDrainComplete(); } diff --git a/src/mem/slicc/ast/CheckProbeStatementAST.py b/src/mem/slicc/ast/CheckProbeStatementAST.py index 10945cfc30..14f6f7e4fa 100644 --- a/src/mem/slicc/ast/CheckProbeStatementAST.py +++ b/src/mem/slicc/ast/CheckProbeStatementAST.py @@ -49,7 +49,8 @@ class CheckProbeStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count($address_code) == 1) && (m_block_map[$address_code] == &$in_port_code)) { - $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py index 14b2e48cd3..4bb446aee2 100644 --- a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py +++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py @@ -68,7 +68,8 @@ class DeferEnqueueingStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge()," + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py index c2d47af9ce..b026f6e7a9 100644 --- a/src/mem/slicc/ast/EnqueueStatementAST.py +++ b/src/mem/slicc/ast/EnqueueStatementAST.py @@ -76,7 +76,8 @@ class EnqueueStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge(), " + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements @@ -89,17 +90,21 @@ class EnqueueStatementAST(StatementAST): bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False) code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled(), " + "$bypass_strict_fifo_code);" ) else: code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) else: code( "(${{self.queue_name.var.code}}).enqueue(out_msg, " - "clockEdge(), cyclesToTicks(Cycles(1)));" + "clockEdge(), cyclesToTicks(Cycles(1))," + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) # End scope diff --git a/src/mem/slicc/ast/LocalVariableAST.py b/src/mem/slicc/ast/LocalVariableAST.py index b4ac8f446b..43ab110a67 100644 --- a/src/mem/slicc/ast/LocalVariableAST.py +++ b/src/mem/slicc/ast/LocalVariableAST.py @@ -73,6 +73,8 @@ class LocalVariableAST(StatementAST): ) ): code += f"{type.c_ident}* {ident}" + elif "implicit_ctor" in type: + code += f"{type.c_ident} {ident}({type['implicit_ctor']})" else: code += f"{type.c_ident} {ident}" return type diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py index 00edff4e7b..415f4ec465 100644 --- a/src/mem/slicc/ast/PeekStatementAST.py +++ b/src/mem/slicc/ast/PeekStatementAST.py @@ -93,7 +93,8 @@ class PeekStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count(in_msg_ptr->m_$address_field) == 1) && (m_block_map[in_msg_ptr->m_$address_field] != &$qcode)) { - $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index b523522501..6202d2d239 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -352,7 +352,6 @@ class $c_ident : public AbstractController public: typedef ${c_ident}Params Params; $c_ident(const Params &p); - static int getNumControllers(); void init(); MessageBuffer *getMandatoryQueue() const; @@ -449,9 +448,8 @@ int m_counters[${ident}_State_NUM][${ident}_Event_NUM]; int m_event_counters[${ident}_Event_NUM]; bool m_possible[${ident}_State_NUM][${ident}_Event_NUM]; -static std::vector eventVec; -static std::vector > transVec; -static int m_num_controllers; +std::vector eventVec; +std::vector > transVec; // Internal functions """ @@ -625,10 +623,6 @@ namespace gem5 namespace ruby { -int $c_ident::m_num_controllers = 0; -std::vector $c_ident::eventVec; -std::vector > $c_ident::transVec; - // for adding information to the protocol debug trace std::stringstream ${ident}_transitionComment; @@ -644,8 +638,9 @@ $c_ident::$c_ident(const Params &p) { m_machineID.type = MachineType_${ident}; m_machineID.num = m_version; - m_num_controllers++; + p.ruby_system->m_num_controllers[MachineType_${ident}]++; p.ruby_system->registerAbstractController(this); + m_ruby_system = p.ruby_system; m_in_ports = $num_in_ports; """ @@ -699,7 +694,7 @@ void $c_ident::initNetQueues() { MachineType machine_type = string_to_MachineType("${{self.ident}}"); - [[maybe_unused]] int base = MachineType_base_number(machine_type); + [[maybe_unused]] int base = m_ruby_system->MachineType_base_number(machine_type); """ ) @@ -776,6 +771,17 @@ $c_ident::init() comment = f"Type {vtype.ident} default" code('*$vid = ${{vtype["default"]}}; // $comment') + # For objects that require knowing the cache line size, + # set the value here. + if vtype.c_ident in ("TBETable"): + block_size_func = "m_ruby_system->getBlockSizeBytes()" + code(f"(*{vid}).setBlockSize({block_size_func});") + + for param in self.config_parameters: + if param.type_ast.type.ident == "CacheMemory": + assert param.pointer + code(f"m_{param.ident}_ptr->setRubySystem(m_ruby_system);") + # Set the prefetchers code() for prefetcher in self.prefetchers: @@ -942,7 +948,9 @@ $c_ident::regStats() "${c_ident}." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector(profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); @@ -961,7 +969,9 @@ $c_ident::regStats() "." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector( profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); transVec[state].push_back(t); @@ -1062,9 +1072,12 @@ $c_ident::regStats() void $c_ident::collateStats() { + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1080,7 +1093,7 @@ $c_ident::collateStats() for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1125,12 +1138,6 @@ $c_ident::getTransitionCount(${ident}_State state, return m_counters[state][event]; } -int -$c_ident::getNumControllers() -{ - return m_num_controllers; -} - MessageBuffer* $c_ident::getMandatoryQueue() const { @@ -1181,6 +1188,7 @@ void $c_ident::set_cache_entry(${{self.EntryType.c_ident}}*& m_cache_entry_ptr, AbstractCacheEntry* m_new_cache_entry) { m_cache_entry_ptr = (${{self.EntryType.c_ident}}*)m_new_cache_entry; + m_cache_entry_ptr->setRubySystem(m_ruby_system); } void @@ -1200,6 +1208,7 @@ void $c_ident::set_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr, ${{self.TBEType.c_ident}}* m_new_tbe) { m_tbe_ptr = m_new_tbe; + m_tbe_ptr->setRubySystem(m_ruby_system); } void diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py index 535a4165b3..53c8ff877e 100644 --- a/src/mem/slicc/symbols/Type.py +++ b/src/mem/slicc/symbols/Type.py @@ -119,6 +119,10 @@ class Type(Symbol): def isMessage(self): return "message" in self + @property + def isTBE(self): + return "tbe" in self + @property def isBuffer(self): return "buffer" in self @@ -250,18 +254,54 @@ namespace gem5 namespace ruby { +class RubySystem; + $klass ${{self.c_ident}}$parent { public: - ${{self.c_ident}} """, klass="class", ) if self.isMessage: - code("(Tick curTime) : %s(curTime) {" % self["interface"]) + code( + "${{self.c_ident}}(Tick curTime, int blockSize, RubySystem* rs) : %s(curTime, blockSize, rs)" + % self["interface"] + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") + + code("{") + elif self.isTBE: + code("${{self.c_ident}}(int block_size)") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(block_size)") + ctor_count += 1 + + code("{") else: - code("()\n\t\t{") + code("${{self.c_ident}}()") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(0)") + ctor_count += 1 + + code("{") code.indent() if not self.isGlobal: @@ -280,6 +320,12 @@ $klass ${{self.c_ident}}$parent code(" // default value of $tid") else: code("// m_$ident has no default") + + # These parts of Messages need RubySystem pointers. For things + # like Entry which only store NetDest, RubySystem is not needed. + if self.isMessage and dm.real_c_type == "NetDest": + code("// m_$ident requires RubySystem") + code("m_$ident.setRubySystem(rs);") code.dedent() code("}") @@ -300,21 +346,45 @@ $klass ${{self.c_ident}}$parent params = ", ".join(params) if self.isMessage: - params = "const Tick curTime, " + params + params = ( + "const Tick curTime, const int blockSize, const RubySystem *rs, " + + params + ) code("${{self.c_ident}}($params)") # Call superclass constructor if "interface" in self: if self.isMessage: - code(' : ${{self["interface"]}}(curTime)') + code( + ' : ${{self["interface"]}}(curTime, blockSize, rs)' + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") else: code(' : ${{self["interface"]}}()') + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(local_{dm.ident})") + else: + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(local_{dm.ident})") + ctor_count += 1 + code("{") code.indent() for dm in self.data_members.values(): - code("m_${{dm.ident}} = local_${{dm.ident}};") + if not dm.real_c_type in ("DataBlock", "WriteMask"): + code("m_${{dm.ident}} = local_${{dm.ident}};") code.dedent() code("}") @@ -342,6 +412,35 @@ clone() const ) if not self.isGlobal: + # Block size setter for fields that require block size + # Intentionally do not begin function name with "set" in case + # the user has a field named BlockSize which would conflict + # with the method generated below. + code("\nvoid initBlockSize(int block_size)") + code("{") + code("\tblock_size_bits = floorLog2(block_size);") + + needs_block_size = ( + "DataBlock", + "WriteMask", + "PersistentTable", + "TimerTable", + "PerfectCacheMemory", + ) + + for dm in self.data_members.values(): + if dm.real_c_type in needs_block_size: + code(f"\tm_{dm.ident}.setBlockSize(block_size);") + code("}\n") + + code("\nvoid setRubySystem(RubySystem *ruby_system)") + code("{") + for dm in self.data_members.values(): + if dm.real_c_type in ("NetDest"): + code(f"// m_{dm.ident} requires RubySystem") + code(f"\tm_{dm.ident}.setRubySystem(ruby_system);") + code("}\n") + # const Get methods for each field code("// Const accessors methods for each field") for dm in self.data_members.values(): @@ -393,6 +492,9 @@ set${{dm.ident}}(const ${{dm.real_c_type}}& local_${{dm.ident}}) code(" //private:") code.indent() + # block_size_bits for print methods + code("int block_size_bits = 0;") + # Data members for each field for dm in self.data_members.values(): if "abstract" not in dm: @@ -473,7 +575,7 @@ ${{self.c_ident}}::print(std::ostream& out) const if dm.type.c_ident == "Addr": code( """ -out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}) << " ";""" +out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}, block_size_bits) << " ";""" ) else: code('out << "${{dm.ident}} = " << m_${{dm.ident}} << " ";' "") @@ -846,7 +948,7 @@ ${{self.c_ident}}_from_base_level(int type) * \\return the base number of components for each machine */ int -${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) { int base = 0; switch(obj) { @@ -860,7 +962,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) # Check if there is a defined machine with this type if enum.primary: code( - " base += ${{enum.ident}}_Controller::getNumControllers();" + "\tbase += m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code(" base += 0;") @@ -882,7 +984,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) * \\return the total number of components for each machine */ int -${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) { switch(obj) { """ @@ -893,7 +995,7 @@ ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) code("case ${{self.c_ident}}_${{enum.ident}}:") if enum.primary: code( - "return ${{enum.ident}}_Controller::getNumControllers();" + "return m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code("return 0;") diff --git a/src/python/SConscript b/src/python/SConscript index 3aed9f03e3..afe786536c 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -280,6 +280,7 @@ PySource('gem5.components.processors', PySource('gem5.prebuilt', 'gem5/prebuilt/__init__.py') PySource('gem5.prebuilt.demo', 'gem5/prebuilt/demo/__init__.py') PySource('gem5.prebuilt.demo', 'gem5/prebuilt/demo/x86_demo_board.py') +PySource('gem5.prebuilt.demo', 'gem5/prebuilt/demo/arm_demo_board.py') PySource('gem5.prebuilt.riscvmatched', 'gem5/prebuilt/riscvmatched/__init__.py') PySource('gem5.prebuilt.riscvmatched', diff --git a/src/python/gem5/components/boards/abstract_board.py b/src/python/gem5/components/boards/abstract_board.py index 83ca32d9c0..cd6f559937 100644 --- a/src/python/gem5/components/boards/abstract_board.py +++ b/src/python/gem5/components/boards/abstract_board.py @@ -41,6 +41,7 @@ from m5.objects import ( ClockDomain, IOXBar, Port, + Root, SrcClockDomain, System, VoltageDomain, @@ -117,12 +118,6 @@ class AbstractBoard: # Simulator module. self._checkpoint = None - # Setup the board and memory system's memory ranges. - self._setup_memory_ranges() - - # Setup board properties unique to the board being constructed. - self._setup_board() - # A private variable to record whether `_connect_things` has been # been called. self._connect_things_called = False @@ -194,6 +189,9 @@ class AbstractBoard: """ self._is_fs = is_fs + self._setup_memory_ranges() + self._setup_board() + def is_fullsystem(self) -> bool: """ Returns ``True`` if the board is to be run in FS mode. Otherwise the board @@ -252,11 +250,14 @@ class AbstractBoard: @abstractmethod def _setup_board(self) -> None: """ - This function is called in the AbstractBoard constructor, before the - memory, processor, and cache hierarchy components are incorporated via - ``_connect_thing()``, but after the ``_setup_memory_ranges()`` function. - This function should be overridden by boards to specify components, - connections unique to that board. + This function is called at the end of `_set_fullsystem`. The reason for + this is the board's configuraiton varies significantly depending on + whether it is to be run in FS or SE mode. This function is therefore + called when a workload is set --- after construction but before + `_pre_instantiate` is called. + + As `_setup_memory_ranges()` is set in the constructor, this function + can be considered to have been called prior to `_setup_board """ raise NotImplementedError @@ -330,10 +331,18 @@ class AbstractBoard: """ Set the memory ranges for this board and memory system. - This is called in the constructor, prior to ``_setup_board`` and - ``_connect_things``. It should query the board's memory to determine the - size and the set the memory ranges on the memory system and on the - board. + This is called at the end of the `_set_fullsystem` function but before + `_setup_board`. `_set_fullsystem` is called when the workload is + declared. It is before `_pre_instantiate` (but, obviously after + construction). + + It should query the board's memory + to determine the size and the set the memory ranges on the memory + system and on the board. + + As thisis called at the end of `_set_fullsystem`, the board's memory + can be setup differently depending on whether the board is to be run in + FS or SE mode. The simplest implementation sets the board's memory range to the size of memory and memory system's range to be the same as the board. Full @@ -391,13 +400,42 @@ class AbstractBoard: self.get_cache_hierarchy()._post_instantiate() self.get_memory()._post_instantiate() - def _pre_instantiate(self): + def _pre_instantiate(self, full_system: Optional[bool] = None) -> Root: """To be called immediately before ``m5.instantiate``. This is where - ``_connect_things`` is executed by default.""" + ``_connect_things`` is executed by default and the root object is Root + object is created and returned. - # Connect the memory, processor, and cache hierarchy. + :param full_system: Used to pass the full system flag to the board from + the Simulator module. **Note**: This was + implemented solely to maintain backawards + compatibility with while the Simululator module's + `full_system` flag is in state of deprecation. This + parameter will be removed when it is. When this + occurs whether a simulation is to be run in FS or + SE mode will be determined by the board set.""" + + # 1. Connect the memory, processor, and cache hierarchy. self._connect_things() + # 2. Create the root object + root = Root( + full_system=( + full_system + if full_system is not None + else self.is_fullsystem() + ), + board=self, + ) + + # 3. Call any of the components' `_pre_instantiate` functions. + self.get_processor()._pre_instantiate(root) + self.get_memory()._pre_instantiate(root) + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._pre_instantiate(root) + + # 4. Return the root object. + return root + def _connect_things_check(self): """ Here we check that connect things has been called and throw an diff --git a/src/python/gem5/components/boards/abstract_system_board.py b/src/python/gem5/components/boards/abstract_system_board.py index 8fe48920b5..a8765ee909 100644 --- a/src/python/gem5/components/boards/abstract_system_board.py +++ b/src/python/gem5/components/boards/abstract_system_board.py @@ -36,7 +36,6 @@ from .abstract_board import AbstractBoard class AbstractSystemBoard(System, AbstractBoard): - """ An abstract board for cases where boards should inherit from System. """ diff --git a/src/python/gem5/components/boards/arm_board.py b/src/python/gem5/components/boards/arm_board.py index 0a0cd2fa28..2da8cd18f2 100644 --- a/src/python/gem5/components/boards/arm_board.py +++ b/src/python/gem5/components/boards/arm_board.py @@ -28,6 +28,7 @@ import os from abc import ABCMeta from typing import ( List, + Optional, Sequence, Tuple, ) @@ -274,11 +275,15 @@ class ArmBoard(ArmSystem, AbstractBoard, KernelDiskWorkload): @overrides(AbstractBoard) def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: - all_ports = [ - (self.realview.bootmem.range, self.realview.bootmem.port), - ] + self.get_memory().get_mem_ports() + # Note: Ruby needs to create a directory for the realview bootmem + if self.get_cache_hierarchy().is_ruby(): + all_ports = [ + (self.realview.bootmem.range, self.realview.bootmem.port), + ] + self.get_memory().get_mem_ports() - return all_ports + return all_ports + + return super().get_mem_ports() @overrides(AbstractBoard) def has_io_bus(self) -> bool: @@ -327,8 +332,8 @@ class ArmBoard(ArmSystem, AbstractBoard, KernelDiskWorkload): self.system_port = port @overrides(AbstractBoard) - def _pre_instantiate(self): - super()._pre_instantiate() + def _pre_instantiate(self, full_system: Optional[bool] = None) -> None: + super()._pre_instantiate(full_system=full_system) # Add the PCI devices. self.pci_devices = self._pci_devices diff --git a/src/python/gem5/components/boards/riscv_board.py b/src/python/gem5/components/boards/riscv_board.py index e8e27029f2..e14833c996 100644 --- a/src/python/gem5/components/boards/riscv_board.py +++ b/src/python/gem5/components/boards/riscv_board.py @@ -26,7 +26,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -from typing import List +from typing import ( + List, + Optional, +) import m5 from m5.objects import ( @@ -498,7 +501,7 @@ class RiscvBoard(AbstractSystemBoard, KernelDiskWorkload): return "/dev/vda" @overrides(AbstractSystemBoard) - def _pre_instantiate(self): + def _pre_instantiate(self, full_system: Optional[bool] = None): if len(self._bootloader) > 0: self.workload.bootloader_addr = 0x0 self.workload.bootloader_filename = self._bootloader[0] @@ -507,7 +510,7 @@ class RiscvBoard(AbstractSystemBoard, KernelDiskWorkload): else: self.workload.kernel_addr = 0x0 self.workload.entry_point = 0x80000000 - self._connect_things() + super()._pre_instantiate(full_system=full_system) @overrides(KernelDiskWorkload) def _add_disk_to_board(self, disk_image: AbstractResource): diff --git a/src/python/gem5/components/boards/test_board.py b/src/python/gem5/components/boards/test_board.py index 2599c6853d..6acce79b1c 100644 --- a/src/python/gem5/components/boards/test_board.py +++ b/src/python/gem5/components/boards/test_board.py @@ -44,7 +44,6 @@ from .abstract_system_board import AbstractSystemBoard class TestBoard(AbstractSystemBoard): - """This is a Testing Board used to run traffic generators on a simple architecture. diff --git a/src/python/gem5/components/cachehierarchies/abstract_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/abstract_cache_hierarchy.py index b0435543af..dc20c14f70 100644 --- a/src/python/gem5/components/cachehierarchies/abstract_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/abstract_cache_hierarchy.py @@ -42,7 +42,10 @@ from abc import ( ) from typing import Callable -from m5.objects import SubSystem +from m5.objects import ( + Root, + SubSystem, +) from m5.util.fdthelper import * from ..boards.abstract_board import AbstractBoard @@ -139,6 +142,18 @@ class AbstractCacheHierarchy(SubSystem): """ raise NotImplementedError + def _pre_instantiate(self, root: Root) -> None: + """Called in the `AbstractBoard`'s `_pre_instantiate` method. This is + called after `connect_things`, after the creation of the root object + (which is passed in as an argument), but before `m5.instantiate`). + + Subclasses should override this method to set up any connections. + + At present there is no general task that must be specified here and is + default or applicable to all cache hierarchies. + """ + pass + def _post_instantiate(self): """Called to set up anything needed after ``m5.instantiate``.""" pass diff --git a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py index 29df2a969c..42c4e2258c 100644 --- a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py @@ -82,6 +82,7 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): @overrides(AbstractCacheHierarchy) def incorporate_cache(self, board: AbstractBoard) -> None: + super().incorporate_cache(board) self.ruby_system = RubySystem() # Ruby's global network. @@ -137,7 +138,9 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_core_cluster( @@ -167,12 +170,16 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): ) cluster.icache.sequencer = RubySequencer( - version=core_num, dcache=NULL, clk_domain=cluster.icache.clk_domain + version=core_num, + dcache=NULL, + clk_domain=cluster.icache.clk_domain, + ruby_system=self.ruby_system, ) cluster.dcache.sequencer = RubySequencer( version=core_num, dcache=cluster.dcache.cache, clk_domain=cluster.dcache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -223,7 +230,11 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): board.get_clock_domain(), ) version = len(board.get_processor().get_cores()) + i - ctrl.sequencer = RubySequencer(version=version, in_ports=port) + ctrl.sequencer = RubySequencer( + version=version, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.sequencer.dcache = NULL ctrl.ruby_system = self.ruby_system @@ -234,3 +245,10 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): dma_controllers.append(ctrl) return dma_controllers + + @overrides(AbstractRubyCacheHierarchy) + def _reset_version_numbers(self): + from .nodes.abstract_node import AbstractNode + + AbstractNode._version = 0 + MemoryController._version = 0 diff --git a/src/python/gem5/components/cachehierarchies/classic/no_cache.py b/src/python/gem5/components/cachehierarchies/classic/no_cache.py index e6ec89b660..c3c791f4e0 100644 --- a/src/python/gem5/components/cachehierarchies/classic/no_cache.py +++ b/src/python/gem5/components/cachehierarchies/classic/no_cache.py @@ -124,7 +124,7 @@ class NoCache(AbstractClassicCacheHierarchy): # Set up the system port for functional access from the simulator. board.connect_system_port(self.membus.cpu_side_ports) - for _, port in board.get_memory().get_mem_ports(): + for _, port in board.get_mem_ports(): self.membus.mem_side_ports = port def _setup_coherent_io_bridge(self, board: AbstractBoard) -> None: diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py index 8f63d3320f..9382d11036 100644 --- a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py @@ -96,7 +96,7 @@ class PrivateL1CacheHierarchy(AbstractClassicCacheHierarchy): # Set up the system port for functional access from the simulator. board.connect_system_port(self.membus.cpu_side_ports) - for _, port in board.get_memory().get_mem_ports(): + for _, port in board.get_mem_ports(): self.membus.mem_side_ports = port self.l1icaches = [ diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py index 049d0fb102..354d9d064d 100644 --- a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py @@ -126,7 +126,7 @@ class PrivateL1PrivateL2CacheHierarchy( # Set up the system port for functional access from the simulator. board.connect_system_port(self.membus.cpu_side_ports) - for _, port in board.get_memory().get_mem_ports(): + for _, port in board.get_mem_ports(): self.membus.mem_side_ports = port self.l2buses = [ diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py index 4a896b2292..1f0d62d541 100644 --- a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py @@ -119,7 +119,7 @@ class PrivateL1SharedL2CacheHierarchy( # Set up the system port for functional access from the simulator. board.connect_system_port(self.membus.cpu_side_ports) - for _, port in board.get_memory().get_mem_ports(): + for _, port in board.get_mem_ports(): self.membus.mem_side_ports = port self.l1icaches = [ diff --git a/src/python/gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py index 3528b74495..6e7e957934 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py @@ -37,6 +37,18 @@ class AbstractRubyCacheHierarchy(AbstractCacheHierarchy): def __init__(self): super().__init__() + def _reset_version_numbers(self): + """Needed for multiple ruby systems so that each system starts at 0. + + Note: This needs to be overridden by the protocol since we don't know + the machine classes at this point. + """ + raise NotImplementedError + + @overrides(AbstractCacheHierarchy) + def incorporate_cache(self, board): + self._reset_version_numbers() + @overrides(AbstractCacheHierarchy) def is_ruby(self) -> bool: return True diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py index 6d203f978a..ef90ac79f6 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -80,7 +80,7 @@ class L1Cache(L0Cache_Controller): replacement_policy=LRURP(), ) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 32 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py index ff2b8e3dd9..7c473f8be9 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -75,7 +75,7 @@ class L2Cache(L1Cache_Controller): self.l2_select_num_bits = int(math.log(num_l3Caches, 2)) self.cluster_id = cluster_id self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.transitions_per_cycle = 32 # l1_request_latency, l1_response_latency, to_l2_latency are # ruby backend terminology. diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py index 7787644c9b..13625beea7 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py @@ -73,7 +73,7 @@ class L1Cache(AbstractL1Cache): ) self.l2_select_num_bits = int(math.log(num_l2Caches, 2)) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 4 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py index 3d1ae54104..79e40e9e01 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py @@ -41,7 +41,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py index 9aa0dc4a36..212c06c4c3 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py @@ -143,6 +143,7 @@ class CoreComplex(SubSystem, RubyNetworkComponent): version=core_id, dcache=cluster.l1_cache.Dcache, clk_domain=cluster.l1_cache.clk_domain, + ruby_system=self._ruby_system, ) if self._board.has_io_bus(): diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py index f7d4d63de1..d576ae6ae4 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py @@ -38,6 +38,7 @@ from ......components.cachehierarchies.ruby.caches.mesi_three_level.directory im from ......components.cachehierarchies.ruby.caches.mesi_three_level.dma_controller import ( DMAController, ) +from ......utils.override import overrides from ......utils.requires import requires from ....abstract_three_level_cache_hierarchy import ( AbstractThreeLevelCacheHierarchy, @@ -95,6 +96,7 @@ class OctopiCache( requires( coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL ) + super().incorporate_cache(board) cache_line_size = board.get_cache_line_size() @@ -151,7 +153,9 @@ class OctopiCache( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_directory_controllers(self, board): @@ -228,7 +232,11 @@ class OctopiCache( if board.has_dma_ports(): self.ruby_system.dma_controllers = [ DMAController( - dma_sequencer=DMASequencer(version=i + 1, in_ports=port), + dma_sequencer=DMASequencer( + version=i + 1, + in_ports=port, + ruby_system=self.ruby_system, + ), ruby_system=self.ruby_system, ) for i, port in enumerate(board.get_dma_ports()) @@ -261,3 +269,15 @@ class OctopiCache( ] for link in self.dma_int_links: self.ruby_system.network._add_int_link(link) + + @overrides(AbstractRubyCacheHierarchy) + def _reset_version_numbers(self): + from ....caches.mesi_three_level.l1_cache import L1Cache + from ....caches.mesi_three_level.l2_cache import L2Cache + from ....caches.mesi_three_level.l3_cache import L3Cache + + Directory._version = 0 + L1Cache._version = 0 + L2Cache._version = 0 + L3Cache._version = 0 + DMAController._version = 0 diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py index 66fea95636..501fbab081 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py @@ -33,6 +33,7 @@ from m5.objects import ( ) from ....coherence_protocol import CoherenceProtocol +from ....utils.override import overrides from ....utils.requires import requires requires(coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL) @@ -87,6 +88,7 @@ class MESIThreeLevelCacheHierarchy( self._num_l3_banks = num_l3_banks def incorporate_cache(self, board: AbstractBoard) -> None: + super().incorporate_cache(board) cache_line_size = board.get_cache_line_size() self.ruby_system = RubySystem() @@ -118,6 +120,7 @@ class MESIThreeLevelCacheHierarchy( version=core_idx, dcache=l1_cache.Dcache, clk_domain=l1_cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -196,7 +199,12 @@ class MESIThreeLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController( - DMASequencer(version=i, in_ports=port), self.ruby_system + DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ), + self.ruby_system, ) self._dma_controllers.append(ctrl) @@ -223,5 +231,15 @@ class MESIThreeLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) + + @overrides(AbstractRubyCacheHierarchy) + def _reset_version_numbers(self): + Directory._version = 0 + L1Cache._version = 0 + L2Cache._version = 0 + L3Cache._version = 0 + DMAController._version = 0 diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py index 004c2ff9d2..52a14c7681 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py @@ -33,6 +33,7 @@ from m5.objects import ( ) from ....coherence_protocol import CoherenceProtocol +from ....utils.override import overrides from ....utils.requires import requires requires(coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL) @@ -83,6 +84,7 @@ class MESITwoLevelCacheHierarchy( self._num_l2_banks = num_l2_banks def incorporate_cache(self, board: AbstractBoard) -> None: + super().incorporate_cache(board) cache_line_size = board.get_cache_line_size() self.ruby_system = RubySystem() @@ -109,7 +111,10 @@ class MESITwoLevelCacheHierarchy( ) cache.sequencer = RubySequencer( - version=i, dcache=cache.L1Dcache, clk_domain=cache.clk_domain + version=i, + dcache=cache.L1Dcache, + clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -163,7 +168,11 @@ class MESITwoLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController(self.ruby_system.network, cache_line_size) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) self._dma_controllers.append(ctrl) ctrl.ruby_system = self.ruby_system @@ -188,5 +197,14 @@ class MESITwoLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) + + @overrides(AbstractRubyCacheHierarchy) + def _reset_version_numbers(self): + Directory._version = 0 + L1Cache._version = 0 + L2Cache._version = 0 + DMAController._version = 0 diff --git a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py index 478c793560..271bc42536 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py @@ -32,6 +32,7 @@ from m5.objects import ( ) from ....coherence_protocol import CoherenceProtocol +from ....utils.override import overrides from ....utils.requires import requires requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE) @@ -65,6 +66,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): @overrides(AbstractCacheHierarchy) def incorporate_cache(self, board: AbstractBoard) -> None: + super().incorporate_cache(board) self.ruby_system = RubySystem() # Ruby's global network. @@ -95,6 +97,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): version=i, dcache=cache.cacheMemory, clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -140,7 +143,11 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): ctrl = DMAController( self.ruby_system.network, board.get_cache_line_size() ) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.ruby_system = self.ruby_system ctrl.dma_sequencer.ruby_system = self.ruby_system @@ -167,5 +174,13 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) + + @overrides(AbstractRubyCacheHierarchy) + def _reset_version_numbers(self): + Directory._version = 0 + L1Cache._version = 0 + DMAController._version = 0 diff --git a/src/python/gem5/components/memory/abstract_memory_system.py b/src/python/gem5/components/memory/abstract_memory_system.py index 06fa60cad8..6d24e724b6 100644 --- a/src/python/gem5/components/memory/abstract_memory_system.py +++ b/src/python/gem5/components/memory/abstract_memory_system.py @@ -38,6 +38,7 @@ from m5.objects import ( AddrRange, MemCtrl, Port, + Root, SubSystem, ) @@ -50,6 +51,18 @@ class AbstractMemorySystem(SubSystem): def __init__(self) -> None: super().__init__() + def _pre_instantiate(self, root: Root) -> None: + """Called in the `AbstractBoard`'s `_pre_instantiate` method. This is + called after `connect_things`, after the creation of the root object + (which is passed in as an argument), but before `m5.instantiate`). + + Subclasses should override this method to set up any connections. + + At present there is no general task that must be specified here and is + default or applicable to all memory systems. + """ + pass + @abstractmethod def incorporate_memory(self, board: AbstractBoard) -> None: """This function completes all of the necessary steps to add this diff --git a/src/python/gem5/components/processors/abstract_processor.py b/src/python/gem5/components/processors/abstract_processor.py index 79dba438a2..303b9658f2 100644 --- a/src/python/gem5/components/processors/abstract_processor.py +++ b/src/python/gem5/components/processors/abstract_processor.py @@ -33,7 +33,10 @@ from typing import ( Optional, ) -from m5.objects import SubSystem +from m5.objects import ( + Root, + SubSystem, +) from ...isas import ISA from ...utils.requires import requires @@ -83,3 +86,12 @@ class AbstractProcessor(SubSystem): def _post_instantiate(self) -> None: """Called to set up anything needed after ``m5.instantiate``.""" pass + + def _pre_instantiate(self, root: Root) -> None: + """Called in the `AbstractBoard`'s `_pre_instantiate` method. This is + called after `connect_things`, after the creation of the root object + (which is passed in as an argument), but before `m5.instantiate`). + + Subclasses should override this method to set up any connections. + """ + pass diff --git a/src/python/gem5/components/processors/base_cpu_processor.py b/src/python/gem5/components/processors/base_cpu_processor.py index b1a63ea8ce..674148b409 100644 --- a/src/python/gem5/components/processors/base_cpu_processor.py +++ b/src/python/gem5/components/processors/base_cpu_processor.py @@ -27,12 +27,14 @@ from typing import List +import m5 from m5.objects import ( BaseAtomicSimpleCPU, BaseMinorCPU, BaseNonCachingSimpleCPU, BaseO3CPU, BaseTimingSimpleCPU, + Root, ) from m5.util import warn @@ -99,3 +101,9 @@ class BaseCPUProcessor(AbstractProcessor): board.set_mem_mode(MemMode.ATOMIC) else: raise NotImplementedError + + def _pre_instantiate(self, root: Root) -> None: + super()._pre_instantiate(root) + if any(core.is_kvm_core() for core in self.get_cores()): + m5.ticks.fixGlobalFrequency() + root.sim_quantum = m5.ticks.fromSeconds(0.001) diff --git a/src/python/gem5/components/processors/switchable_processor.py b/src/python/gem5/components/processors/switchable_processor.py index 2436c9e81f..a5a9ae2b6b 100644 --- a/src/python/gem5/components/processors/switchable_processor.py +++ b/src/python/gem5/components/processors/switchable_processor.py @@ -31,6 +31,7 @@ from typing import ( ) import m5 +from m5.objects import Root from ...utils.override import * from ..boards.abstract_board import AbstractBoard @@ -155,3 +156,24 @@ class SwitchableProcessor(AbstractProcessor): # Ensure the current processor is updated. self._current_cores = to_switch + + def _pre_instantiate(self, root: Root) -> None: + super()._pre_instantiate(root) + # The following is a bit of a hack. If a simulation is to use a KVM + # core then the `sim_quantum` value must be set. However, in the + # case of using a SwitchableProcessor the KVM cores may be + # switched out and therefore not accessible via `get_cores()`. + # This is the reason for the `isinstance` check. + # + # We cannot set the `sim_quantum` value in every simulation as + # setting it causes the scheduling of exits to be off by the + # `sim_quantum` value (something necessary if we are using KVM + # cores). Ergo we only set the value of KVM cores are present. + # + # There is still a bug here in that if the user is switching to and + # from KVM and non-KVM cores via the SwitchableProcessor then the + # scheduling of exits for the non-KVM cores will be incorrect. This + # will be fixed at a later date. + if self._prepare_kvm: + m5.ticks.fixGlobalFrequency() + root.sim_quantum = m5.ticks.fromSeconds(0.001) diff --git a/src/python/gem5/prebuilt/demo/arm_demo_board.py b/src/python/gem5/prebuilt/demo/arm_demo_board.py new file mode 100644 index 0000000000..dfbc6d89e2 --- /dev/null +++ b/src/python/gem5/prebuilt/demo/arm_demo_board.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + ArmDefaultRelease, + VExpress_GEM5_Foundation, + VExpress_GEM5_V1, +) +from m5.util import warn + +from ...components.boards.arm_board import ArmBoard +from ...components.cachehierarchies.classic.private_l1_shared_l2_cache_hierarchy import ( + PrivateL1SharedL2CacheHierarchy, +) +from ...components.memory import DualChannelDDR4_2400 +from ...components.processors.cpu_types import CPUTypes +from ...components.processors.simple_processor import SimpleProcessor +from ...isas import ISA +from ...utils.requires import requires + + +class ArmDemoBoard(ArmBoard): + """ + This prebuilt ARM board is used for demonstration purposes. It simulates an + ARM 3GHz dual-core system with a 4GiB DDR4_2400 memory system. It uses + a PrivateL1SharedL2CacheHierarchy with l1d and l1i caches set to 64KiB and + l2 shared cache set to 8MiB + + **DISCLAIMER**: This board is solely for demonstration purposes. This board + is not known to be representative of any real-world system or produce + reliable statistical results. + """ + + def __init__(self, use_kvm: bool = False) -> None: + """ + :param use_kvm: If True, the board will use a SimpleProcessor + with cpu type of CPUTypes.KVM. If False, the board will use a SimpleProcessor with + a cpu type of CPUTypes.TIMING. + """ + requires( + isa_required=ISA.ARM, + ) + + warn( + "The ARMDemoBoard is solely for demonstration purposes. " + "This board is not known to be be representative of any " + "real-world system. Use with caution." + ) + cache_hierarchy = PrivateL1SharedL2CacheHierarchy( + l1d_size="64KiB", l1i_size="64KiB", l2_size="8MiB" + ) + + # Note: Normally a system with these specification would have 1 + # GiB for memory but because some benchmarks would not run with + # 1 GiB of memory so we have set it to 4 GiB. + memory = DualChannelDDR4_2400(size="4GiB") + + if use_kvm: + processor = SimpleProcessor( + cpu_type=CPUTypes.KVM, num_cores=2, isa=ISA.ARM + ) + # The ArmBoard requires a `release` to be specified. This adds all the + # extensions or features to the system. We are setting this to for_kvm() + # to enable KVM simulation. + release = ArmDefaultRelease.for_kvm() + + # The platform sets up the memory ranges of all the on-chip and off-chip + # devices present on the ARM system. ARM KVM only works with VExpress_GEM5_V1 + # on the ArmBoard at the moment. + platform = VExpress_GEM5_V1() + + else: + processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, num_cores=2, isa=ISA.ARM + ) + release = ArmDefaultRelease() + + # The platform sets up the memory ranges of all the on-chip and off-chip + # devices present on the ARM system. + platform = VExpress_GEM5_Foundation() + + super().__init__( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + release=release, + platform=platform, + ) diff --git a/src/python/gem5/prebuilt/demo/x86_demo_board.py b/src/python/gem5/prebuilt/demo/x86_demo_board.py index 793b43a3d1..ac89847f2b 100644 --- a/src/python/gem5/prebuilt/demo/x86_demo_board.py +++ b/src/python/gem5/prebuilt/demo/x86_demo_board.py @@ -24,27 +24,33 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from m5.objects import ( + IOXBar, + Pc, + Port, + X86FsLinux, +) from m5.util import warn -from ...coherence_protocol import CoherenceProtocol +from ...components.boards.se_binary_workload import SEBinaryWorkload from ...components.boards.x86_board import X86Board -from ...components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import ( - MESITwoLevelCacheHierarchy, +from ...components.cachehierarchies.classic.private_l1_shared_l2_cache_hierarchy import ( + PrivateL1SharedL2CacheHierarchy, ) -from ...components.memory.single_channel import SingleChannelDDR3_1600 +from ...components.memory.multi_channel import DualChannelDDR4_2400 from ...components.processors.cpu_types import CPUTypes from ...components.processors.simple_processor import SimpleProcessor from ...isas import ISA +from ...utils.override import overrides from ...utils.requires import requires -class X86DemoBoard(X86Board): +class X86DemoBoard(X86Board, SEBinaryWorkload): """ This prebuilt X86 board is used for demonstration purposes. It simulates - an X86 3GHz quad-core system with a 2GiB DDR3_1600 memory system. A - MESI_Two_Level cache hierarchy is set with an l1 data and instruction - cache, each 32KiB with an associativity of 8, and a single bank l2 cache of - 1MiB with an associativity of 16. + an X86 3GHz dual-core system with a 3GiB DDR4_2400 memory system. The + cache hierarchy consists of per-core private L1 instruction and data + caches (64KiB each) connected to a shared 8MiB L2 cache. **DISCLAIMER**: This board is solely for demonstration purposes. This board is not known to be representative of any real-world system or produce @@ -68,7 +74,6 @@ class X86DemoBoard(X86Board): def __init__(self): requires( isa_required=ISA.X86, - coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL, ) warn( @@ -77,18 +82,15 @@ class X86DemoBoard(X86Board): "real-world system. Use with caution." ) - memory = SingleChannelDDR3_1600(size="2GiB") + # The other demo boards have 4 GiB of memory, but X86Board can only + # support up to 3 GiB. + memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=4 + cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=2 ) - cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="32KiB", - l1d_assoc=8, - l1i_size="32KiB", - l1i_assoc=8, - l2_size="1MiB", - l2_assoc=16, - num_l2_banks=1, + + cache_hierarchy = PrivateL1SharedL2CacheHierarchy( + l1d_size="64KiB", l1i_size="64KiB", l2_size="8MiB" ) super().__init__( @@ -97,3 +99,46 @@ class X86DemoBoard(X86Board): memory=memory, cache_hierarchy=cache_hierarchy, ) + + @overrides(X86Board) + def _setup_board(self) -> None: + if self._is_fs: + self.pc = Pc() + + self.workload = X86FsLinux() + + # North Bridge + self.iobus = IOXBar() + + # Set up all of the I/O. + self._setup_io_devices() + + self.m5ops_base = 0xFFFF0000 + + @overrides(X86Board) + def has_io_bus(self) -> bool: + return self.is_fullsystem() + + @overrides(X86Board) + def get_io_bus(self) -> IOXBar: + if self.has_io_bus(): + return self.iobus + else: + raise NotImplementedError( + "X86DemoBoard does not have an IO bus. " + "Use `has_io_bus()` to check this." + ) + + @overrides(X86Board) + def has_coherent_io(self) -> bool: + return self.is_fullsystem() + + @overrides(X86Board) + def get_mem_side_coherent_io_port(self) -> Port: + if self.has_coherent_io(): + return self.iobus.mem_side_ports + else: + raise NotImplementedError( + "x86DemoBoard does not have any I/O ports. Use has_coherent_io" + " to check this." + ) diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py index 23a7dcc8cb..ba9588c725 100644 --- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py +++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py @@ -313,7 +313,7 @@ class RISCVMatchedBoard( memory.set_memory_range(self.mem_ranges) @overrides(AbstractSystemBoard) - def _pre_instantiate(self): + def _pre_instantiate(self, full_system: Optional[bool] = None) -> None: if self._fs: if len(self._bootloader) > 0: self.workload.bootloader_addr = 0x0 @@ -326,7 +326,7 @@ class RISCVMatchedBoard( self.workload.kernel_addr = 0x0 self.workload.entry_point = 0x80000000 - self._connect_things() + super()._pre_instantiate(full_system=full_system) def generate_device_tree(self, outdir: str) -> None: """Creates the ``dtb`` and ``dts`` files. diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py index 49dfac2bdf..ba74361915 100644 --- a/src/python/gem5/simulate/simulator.py +++ b/src/python/gem5/simulate/simulator.py @@ -117,6 +117,10 @@ class Simulator: behavior. If not set, whether or not to run in FS mode will be determined via the board's ``is_fullsystem()`` function. + **Warning: This parameter is deprecated. The board + determines if the simulation is full system or not. + This parameter will be removed in a future gem5 + release.** :param on_exit_event: An optional map to specify what to execute on each exit event. There are three possibilities here: a generator, a list of functions, or a single function. @@ -291,6 +295,15 @@ class Simulator: """ + if full_system is not None: + warn( + "Setting the full_system parameter via the Simulator " + "constructor is deprecated and will be removed in future " + "releases of gem5. " + "The board determines if the simulation is full system or not " + "via it's `is_fullsystem` method." + ) + self.set_max_ticks(max_ticks) if id: @@ -651,45 +664,12 @@ class Simulator: if not self._instantiated: # Before anything else we run the AbstractBoard's - # `_pre_instantiate` function. - self._board._pre_instantiate() - - root = Root( - full_system=( - self._full_system - if self._full_system is not None - else self._board.is_fullsystem() - ), - board=self._board, + # `_pre_instantiate` function. This returns the root object which + # is required for instantiation. + self._root = self._board._pre_instantiate( + full_system=self._full_system ) - # We take a copy of the Root in case it's required elsewhere - # (for example, in `get_stats()`). - self._root = root - - # The following is a bit of a hack. If a simulation is to use a KVM - # core then the `sim_quantum` value must be set. However, in the - # case of using a SwitchableProcessor the KVM cores may be - # switched out and therefore not accessible via `get_cores()`. - # This is the reason for the `isinstance` check. - # - # We cannot set the `sim_quantum` value in every simulation as - # setting it causes the scheduling of exits to be off by the - # `sim_quantum` value (something necessary if we are using KVM - # cores). Ergo we only set the value of KVM cores are present. - # - # There is still a bug here in that if the user is switching to and - # from KVM and non-KVM cores via the SwitchableProcessor then the - # scheduling of exits for the non-KVM cores will be incorrect. This - # will be fixed at a later date. - processor = self._board.processor - if any(core.is_kvm_core() for core in processor.get_cores()) or ( - isinstance(processor, SwitchableProcessor) - and any(core.is_kvm_core() for core in processor._all_cores()) - ): - m5.ticks.fixGlobalFrequency() - root.sim_quantum = m5.ticks.fromSeconds(0.001) - # m5.instantiate() takes a parameter specifying the path to the # checkpoint directory. If the parameter is None, no checkpoint # will be restored. diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index ce098bea7d..72bf692b6b 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -1259,7 +1259,9 @@ class SimObject(metaclass=MetaSimObject): if not self._ccObject: # Make sure this object is in the configuration hierarchy if not self._parent and not isRoot(self): - raise RuntimeError("Attempt to instantiate orphan node") + raise RuntimeError( + f"Attempt to instantiate orphan node {self}" + ) # Cycles in the configuration hierarchy are not supported. This # will catch the resulting recursion and stop. self._ccObject = -1 diff --git a/src/sim/signal.hh b/src/sim/signal.hh index 233de07658..e89fbe0b9f 100644 --- a/src/sim/signal.hh +++ b/src/sim/signal.hh @@ -51,12 +51,11 @@ class SignalSinkPort : public Port SignalSourcePort *_source = nullptr; State _state = {}; - OnChangeFunc _onChange; protected: // if bypass_on_change is specified true, it will not call the _onChange // function. Only _state will be updated if needed. - void + virtual void set(const State &new_state, const bool bypass_on_change = false) { if (new_state == _state) @@ -67,6 +66,8 @@ class SignalSinkPort : public Port _onChange(_state); } + OnChangeFunc _onChange; + public: SignalSinkPort(const std::string &_name, PortID _id=InvalidPortID) : Port(_name, _id) diff --git a/src/systemc/ext/core/sc_export.hh b/src/systemc/ext/core/sc_export.hh index c93f01a9a3..913cd75a9d 100644 --- a/src/systemc/ext/core/sc_export.hh +++ b/src/systemc/ext/core/sc_export.hh @@ -70,6 +70,17 @@ class sc_export : public sc_export_base virtual const char *kind() const override { return "sc_export"; } +#pragma GCC diagnostic push +/** + * The following warning is disabled because the bind methods are overloaded + * in the derived class and the base class. In GCC v13+ this + * 'overloaded-virtual' warning is strict enough to trigger here (though the + * code is correct). + * Please check section 9.3 of SystemC 2.3.1 release note for more details. + */ +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 13)) +#pragma GCC diagnostic ignored "-Woverloaded-virtual" +#endif void operator () (IF &i) { bind(i); } virtual void bind(IF &i) @@ -80,6 +91,7 @@ class sc_export : public sc_export_base } interface = &i; } +#pragma GCC diagnostic pop operator IF & () { if (!interface) diff --git a/src/systemc/ext/core/sc_port.hh b/src/systemc/ext/core/sc_port.hh index bd57553559..346eb430b1 100644 --- a/src/systemc/ext/core/sc_port.hh +++ b/src/systemc/ext/core/sc_port.hh @@ -114,19 +114,27 @@ class sc_port_base : public sc_object virtual sc_port_policy _portPolicy() const = 0; }; -// The overloaded virtual is intended in SystemC, so we'll disable the warning. -// Please check section 9.3 of SystemC 2.3.1 release note for more details. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverloaded-virtual" template class sc_port_b : public sc_port_base { public: +#pragma GCC diagnostic push +/** + * The following warning is disabled because the bind methods are overloaded + * in the derived class and the base class. In GCC v13+ this + * 'overloaded-virtual' warning is strict enough to trigger here (though the + * code is correct). + * Please check section 9.3 of SystemC 2.3.1 release note for more details. + */ +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 13)) +#pragma GCC diagnostic ignored "-Woverloaded-virtual" +#endif void operator () (IF &i) { bind(i); } void operator () (sc_port_b &p) { bind(p); } virtual void bind(IF &i) { sc_port_base::bind(i); } virtual void bind(sc_port_b &p) { sc_port_base::bind(p); } +#pragma GCC diagnostic pop IF * operator -> () @@ -248,7 +256,6 @@ class sc_port_b : public sc_port_base sc_port_b(const sc_port_b &) {} sc_port_b &operator = (const sc_port_b &) { return *this; } }; -#pragma GCC diagnostic pop template class sc_port : public sc_port_b diff --git a/src/systemc/ext/tlm_core/2/sockets/initiator_socket.hh b/src/systemc/ext/tlm_core/2/sockets/initiator_socket.hh index 4f67b59237..d4cf3849e3 100644 --- a/src/systemc/ext/tlm_core/2/sockets/initiator_socket.hh +++ b/src/systemc/ext/tlm_core/2/sockets/initiator_socket.hh @@ -51,10 +51,6 @@ template class tlm_base_target_socket; -// The overloaded virtual is intended in SystemC, so we'll disable the warning. -// Please check section 9.3 of SystemC 2.3.1 release note for more details. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverloaded-virtual" template , typename BW_IF=tlm_bw_transport_if<>, int N=1, sc_core::sc_port_policy POL=sc_core::SC_ONE_OR_MORE_BOUND> @@ -100,6 +96,18 @@ class tlm_base_initiator_socket : // - Binds the port of the target socket to the export of the initiator // socket // + +#pragma GCC diagnostic push +/** + * The following warning is disabled because the bind methods are overloaded + * in the derived class and the base class. In GCC v13+ this + * 'overloaded-virtual' warning is strict enough to trigger here (though the + * code is correct). + * Please check section 9.3 of SystemC 2.3.1 release note for more details. + */ +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 13)) +#pragma GCC diagnostic ignored "-Woverloaded-virtual" +#endif virtual void bind(base_target_socket_type &s) { @@ -132,6 +140,7 @@ class tlm_base_initiator_socket : // virtual void bind(bw_interface_type &ifs) { (get_base_export())(ifs); } void operator() (bw_interface_type &s) { bind(s); } +#pragma GCC diagnostic pop // Implementation of tlm_base_socket_if functions virtual sc_core::sc_port_base &get_port_base() { return *this; } @@ -174,7 +183,6 @@ class tlm_base_initiator_socket : protected: export_type m_export; }; -#pragma GCC diagnostic pop // // Convenience socket classes diff --git a/src/systemc/ext/tlm_core/2/sockets/target_socket.hh b/src/systemc/ext/tlm_core/2/sockets/target_socket.hh index 5da81d892e..a3d3026614 100644 --- a/src/systemc/ext/tlm_core/2/sockets/target_socket.hh +++ b/src/systemc/ext/tlm_core/2/sockets/target_socket.hh @@ -98,8 +98,9 @@ class tlm_base_target_socket : * in the derived class and the base class. In GCC v13+ this * 'overloaded-virtual' warning is strict enough to trigger here (though the * code is correct). + * Please check section 9.3 of SystemC 2.3.1 release note for more details. */ -#if defined(__GNUC__) && (__GNUC__ >= 13) +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 13)) #pragma GCC diagnostic ignored "-Woverloaded-virtual" #endif virtual void diff --git a/tests/gem5/gpu/test_gpu_pannotia.py b/tests/gem5/gpu/test_gpu_pannotia.py index 0276e79009..9250c3c47b 100644 --- a/tests/gem5/gpu/test_gpu_pannotia.py +++ b/tests/gem5/gpu/test_gpu_pannotia.py @@ -27,11 +27,14 @@ import gzip import os.path import shutil +from pathlib import Path from urllib.request import urlretrieve from testlib import * -resource_path = joinpath(absdirpath(__file__), "..", "gpu-pannotia-resources") +resource_path = joinpath( + absdirpath(__file__), "..", "resources", "gpu-pannotia" +) binary_path = joinpath(resource_path, "pannotia-bins") dataset_path = joinpath(resource_path, "pannotia-datasets") @@ -52,15 +55,14 @@ if not os.path.isdir(resource_path): os.makedirs(dataset_path) for name in binary_links.keys(): + if Path(f"{binary_path}/{name}").exists(): + continue urlretrieve(binary_links[name], f"{binary_path}/{name}") for name in dataset_links.keys(): + if Path(f"{dataset_path}/{name}").exists(): + continue urlretrieve(dataset_links[name], f"{dataset_path}/{name}") - with gzip.open(f"{dataset_path}/USA-road-d.NY.gr.gz", "rb") as f_in: - with open(f"{dataset_path}/USA-road-d.NY.gr", "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - os.remove(f"{dataset_path}/USA-road-d.NY.gr.gz") - if len(os.listdir(binary_path)) < len(binary_links): testlib.log.test_log.warn( "One or more binaries for the Pannotia GPU tests are missing! Try deleting gpu-pannotia-resources and rerunning." diff --git a/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py b/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py index 8850a27c75..be6e6009e1 100644 --- a/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py +++ b/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py @@ -207,15 +207,15 @@ print("Running with ISA: " + processor.get_isa().name) print("Running with protocol: " + get_runtime_coherence_protocol().name) print() -root = Root(full_system=True, system=motherboard) +# Disable the gdb ports. Required for forking. +m5.disableAllListeners() +root = motherboard._pre_instantiate() # TODO: This of annoying. Is there a way to fix this to happen # automatically when running KVM? root.sim_quantum = int(1e9) -# Disable the gdb ports. Required for forking. -m5.disableAllListeners() -motherboard._pre_instantiate() + m5.instantiate() # Simulate the inital boot with the starting KVM cpu diff --git a/tests/gem5/learning_gem5/ref/test b/tests/gem5/learning_gem5/ref/test index 309ac2fa40..1e83a06f97 100644 --- a/tests/gem5/learning_gem5/ref/test +++ b/tests/gem5/learning_gem5/ref/test @@ -1,3 +1,3 @@ Global frequency set at 1000000000 ticks per second Beginning simulation! -Exiting @ tick 9981 because Ruby Tester completed +Exiting @ tick 9831 because Ruby Tester completed diff --git a/tests/gem5/replacement_policies/configs/run_replacement_policy.py b/tests/gem5/replacement_policies/configs/run_replacement_policy.py index 8f52a061f6..f7ecdb71de 100644 --- a/tests/gem5/replacement_policies/configs/run_replacement_policy.py +++ b/tests/gem5/replacement_policies/configs/run_replacement_policy.py @@ -83,9 +83,8 @@ motherboard = TestBoard( memory=memory, cache_hierarchy=cache_hierarchy, ) -root = Root(full_system=False, system=motherboard) -motherboard._pre_instantiate() +root = motherboard._pre_instantiate() m5.instantiate() generator.start_traffic() diff --git a/tests/gem5/replacement_policies/run_replacement_policy.py b/tests/gem5/replacement_policies/run_replacement_policy.py index 8f52a061f6..f7ecdb71de 100644 --- a/tests/gem5/replacement_policies/run_replacement_policy.py +++ b/tests/gem5/replacement_policies/run_replacement_policy.py @@ -83,9 +83,8 @@ motherboard = TestBoard( memory=memory, cache_hierarchy=cache_hierarchy, ) -root = Root(full_system=False, system=motherboard) -motherboard._pre_instantiate() +root = motherboard._pre_instantiate() m5.instantiate() generator.start_traffic() diff --git a/tests/gem5/se_mode/rvv_intrinsic_tests/test.py b/tests/gem5/se_mode/rvv_intrinsic_tests/test.py new file mode 100644 index 0000000000..e20018ba60 --- /dev/null +++ b/tests/gem5/se_mode/rvv_intrinsic_tests/test.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024 Barcelona Supercomputing Center +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import re +import sys + +from testlib import * + +resources = [ + "rvv-branch", + "rvv-index", + "rvv-matmul", + "rvv-memcpy", + "rvv-reduce", + "rvv-saxpy", + "rvv-sgemm", + "rvv-strcmp", + "rvv-strcpy", + "rvv-strlen", + "rvv-strlen-fault", + "rvv-strncpy", +] + +vlens = [2**x for x in range(7, 15)] + +for resource in resources: + out_verifier = verifier.MatchRegex(re.compile(f"^.*{resource}: pass$")) + + for vlen in vlens: + gem5_verify_config( + name=f"test-riscv-{resource}-vlen_{vlen}-O3-se-mode", + fixtures=(), + verifiers=(out_verifier,), + config=f"{config.base_dir}/configs/example/gem5_library/riscv-rvv-example.py", + config_args=[resource, f"--vlen={vlen}"], + valid_isas=(constants.all_compiled_tag,), + length=constants.quick_tag, + ) diff --git a/tests/gem5/stats/configs/pystat_vector2d_check.py b/tests/gem5/stats/configs/pystat_vector2d_check.py index 617463e56f..909de12232 100644 --- a/tests/gem5/stats/configs/pystat_vector2d_check.py +++ b/tests/gem5/stats/configs/pystat_vector2d_check.py @@ -138,9 +138,11 @@ for x in range(args.num_vectors): vectors[x_index] = { "type": "Vector", - "description": stat_tester.subdescs[x] - if x in stat_tester.subdescs - else stat_tester.description, + "description": ( + stat_tester.subdescs[x] + if x in stat_tester.subdescs + else stat_tester.description + ), "value": vector, } diff --git a/tests/gem5/traffic_gen/configs/simple_traffic_run.py b/tests/gem5/traffic_gen/configs/simple_traffic_run.py index 3a850b497d..7c264cefe9 100644 --- a/tests/gem5/traffic_gen/configs/simple_traffic_run.py +++ b/tests/gem5/traffic_gen/configs/simple_traffic_run.py @@ -202,9 +202,7 @@ motherboard = TestBoard( cache_hierarchy=cache_hierarchy, ) -root = Root(full_system=False, system=motherboard) - -motherboard._pre_instantiate() +root = motherboard._pre_instantiate() m5.instantiate() generator.start_traffic() diff --git a/util/dockerfiles/docker-bake.hcl b/util/dockerfiles/docker-bake.hcl index 05f3b4c94b..3517894684 100644 --- a/util/dockerfiles/docker-bake.hcl +++ b/util/dockerfiles/docker-bake.hcl @@ -125,7 +125,8 @@ group "gcc-compilers" { "gcc-version-10", "gcc-version-11", "gcc-version-12", - "gcc-version-13" + "gcc-version-13", + "gcc-version-14" ] } @@ -169,6 +170,16 @@ target "gcc-version-13" { tags = ["${IMAGE_URI}/gcc-version-13:${TAG}"] } +target "gcc-version-14" { + inherits = ["common"] + annotations = ["index,manifest:org.opencontainers.image.description=An image with all dependencies for building gem5 with a GCC v14 compiler."] + args = { + version = "14" + } + context = "gcc-compiler" + tags = ["${IMAGE_URI}/gcc-version-14:${TAG}"] +} + group "ubuntu-releases" { targets=[ "ubuntu-24-04_all-dependencies", diff --git a/util/dockerfiles/gcc-compiler/Dockerfile b/util/dockerfiles/gcc-compiler/Dockerfile index f36130ebff..8fd5032113 100644 --- a/util/dockerfiles/gcc-compiler/Dockerfile +++ b/util/dockerfiles/gcc-compiler/Dockerfile @@ -3,7 +3,7 @@ FROM ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest LABEL org.opencontainers.image.source=https://github.com/gem5/gem5 LABEL org.opencontainers.image.licenses=BSD-3-Clause -ARG version=13 # Version of GCC to install in this image. Default is 13. +ARG version=14 # Version of GCC to install in this image. Default is 14. RUN apt -y update && \ apt -y install gcc-${version} g++-${version} && \ diff --git a/util/minorview/model.py b/util/minorview/model.py index 91979825c3..d84680fcd3 100644 --- a/util/minorview/model.py +++ b/util/minorview/model.py @@ -374,9 +374,9 @@ class TwoDColours(ColourPattern): for index, value in parsed: try: - array[index % strips][ - index / strips - ] = special_view_decoder(elemClass)(value) + array[index % strips][index / strips] = ( + special_view_decoder(elemClass)(value) + ) except: print( "Element out of range strips: %d," @@ -912,9 +912,9 @@ class BlobModel: blobs = self.unitNameToBlobs.get(unit, []) for blob in blobs: if blob.visualDecoder is not None: - event.visuals[ - blob.picChar - ] = blob.visualDecoder(pairs) + event.visuals[blob.picChar] = ( + blob.visualDecoder(pairs) + ) self.add_unit_event(event) last_time_lines[unit] = rest