From f600db4a98d4aee4e9543d6e62e5aeeb53146126 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 19 Aug 2024 10:58:06 -0700 Subject: [PATCH] gpu-compute,tests: Move GPU tests to testlib (#1270) A new host tag `gcn_gpu` has been added. This allows for selection of those GPU tests which depend upon the gcn-gpu docker image to run. In addition to this, the square GPU tests has been moved to the CI tests. This ensures some GPU code is compiled and run on every PR. --- .github/workflows/ci-tests.yaml | 27 ++++ .github/workflows/daily-tests.yaml | 48 +++---- .github/workflows/weekly-tests.yaml | 94 ++++--------- configs/example/apu_se.py | 28 ++++ ext/testlib/configuration.py | 2 + tests/gem5/gpu/test_gpu_apu_se.py | 145 ++++++++++++++++++++ tests/gem5/gpu/test_gpu_ruby_random.py | 4 +- tests/gem5/gpu/test_gpu_ruby_random_wbL2.py | 4 +- 8 files changed, 255 insertions(+), 97 deletions(-) create mode 100644 tests/gem5/gpu/test_gpu_apu_se.py diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index a1adfdb8ef..efaa94a7c4 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -221,6 +221,32 @@ jobs: path: tests/testing-results retention-days: 30 + gpu-tests: + runs-on: [self-hosted, linux, x64] + container: ghcr.io/gem5/gcn-gpu:latest + timeout-minutes: 180 + needs: [pre-commit, check-for-change-id] + steps: + - uses: actions/checkout@v4 + + # Build the VEGA_X86/gem5.opt binary. + - name: Build VEGA_X86/gem5.opt + run: scons build/VEGA_X86/gem5.opt -j`nproc` + + # Run the GPU tests. + - name: Run Testlib GPU Tests + working-directory: ${{ github.workspace }}/tests + run: ./main.py run --skip-build -vvv -t $(nproc) --host gcn_gpu gem5/gpu + + # Upload the tests/testing-results directory as an artifact. + - name: Upload results + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gpu-status-${{ steps.run-tests.outcome }}-output + path: tests/testing-results + retention-days: 30 + ci-tests: # It is 'testlib-quick' which needs to pass for the pull request to be # merged. This job is a dummy job that depends on all the other jobs. @@ -232,5 +258,6 @@ jobs: - unittests-all-opt - pre-commit - check-for-change-id + - gpu-tests steps: - run: echo "This job's status is ${{ job.status }}." diff --git a/.github/workflows/daily-tests.yaml b/.github/workflows/daily-tests.yaml index 2ad0069a3d..45e9e58dd4 100644 --- a/.github/workflows/daily-tests.yaml +++ b/.github/workflows/daily-tests.yaml @@ -80,6 +80,30 @@ jobs: retention-days: 7 - run: echo "This job's status is ${{ job.status }}." + gpu-tests: + runs-on: [self-hosted, linux, x64] + container: ghcr.io/gem5/gcn-gpu:latest + timeout-minutes: 300 + + steps: + - uses: actions/checkout@v4 + with: + ref: develop + - name: Build VEGA_X86/gem5.opt + run: scons build/VEGA_X86/gem5.opt -j`nproc` + + - name: Run Testlib GPU Tests + working-directory: ${{ github.workspace }}/tests + run: ./main.py run --length=long --skip-build -vvv -t $(nproc) --host gcn_gpu + + - name: Upload results + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: gpu_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}} + path: tests/testing-results + retention-days: 7 + # This runs the SST-gem5 integration compilation and tests it with # ext/sst/sst/example.py. sst-test: @@ -124,30 +148,6 @@ jobs: - name: Continue gem5 within SystemC test run: LD_LIBRARY_PATH=build/ARM/:/opt/systemc/lib-linux64/ ./util/systemc/gem5_within_systemc/gem5.opt.sc m5out/config.ini - # Runs the gem5 Nighyly GPU tests. - gpu-tests: - runs-on: [self-hosted, linux, x64] - container: ghcr.io/gem5/gcn-gpu:latest - timeout-minutes: 720 # 12 hours - - steps: - - uses: actions/checkout@v4 - - name: Compile build/VEGA_X86/gem5.opt - run: scons build/VEGA_X86/gem5.opt -j $(nproc) - - name: Get Square test-prog from gem5-resources - run: build/VEGA_X86/gem5.opt util/obtain-resource.py square-gpu-test -p square - - name: Run Square test with VEGA_X86/gem5.opt (SE mode) - run: | - mkdir -p tests/testing-results - ./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c square - - name: Get allSyncPrims-1kernel from gem5-resources - run: build/VEGA_X86/gem5.opt util/obtain-resource.py allSyncPrims-1kernel -p allSyncPrims-1kernel - - name: Run allSyncPrims-1kernel sleepMutex test with VEGA_X86/gem5.opt (SE mode) - run: ./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16 - 4" - - name: Run allSyncPrims-1kernel lfTreeBarrUsing test with VEGA_X86/gem5.opt (SE mode) - run: ./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq - 10 16 4" daily-tests: # The dummy job is used to indicate whether the daily tests have # passed or not. This can be used as status check for pull requests. diff --git a/.github/workflows/weekly-tests.yaml b/.github/workflows/weekly-tests.yaml index 1d4161731a..e80761e986 100644 --- a/.github/workflows/weekly-tests.yaml +++ b/.github/workflows/weekly-tests.yaml @@ -8,73 +8,6 @@ on: workflow_dispatch: jobs: - build-gcn-gpu-gem5: - runs-on: [self-hosted, linux, x64] - container: ghcr.io/gem5/gcn-gpu:latest - steps: - - uses: actions/checkout@v4 - - name: Build gem5 - run: scons build/VEGA_X86/gem5.opt -j $(nproc) --ignore-style - - uses: actions/upload-artifact@v4 - with: - name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega - path: build/VEGA_X86/gem5.opt - retention-days: 5 - - run: echo "This job's status is ${{ job.status }}." - - LULESH-tests: - runs-on: [self-hosted, linux, x64] - container: ghcr.io/gem5/gcn-gpu:latest - needs: build-gcn-gpu-gem5 - timeout-minutes: 480 # 8 hours - steps: - - uses: actions/checkout@v4 - - - name: Download build/VEGA_X86/gem5.opt - uses: actions/download-artifact@v4 - with: - name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega - path: build/VEGA_X86 - # `download-artifact` does not preserve permissions so we need to set - # them again. - - run: chmod u+x build/VEGA_X86/gem5.opt - - - name: Obtain LULESH - # Obtains the latest LULESH compatible with this version of gem5 via - # gem5 Resources. - run: build/VEGA_X86/gem5.opt util/obtain-resource.py lulesh -p lulesh - - - name: Run LULUESH tests - working-directory: ${{ github.workspace }} - run: | - build/VEGA_X86/gem5.opt configs/example/apu_se.py -n3 --mem-size=8GB --reg-alloc-policy=dynamic --dgpu --gfx-version=gfx900 -c \ - lulesh --options="0.01 2" - - HACC-tests: - runs-on: [self-hosted, linux, x64] - container: ghcr.io/gem5/gcn-gpu:latest - needs: build-gcn-gpu-gem5 - timeout-minutes: 120 # 2 hours - steps: - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega - path: build/VEGA_X86 - - run: chmod u+x build/VEGA_X86/gem5.opt - - name: make hip directory - run: mkdir hip - - name: Compile m5ops and x86 - working-directory: ${{ github.workspace }}/util/m5 - run: | - export TERM=xterm-256color - scons build/x86/out/m5 - - name: Download tests - run: build/VEGA_X86/gem5.opt util/obtain-resource.py hacc-force-tree -p hip/ForceTreeTest - - name: Run HACC tests - run: | - build/VEGA_X86/gem5.opt configs/example/apu_se.py -n3 --reg-alloc-policy=dynamic --benchmark-root=hip -c ForceTreeTest --options="0.5 0.1 64 0.1 1 N 12 rcb" - build-gem5: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest @@ -127,6 +60,30 @@ jobs: retention-days: 7 - run: echo "This job's status is ${{ job.status }}." + gpu-tests: + runs-on: [self-hosted, linux, x64] + container: ghcr.io/gem5/gcn-gpu:latest + timeout-minutes: 300 + + steps: + - uses: actions/checkout@v4 + with: + ref: develop + - name: Build VEGA_X86/gem5.opt + run: scons build/VEGA_X86/gem5.opt -j`nproc` + + - name: Run Testlib GPU Tests + working-directory: ${{ github.workspace }}/tests + run: ./main.py run --length=very-long --skip-build -vvv -t $(nproc) --host gcn_gpu + + - name: Upload results + if: success() || failure() + uses: actions/upload-artifact@v4.0.0 + with: + name: gpu_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}} + path: tests/testing-results + retention-days: 7 + dramsys-tests: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest @@ -158,7 +115,6 @@ jobs: needs: - testlib-very-long-tests - dramsys-tests - - LULESH-tests - - HACC-tests + - gpu-tests steps: - run: echo "This weekly tests have passed." diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index eb7c625cad..fa7a35381d 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -40,6 +40,7 @@ from m5.objects import * from m5.util import addToPath from gem5.isas import ISA +from gem5.resources.resource import obtain_resource from gem5.runtime import get_supported_isas addToPath("../") @@ -402,6 +403,22 @@ parser.add_argument( help="cache replacement policy" "policy for sqc", ) +parser.add_argument( + "--download-resource", + type=str, + default=None, + required=False, + help="Download this resources prior to simulation", +) + +parser.add_argument( + "--download-dir", + type=str, + default=None, + required=False, + help="Download resources to this directory", +) + Ruby.define_options(parser) # add TLB options to the parser @@ -409,6 +426,17 @@ GPUTLBOptions.tlb_options(parser) args = parser.parse_args() +# Get the resource if specified. +if args.download_resource: + resources = obtain_resource( + resource_id=args.download_resource, + resource_directory=args.download_dir, + ) + + # This line seems pointless but is actually what triggers the download. + resources.get_local_path() + + # The GPU cache coherence protocols only work with the backing store args.access_backing_store = True diff --git a/ext/testlib/configuration.py b/ext/testlib/configuration.py index cebf493add..53d4476aec 100644 --- a/ext/testlib/configuration.py +++ b/ext/testlib/configuration.py @@ -267,6 +267,7 @@ def define_constants(constants): constants.host_isa_tag_type = "host" constants.host_x86_64_tag = "x86_64" constants.host_arm_tag = "aarch64" + constants.host_gcn_gpu_tag = "gcn_gpu" constants.kvm_tag = "kvm" @@ -295,6 +296,7 @@ def define_constants(constants): constants.host_isa_tag_type: ( constants.host_x86_64_tag, constants.host_arm_tag, + constants.host_gcn_gpu_tag, ), } diff --git a/tests/gem5/gpu/test_gpu_apu_se.py b/tests/gem5/gpu/test_gpu_apu_se.py new file mode 100644 index 0000000000..72a416bb23 --- /dev/null +++ b/tests/gem5/gpu/test_gpu_apu_se.py @@ -0,0 +1,145 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from testlib import * + +if config.bin_path: + resource_path = config.bin_path +else: + resource_path = joinpath(absdirpath(__file__), "..", "resources") + +gem5_verify_config( + name="gpu-apu-se-square", + fixtures=(), + verifiers=(), + config=joinpath(config.base_dir, "configs", "example", "apu_se.py"), + config_args=[ + "--download-resource", + "square-gpu-test", + "--download-dir", + resource_path, + "--reg-alloc-policy=dynamic", + "-n3", + "-c", + joinpath(resource_path, "square-gpu-test"), + ], + valid_isas=(constants.vega_x86_tag,), + valid_hosts=(constants.host_gcn_gpu_tag,), + length=constants.quick_tag, +) + +gem5_verify_config( + name="gpu-apu-se-sleepMutex", + fixtures=(), + verifiers=(), + config=joinpath(config.base_dir, "configs", "example", "apu_se.py"), + config_args=[ + "--download-resource", + "allSyncPrims-1kernel", + "--download-dir", + resource_path, + "--reg-alloc-policy=dynamic", + "-n3", + "-c", + joinpath(resource_path, "allSyncPrims-1kernel"), + "--options", + "'sleepMutex 10 16 4'", + ], + valid_isas=(constants.vega_x86_tag,), + valid_hosts=(constants.host_gcn_gpu_tag,), + length=constants.long_tag, +) + +gem5_verify_config( + name="gpu-apu-se-lftreebarruniq", + fixtures=(), + verifiers=(), + config=joinpath(config.base_dir, "configs", "example", "apu_se.py"), + config_args=[ + "--download-resource", + "allSyncPrims-1kernel", + "--download-dir", + resource_path, + "--reg-alloc-policy=dynamic", + "-n3", + "-c", + joinpath(resource_path, "allSyncPrims-1kernel"), + "--options", + "'lfTreeBarrUniq 10 16 4 10 16 4'", + ], + valid_isas=(constants.vega_x86_tag,), + valid_hosts=(constants.host_gcn_gpu_tag,), + length=constants.long_tag, +) + +gem5_verify_config( + name="gpu-apu-se-lulesh", + fixtures=(), + verifiers=(), + config=joinpath(config.base_dir, "configs", "example", "apu_se.py"), + config_args=[ + "--download-resource", + "lulesh", + "--download-dir", + resource_path, + "--reg-alloc-policy=dynamic", + "-n3", + "--mem-size=8GB", + "--dgpu", + "--gfx-version", + "gfx900", + "-c", + joinpath(resource_path, "lulesh"), + "--options", + "'0.01 2'", + ], + valid_isas=(constants.vega_x86_tag,), + valid_hosts=(constants.host_gcn_gpu_tag,), + length=constants.very_long_tag, +) + +gem5_verify_config( + name="gpu-apu-se-hacc", + fixtures=(), + verifiers=(), + config=joinpath(config.base_dir, "configs", "example", "apu_se.py"), + config_args=[ + "--download-resource", + "hacc-force-tree", + "--download-dir", + resource_path, + "--reg-alloc-policy=dynamic", + "-n3", + "-c", + joinpath(resource_path, "hacc-force-tree"), + "--options", + "'0.5 0.1 64 0.1 1 N 12 rcb'", + ], + valid_isas=(constants.vega_x86_tag,), + valid_hosts=(constants.host_gcn_gpu_tag,), + length=constants.very_long_tag, +) diff --git a/tests/gem5/gpu/test_gpu_ruby_random.py b/tests/gem5/gpu/test_gpu_ruby_random.py index e29ecf24b1..7cea412ff8 100644 --- a/tests/gem5/gpu/test_gpu_ruby_random.py +++ b/tests/gem5/gpu/test_gpu_ruby_random.py @@ -52,7 +52,7 @@ gem5_verify_config( ), config_args=["--test-length", "50000", "--num-dmas", "0"], valid_isas=(constants.vega_x86_tag,), - valid_hosts=constants.supported_hosts, + valid_hosts=(constants.host_gcn_gpu_tag,), length=constants.long_tag, ) @@ -79,6 +79,6 @@ gem5_verify_config( ), config_args=["--test-length", "5000000", "--num-dmas", "0"], valid_isas=(constants.vega_x86_tag,), - valid_hosts=constants.supported_hosts, + valid_hosts=(constants.host_gcn_gpu_tag,), length=constants.long_tag, ) diff --git a/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py b/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py index 9af4e65a11..4e4074ec15 100644 --- a/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py +++ b/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py @@ -52,7 +52,7 @@ gem5_verify_config( ), config_args=["--WB_L2", "--test-length", "50000", "--num-dmas", "0"], valid_isas=(constants.vega_x86_tag,), - valid_hosts=constants.supported_hosts, + valid_hosts=(constants.host_gcn_gpu_tag,), length=constants.long_tag, ) @@ -79,6 +79,6 @@ gem5_verify_config( ), config_args=["--WB_L2", "--test-length", "5000000", "--num-dmas", "0"], valid_isas=(constants.vega_x86_tag,), - valid_hosts=constants.supported_hosts, + valid_hosts=(constants.host_gcn_gpu_tag,), length=constants.long_tag, )