gpu-compute,tests: Move GPU tests to testlib (#1270)

A new host tag `gcn_gpu` has been added. This allows for selection of
those GPU tests which depend upon the gcn-gpu docker image to run.

In addition to this, the square GPU tests has been moved to the CI
tests. This ensures some GPU code is compiled and run on every PR.
This commit is contained in:
Bobby R. Bruce
2024-08-19 10:58:06 -07:00
committed by GitHub
parent b0d81ec8a2
commit f600db4a98
8 changed files with 255 additions and 97 deletions

View File

@@ -221,6 +221,32 @@ jobs:
path: tests/testing-results
retention-days: 30
gpu-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 180
needs: [pre-commit, check-for-change-id]
steps:
- uses: actions/checkout@v4
# Build the VEGA_X86/gem5.opt binary.
- name: Build VEGA_X86/gem5.opt
run: scons build/VEGA_X86/gem5.opt -j`nproc`
# Run the GPU tests.
- name: Run Testlib GPU Tests
working-directory: ${{ github.workspace }}/tests
run: ./main.py run --skip-build -vvv -t $(nproc) --host gcn_gpu gem5/gpu
# Upload the tests/testing-results directory as an artifact.
- name: Upload results
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gpu-status-${{ steps.run-tests.outcome }}-output
path: tests/testing-results
retention-days: 30
ci-tests:
# It is 'testlib-quick' which needs to pass for the pull request to be
# merged. This job is a dummy job that depends on all the other jobs.
@@ -232,5 +258,6 @@ jobs:
- unittests-all-opt
- pre-commit
- check-for-change-id
- gpu-tests
steps:
- run: echo "This job's status is ${{ job.status }}."

View File

@@ -80,6 +80,30 @@ jobs:
retention-days: 7
- run: echo "This job's status is ${{ job.status }}."
gpu-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 300
steps:
- uses: actions/checkout@v4
with:
ref: develop
- name: Build VEGA_X86/gem5.opt
run: scons build/VEGA_X86/gem5.opt -j`nproc`
- name: Run Testlib GPU Tests
working-directory: ${{ github.workspace }}/tests
run: ./main.py run --length=long --skip-build -vvv -t $(nproc) --host gcn_gpu
- name: Upload results
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: gpu_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
path: tests/testing-results
retention-days: 7
# This runs the SST-gem5 integration compilation and tests it with
# ext/sst/sst/example.py.
sst-test:
@@ -124,30 +148,6 @@ jobs:
- name: Continue gem5 within SystemC test
run: LD_LIBRARY_PATH=build/ARM/:/opt/systemc/lib-linux64/ ./util/systemc/gem5_within_systemc/gem5.opt.sc m5out/config.ini
# Runs the gem5 Nighyly GPU tests.
gpu-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 720 # 12 hours
steps:
- uses: actions/checkout@v4
- name: Compile build/VEGA_X86/gem5.opt
run: scons build/VEGA_X86/gem5.opt -j $(nproc)
- name: Get Square test-prog from gem5-resources
run: build/VEGA_X86/gem5.opt util/obtain-resource.py square-gpu-test -p square
- name: Run Square test with VEGA_X86/gem5.opt (SE mode)
run: |
mkdir -p tests/testing-results
./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c square
- name: Get allSyncPrims-1kernel from gem5-resources
run: build/VEGA_X86/gem5.opt util/obtain-resource.py allSyncPrims-1kernel -p allSyncPrims-1kernel
- name: Run allSyncPrims-1kernel sleepMutex test with VEGA_X86/gem5.opt (SE mode)
run: ./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16
4"
- name: Run allSyncPrims-1kernel lfTreeBarrUsing test with VEGA_X86/gem5.opt (SE mode)
run: ./build/VEGA_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq
10 16 4"
daily-tests:
# The dummy job is used to indicate whether the daily tests have
# passed or not. This can be used as status check for pull requests.

View File

@@ -8,73 +8,6 @@ on:
workflow_dispatch:
jobs:
build-gcn-gpu-gem5:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
steps:
- uses: actions/checkout@v4
- name: Build gem5
run: scons build/VEGA_X86/gem5.opt -j $(nproc) --ignore-style
- uses: actions/upload-artifact@v4
with:
name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega
path: build/VEGA_X86/gem5.opt
retention-days: 5
- run: echo "This job's status is ${{ job.status }}."
LULESH-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
needs: build-gcn-gpu-gem5
timeout-minutes: 480 # 8 hours
steps:
- uses: actions/checkout@v4
- name: Download build/VEGA_X86/gem5.opt
uses: actions/download-artifact@v4
with:
name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega
path: build/VEGA_X86
# `download-artifact` does not preserve permissions so we need to set
# them again.
- run: chmod u+x build/VEGA_X86/gem5.opt
- name: Obtain LULESH
# Obtains the latest LULESH compatible with this version of gem5 via
# gem5 Resources.
run: build/VEGA_X86/gem5.opt util/obtain-resource.py lulesh -p lulesh
- name: Run LULUESH tests
working-directory: ${{ github.workspace }}
run: |
build/VEGA_X86/gem5.opt configs/example/apu_se.py -n3 --mem-size=8GB --reg-alloc-policy=dynamic --dgpu --gfx-version=gfx900 -c \
lulesh --options="0.01 2"
HACC-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
needs: build-gcn-gpu-gem5
timeout-minutes: 120 # 2 hours
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-vega
path: build/VEGA_X86
- run: chmod u+x build/VEGA_X86/gem5.opt
- name: make hip directory
run: mkdir hip
- name: Compile m5ops and x86
working-directory: ${{ github.workspace }}/util/m5
run: |
export TERM=xterm-256color
scons build/x86/out/m5
- name: Download tests
run: build/VEGA_X86/gem5.opt util/obtain-resource.py hacc-force-tree -p hip/ForceTreeTest
- name: Run HACC tests
run: |
build/VEGA_X86/gem5.opt configs/example/apu_se.py -n3 --reg-alloc-policy=dynamic --benchmark-root=hip -c ForceTreeTest --options="0.5 0.1 64 0.1 1 N 12 rcb"
build-gem5:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/ubuntu-24.04_all-dependencies:latest
@@ -127,6 +60,30 @@ jobs:
retention-days: 7
- run: echo "This job's status is ${{ job.status }}."
gpu-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/gcn-gpu:latest
timeout-minutes: 300
steps:
- uses: actions/checkout@v4
with:
ref: develop
- name: Build VEGA_X86/gem5.opt
run: scons build/VEGA_X86/gem5.opt -j`nproc`
- name: Run Testlib GPU Tests
working-directory: ${{ github.workspace }}/tests
run: ./main.py run --length=very-long --skip-build -vvv -t $(nproc) --host gcn_gpu
- name: Upload results
if: success() || failure()
uses: actions/upload-artifact@v4.0.0
with:
name: gpu_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
path: tests/testing-results
retention-days: 7
dramsys-tests:
runs-on: [self-hosted, linux, x64]
container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
@@ -158,7 +115,6 @@ jobs:
needs:
- testlib-very-long-tests
- dramsys-tests
- LULESH-tests
- HACC-tests
- gpu-tests
steps:
- run: echo "This weekly tests have passed."

View File

@@ -40,6 +40,7 @@ from m5.objects import *
from m5.util import addToPath
from gem5.isas import ISA
from gem5.resources.resource import obtain_resource
from gem5.runtime import get_supported_isas
addToPath("../")
@@ -402,6 +403,22 @@ parser.add_argument(
help="cache replacement policy" "policy for sqc",
)
parser.add_argument(
"--download-resource",
type=str,
default=None,
required=False,
help="Download this resources prior to simulation",
)
parser.add_argument(
"--download-dir",
type=str,
default=None,
required=False,
help="Download resources to this directory",
)
Ruby.define_options(parser)
# add TLB options to the parser
@@ -409,6 +426,17 @@ GPUTLBOptions.tlb_options(parser)
args = parser.parse_args()
# Get the resource if specified.
if args.download_resource:
resources = obtain_resource(
resource_id=args.download_resource,
resource_directory=args.download_dir,
)
# This line seems pointless but is actually what triggers the download.
resources.get_local_path()
# The GPU cache coherence protocols only work with the backing store
args.access_backing_store = True

View File

@@ -267,6 +267,7 @@ def define_constants(constants):
constants.host_isa_tag_type = "host"
constants.host_x86_64_tag = "x86_64"
constants.host_arm_tag = "aarch64"
constants.host_gcn_gpu_tag = "gcn_gpu"
constants.kvm_tag = "kvm"
@@ -295,6 +296,7 @@ def define_constants(constants):
constants.host_isa_tag_type: (
constants.host_x86_64_tag,
constants.host_arm_tag,
constants.host_gcn_gpu_tag,
),
}

View File

@@ -0,0 +1,145 @@
# Copyright (c) 2024 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from testlib import *
if config.bin_path:
resource_path = config.bin_path
else:
resource_path = joinpath(absdirpath(__file__), "..", "resources")
gem5_verify_config(
name="gpu-apu-se-square",
fixtures=(),
verifiers=(),
config=joinpath(config.base_dir, "configs", "example", "apu_se.py"),
config_args=[
"--download-resource",
"square-gpu-test",
"--download-dir",
resource_path,
"--reg-alloc-policy=dynamic",
"-n3",
"-c",
joinpath(resource_path, "square-gpu-test"),
],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.quick_tag,
)
gem5_verify_config(
name="gpu-apu-se-sleepMutex",
fixtures=(),
verifiers=(),
config=joinpath(config.base_dir, "configs", "example", "apu_se.py"),
config_args=[
"--download-resource",
"allSyncPrims-1kernel",
"--download-dir",
resource_path,
"--reg-alloc-policy=dynamic",
"-n3",
"-c",
joinpath(resource_path, "allSyncPrims-1kernel"),
"--options",
"'sleepMutex 10 16 4'",
],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)
gem5_verify_config(
name="gpu-apu-se-lftreebarruniq",
fixtures=(),
verifiers=(),
config=joinpath(config.base_dir, "configs", "example", "apu_se.py"),
config_args=[
"--download-resource",
"allSyncPrims-1kernel",
"--download-dir",
resource_path,
"--reg-alloc-policy=dynamic",
"-n3",
"-c",
joinpath(resource_path, "allSyncPrims-1kernel"),
"--options",
"'lfTreeBarrUniq 10 16 4 10 16 4'",
],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)
gem5_verify_config(
name="gpu-apu-se-lulesh",
fixtures=(),
verifiers=(),
config=joinpath(config.base_dir, "configs", "example", "apu_se.py"),
config_args=[
"--download-resource",
"lulesh",
"--download-dir",
resource_path,
"--reg-alloc-policy=dynamic",
"-n3",
"--mem-size=8GB",
"--dgpu",
"--gfx-version",
"gfx900",
"-c",
joinpath(resource_path, "lulesh"),
"--options",
"'0.01 2'",
],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.very_long_tag,
)
gem5_verify_config(
name="gpu-apu-se-hacc",
fixtures=(),
verifiers=(),
config=joinpath(config.base_dir, "configs", "example", "apu_se.py"),
config_args=[
"--download-resource",
"hacc-force-tree",
"--download-dir",
resource_path,
"--reg-alloc-policy=dynamic",
"-n3",
"-c",
joinpath(resource_path, "hacc-force-tree"),
"--options",
"'0.5 0.1 64 0.1 1 N 12 rcb'",
],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.very_long_tag,
)

View File

@@ -52,7 +52,7 @@ gem5_verify_config(
),
config_args=["--test-length", "50000", "--num-dmas", "0"],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=constants.supported_hosts,
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)
@@ -79,6 +79,6 @@ gem5_verify_config(
),
config_args=["--test-length", "5000000", "--num-dmas", "0"],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=constants.supported_hosts,
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)

View File

@@ -52,7 +52,7 @@ gem5_verify_config(
),
config_args=["--WB_L2", "--test-length", "50000", "--num-dmas", "0"],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=constants.supported_hosts,
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)
@@ -79,6 +79,6 @@ gem5_verify_config(
),
config_args=["--WB_L2", "--test-length", "5000000", "--num-dmas", "0"],
valid_isas=(constants.vega_x86_tag,),
valid_hosts=constants.supported_hosts,
valid_hosts=(constants.host_gcn_gpu_tag,),
length=constants.long_tag,
)