From 866b51a1ccb01ce455dca8b4220cbff68c35e1b6 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 4 Oct 2024 06:12:13 -0700 Subject: [PATCH 1/2] misc,tests: Increase Weekly GPU test timeout The Weekly GPU tests are failing due to a timeout but I found the testing timeout was set to 5 hours and we have been frequently close to reaching this but have recently changes the test enought o consistently go over. The main two things that appear to have caused this are: 1. Moving the X86_VEGA compilation into the the same step as the running of the tests. 2. Reducing the number of threads per GitHub Actions runner, thus slowing job execution. In addition we've added more tests to this weekly GPU suite though I don't believe have got to running these tests yet. The timeout appears to always been triggered before this. This PR increases the timout to 3 days and moves the compilation into a seperate step. --- .github/workflows/weekly-tests.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/weekly-tests.yaml b/.github/workflows/weekly-tests.yaml index f63b2aa080..7ada70fddb 100644 --- a/.github/workflows/weekly-tests.yaml +++ b/.github/workflows/weekly-tests.yaml @@ -48,7 +48,7 @@ jobs: gpu-tests: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/gcn-gpu:latest - timeout-minutes: 300 + timeout-minutes: 4320 # 3 days steps: - uses: actions/checkout@v4 @@ -63,9 +63,13 @@ jobs: restore-keys: | testlib-build-vega + - name: Build VEGA_X86/gem5.opt + working-directory: ${{ github.workspace }} + run: scons build/VEGA_X86/gem5.opt -j $(nproc) + - name: Run Testlib GPU Tests working-directory: ${{ github.workspace }}/tests - run: ./main.py run --length=very-long -vvv -j $(nproc) -t $(nproc) --host gcn_gpu gem5/gpu + run: ./main.py run --length=very-long -vvv --skip-build -t $(nproc) --host gcn_gpu gem5/gpu - name: Upload results if: success() || failure() From d49d0272ffc47e0a24ec99970fdc6fe2c5044eee Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 4 Oct 2024 07:36:46 -0700 Subject: [PATCH 2/2] misc,tests: Create Daily GPU Test timeout --- .github/workflows/daily-tests.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/daily-tests.yaml b/.github/workflows/daily-tests.yaml index f36e722b8f..54711ad63d 100644 --- a/.github/workflows/daily-tests.yaml +++ b/.github/workflows/daily-tests.yaml @@ -112,7 +112,7 @@ jobs: gpu-tests: runs-on: [self-hosted, linux, x64] container: ghcr.io/gem5/gcn-gpu:latest - timeout-minutes: 300 + timeout-minutes: 720 # 12 hours steps: - uses: actions/checkout@v4 @@ -127,9 +127,13 @@ jobs: restore-keys: | testlib-build-vega + - name: Build VEGA_X86/gem5.opt + working-directory: ${{ github.workspace }} + run: scons build/VEGA_X86/gem5.opt -j $(nproc) + - name: Run Testlib GPU Tests working-directory: ${{ github.workspace }}/tests - run: ./main.py run --length=long -vvv -t $(nproc) -j $(nproc) --host gcn_gpu gem5/gpu + run: ./main.py run --length=long -vvv --skip-build -t $(nproc) --host gcn_gpu gem5/gpu - name: Upload results if: success() || failure()