From 4b9c4e1e17a80c9ceb4f9c7ccd1fa843bf2f8a25 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 16 Oct 2023 13:33:30 -0700 Subject: [PATCH 1/3] misc: Add `--all` to Runner `docker system prune` Without `--all` `docker prune --force --volumes` will remove everything exception non-dangling images. For an image to be considered dangling it must be untagged and/or not used by a container at that time. As most of the images we download are tagged (e.g., `:latest`) then most of our images are never removed without the inclusion of `--all` which will remove any image not currently used by a container. Images were starting to accumulate on runners. This will ensure they do not and are cleaned after each job run. Change-Id: I6d8441a11d22fdcf827e9c44422dbcf02cf600e0 --- util/github-runners-vagrant/action-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/github-runners-vagrant/action-run.sh b/util/github-runners-vagrant/action-run.sh index b802a7c739..b3c343e495 100755 --- a/util/github-runners-vagrant/action-run.sh +++ b/util/github-runners-vagrant/action-run.sh @@ -77,5 +77,5 @@ while true; do # 4. Cleanup the machine rm -rf "${WORK_DIR}" - docker system prune --force --volumes + docker system prune --force --volumes --all done From d18087af969a4263db6d6fb68d95169f8c99b79c Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 16 Oct 2023 21:14:09 -0700 Subject: [PATCH 2/3] util: Add halt-helper.sh This script helps use safely halt vagrant VMs. Change-Id: I2f2f36b93f82e07756d069334db178604a9915b3 --- util/github-runners-vagrant/halt-helper.sh | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 util/github-runners-vagrant/halt-helper.sh diff --git a/util/github-runners-vagrant/halt-helper.sh b/util/github-runners-vagrant/halt-helper.sh new file mode 100755 index 0000000000..9b4caf7e28 --- /dev/null +++ b/util/github-runners-vagrant/halt-helper.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This script will try to safely halt each VM specified in the Vagrantfile. +# A VM is skipped if it is currently running a job and returned to after +# attempted shutdowns on the other VMs. This cycle continues indefinitely until +# all the runners are shutdown. +# +# This script is usefull as the VMs occasionally need to be halted to apply +# patches and do maintenance. This script allows us to do this without +# interrupting any jobs that may be running. + +while true; do + # This will list all the VMs still running. If there are no VM's running, + # we infer all have been shutdown and we exit the script. Otherwise, we + # iterate over he VMs in an attempt to shut them down. + active=$(vagrant status | grep running | tr -s ' ' | cut -d ' ' -f1) + if [ "$active" == "" ]; then + echo "All VMs have been shutdown. Exiting." + exit 0 + fi + echo "The following VMs are still running:" + echo "${active}" + + for virtm in $active + do + # This script will first list the contents of the "_diag" directory. + # This directory hosts the github action runner job logs. Each job + # is logged to a seperate file in the directpry. This script then + # sort these files by name. The last file in this sorted list is the + # most recent file and therefore for the most recent job. We can sort + # them in this was because their filenames are appended with UTC + # timestamps. + # + # One one job ever runs at a time on a GitHub runner so if there is any + # job running, it be being logged in the most recent file in the + # "_diag" directory. + # + # If the job has completed the last line in the file will contain the + # string "Job completed.". This script checks for this and, if found, + # we assume there are no jobs running safely run `vagrant halt` to + # shutdown the VM. If the job is still running we print a message + # saying the job is still running and will return to it on the next + # iteration of the loop. + echo "Inspecting \"${virtm}\"..." + vagrant ssh $virtm -c 'ls _diag | sort | tail -1 | xargs -I % cat "_diag/%" | tail -1 | grep -q "Job completed"' + status=$? + if [[ ${status} == 0 ]]; then + echo "${virtm} is Idle. Attempting shutdown" + vagrant halt ${virtm} && echo "${virtm} successfully halted" || echo "${virtm} experience a failure halting" + else + echo "${virtm} is Busy. Skipping for now." + fi + done + # Sleep here for 20 seconds just to ensure all the VMs have time + # to shutdown. + sleep 20 +done From 3783afff5d5c4e2e4c7b1e3e846185900f06ab4d Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 16 Oct 2023 21:21:31 -0700 Subject: [PATCH 3/3] util: Enable KVM on VMs and ensure working in Runners This patch: 1. Adds setup scripting to "provision_root.sh" to setup and enable KVM, for the 'vagrant' user, for VMs which are capable of this. 2. Runs a check on each VM to see if KVM can be run sucessfully within a docker container. If so, the GitHub Actions runner is given a 'kvm' label. It is unknown at this time if GitHub Runners can utlized KVM but it is open to their processes. Change-Id: Idfcbb7bfa3e5b7cc47d29aea50fb1ebcafdb7acc --- util/github-runners-vagrant/Vagrantfile | 28 +++++++++++++++++-- util/github-runners-vagrant/provision_root.sh | 19 ++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/util/github-runners-vagrant/Vagrantfile b/util/github-runners-vagrant/Vagrantfile index 3446f703dd..0e505ba38b 100644 --- a/util/github-runners-vagrant/Vagrantfile +++ b/util/github-runners-vagrant/Vagrantfile @@ -58,8 +58,32 @@ Vagrant.configure("2") do |config| # Copy the "action-run.sh" script from the host to the VM. runner.vm.provision "file", source: "./action-run.sh", destination: "/tmp/action-run.sh" runner.vm.provision :shell, privileged: false, inline: "cp /tmp/action-run.sh ." - # Execute the actions-run.sh script on every boot. This configures the and starts the runner. - runner.vm.provision :shell, privileged: false, run: 'always', inline: "./action-run.sh #{PERSONAL_ACCESS_TOKEN} #{GITHUB_ORG} >> action-run.log 2>&1 &" + + # The following attempts to see if KVM can be used inside the docker + # container. + # + # Almost all github action jobs run within a docker container. Therefore + # to be compatible with KVM, KVM must be enabled inside the docker. + # + # We used existence of "kvm-works" in the VM home directory is how we + # indicate that KVM is working. It is created if the 'kvm-ok' command is + # successful. This is then passed to the action-run.sh script to indicate + # that the runner can be used for KVM via the `kvm` label. + runner.vm.provision :shell, privileged: false, run: 'always', inline: <<-SHELL + rm -f kvm-works + docker run --device /dev/kvm -v$(pwd):/work -w /work --rm ubuntu:22.04 bash -c "apt update -y && apt install -y cpu-checker && kvm-ok" + status=$? + if [[ ${status} == 0 ]]; then + echo >&1 "Success. KVM enabled." + echo "success" > kvm-works + else + echo >&2 "Failure. KVM not enabled." + fi + exit 0 + SHELL + # Execute the actions-run.sh script on every boot. This configures and starts the runner. + # Note the 'kvm' label is applied to this runner if the "kvm-works" file eixsts. See above. + runner.vm.provision :shell, privileged: false, run: 'always', inline: "./action-run.sh #{PERSONAL_ACCESS_TOKEN} #{GITHUB_ORG} $(if [ -f 'kvm-works' ]; then echo 'kvm'; fi) >> action-run.log 2>&1 &" end end end diff --git a/util/github-runners-vagrant/provision_root.sh b/util/github-runners-vagrant/provision_root.sh index 34c2d811de..d3e6bb574c 100644 --- a/util/github-runners-vagrant/provision_root.sh +++ b/util/github-runners-vagrant/provision_root.sh @@ -56,7 +56,8 @@ apt-get install -y \ apt-transport-https ca-certificates \ curl \ gnupg \ - lsb-release + lsb-release \ + cpu-checker # Install docker apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release @@ -70,6 +71,22 @@ apt-get install -y docker-ce docker-ce-cli containerd.io # work. usermod -aG docker vagrant +kvm-ok +kvm_ok_status=$? + +# `kvm-ok` will return a exit zero if the machine supports KVM, and non-zero +# otherwise. If the machine support KVM, let's enable it. +if [[ ${kvm_ok_status} == 0 ]]; then + apt install -y qemu-kvm \ + virt-manager \ + libvirt-daemon-system virtinst \ + libvirt-clients bridge-utils && \ + sudo systemctl enable --now libvirtd && \ + sudo systemctl start libvirtd && \ + usermod -aG kvm vagrant && \ + usermod -aG libvirt vagrant +fi + # Cleanup apt-get autoremove -y