util: Improve GitHub Action runners: Enable KVM; Better Cleanup; Better Tooling (#470)
This PR adds the following the GitHub Actions runners: 1. Enables KVM to be run within docker containers within the VMs, if permitted. Now, any Docker containers wanting to use KVM must create containers with the `--device /dev/kvm` argument. This may make it hard or impossible to utilize with GitHub Actions. Nonetheless it is enabled. 2. Improves the docker prune step (a cleanup step carried out after each job) so it now removes all the Docker images in the VM. 3. Adds the "halt-helper.sh" script which automatically, and safely, halts (shutsdown) all the VMs so maintenance tasks can be undertaken.
This commit is contained in:
28
util/github-runners-vagrant/Vagrantfile
vendored
28
util/github-runners-vagrant/Vagrantfile
vendored
@@ -58,8 +58,32 @@ Vagrant.configure("2") do |config|
|
||||
# Copy the "action-run.sh" script from the host to the VM.
|
||||
runner.vm.provision "file", source: "./action-run.sh", destination: "/tmp/action-run.sh"
|
||||
runner.vm.provision :shell, privileged: false, inline: "cp /tmp/action-run.sh ."
|
||||
# Execute the actions-run.sh script on every boot. This configures the and starts the runner.
|
||||
runner.vm.provision :shell, privileged: false, run: 'always', inline: "./action-run.sh #{PERSONAL_ACCESS_TOKEN} #{GITHUB_ORG} >> action-run.log 2>&1 &"
|
||||
|
||||
# The following attempts to see if KVM can be used inside the docker
|
||||
# container.
|
||||
#
|
||||
# Almost all github action jobs run within a docker container. Therefore
|
||||
# to be compatible with KVM, KVM must be enabled inside the docker.
|
||||
#
|
||||
# We used existence of "kvm-works" in the VM home directory is how we
|
||||
# indicate that KVM is working. It is created if the 'kvm-ok' command is
|
||||
# successful. This is then passed to the action-run.sh script to indicate
|
||||
# that the runner can be used for KVM via the `kvm` label.
|
||||
runner.vm.provision :shell, privileged: false, run: 'always', inline: <<-SHELL
|
||||
rm -f kvm-works
|
||||
docker run --device /dev/kvm -v$(pwd):/work -w /work --rm ubuntu:22.04 bash -c "apt update -y && apt install -y cpu-checker && kvm-ok"
|
||||
status=$?
|
||||
if [[ ${status} == 0 ]]; then
|
||||
echo >&1 "Success. KVM enabled."
|
||||
echo "success" > kvm-works
|
||||
else
|
||||
echo >&2 "Failure. KVM not enabled."
|
||||
fi
|
||||
exit 0
|
||||
SHELL
|
||||
# Execute the actions-run.sh script on every boot. This configures and starts the runner.
|
||||
# Note the 'kvm' label is applied to this runner if the "kvm-works" file eixsts. See above.
|
||||
runner.vm.provision :shell, privileged: false, run: 'always', inline: "./action-run.sh #{PERSONAL_ACCESS_TOKEN} #{GITHUB_ORG} $(if [ -f 'kvm-works' ]; then echo 'kvm'; fi) >> action-run.log 2>&1 &"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -77,5 +77,5 @@ while true; do
|
||||
|
||||
# 4. Cleanup the machine
|
||||
rm -rf "${WORK_DIR}"
|
||||
docker system prune --force --volumes
|
||||
docker system prune --force --volumes --all
|
||||
done
|
||||
|
||||
83
util/github-runners-vagrant/halt-helper.sh
Executable file
83
util/github-runners-vagrant/halt-helper.sh
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (c) 2023 The Regents of the University of California
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# This script will try to safely halt each VM specified in the Vagrantfile.
|
||||
# A VM is skipped if it is currently running a job and returned to after
|
||||
# attempted shutdowns on the other VMs. This cycle continues indefinitely until
|
||||
# all the runners are shutdown.
|
||||
#
|
||||
# This script is usefull as the VMs occasionally need to be halted to apply
|
||||
# patches and do maintenance. This script allows us to do this without
|
||||
# interrupting any jobs that may be running.
|
||||
|
||||
while true; do
|
||||
# This will list all the VMs still running. If there are no VM's running,
|
||||
# we infer all have been shutdown and we exit the script. Otherwise, we
|
||||
# iterate over he VMs in an attempt to shut them down.
|
||||
active=$(vagrant status | grep running | tr -s ' ' | cut -d ' ' -f1)
|
||||
if [ "$active" == "" ]; then
|
||||
echo "All VMs have been shutdown. Exiting."
|
||||
exit 0
|
||||
fi
|
||||
echo "The following VMs are still running:"
|
||||
echo "${active}"
|
||||
|
||||
for virtm in $active
|
||||
do
|
||||
# This script will first list the contents of the "_diag" directory.
|
||||
# This directory hosts the github action runner job logs. Each job
|
||||
# is logged to a seperate file in the directpry. This script then
|
||||
# sort these files by name. The last file in this sorted list is the
|
||||
# most recent file and therefore for the most recent job. We can sort
|
||||
# them in this was because their filenames are appended with UTC
|
||||
# timestamps.
|
||||
#
|
||||
# One one job ever runs at a time on a GitHub runner so if there is any
|
||||
# job running, it be being logged in the most recent file in the
|
||||
# "_diag" directory.
|
||||
#
|
||||
# If the job has completed the last line in the file will contain the
|
||||
# string "Job completed.". This script checks for this and, if found,
|
||||
# we assume there are no jobs running safely run `vagrant halt` to
|
||||
# shutdown the VM. If the job is still running we print a message
|
||||
# saying the job is still running and will return to it on the next
|
||||
# iteration of the loop.
|
||||
echo "Inspecting \"${virtm}\"..."
|
||||
vagrant ssh $virtm -c 'ls _diag | sort | tail -1 | xargs -I % cat "_diag/%" | tail -1 | grep -q "Job completed"'
|
||||
status=$?
|
||||
if [[ ${status} == 0 ]]; then
|
||||
echo "${virtm} is Idle. Attempting shutdown"
|
||||
vagrant halt ${virtm} && echo "${virtm} successfully halted" || echo "${virtm} experience a failure halting"
|
||||
else
|
||||
echo "${virtm} is Busy. Skipping for now."
|
||||
fi
|
||||
done
|
||||
# Sleep here for 20 seconds just to ensure all the VMs have time
|
||||
# to shutdown.
|
||||
sleep 20
|
||||
done
|
||||
@@ -56,7 +56,8 @@ apt-get install -y \
|
||||
apt-transport-https ca-certificates \
|
||||
curl \
|
||||
gnupg \
|
||||
lsb-release
|
||||
lsb-release \
|
||||
cpu-checker
|
||||
|
||||
# Install docker
|
||||
apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release
|
||||
@@ -70,6 +71,22 @@ apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
# work.
|
||||
usermod -aG docker vagrant
|
||||
|
||||
kvm-ok
|
||||
kvm_ok_status=$?
|
||||
|
||||
# `kvm-ok` will return a exit zero if the machine supports KVM, and non-zero
|
||||
# otherwise. If the machine support KVM, let's enable it.
|
||||
if [[ ${kvm_ok_status} == 0 ]]; then
|
||||
apt install -y qemu-kvm \
|
||||
virt-manager \
|
||||
libvirt-daemon-system virtinst \
|
||||
libvirt-clients bridge-utils && \
|
||||
sudo systemctl enable --now libvirtd && \
|
||||
sudo systemctl start libvirtd && \
|
||||
usermod -aG kvm vagrant && \
|
||||
usermod -aG libvirt vagrant
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoremove -y
|
||||
|
||||
|
||||
Reference in New Issue
Block a user