Skip to content

perf(ci): cache slow build steps and skip redundant work #949

perf(ci): cache slow build steps and skip redundant work

perf(ci): cache slow build steps and skip redundant work #949

---
name: "Build Packages and tests on Droplets"
on:
push:
branches: [main]
pull_request:
# The branches below must be a subset of the branches above
branches: [main]
# This action build the package for Ubuntu and Debian
# Then run e2e integration tests on Digital Ocean droplets (on demand VM)
# Cancel superseded runs when new commits are pushed to a PR branch.
# This also releases the global droplet concurrency groups earlier.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
build_deb:
name: "Build ${{ matrix.os }} Package"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
os: ["debian-12", "debian-13", "ubuntu-22.04", "ubuntu-24.04"]
include:
- os: "debian-12"
make_target: "all-podman-debian-12"
artifact_name: "aleph-vm.debian-12.deb"
- os: "debian-13"
make_target: "all-podman-debian-13"
artifact_name: "aleph-vm.debian-13.deb"
- os: "ubuntu-22.04"
make_target: "all-podman-ubuntu-2204"
artifact_name: "aleph-vm.ubuntu-22.04.deb"
- os: "ubuntu-24.04"
make_target: "all-podman-ubuntu-2404"
artifact_name: "aleph-vm.ubuntu-24.04.deb"
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: true
# Fetch the whole history for all tags and branches (required for aleph.__version__)
fetch-depth: 0
- name: Initialize git submodules
run: git submodule init
# sevctl is built with `cargo install` at a rev pinned in packaging/Makefile,
# which takes ~2 minutes on every build. `target/bin/sevctl` is a file target
# in the Makefile, so restoring the binary from cache skips the build entirely.
# The key includes the Makefile (pins the sevctl rev) and the dockerfile
# (pins the toolchain used to build it).
- name: Cache the sevctl binary
uses: actions/cache@v4
with:
path: packaging/target/bin
key: sevctl-${{ matrix.os }}-${{ hashFiles('packaging/Makefile', format('packaging/{0}.dockerfile',
matrix.os)) }}
- run: |
cd packaging && make ${{ matrix.make_target }} && cd ..
ls packaging/target
- name: Ensure that the relevant files are present in the package
run: |
dpkg --contents packaging/target/${{ matrix.artifact_name }} | grep /opt/firecracker/firecracker
dpkg --contents packaging/target/${{ matrix.artifact_name }} | grep /opt/firecracker/jailer
dpkg --contents packaging/target/${{ matrix.artifact_name }} | grep /opt/firecracker/vmlinux.bin
dpkg --contents packaging/target/${{ matrix.artifact_name }} | grep /opt/sevctl
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.artifact_name }}
path: packaging/target/${{ matrix.artifact_name }}
build_rootfs:
name: "Build runtime aleph-${{ matrix.os }}-python"
runs-on: ubuntu-latest
strategy:
matrix:
os: ["debian-12"]
include:
- os: "debian-12"
artifact_name: "aleph-debian-12-python.squashfs"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Workaround github issue https://github.com/actions/runner-images/issues/7192
run: sudo echo RESET grub-efi/install_devices | sudo debconf-communicate grub-pc
# The runtime takes ~2 minutes to debootstrap and rarely changes. Cache it on
# the content of its build inputs, with a monthly stamp so the packages
# pulled by debootstrap and pip do not become indefinitely stale.
# The key is computed in shell because hashFiles() is re-evaluated in the
# cache post step, after the build filled the directory with the
# debootstrapped tree (hashing it times out).
- name: Compute runtime cache key
id: cache-key
run: |
input_hash=$(cat runtimes/aleph-${{ matrix.os }}-python/*.sh runtimes/aleph-${{ matrix.os }}-python/*.py runtimes/aleph-${{ matrix.os }}-python/*.html | sha256sum | cut -c -16)
echo "key=runtime-aleph-${{ matrix.os }}-python-$(date +%Y-%m)-${input_hash}" >> "$GITHUB_OUTPUT"
- name: Cache the built runtime
id: cache-runtime
uses: actions/cache@v4
with:
path: runtimes/aleph-${{ matrix.os }}-python/rootfs.squashfs
key: ${{ steps.cache-key.outputs.key }}
- name: Install dep and build
if: steps.cache-runtime.outputs.cache-hit != 'true'
run: |
sudo apt update
sudo apt install -y debootstrap
cd runtimes/aleph-${{ matrix.os }}-python && sudo ./create_disk_image.sh && cd ../..
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.artifact_name }}
path: runtimes/aleph-${{ matrix.os }}-python/rootfs.squashfs
build_example_venv_volume:
name: "Build example squashfs volume using Docker"
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
docker build -t aleph-vm-build-squashfs -f examples/volumes/Dockerfile examples/volumes
docker run --rm -v "$(pwd)":/mnt aleph-vm-build-squashfs
- uses: actions/upload-artifact@v4
with:
name: example-volume-venv.squashfs
path: volume-venv.squashfs
run_on_droplet:
# These are end-to-end tests running on ephemeral DigitalOcean "Droplet" virtual machines
# with the different operating systems that are supported.
#
# The main focus of these tests is to ensure that the packaging works on all supported platforms
# and to ensure the compatibility of dependencies (system and vendored) across these platforms.
#
# All supported runtimes are tested sequentially on the same droplet to avoid
# the overhead of provisioning separate VMs (the setup is the slow part).
name: "Test Droplet with ${{ matrix.os_config.os_name }}"
runs-on: ubuntu-latest
concurrency: "${{ matrix.os_config.concurrency_group }}"
timeout-minutes: 20
needs: build_deb
strategy:
fail-fast: false
matrix:
# Check compatibility with all supported OSes.
os_config:
- os_name: "Debian 12"
os_image: "debian-12-x64"
alias: "debian-12"
package_build_command: "all-podman-debian-12"
package_name: "aleph-vm.debian-12.deb"
concurrency_group: "droplet-aleph-vm-debian-12"
- os_name: "Debian 13"
os_image: "debian-13-x64"
alias: "debian-13"
package_build_command: "all-podman-debian-13"
package_name: "aleph-vm.debian-13.deb"
concurrency_group: "droplet-aleph-vm-debian-13"
- os_name: "Ubuntu 22.04"
os_image: "ubuntu-22-04-x64"
alias: "ubuntu-22-04"
package_build_command: "all-podman-ubuntu-2204"
package_name: "aleph-vm.ubuntu-22.04.deb"
concurrency_group: "droplet-aleph-vm-ubuntu-22-04"
- os_name: "Ubuntu 24.04"
os_image: "ubuntu-24-04-x64"
alias: "ubuntu-24-04"
package_build_command: "all-podman-ubuntu-2404"
package_name: "aleph-vm.ubuntu-24.04.deb"
concurrency_group: "droplet-aleph-vm-ubuntu-24-04"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install doctl
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN_V2 }}
- name: Setup SSH private key
run: |
mkdir ~/.ssh
echo $DIGITALOCEAN_SSH_PRIVATE_KEY | base64 --decode > ~/.ssh/id_ed25519
chmod 0700 ~/.ssh
chmod 0600 ~/.ssh/id_ed25519
env:
DIGITALOCEAN_SSH_PRIVATE_KEY: ${{ secrets.DIGITALOCEAN_SSH_PRIVATE_KEY }}
- name: Create the Droplet
run: |
doctl compute droplet create \
--image ${{ matrix.os_config.os_image }} \
--size s-2vcpu-4gb-amd \
--region ams3 \
--vpc-uuid 5976b7bd-4417-49e8-8522-672aaa920c30 \
--enable-ipv6 \
--ssh-keys ab:2b:25:16:46:6f:25:d0:80:63:e5:be:67:04:cb:64 \
aleph-vm-ci-${{ matrix.os_config.alias }}
- uses: actions/download-artifact@v4
name: "Download the package from artifacts."
with:
name: ${{ matrix.os_config.package_name }}
path: packaging/target/
- name: Get droplet ip and export it in env
# the until loop wait till the network is available.
run: |
until (doctl compute droplet get aleph-vm-ci-${{ matrix.os_config.alias }} --output json | ./.github/scripts/extract_droplet_ipv4.py); do sleep 1; done
echo "DROPLET_IPV4=$(doctl compute droplet get aleph-vm-ci-${{ matrix.os_config.alias }} --output json | ./.github/scripts/extract_droplet_ipv4.py)" >> "$GITHUB_ENV"
- name: Wait for the system to setup and boot
id: system-booted
run: |
until ssh-keyscan -H ${DROPLET_IPV4} > ~/.ssh/known_hosts 2>/dev/null; do sleep 5; done
timeout-minutes: 5
- name: Install Aleph-VM on the Droplet
run: |
set -x
## Build configuration file and copy it on Droplet
echo ALEPH_VM_SUPERVISOR_HOST=0.0.0.0 >> supervisor.env
echo ALEPH_VM_ALLOCATION_TOKEN_HASH=9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 >> supervisor.env
echo ALEPH_VM_SENTRY_DSN=${{ secrets.SENTRY_DSN }} >> supervisor.env
# Pin public resolvers for guest VMs: DNS auto-detection on the droplet
# picks up DigitalOcean's VPC-internal resolver (e.g. 10.110.15.254),
# which is unreachable from inside the VMs and breaks all guest DNS.
echo 'ALEPH_VM_DNS_NAMESERVERS=["1.1.1.1","8.8.8.8"]' >> supervisor.env
ssh root@${DROPLET_IPV4} mkdir -p /etc/aleph-vm/
scp supervisor.env root@${DROPLET_IPV4}:/etc/aleph-vm/supervisor.env
scp packaging/target/${{ matrix.os_config.package_name }} root@${DROPLET_IPV4}:/opt
# Wait a few seconds for DigitalOcean to setup the Droplet using apt, which conflicts with our comands:
sleep 5
# Wait for /var/lib/apt/lists/lock to be unlocked on the remote host via SSH.
while ssh root@${DROPLET_IPV4} lsof /var/lib/apt/lists/lock; do sleep 1; done
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 update"
# A full system upgrade takes ~1 minute. Only run it on pushes to main
# to catch incompatibilities with the latest distro packages, and skip
# it on pull requests to keep iteration fast.
if [ "${{ github.event_name }}" != "pull_request" ]; then
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 upgrade -y"
fi
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 install -y docker.io apparmor-profiles"
ssh root@${DROPLET_IPV4} "docker pull ghcr.io/aleph-im/vm-connector:alpha"
ssh root@${DROPLET_IPV4} "docker run -d -p 127.0.0.1:4021:4021/tcp --restart=always --name vm-connector ghcr.io/aleph-im/vm-connector:alpha"
# "--force-confold" keeps existing config files during package install/upgrade, avoiding prompts.
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 -o Dpkg::Options::="--force-confold" install -y /opt/${{ matrix.os_config.package_name }}"
- name: Wait for supervisor to be ready
run: |
echo "Waiting for aleph-vm-supervisor to listen on port 4020..."
timeout 90 bash -c 'until ssh root@${DROPLET_IPV4} "systemctl is-active --quiet aleph-vm-supervisor && ss -tlnp | grep -q :4020\ " 2>/dev/null; do sleep 2; done'
echo "Supervisor is active and listening"
- name: Dump supervisor logs on failure
if: failure()
run: |
ssh root@${DROPLET_IPV4} "systemctl status aleph-vm-supervisor --no-pager" || true
ssh root@${DROPLET_IPV4} "journalctl -u aleph-vm-supervisor -n 100 --no-pager" || true
- name: "Test runtime: Debian 12, SDK 0.9.0"
run: ./.github/scripts/test_runtime_on_droplet.sh "${DROPLET_IPV4}" "63faf8b5db1cf8d965e6a464a0cb8062af8e7df131729e48738342d956f29ace"
- name: "Test runtime: Debian 12, pydantic V2, SDK 2.0.5"
run: ./.github/scripts/test_runtime_on_droplet.sh "${DROPLET_IPV4}" "d2b74aa29898457bde0560e47f7cdd4e77287e9f1f7a1456161d2fd7d5c855d7"
- name: Fetch system usage endpoint
run: |
curl -X GET -H "Content-Type: application/json" \
"http://${DROPLET_IPV4}:4020/about/usage/system"
- name: Run the sevctl command to ensure it's properly packaged and working
run: |
ssh root@${DROPLET_IPV4} "/opt/sevctl --version"
- name: Export aleph logs
continue-on-error: true
if: ${{ !cancelled() && steps.system-booted.outcome == 'success'}}
run: |
ssh root@${DROPLET_IPV4} "journalctl -u aleph-vm-supervisor"
- name: Cleanup
if: always()
run: |-
DROPLET_IDS=$(doctl compute droplet list --format "ID,Name" --no-header | grep "aleph-vm-ci-${{ matrix.os_config.alias }}" | awk '{print $1}')
for DROPLET_ID in $DROPLET_IDS; do
echo "Deleting droplet with ID: $DROPLET_ID"
doctl compute droplet delete --force $DROPLET_ID
done
run_new_runtime_debian_12:
name: "Test new runtime on Droplet with Debian 12"
# Test by building a version of the runtimes and diagnostic program from the source.
# (other tests use the version deployed in the aleph cloud)
runs-on: ubuntu-latest
concurrency: droplet-aleph-vm-runtime
timeout-minutes: 10
needs: [build_deb, build_rootfs, build_example_venv_volume]
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
# Fetch the whole history for all tags and branches (required for aleph.__version__)
fetch-depth: 0
- name: Install doctl
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN_V2 }}
- name: Setup SSH private key
run: |
mkdir ~/.ssh
echo $DIGITALOCEAN_SSH_PRIVATE_KEY | base64 --decode > ~/.ssh/id_ed25519
chmod 0700 ~/.ssh
chmod 0600 ~/.ssh/id_ed25519
env:
DIGITALOCEAN_SSH_PRIVATE_KEY: ${{ secrets.DIGITALOCEAN_SSH_PRIVATE_KEY }}
- name: Create the Droplet
run: |
doctl compute droplet create \
--image debian-12-x64 \
--size s-2vcpu-4gb-amd \
--region ams3 \
--vpc-uuid 5976b7bd-4417-49e8-8522-672aaa920c30 \
--enable-ipv6 \
--ssh-keys ab:2b:25:16:46:6f:25:d0:80:63:e5:be:67:04:cb:64 \
aleph-vm-ci-runtime
# Reuse the runtime built by the build_rootfs job instead of spending
# ~2 minutes rebuilding it here with debootstrap.
- uses: actions/download-artifact@v4
name: "Download the runtime from artifacts."
with:
name: "aleph-debian-12-python.squashfs"
path: runtimes/aleph-debian-12-python/
- uses: actions/download-artifact@v4
name: "Download the debian package from artifacts."
with:
name: "aleph-vm.debian-12.deb"
path: packaging/target/
# The supervisor requires the example venv volume when running with fake data
# (ALEPH_VM_FAKE_DATA_VOLUME, asserted at startup). It is built by the
# build_example_venv_volume job; place it where `scp -pr ./examples` ships it.
- uses: actions/download-artifact@v4
name: "Download the example venv volume from artifacts."
with:
name: "example-volume-venv.squashfs"
path: examples/volumes/
# - name: Build Debian Package
# run: |
# cd packaging && make all-podman-debian-12 && cd ..
# ls packaging/target
- name: Get droplet ip and export it in env
# the until loop wait till the network is available.
run: |
until (doctl compute droplet get aleph-vm-ci-runtime --output json | ./.github/scripts/extract_droplet_ipv4.py); do sleep 1; done
echo "DROPLET_IPV4=$(doctl compute droplet get aleph-vm-ci-runtime --output json | ./.github/scripts/extract_droplet_ipv4.py)" >> "$GITHUB_ENV"
- name: Wait for the system to setup and boot
id: system-booted
timeout-minutes: 3
run: |
until ssh-keyscan -H ${DROPLET_IPV4} > ~/.ssh/known_hosts 2>/dev/null; do sleep 5; done
- name: Copy the runtime to the system
run: |
scp runtimes/aleph-debian-12-python/rootfs.squashfs root@${DROPLET_IPV4}:/opt
- name: Install Aleph-VM on the Droplet
run: |
## Build configuration file and copy it on Droplet
echo ALEPH_VM_SUPERVISOR_HOST=0.0.0.0 >> supervisor.env
echo ALEPH_VM_FAKE_DATA_PROGRAM=/opt/examples/example_fastapi >> supervisor.env
echo ALEPH_VM_FAKE_DATA_RUNTIME=/opt/rootfs.squashfs >> supervisor.env
# Pin public resolvers for guest VMs: DNS auto-detection on the droplet
# picks up DigitalOcean's VPC-internal resolver (e.g. 10.110.15.254),
# which is unreachable from inside the VMs and breaks all guest DNS.
echo 'ALEPH_VM_DNS_NAMESERVERS=["1.1.1.1","8.8.8.8"]' >> supervisor.env
ssh root@${DROPLET_IPV4} mkdir -p /etc/aleph-vm/
scp supervisor.env root@${DROPLET_IPV4}:/etc/aleph-vm/supervisor.env
scp packaging/target/aleph-vm.debian-12.deb root@${DROPLET_IPV4}:/opt
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 update"
# A full system upgrade takes ~1 minute. Only run it on pushes to main
# to catch incompatibilities with the latest distro packages, and skip
# it on pull requests to keep iteration fast.
if [ "${{ github.event_name }}" != "pull_request" ]; then
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 upgrade -y"
fi
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o Dpkg::Progress-Fancy=0 -o DPkg::Lock::Timeout=-1 install -y docker.io apparmor-profiles"
ssh root@${DROPLET_IPV4} "docker run -d -p 127.0.0.1:4021:4021/tcp --restart=always --name vm-connector alephim/vm-connector:alpha"
# Copy our example VM on the server
scp -pr ./examples root@${DROPLET_IPV4}:/opt/
ssh root@${DROPLET_IPV4} DEBIAN_FRONTEND=noninteractive "apt-get -o DPkg::Lock::Timeout=-1 -o Dpkg::Options::="--force-confold" install -y /opt/aleph-vm.debian-12.deb"
- name: Call the runtime and example program on the Droplet
run: |
sleep 3
curl --retry 5 --retry-delay 10 --retry-connrefused --max-time 120 --fail "http://${DROPLET_IPV4}:4020/about/usage/system"
curl --retry 5 --retry-delay 10 --retry-connrefused --max-time 120 --fail "http://${DROPLET_IPV4}:4020/status/check/fastapi"
- name: Export aleph logs
if: ${{ !cancelled() && steps.system-booted.outcome == 'success'}}
run: |
ssh root@${DROPLET_IPV4} "journalctl -u aleph-vm-supervisor"
- name: Cleanup
if: always()
run: |-
doctl compute droplet delete -f aleph-vm-ci-runtime