Skip to content

fix(download-server): add VPN watchdog backoff, port validation, docs #1653

fix(download-server): add VPN watchdog backoff, port validation, docs

fix(download-server): add VPN watchdog backoff, port validation, docs #1653

name: Build and Cache Nix Flake
on:
push:
branches:
- main
workflow_dispatch:
workflow_run:
workflows: ["Update and push flake lock"]
types:
- completed
branches:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false
jobs:
# Lightweight server builds — run on free GHA runners (fully cached, ~2 min each)
build-servers:
runs-on: ubuntu-latest
if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
strategy:
fail-fast: false
matrix:
configuration:
- .#nixosConfigurations.download-server-1.config.system.build.toplevel
- .#nixosConfigurations.home-k8s-master-1.config.system.build.toplevel
- .#nixosConfigurations.home-k8s-server-1.config.system.build.toplevel
- .#nixosConfigurations.home-kvm-hypervisor-1.config.system.build.toplevel
- .#nixosConfigurations.home-storage-server-1.config.system.build.toplevel
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v6
with:
lfs: false
- uses: wimpysworld/nothing-but-nix@main
with:
hatchet-protocol: 'rampage'
- uses: cachix/install-nix-action@v31
with:
nix_path: nixpkgs=channel:nixos-unstable
github_access_token: ${{ secrets.GITHUB_TOKEN }}
extra_nix_config: |
max-jobs = auto
cores = 0
eval-cache = true
http-connections = 128
max-substitution-jobs = 128
narinfo-cache-negative-ttl = 3600
connect-timeout = 5
stalled-download-timeout = 10
fallback = true
extra-substituters = https://cache.nixcache.org https://nix-community.cachix.org https://cache.garnix.io
extra-trusted-public-keys = nixcache.org-1:fd7sIL2BDxZa68s/IqZ8kvDsxsjt3SV4mQKdROuPoak= nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs= cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g=
- name: Cache Nix evaluation
uses: actions/cache@v5
with:
path: ~/.cache/nix
key: nix-eval-servers-${{ matrix.configuration }}-${{ github.run_id }}
restore-keys: |
nix-eval-servers-${{ matrix.configuration }}-
- uses: DeterminateSystems/flake-checker-action@v12
- name: Check if build is needed
id: dry-run
run: |
dry_output=$(nix build --dry-run '${{ matrix.configuration }}' 2>&1)
echo "$dry_output"
if echo "$dry_output" | grep -q 'will be built:'; then
echo "needs_build=true" >> "$GITHUB_OUTPUT"
else
echo "needs_build=false" >> "$GITHUB_OUTPUT"
echo "All derivations are cached — skipping build"
fi
- name: Install niks3
if: steps.dry-run.outputs.needs_build == 'true'
run: .github/scripts/install-niks3.sh
- name: Build and push to cache in parallel
if: steps.dry-run.outputs.needs_build == 'true'
env:
NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }}
run: |
QUEUE=/tmp/niks3-queue
touch "$QUEUE"
# Background drainer: watches queue and pushes paths as they appear
(
BATCH=0
while true; do
if [ -s "$QUEUE" ] && mv "$QUEUE" /tmp/niks3-processing 2>/dev/null; then
touch "$QUEUE"
PATHS=$(wc -l < /tmp/niks3-processing)
BATCH=$((BATCH + 1))
echo "[drainer] Batch $BATCH: pushing $PATHS path(s) to cache..."
if cat /tmp/niks3-processing | xargs -r nix path-info --recursive 2>/dev/null | sort -u | xargs -r niks3 push \
--server-url https://api.nixcache.org \
--max-concurrent-uploads 10 \
--auth-token "$NIKS3_TOKEN" 2>&1; then
echo "[drainer] Batch $BATCH: done"
else
echo "::warning::[drainer] Batch $BATCH: push failed with exit code $?"
fi
rm -f /tmp/niks3-processing
fi
sleep 2
done
) &
DRAINER_PID=$!
# Build, appending output paths to queue
set +e
nix build -L --keep-going --print-out-paths --no-link '${{ matrix.configuration }}' \
2> >(tee /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$QUEUE"
BUILD_EXIT=$?
set -e
if [ $BUILD_EXIT -ne 0 ]; then
echo "::error::Build failed with exit code $BUILD_EXIT"
echo "--- Full build stderr (last 100 lines) ---"
tail -100 /tmp/nix-build-stderr.log
fi
# Stop drainer and do final push of any remaining paths
kill $DRAINER_PID 2>/dev/null || true
wait $DRAINER_PID 2>/dev/null || true
# Merge any leftover queue + in-progress paths for final push
cat "$QUEUE" /tmp/niks3-processing 2>/dev/null | sort -u > /tmp/niks3-final || true
if [ -s /tmp/niks3-final ]; then
FINAL_PATHS=$(wc -l < /tmp/niks3-final)
echo "[final push] Pushing $FINAL_PATHS remaining path(s) to cache..."
if cat /tmp/niks3-final | xargs -r nix path-info --recursive 2>/dev/null | sort -u | xargs -r niks3 push \
--server-url https://api.nixcache.org \
--max-concurrent-uploads 10 \
--auth-token "$NIKS3_TOKEN" 2>&1; then
echo "[final push] Done"
else
echo "::error::[final push] Failed with exit code $?"
fi
else
echo "[final push] No remaining paths to push"
fi
exit $BUILD_EXIT
# Heavy x86_64 desktop/laptop builds — self-hosted runner with nix
build-desktops:
runs-on: aws-nix-builder-amd64
timeout-minutes: 1440
if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
permissions:
contents: read
actions: read
env:
# Use cluster-internal service URL — these runners are pods in the k3s cluster,
# so the external URL (api.nixcache.org → public IP) causes hairpin NAT timeouts.
NIKS3_SERVER_URL: http://niks3.niks3.svc.cluster.local
# Host nix daemon post-build-hook writes to /var/tmp/niks3-queue
NIKS3_QUEUE: /var/tmp/niks3-queue
steps:
- name: Skip if newer run is queued
run: |
NEWER=$(curl -sf -H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/runs?status=queued&branch=${{ github.ref_name }}&per_page=10" \
| grep -o '"id":[0-9]*' | grep -o '[0-9]*' | awk '$1 > ${{ github.run_id }}' | wc -l)
if [ "$NEWER" -gt 0 ]; then
echo "::notice::Skipping this run — $NEWER newer run(s) queued"
exit 1
fi
- uses: actions/checkout@v6
with:
lfs: false
- name: Configure host Nix
uses: ./.github/actions/setup-host-nix
- name: Check if any desktop build is needed
id: dry-run
run: |
needs_build=false
for cfg in ali-desktop ali-framework-laptop ali-work-laptop; do
if dry_output=$(nix build --dry-run ".#nixosConfigurations.${cfg}.config.system.build.toplevel" 2>&1); then
if echo "$dry_output" | grep -q 'will be built:'; then
needs_build=true
echo "$cfg needs building"
else
echo "$cfg is fully cached"
fi
else
needs_build=true
echo "$cfg dry-run failed (needs building)"
fi
done
echo "needs_build=$needs_build" >> "$GITHUB_OUTPUT"
- name: Install niks3
if: steps.dry-run.outputs.needs_build == 'true'
run: .github/scripts/install-niks3.sh
- name: Build and push to cache
if: steps.dry-run.outputs.needs_build == 'true'
env:
NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }}
run: |
source .github/scripts/niks3-background-push.sh
niks3_start_drainer
# Build each config sequentially, appending output paths to queue
set +e
any_failed=false
for cfg in ali-desktop ali-framework-laptop ali-work-laptop; do
echo "=== Building $cfg ==="
if nix build -L --keep-going --print-out-paths --no-link \
".#nixosConfigurations.${cfg}.config.system.build.toplevel" \
2> >(tee -a /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$NIKS3_QUEUE"; then
echo "=== $cfg complete ==="
else
echo "::warning::$cfg build failed"
any_failed=true
fi
done
set -e
niks3_stop_and_final_push
if [ "$any_failed" = true ]; then
echo "::group::Full nix build stderr log"
cat /tmp/nix-build-stderr.log 2>/dev/null || true
echo "::endgroup::"
exit 1
fi
# aarch64-linux builds — self-hosted ARM runner with nix
build-arm64:
runs-on: aws-nix-builder-arm64
timeout-minutes: 1440
if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
permissions:
contents: read
actions: read
env:
NIKS3_SERVER_URL: http://niks3.niks3.svc.cluster.local
# Host nix daemon post-build-hook writes to /var/tmp/niks3-queue
NIKS3_QUEUE: /var/tmp/niks3-queue
steps:
- name: Skip if newer run is queued
run: |
NEWER=$(curl -sf -H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/runs?status=queued&branch=${{ github.ref_name }}&per_page=10" \
| grep -o '"id":[0-9]*' | grep -o '[0-9]*' | awk '$1 > ${{ github.run_id }}' | wc -l)
if [ "$NEWER" -gt 0 ]; then
echo "::notice::Skipping this run — $NEWER newer run(s) queued"
exit 1
fi
- uses: actions/checkout@v6
with:
lfs: false
- name: Configure host Nix
uses: ./.github/actions/setup-host-nix
- name: Check if any arm64 build is needed
id: dry-run
run: |
needs_build=false
for cfg in dev-vm; do
if dry_output=$(nix build --dry-run ".#nixosConfigurations.${cfg}.config.system.build.toplevel" 2>&1); then
if echo "$dry_output" | grep -q 'will be built:'; then
needs_build=true
echo "$cfg needs building"
else
echo "$cfg is fully cached"
fi
else
needs_build=true
echo "$cfg dry-run failed (needs building)"
fi
done
echo "needs_build=$needs_build" >> "$GITHUB_OUTPUT"
- name: Install niks3
if: steps.dry-run.outputs.needs_build == 'true'
run: .github/scripts/install-niks3.sh
- name: Build and push to cache
if: steps.dry-run.outputs.needs_build == 'true'
env:
NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }}
run: |
source .github/scripts/niks3-background-push.sh
niks3_start_drainer
# Build each config sequentially, appending output paths to queue
set +e
any_failed=false
for cfg in dev-vm; do
echo "=== Building $cfg ==="
if nix build -L --keep-going --print-out-paths --no-link \
".#nixosConfigurations.${cfg}.config.system.build.toplevel" \
2> >(tee -a /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$NIKS3_QUEUE"; then
echo "=== $cfg complete ==="
else
echo "::warning::$cfg build failed"
any_failed=true
fi
done
set -e
niks3_stop_and_final_push
if [ "$any_failed" = true ]; then
echo "::group::Full nix build stderr log"
cat /tmp/nix-build-stderr.log 2>/dev/null || true
echo "::endgroup::"
exit 1
fi