fix(download-server): add VPN watchdog backoff, port validation, docs #1653
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build and Cache Nix Flake | |
| on: | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| workflow_run: | |
| workflows: ["Update and push flake lock"] | |
| types: | |
| - completed | |
| branches: | |
| - main | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: false | |
| jobs: | |
| # Lightweight server builds — run on free GHA runners (fully cached, ~2 min each) | |
| build-servers: | |
| runs-on: ubuntu-latest | |
| if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| configuration: | |
| - .#nixosConfigurations.download-server-1.config.system.build.toplevel | |
| - .#nixosConfigurations.home-k8s-master-1.config.system.build.toplevel | |
| - .#nixosConfigurations.home-k8s-server-1.config.system.build.toplevel | |
| - .#nixosConfigurations.home-kvm-hypervisor-1.config.system.build.toplevel | |
| - .#nixosConfigurations.home-storage-server-1.config.system.build.toplevel | |
| permissions: | |
| contents: read | |
| id-token: write | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| lfs: false | |
| - uses: wimpysworld/nothing-but-nix@main | |
| with: | |
| hatchet-protocol: 'rampage' | |
| - uses: cachix/install-nix-action@v31 | |
| with: | |
| nix_path: nixpkgs=channel:nixos-unstable | |
| github_access_token: ${{ secrets.GITHUB_TOKEN }} | |
| extra_nix_config: | | |
| max-jobs = auto | |
| cores = 0 | |
| eval-cache = true | |
| http-connections = 128 | |
| max-substitution-jobs = 128 | |
| narinfo-cache-negative-ttl = 3600 | |
| connect-timeout = 5 | |
| stalled-download-timeout = 10 | |
| fallback = true | |
| extra-substituters = https://cache.nixcache.org https://nix-community.cachix.org https://cache.garnix.io | |
| extra-trusted-public-keys = nixcache.org-1:fd7sIL2BDxZa68s/IqZ8kvDsxsjt3SV4mQKdROuPoak= nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs= cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g= | |
| - name: Cache Nix evaluation | |
| uses: actions/cache@v5 | |
| with: | |
| path: ~/.cache/nix | |
| key: nix-eval-servers-${{ matrix.configuration }}-${{ github.run_id }} | |
| restore-keys: | | |
| nix-eval-servers-${{ matrix.configuration }}- | |
| - uses: DeterminateSystems/flake-checker-action@v12 | |
| - name: Check if build is needed | |
| id: dry-run | |
| run: | | |
| dry_output=$(nix build --dry-run '${{ matrix.configuration }}' 2>&1) | |
| echo "$dry_output" | |
| if echo "$dry_output" | grep -q 'will be built:'; then | |
| echo "needs_build=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "needs_build=false" >> "$GITHUB_OUTPUT" | |
| echo "All derivations are cached — skipping build" | |
| fi | |
| - name: Install niks3 | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| run: .github/scripts/install-niks3.sh | |
| - name: Build and push to cache in parallel | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| env: | |
| NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }} | |
| run: | | |
| QUEUE=/tmp/niks3-queue | |
| touch "$QUEUE" | |
| # Background drainer: watches queue and pushes paths as they appear | |
| ( | |
| BATCH=0 | |
| while true; do | |
| if [ -s "$QUEUE" ] && mv "$QUEUE" /tmp/niks3-processing 2>/dev/null; then | |
| touch "$QUEUE" | |
| PATHS=$(wc -l < /tmp/niks3-processing) | |
| BATCH=$((BATCH + 1)) | |
| echo "[drainer] Batch $BATCH: pushing $PATHS path(s) to cache..." | |
| if cat /tmp/niks3-processing | xargs -r nix path-info --recursive 2>/dev/null | sort -u | xargs -r niks3 push \ | |
| --server-url https://api.nixcache.org \ | |
| --max-concurrent-uploads 10 \ | |
| --auth-token "$NIKS3_TOKEN" 2>&1; then | |
| echo "[drainer] Batch $BATCH: done" | |
| else | |
| echo "::warning::[drainer] Batch $BATCH: push failed with exit code $?" | |
| fi | |
| rm -f /tmp/niks3-processing | |
| fi | |
| sleep 2 | |
| done | |
| ) & | |
| DRAINER_PID=$! | |
| # Build, appending output paths to queue | |
| set +e | |
| nix build -L --keep-going --print-out-paths --no-link '${{ matrix.configuration }}' \ | |
| 2> >(tee /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$QUEUE" | |
| BUILD_EXIT=$? | |
| set -e | |
| if [ $BUILD_EXIT -ne 0 ]; then | |
| echo "::error::Build failed with exit code $BUILD_EXIT" | |
| echo "--- Full build stderr (last 100 lines) ---" | |
| tail -100 /tmp/nix-build-stderr.log | |
| fi | |
| # Stop drainer and do final push of any remaining paths | |
| kill $DRAINER_PID 2>/dev/null || true | |
| wait $DRAINER_PID 2>/dev/null || true | |
| # Merge any leftover queue + in-progress paths for final push | |
| cat "$QUEUE" /tmp/niks3-processing 2>/dev/null | sort -u > /tmp/niks3-final || true | |
| if [ -s /tmp/niks3-final ]; then | |
| FINAL_PATHS=$(wc -l < /tmp/niks3-final) | |
| echo "[final push] Pushing $FINAL_PATHS remaining path(s) to cache..." | |
| if cat /tmp/niks3-final | xargs -r nix path-info --recursive 2>/dev/null | sort -u | xargs -r niks3 push \ | |
| --server-url https://api.nixcache.org \ | |
| --max-concurrent-uploads 10 \ | |
| --auth-token "$NIKS3_TOKEN" 2>&1; then | |
| echo "[final push] Done" | |
| else | |
| echo "::error::[final push] Failed with exit code $?" | |
| fi | |
| else | |
| echo "[final push] No remaining paths to push" | |
| fi | |
| exit $BUILD_EXIT | |
| # Heavy x86_64 desktop/laptop builds — self-hosted runner with nix | |
| build-desktops: | |
| runs-on: aws-nix-builder-amd64 | |
| timeout-minutes: 1440 | |
| if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }} | |
| permissions: | |
| contents: read | |
| actions: read | |
| env: | |
| # Use cluster-internal service URL — these runners are pods in the k3s cluster, | |
| # so the external URL (api.nixcache.org → public IP) causes hairpin NAT timeouts. | |
| NIKS3_SERVER_URL: http://niks3.niks3.svc.cluster.local | |
| # Host nix daemon post-build-hook writes to /var/tmp/niks3-queue | |
| NIKS3_QUEUE: /var/tmp/niks3-queue | |
| steps: | |
| - name: Skip if newer run is queued | |
| run: | | |
| NEWER=$(curl -sf -H "Authorization: Bearer ${{ github.token }}" \ | |
| "https://api.github.com/repos/${{ github.repository }}/actions/runs?status=queued&branch=${{ github.ref_name }}&per_page=10" \ | |
| | grep -o '"id":[0-9]*' | grep -o '[0-9]*' | awk '$1 > ${{ github.run_id }}' | wc -l) | |
| if [ "$NEWER" -gt 0 ]; then | |
| echo "::notice::Skipping this run — $NEWER newer run(s) queued" | |
| exit 1 | |
| fi | |
| - uses: actions/checkout@v6 | |
| with: | |
| lfs: false | |
| - name: Configure host Nix | |
| uses: ./.github/actions/setup-host-nix | |
| - name: Check if any desktop build is needed | |
| id: dry-run | |
| run: | | |
| needs_build=false | |
| for cfg in ali-desktop ali-framework-laptop ali-work-laptop; do | |
| if dry_output=$(nix build --dry-run ".#nixosConfigurations.${cfg}.config.system.build.toplevel" 2>&1); then | |
| if echo "$dry_output" | grep -q 'will be built:'; then | |
| needs_build=true | |
| echo "$cfg needs building" | |
| else | |
| echo "$cfg is fully cached" | |
| fi | |
| else | |
| needs_build=true | |
| echo "$cfg dry-run failed (needs building)" | |
| fi | |
| done | |
| echo "needs_build=$needs_build" >> "$GITHUB_OUTPUT" | |
| - name: Install niks3 | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| run: .github/scripts/install-niks3.sh | |
| - name: Build and push to cache | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| env: | |
| NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }} | |
| run: | | |
| source .github/scripts/niks3-background-push.sh | |
| niks3_start_drainer | |
| # Build each config sequentially, appending output paths to queue | |
| set +e | |
| any_failed=false | |
| for cfg in ali-desktop ali-framework-laptop ali-work-laptop; do | |
| echo "=== Building $cfg ===" | |
| if nix build -L --keep-going --print-out-paths --no-link \ | |
| ".#nixosConfigurations.${cfg}.config.system.build.toplevel" \ | |
| 2> >(tee -a /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$NIKS3_QUEUE"; then | |
| echo "=== $cfg complete ===" | |
| else | |
| echo "::warning::$cfg build failed" | |
| any_failed=true | |
| fi | |
| done | |
| set -e | |
| niks3_stop_and_final_push | |
| if [ "$any_failed" = true ]; then | |
| echo "::group::Full nix build stderr log" | |
| cat /tmp/nix-build-stderr.log 2>/dev/null || true | |
| echo "::endgroup::" | |
| exit 1 | |
| fi | |
| # aarch64-linux builds — self-hosted ARM runner with nix | |
| build-arm64: | |
| runs-on: aws-nix-builder-arm64 | |
| timeout-minutes: 1440 | |
| if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }} | |
| permissions: | |
| contents: read | |
| actions: read | |
| env: | |
| NIKS3_SERVER_URL: http://niks3.niks3.svc.cluster.local | |
| # Host nix daemon post-build-hook writes to /var/tmp/niks3-queue | |
| NIKS3_QUEUE: /var/tmp/niks3-queue | |
| steps: | |
| - name: Skip if newer run is queued | |
| run: | | |
| NEWER=$(curl -sf -H "Authorization: Bearer ${{ github.token }}" \ | |
| "https://api.github.com/repos/${{ github.repository }}/actions/runs?status=queued&branch=${{ github.ref_name }}&per_page=10" \ | |
| | grep -o '"id":[0-9]*' | grep -o '[0-9]*' | awk '$1 > ${{ github.run_id }}' | wc -l) | |
| if [ "$NEWER" -gt 0 ]; then | |
| echo "::notice::Skipping this run — $NEWER newer run(s) queued" | |
| exit 1 | |
| fi | |
| - uses: actions/checkout@v6 | |
| with: | |
| lfs: false | |
| - name: Configure host Nix | |
| uses: ./.github/actions/setup-host-nix | |
| - name: Check if any arm64 build is needed | |
| id: dry-run | |
| run: | | |
| needs_build=false | |
| for cfg in dev-vm; do | |
| if dry_output=$(nix build --dry-run ".#nixosConfigurations.${cfg}.config.system.build.toplevel" 2>&1); then | |
| if echo "$dry_output" | grep -q 'will be built:'; then | |
| needs_build=true | |
| echo "$cfg needs building" | |
| else | |
| echo "$cfg is fully cached" | |
| fi | |
| else | |
| needs_build=true | |
| echo "$cfg dry-run failed (needs building)" | |
| fi | |
| done | |
| echo "needs_build=$needs_build" >> "$GITHUB_OUTPUT" | |
| - name: Install niks3 | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| run: .github/scripts/install-niks3.sh | |
| - name: Build and push to cache | |
| if: steps.dry-run.outputs.needs_build == 'true' | |
| env: | |
| NIKS3_TOKEN: ${{ secrets.NIKS3_TOKEN }} | |
| run: | | |
| source .github/scripts/niks3-background-push.sh | |
| niks3_start_drainer | |
| # Build each config sequentially, appending output paths to queue | |
| set +e | |
| any_failed=false | |
| for cfg in dev-vm; do | |
| echo "=== Building $cfg ===" | |
| if nix build -L --keep-going --print-out-paths --no-link \ | |
| ".#nixosConfigurations.${cfg}.config.system.build.toplevel" \ | |
| 2> >(tee -a /tmp/nix-build-stderr.log | grep --line-buffered -iE '(^[^ ]*> (building|configuring|installing|compiling|linking|cmake|make\[)|building .*/nix/store|copying path|fetching)' >&2) >> "$NIKS3_QUEUE"; then | |
| echo "=== $cfg complete ===" | |
| else | |
| echo "::warning::$cfg build failed" | |
| any_failed=true | |
| fi | |
| done | |
| set -e | |
| niks3_stop_and_final_push | |
| if [ "$any_failed" = true ]; then | |
| echo "::group::Full nix build stderr log" | |
| cat /tmp/nix-build-stderr.log 2>/dev/null || true | |
| echo "::endgroup::" | |
| exit 1 | |
| fi |