tune otel loki catch-up ingestion #1814
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Cluster CI | |
| on: | |
| pull_request: | |
| paths: | |
| - infrastructure/** | |
| - monitoring/** | |
| - my-apps/** | |
| - scripts/** | |
| - scripts/validate-argocd-apps.sh | |
| - .github/workflows/cluster-ci.yml | |
| - .github/renovate.json5 | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - infrastructure/** | |
| - monitoring/** | |
| - my-apps/** | |
| - scripts/** | |
| - scripts/validate-argocd-apps.sh | |
| - .github/workflows/cluster-ci.yml | |
| - .github/renovate.json5 | |
| permissions: | |
| contents: read | |
| jobs: | |
| argocd-structure: | |
| name: ArgoCD Structure Validation | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Validate ArgoCD app topology | |
| run: bash ./scripts/validate-argocd-apps.sh | |
| truenas-csi-contract: | |
| name: TrueNAS CSI Contract | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Kustomize | |
| uses: imranismail/setup-kustomize@v2 | |
| with: | |
| kustomize-version: 5.4.2 | |
| - name: Validate official TrueNAS CSI deployment | |
| run: bash ./scripts/validate-truenas-csi.sh | |
| render-and-schema: | |
| name: Kustomize Render and Schema Validation | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Kustomize | |
| uses: imranismail/setup-kustomize@v2 | |
| with: | |
| kustomize-version: 5.4.2 | |
| - name: Setup Helm | |
| uses: azure/setup-helm@v4 | |
| with: | |
| version: v3.16.2 | |
| - name: Install kubeconform | |
| run: | | |
| set -euo pipefail | |
| KUBECONFORM_VERSION="v0.6.7" | |
| curl -sSL -o /tmp/kubeconform.tar.gz "https://github.com/yannh/kubeconform/releases/download/${KUBECONFORM_VERSION}/kubeconform-linux-amd64.tar.gz" | |
| tar -xzf /tmp/kubeconform.tar.gz -C /tmp | |
| sudo mv /tmp/kubeconform /usr/local/bin/kubeconform | |
| kubeconform -v | |
| - name: Render all kustomizations | |
| run: | | |
| set -euo pipefail | |
| mapfile -t dirs < <(find infrastructure monitoring my-apps -type f -name kustomization.yaml -exec dirname {} \; | sort -u) | |
| if [ "${#dirs[@]}" -eq 0 ]; then | |
| echo "No kustomization directories found." | |
| exit 1 | |
| fi | |
| : > /tmp/all-manifests.yaml | |
| for dir in "${dirs[@]}"; do | |
| echo "Rendering ${dir}" | |
| kustomize build "${dir}" --enable-helm >> /tmp/all-manifests.yaml | |
| echo "---" >> /tmp/all-manifests.yaml | |
| done | |
| - name: Validate pvc-plumber restore contract (rendered) | |
| # A managed PVC without dataSourceRef -> <pvc>-dst backs up fine | |
| # but recreates EMPTY during DR. Runs against the rendered stream | |
| # (not raw YAML) so Helm-rendered PVCs are covered — e.g. | |
| # gitea/gitea-shared-storage, which a static grep cannot see. | |
| run: | | |
| set -euo pipefail | |
| python3 -c "import yaml" 2>/dev/null || pip3 install --quiet pyyaml | |
| bash ./scripts/validate-restore-contract.sh /tmp/all-manifests.yaml | |
| - name: Validate Kubernetes schemas | |
| run: | | |
| set -euo pipefail | |
| # Filter known kubeconform false positives: | |
| # - Gitea Helm-rendered Service gitea-http: targetPort triggers a | |
| # oneOf ambiguity in the Kubernetes Service schema. | |
| # - VolSync ReplicationSource and ReplicationDestination: | |
| # Datree's CRD catalog schema is stale for our installed | |
| # VolSync and rejects valid spec.kopia configs on both kinds. | |
| # Both must be skipped — the previous filter skipped only | |
| # ReplicationSource, which let the 28 inline RD resources | |
| # in my-apps/** fail kubeconform with "additionalProperties | |
| # 'kopia' not allowed" on every Cluster CI run. | |
| # Split multi-doc YAML into per-document files, skip those documents. | |
| csplit -z -f /tmp/doc- /tmp/all-manifests.yaml '/^---$/' '{*}' > /dev/null | |
| : > /tmp/filtered-manifests.yaml | |
| for f in /tmp/doc-*; do | |
| if grep -q 'kind: Service' "$f" && grep -q 'name: gitea-http' "$f"; then | |
| continue | |
| fi | |
| if grep -q 'apiVersion: volsync.backube/v1alpha1' "$f" \ | |
| && { grep -q 'kind: ReplicationSource' "$f" || grep -q 'kind: ReplicationDestination' "$f"; }; then | |
| continue | |
| fi | |
| cat "$f" >> /tmp/filtered-manifests.yaml | |
| done | |
| kubeconform \ | |
| -summary \ | |
| -ignore-missing-schemas \ | |
| -schema-location default \ | |
| -schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \ | |
| /tmp/filtered-manifests.yaml | |
| kyverno-policy-safety: | |
| name: Kyverno Policy Safety Check | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Check for dangerous Kyverno generate policy settings | |
| run: bash ./scripts/validate-kyverno-policies.sh | |
| backup-exempt-contract: | |
| name: Backup-Exempt Annotation Contract | |
| # Catches the class of bug found 2026-05-19 (and again 2026-06-09 in | |
| # monitoring/ Helm values): a PVC labeled backup-exempt:"true" using | |
| # the bare `backup-exempt-reason` key instead of the fully-qualified | |
| # `storage.vanillax.dev/...` key. pvc-plumber v4 (permissive, no | |
| # admission webhook) classifies that as ExemptMissingReason and parks | |
| # the PVC in /audit as needs-human-review, where it masks real | |
| # findings; any future strict mode would deny it at admission. | |
| # Fail at PR time, not during DR. | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Validate backup-exempt annotation keys | |
| run: bash ./scripts/validate-backup-exempt-keys.sh | |
| otel-collector-validate: | |
| name: OpenTelemetry Collector Config Validation | |
| # Catches the class of bug that caused the 2026-04-20 9-hour | |
| # root-sync jam: a pipeline referencing a receiver that was removed. | |
| # Runs `otelcol validate` on each OpenTelemetryCollector's rendered | |
| # config so pipeline-receiver-exporter mismatches are rejected at | |
| # PR time instead of crashlooping in-cluster. | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Kustomize | |
| uses: imranismail/setup-kustomize@v2 | |
| with: | |
| kustomize-version: 5.4.2 | |
| - name: Setup Helm | |
| uses: azure/setup-helm@v4 | |
| with: | |
| version: v3.16.2 | |
| - name: Validate all OpenTelemetryCollector configs | |
| run: bash ./scripts/validate-otel-configs.sh | |
| renovate-config-validate: | |
| name: Renovate Config Validation | |
| # Catches the class of bug that opened issue #1284 on 2026-05-10: | |
| # `packageRules` cannot combine `matchUpdateTypes` and `versioning`, | |
| # but the validator only flags it at runtime — Renovate stops opening | |
| # PRs cluster-wide until fixed. Run validator on every PR that touches | |
| # .github/renovate.json5 so the bad rule never reaches main. | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Node | |
| uses: actions/setup-node@v4 | |
| with: | |
| # Renovate >=41 requires Node >=22 and dropped Node 20. On Node 20, | |
| # `npx renovate` silently resolves to an old renovate major that | |
| # still uses `fileMatch`, so it rejects this repo's modern | |
| # `managerFilePatterns` config and fails EVERY renovate PR. | |
| node-version: '22' | |
| - name: Validate Renovate config | |
| run: | | |
| set -euo pipefail | |
| # The validator looks at the filename to decide global vs repo | |
| # config; .github/renovate.json5 is treated as global, which | |
| # surfaces the same packageRules / managers errors we care about. | |
| npx --yes --package=renovate renovate-config-validator --strict .github/renovate.json5 | |
| shellcheck: | |
| name: Shell Script Lint (Informational) | |
| runs-on: ubuntu-latest | |
| continue-on-error: true | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Install shellcheck | |
| run: | | |
| set -euo pipefail | |
| sudo apt-get update | |
| sudo apt-get install -y shellcheck | |
| - name: Run shellcheck on scripts | |
| run: | | |
| set -euo pipefail | |
| shellcheck -S warning scripts/*.sh |