diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e2c119e5..038b2d3f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -523,9 +523,10 @@ jobs: - name: Generate versioned JSON Schema run: | mkdir -p release - node scripts/generate-schema.mjs --version ${{ needs.bump-version.outputs.version }} --print > release/awf-config.schema.json + node scripts/generate-schema.mjs --version ${{ needs.bump-version.outputs.version }} --print > release/awf-config.v1.schema.json + cp release/awf-config.v1.schema.json release/awf-config.schema.json echo "=== Schema preview (first 10 lines) ===" - head -10 release/awf-config.schema.json + head -10 release/awf-config.v1.schema.json - name: Generate checksums run: | @@ -653,6 +654,7 @@ jobs: release/awf.tgz release/containers.txt release/awf-config.schema.json + release/awf-config.v1.schema.json release/checksums.txt env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/containers/agent/Dockerfile b/containers/agent/Dockerfile index 70425684..e84746b9 100644 --- a/containers/agent/Dockerfile +++ b/containers/agent/Dockerfile @@ -9,29 +9,45 @@ ARG BASE_IMAGE=ubuntu:22.04 FROM ${BASE_IMAGE} -# Switch to Azure apt mirror for faster, more reliable package fetches in CI -# GitHub Actions runners are Azure-hosted; azure.archive.ubuntu.com is geographically closer -# Handles both traditional sources.list (jammy) and DEB822 format (noble+) -RUN if [ -f /etc/apt/sources.list ]; then \ - sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ - sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ - fi && \ - if [ -d /etc/apt/sources.list.d ]; then \ - find /etc/apt/sources.list.d -name '*.sources' -exec \ - sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ - find /etc/apt/sources.list.d -name '*.sources' -exec \ - sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ +# Optionally switch to Azure apt mirror for faster package fetches in CI +# Only rewrite if azure.archive.ubuntu.com is resolvable (BuildKit DNS can fail) +# Falls back to default archive.ubuntu.com which is universally reachable +RUN if getent hosts azure.archive.ubuntu.com >/dev/null 2>&1; then \ + echo "Using Azure apt mirror (DNS resolved successfully)"; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ + sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + else \ + echo "Azure apt mirror not reachable, using default archive.ubuntu.com"; \ fi # Install required packages and Node.js 22 # Note: Some packages may already exist in runner-like base images, apt handles this gracefully -# apt_update_retry: retries up to 3 times with backoff to survive prolonged mirror syncs +# apt_update_retry: retries up to 3 times with backoff; if all fail, reverts to archive.ubuntu.com RUN set -eux; \ apt_update_retry() { \ local i; for i in 1 2 3; do \ - rm -rf /var/lib/apt/lists/* && apt-get update && return 0; \ - echo "apt-get update attempt $i/3 failed, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ - done; return 1; \ + rm -rf /var/lib/apt/lists/* && apt-get update 2>&1 | tee /tmp/apt-update.log && \ + if ! grep -q "Failed to fetch" /tmp/apt-update.log; then return 0; fi; \ + echo "apt-get update attempt $i/3 had fetch failures, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ + done; \ + echo "All apt-get update retries failed, falling back to archive.ubuntu.com..." >&2; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' /etc/apt/sources.list; \ + sed -i 's|http://security.ubuntu.com|http://archive.ubuntu.com|g' /etc/apt/sources.list 2>/dev/null || true; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + rm -rf /var/lib/apt/lists/* && apt-get update; \ }; \ PKGS="iptables curl ca-certificates git gh gnupg dnsutils net-tools netcat-openbsd gosu libcap2-bin"; \ apt_update_retry && \ @@ -60,9 +76,19 @@ RUN set -eux; \ RUN set -eux; \ apt_update_retry() { \ local i; for i in 1 2 3; do \ - rm -rf /var/lib/apt/lists/* && apt-get update && return 0; \ - echo "apt-get update attempt $i/3 failed, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ - done; return 1; \ + rm -rf /var/lib/apt/lists/* && apt-get update 2>&1 | tee /tmp/apt-update.log && \ + if ! grep -q "Failed to fetch" /tmp/apt-update.log; then return 0; fi; \ + echo "apt-get update attempt $i/3 had fetch failures, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ + done; \ + echo "All apt-get update retries failed, falling back to archive.ubuntu.com..." >&2; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' /etc/apt/sources.list; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + rm -rf /var/lib/apt/lists/* && apt-get update; \ }; \ PARITY_PKGS="libgdiplus libev-dev libssl-dev php-intl php-gd"; \ apt_update_retry && \ @@ -77,9 +103,19 @@ RUN set -eux; \ # Retry logic handles transient mirror sync failures during apt-get update RUN apt_update_retry() { \ local i; for i in 1 2 3; do \ - rm -rf /var/lib/apt/lists/* && apt-get update && return 0; \ - echo "apt-get update attempt $i/3 failed, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ - done; return 1; \ + rm -rf /var/lib/apt/lists/* && apt-get update 2>&1 | tee /tmp/apt-update.log && \ + if ! grep -q "Failed to fetch" /tmp/apt-update.log; then return 0; fi; \ + echo "apt-get update attempt $i/3 had fetch failures, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ + done; \ + echo "All apt-get update retries failed, falling back to archive.ubuntu.com..." >&2; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' /etc/apt/sources.list; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + rm -rf /var/lib/apt/lists/* && apt-get update; \ }; \ apt_update_retry && \ apt-get upgrade -y && rm -rf /var/lib/apt/lists/* || \ diff --git a/containers/squid/Dockerfile b/containers/squid/Dockerfile index cbd80e5f..2ce30a67 100644 --- a/containers/squid/Dockerfile +++ b/containers/squid/Dockerfile @@ -1,27 +1,42 @@ FROM ubuntu/squid:latest -# Switch to Azure apt mirror for faster, more reliable package fetches in CI -# GitHub Actions runners are Azure-hosted; azure.archive.ubuntu.com is geographically closer -# Handles both traditional sources.list (jammy) and DEB822 format (noble+) -RUN if [ -f /etc/apt/sources.list ]; then \ - sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ - sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ - fi && \ - if [ -d /etc/apt/sources.list.d ]; then \ - find /etc/apt/sources.list.d -name '*.sources' -exec \ - sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ - find /etc/apt/sources.list.d -name '*.sources' -exec \ - sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ +# Optionally switch to Azure apt mirror for faster package fetches in CI +# Only rewrite if azure.archive.ubuntu.com is resolvable (BuildKit DNS can fail) +# Falls back to default archive.ubuntu.com which is universally reachable +RUN if getent hosts azure.archive.ubuntu.com >/dev/null 2>&1; then \ + echo "Using Azure apt mirror (DNS resolved successfully)"; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ + sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' /etc/apt/sources.list; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://archive.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://security.ubuntu.com|http://azure.archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + else \ + echo "Azure apt mirror not reachable, using default archive.ubuntu.com"; \ fi # Install additional tools for debugging, healthcheck, and SSL Bump -# apt_update_retry: retries up to 3 times with backoff to survive prolonged mirror syncs +# apt_update_retry: retries up to 3 times with backoff; if all fail, reverts to archive.ubuntu.com RUN set -eux; \ apt_update_retry() { \ local i; for i in 1 2 3; do \ - rm -rf /var/lib/apt/lists/* && apt-get update && return 0; \ - echo "apt-get update attempt $i/3 failed, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ - done; return 1; \ + rm -rf /var/lib/apt/lists/* && apt-get update 2>&1 | tee /tmp/apt-update.log && \ + if ! grep -q "Failed to fetch" /tmp/apt-update.log; then return 0; fi; \ + echo "apt-get update attempt $i/3 had fetch failures, retrying in $((i*10))s..." >&2; sleep $((i*10)); \ + done; \ + echo "All apt-get update retries failed, falling back to archive.ubuntu.com..." >&2; \ + if [ -f /etc/apt/sources.list ]; then \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' /etc/apt/sources.list; \ + fi; \ + if [ -d /etc/apt/sources.list.d ]; then \ + find /etc/apt/sources.list.d -name '*.sources' -exec \ + sed -i 's|http://azure.archive.ubuntu.com|http://archive.ubuntu.com|g' {} + 2>/dev/null || true; \ + fi; \ + rm -rf /var/lib/apt/lists/* && apt-get update; \ }; \ PKGS="curl dnsutils net-tools netcat-openbsd openssl squid-openssl"; \ apt_update_retry && \ diff --git a/docs/awf-config.schema.json b/docs/awf-config.schema.json index b21cb502..5cd6a092 100644 --- a/docs/awf-config.schema.json +++ b/docs/awf-config.schema.json @@ -2,6 +2,7 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.schema.json", "title": "AWF Configuration", + "version": "1", "description": "JSON/YAML configuration for awf CLI. CLI flags override config file values. See https://github.com/github/gh-aw-firewall for documentation.", "type": "object", "additionalProperties": false, diff --git a/docs/awf-config.v1.schema.json b/docs/awf-config.v1.schema.json new file mode 100644 index 00000000..ed187c7f --- /dev/null +++ b/docs/awf-config.v1.schema.json @@ -0,0 +1,334 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.v1.schema.json", + "title": "AWF Configuration", + "version": "1", + "description": "JSON/YAML configuration for awf CLI. CLI flags override config file values. See https://github.com/github/gh-aw-firewall for documentation.", + "type": "object", + "additionalProperties": false, + "properties": { + "$schema": { + "type": "string", + "description": "JSON Schema URL for IDE validation and autocomplete." + }, + "network": { + "type": "object", + "description": "Network egress configuration.", + "additionalProperties": false, + "properties": { + "allowDomains": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Domains that the agent is allowed to reach. Both the bare domain and all subdomains are permitted (e.g. \"github.com\" also allows \"api.github.com\")." + }, + "blockDomains": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Domains that are explicitly blocked, overriding allowDomains." + }, + "dnsServers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "DNS servers to use inside the container. Defaults to Google DNS (8.8.8.8, 8.8.4.4). Accepts IPv4 and IPv6 addresses." + }, + "upstreamProxy": { + "type": "string", + "description": "Upstream HTTP proxy URL (e.g. \"http://proxy.corp.example.com:8080\"). When set, the AWF Squid proxy forwards traffic through this proxy." + } + } + }, + "apiProxy": { + "type": "object", + "description": "API proxy sidecar configuration. The sidecar injects real API credentials so the agent never has direct access to them.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the API proxy sidecar container." + }, + "enableOpenCode": { + "type": "boolean", + "description": "Enable the OpenCode API proxy endpoint (port 10004)." + }, + "anthropicAutoCache": { + "type": "boolean", + "description": "Automatically apply Anthropic prompt-cache optimizations on /v1/messages requests." + }, + "anthropicCacheTailTtl": { + "type": "string", + "enum": [ + "5m", + "1h" + ], + "description": "TTL for Anthropic cache tail optimization. Only applies when anthropicAutoCache is enabled. Allowed values: \"5m\" or \"1h\"." + }, + "targets": { + "type": "object", + "description": "Override upstream API endpoints for each provider.", + "additionalProperties": false, + "properties": { + "openai": { + "$ref": "#/$defs/providerTarget", + "description": "OpenAI API target override." + }, + "anthropic": { + "$ref": "#/$defs/providerTarget", + "description": "Anthropic API target override." + }, + "copilot": { + "$ref": "#/$defs/providerHostOnlyTarget", + "description": "GitHub Copilot API target override (basePath not supported)." + }, + "gemini": { + "$ref": "#/$defs/providerTarget", + "description": "Google Gemini API target override." + } + } + }, + "models": { + "type": "object", + "description": "Model alias mapping. Keys are canonical model names; values are arrays of alternative names or patterns that should be rewritten to the canonical name.", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "security": { + "type": "object", + "description": "Security and isolation configuration.", + "additionalProperties": false, + "properties": { + "sslBump": { + "type": "boolean", + "description": "Enable SSL bumping (TLS interception) in the Squid proxy. Requires a custom CA certificate." + }, + "enableDlp": { + "type": "boolean", + "description": "Enable Data Loss Prevention (DLP) inspection of outbound traffic." + }, + "enableHostAccess": { + "type": "boolean", + "description": "Mount the host filesystem (read-only for system paths, read-write for the workspace). Enabled by default; set to false to run without host filesystem access." + }, + "allowHostPorts": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Host TCP ports the agent may connect to (e.g. local dev services). Accepts a single port string or an array of port strings." + }, + "allowHostServicePorts": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Named service ports on the host that the agent may connect to. Accepts a single port string or an array of port strings." + }, + "difcProxy": { + "type": "object", + "description": "DIFC (Data-in-Flight Control) proxy configuration.", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "DIFC proxy host." + }, + "caCert": { + "type": "string", + "description": "Path to the CA certificate for DIFC proxy TLS verification." + } + } + } + } + }, + "container": { + "type": "object", + "description": "Container and Docker configuration.", + "additionalProperties": false, + "properties": { + "memoryLimit": { + "type": "string", + "description": "Docker memory limit for the agent container (e.g. \"4g\", \"512m\"). Uses Docker memory limit syntax." + }, + "agentTimeout": { + "type": "integer", + "minimum": 1, + "description": "Maximum time (in minutes) the agent command is allowed to run." + }, + "enableDind": { + "type": "boolean", + "description": "Enable Docker-in-Docker support inside the agent container." + }, + "workDir": { + "type": "string", + "description": "Host path used as the AWF working directory for generated configs and logs. Defaults to a temporary directory." + }, + "containerWorkDir": { + "type": "string", + "description": "Working directory inside the agent container." + }, + "imageRegistry": { + "type": "string", + "description": "Container image registry to pull from. Defaults to \"ghcr.io/github/gh-aw-firewall\"." + }, + "imageTag": { + "type": "string", + "description": "Container image tag to use. Defaults to \"latest\"." + }, + "skipPull": { + "type": "boolean", + "description": "Skip pulling container images (use locally cached images)." + }, + "buildLocal": { + "type": "boolean", + "description": "Build container images from source instead of pulling from the registry." + }, + "agentImage": { + "type": "string", + "description": "Override the agent container image (e.g. for a GitHub Actions parity image)." + }, + "tty": { + "type": "boolean", + "description": "Allocate a pseudo-TTY for the agent container." + }, + "dockerHost": { + "type": "string", + "description": "Docker daemon socket or host to connect to (e.g. \"unix:///var/run/docker.sock\")." + } + } + }, + "environment": { + "type": "object", + "description": "Environment variable propagation into the agent container.", + "additionalProperties": false, + "properties": { + "envFile": { + "type": "string", + "description": "Path to a .env file whose variables are injected into the agent container." + }, + "envAll": { + "type": "boolean", + "description": "Forward all host environment variables into the agent container. Use with caution — may expose secrets." + }, + "excludeEnv": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Environment variable names to exclude when envAll is true." + } + } + }, + "logging": { + "type": "object", + "description": "Logging and diagnostics configuration.", + "additionalProperties": false, + "properties": { + "logLevel": { + "type": "string", + "enum": [ + "debug", + "info", + "warn", + "error" + ], + "description": "Log verbosity level. Defaults to \"info\"." + }, + "diagnosticLogs": { + "type": "boolean", + "description": "Enable diagnostic logging (Squid access logs, iptables logs). Logs are written to the work directory." + }, + "auditDir": { + "type": "string", + "description": "Directory path for audit logs." + }, + "proxyLogsDir": { + "type": "string", + "description": "Directory path for Squid proxy access logs." + }, + "sessionStateDir": { + "type": "string", + "description": "Directory path for agent session state (e.g. conversation history). Set to \"/tmp/gh-aw/sandbox/agent/session-state\" for Copilot agent runs." + } + } + }, + "rateLimiting": { + "type": "object", + "description": "Egress rate limiting configuration.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable egress rate limiting." + }, + "requestsPerMinute": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of HTTP requests per minute." + }, + "requestsPerHour": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of HTTP requests per hour." + }, + "bytesPerMinute": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of bytes transferred per minute." + } + } + } + }, + "$defs": { + "providerTarget": { + "type": "object", + "description": "API provider target override.", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "Override the provider API host." + }, + "basePath": { + "type": "string", + "description": "Override the provider API base path." + } + } + }, + "providerHostOnlyTarget": { + "type": "object", + "description": "API provider target override (host only; basePath not supported).", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "Override the provider API host." + } + } + } + } +} diff --git a/docs/releasing.md b/docs/releasing.md index 6930b895..fee3fb8f 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -31,6 +31,7 @@ The workflow will: - Build and push Docker images to GHCR - Create Linux x64 and arm64 binaries - Create NPM tarball and checksums +- Generate versioned JSON Schema files with the release tag embedded in their `$id` URLs - Publish the GitHub Release with auto-generated changelog ### 2. Verify Release @@ -43,6 +44,7 @@ Once the workflow completes: - Linux arm64 binary (`awf-linux-arm64`) - NPM tarball (`awf.tgz`) - Checksums file (`checksums.txt`) + - JSON Schema files (`awf-config.schema.json`, `awf-config.v1.schema.json`) - Installation instructions with GHCR image references 3. Go to **Packages** page (in repository) 4. Verify Docker images are published: @@ -61,6 +63,25 @@ Each release includes: - `awf-linux-arm64` - Linux arm64 standalone executable - `awf.tgz` - NPM package tarball (alternative installation method) - `checksums.txt` - SHA256 checksums for all files +- `awf-config.schema.json` - AWF config JSON Schema (latest alias, same content as `awf-config.v1.schema.json`) +- `awf-config.v1.schema.json` - AWF config JSON Schema, version 1 (stable versioned copy) + +### JSON Schema versioning + +Each release generates the schema with a `$id` URL that includes the release tag, creating a stable, pinnable reference: + +``` +https://github.com/github/gh-aw-firewall/releases/download/v0.23.1/awf-config.v1.schema.json +``` + +The unversioned `awf-config.schema.json` asset is a copy of the v1 schema for convenience. External consumers (e.g. the gh-aw compiler) should pin to the versioned URL or the stable raw URL: + +| Reference | URL | +|-----------|-----| +| Pinned to a specific release tag | `https://github.com/github/gh-aw-firewall/releases/download//awf-config.v1.schema.json` | +| Always-latest from `main` branch | `https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.v1.schema.json` | + +**Schema version bumping:** The schema version (`"version": "1"` in the schema body) must be incremented whenever breaking changes are made to the config surface (removed fields, changed types, stricter constraints). Non-breaking additions do not require a version bump. When the version is bumped (e.g. from `1` → `2`), a new file `awf-config.v2.schema.json` should be introduced in `docs/` and `scripts/generate-schema.mjs` updated accordingly. ### GitHub Container Registry (GHCR) Docker images are published to `ghcr.io/github/gh-aw-firewall`: diff --git a/package-lock.json b/package-lock.json index 35b4cab3..a49e9168 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "0.23.1", "license": "MIT", "dependencies": { + "ajv": "^8.18.0", "chalk": "^4.1.2", "commander": "^12.0.0", "execa": "^5.1.1", @@ -30,7 +31,6 @@ "@types/node": "^25.6.0", "@typescript-eslint/eslint-plugin": "^8.58.2", "@typescript-eslint/parser": "^8.58.2", - "ajv": "^8.18.0", "babel-jest": "^30.2.0", "esbuild": "^0.25.0", "eslint": "^10.2.1", @@ -4246,7 +4246,6 @@ "version": "8.18.0", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", - "dev": true, "license": "MIT", "dependencies": { "fast-deep-equal": "^3.1.3", @@ -5500,7 +5499,6 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true, "license": "MIT" }, "node_modules/fast-glob": { @@ -5551,7 +5549,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", - "dev": true, "funding": [ { "type": "github", @@ -7077,7 +7074,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", - "dev": true, "license": "MIT" }, "node_modules/json-stable-stringify-without-jsonify": { @@ -8581,7 +8577,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" diff --git a/package.json b/package.json index 1b3033d1..cc9e0c74 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "author": "GitHub", "license": "MIT", "dependencies": { + "ajv": "^8.18.0", "chalk": "^4.1.2", "commander": "^12.0.0", "execa": "^5.1.1", @@ -64,7 +65,6 @@ "@types/node": "^25.6.0", "@typescript-eslint/eslint-plugin": "^8.58.2", "@typescript-eslint/parser": "^8.58.2", - "ajv": "^8.18.0", "babel-jest": "^30.2.0", "esbuild": "^0.25.0", "eslint": "^10.2.1", diff --git a/scripts/generate-schema.mjs b/scripts/generate-schema.mjs index e4cea762..36f68ed9 100644 --- a/scripts/generate-schema.mjs +++ b/scripts/generate-schema.mjs @@ -1,13 +1,18 @@ #!/usr/bin/env node /** - * Generates the JSON Schema for the AWF config file (docs/awf-config.schema.json). + * Generates the JSON Schema for the AWF config file. * * Usage: - * node scripts/generate-schema.mjs # writes docs/awf-config.schema.json - * node scripts/generate-schema.mjs --version v0.23.1 # embeds a versioned $id + * node scripts/generate-schema.mjs # writes docs/awf-config.schema.json and docs/awf-config.v1.schema.json + * node scripts/generate-schema.mjs --version v0.23.1 # embeds a versioned $id in release output * node scripts/generate-schema.mjs --print # prints to stdout * + * Output files: + * docs/awf-config.v1.schema.json — stable versioned file (canonical source) + * docs/awf-config.schema.json — latest alias (always points to current version content) + * src/awf-config-schema.json — bundleable copy for runtime validation + * * The schema reflects the validated config surface defined in src/config-file.ts * (validateAwfFileConfig), not just the AwfFileConfig TypeScript interface. * When validation rules change (e.g. new fields, enum constraints), update this script to match. @@ -46,15 +51,20 @@ const version = versionIdx !== -1 ? args[versionIdx + 1] : null; const printOnly = args.includes('--print'); // --- Build the schema --- -const schemaId = version +// Versioned $id (stable reference for v1 of the config schema) +const schemaV1Id = version + ? `https://github.com/github/gh-aw-firewall/releases/download/${version}/awf-config.v1.schema.json` + : 'https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.v1.schema.json'; + +// Unversioned "latest" $id (always points to the current schema) +const schemaLatestId = version ? `https://github.com/github/gh-aw-firewall/releases/download/${version}/awf-config.schema.json` : 'https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.schema.json'; /** @type {object} */ -const schema = { - $schema: 'https://json-schema.org/draft/2020-12/schema', - $id: schemaId, +const schemaBody = { title: 'AWF Configuration', + version: '1', description: 'JSON/YAML configuration for awf CLI. CLI flags override config file values. ' + 'See https://github.com/github/gh-aw-firewall for documentation.', @@ -392,14 +402,41 @@ const schema = { }, }; -const output = JSON.stringify(schema, null, 2) + '\n'; +// Compose the versioned schema (stable, canonical) and the latest alias +const schemaV1 = { + $schema: 'https://json-schema.org/draft/2020-12/schema', + $id: schemaV1Id, + ...schemaBody, +}; + +const schemaLatest = { + $schema: 'https://json-schema.org/draft/2020-12/schema', + $id: schemaLatestId, + ...schemaBody, +}; + +const outputV1 = JSON.stringify(schemaV1, null, 2) + '\n'; +const outputLatest = JSON.stringify(schemaLatest, null, 2) + '\n'; if (printOnly) { - process.stdout.write(output); + // --print emits the versioned (v1) schema to stdout + process.stdout.write(outputV1); } else { const docsDir = join(projectRoot, 'docs'); mkdirSync(docsDir, { recursive: true }); - const outPath = join(docsDir, 'awf-config.schema.json'); - writeFileSync(outPath, output); - console.log(`Schema written to ${outPath}`); + + // Stable versioned file (canonical) + const v1Path = join(docsDir, 'awf-config.v1.schema.json'); + writeFileSync(v1Path, outputV1); + console.log(`Schema written to ${v1Path}`); + + // Unversioned "latest" alias + const latestPath = join(docsDir, 'awf-config.schema.json'); + writeFileSync(latestPath, outputLatest); + console.log(`Schema written to ${latestPath}`); + + // Also write to src/ for runtime loading (loaded dynamically by schema-validator.ts at startup) + const srcPath = join(projectRoot, 'src', 'awf-config-schema.json'); + writeFileSync(srcPath, outputV1); + console.log(`Schema written to ${srcPath}`); } diff --git a/src/awf-config-schema.json b/src/awf-config-schema.json new file mode 100644 index 00000000..ed187c7f --- /dev/null +++ b/src/awf-config-schema.json @@ -0,0 +1,334 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/github/gh-aw-firewall/main/docs/awf-config.v1.schema.json", + "title": "AWF Configuration", + "version": "1", + "description": "JSON/YAML configuration for awf CLI. CLI flags override config file values. See https://github.com/github/gh-aw-firewall for documentation.", + "type": "object", + "additionalProperties": false, + "properties": { + "$schema": { + "type": "string", + "description": "JSON Schema URL for IDE validation and autocomplete." + }, + "network": { + "type": "object", + "description": "Network egress configuration.", + "additionalProperties": false, + "properties": { + "allowDomains": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Domains that the agent is allowed to reach. Both the bare domain and all subdomains are permitted (e.g. \"github.com\" also allows \"api.github.com\")." + }, + "blockDomains": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Domains that are explicitly blocked, overriding allowDomains." + }, + "dnsServers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "DNS servers to use inside the container. Defaults to Google DNS (8.8.8.8, 8.8.4.4). Accepts IPv4 and IPv6 addresses." + }, + "upstreamProxy": { + "type": "string", + "description": "Upstream HTTP proxy URL (e.g. \"http://proxy.corp.example.com:8080\"). When set, the AWF Squid proxy forwards traffic through this proxy." + } + } + }, + "apiProxy": { + "type": "object", + "description": "API proxy sidecar configuration. The sidecar injects real API credentials so the agent never has direct access to them.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the API proxy sidecar container." + }, + "enableOpenCode": { + "type": "boolean", + "description": "Enable the OpenCode API proxy endpoint (port 10004)." + }, + "anthropicAutoCache": { + "type": "boolean", + "description": "Automatically apply Anthropic prompt-cache optimizations on /v1/messages requests." + }, + "anthropicCacheTailTtl": { + "type": "string", + "enum": [ + "5m", + "1h" + ], + "description": "TTL for Anthropic cache tail optimization. Only applies when anthropicAutoCache is enabled. Allowed values: \"5m\" or \"1h\"." + }, + "targets": { + "type": "object", + "description": "Override upstream API endpoints for each provider.", + "additionalProperties": false, + "properties": { + "openai": { + "$ref": "#/$defs/providerTarget", + "description": "OpenAI API target override." + }, + "anthropic": { + "$ref": "#/$defs/providerTarget", + "description": "Anthropic API target override." + }, + "copilot": { + "$ref": "#/$defs/providerHostOnlyTarget", + "description": "GitHub Copilot API target override (basePath not supported)." + }, + "gemini": { + "$ref": "#/$defs/providerTarget", + "description": "Google Gemini API target override." + } + } + }, + "models": { + "type": "object", + "description": "Model alias mapping. Keys are canonical model names; values are arrays of alternative names or patterns that should be rewritten to the canonical name.", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "security": { + "type": "object", + "description": "Security and isolation configuration.", + "additionalProperties": false, + "properties": { + "sslBump": { + "type": "boolean", + "description": "Enable SSL bumping (TLS interception) in the Squid proxy. Requires a custom CA certificate." + }, + "enableDlp": { + "type": "boolean", + "description": "Enable Data Loss Prevention (DLP) inspection of outbound traffic." + }, + "enableHostAccess": { + "type": "boolean", + "description": "Mount the host filesystem (read-only for system paths, read-write for the workspace). Enabled by default; set to false to run without host filesystem access." + }, + "allowHostPorts": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Host TCP ports the agent may connect to (e.g. local dev services). Accepts a single port string or an array of port strings." + }, + "allowHostServicePorts": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Named service ports on the host that the agent may connect to. Accepts a single port string or an array of port strings." + }, + "difcProxy": { + "type": "object", + "description": "DIFC (Data-in-Flight Control) proxy configuration.", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "DIFC proxy host." + }, + "caCert": { + "type": "string", + "description": "Path to the CA certificate for DIFC proxy TLS verification." + } + } + } + } + }, + "container": { + "type": "object", + "description": "Container and Docker configuration.", + "additionalProperties": false, + "properties": { + "memoryLimit": { + "type": "string", + "description": "Docker memory limit for the agent container (e.g. \"4g\", \"512m\"). Uses Docker memory limit syntax." + }, + "agentTimeout": { + "type": "integer", + "minimum": 1, + "description": "Maximum time (in minutes) the agent command is allowed to run." + }, + "enableDind": { + "type": "boolean", + "description": "Enable Docker-in-Docker support inside the agent container." + }, + "workDir": { + "type": "string", + "description": "Host path used as the AWF working directory for generated configs and logs. Defaults to a temporary directory." + }, + "containerWorkDir": { + "type": "string", + "description": "Working directory inside the agent container." + }, + "imageRegistry": { + "type": "string", + "description": "Container image registry to pull from. Defaults to \"ghcr.io/github/gh-aw-firewall\"." + }, + "imageTag": { + "type": "string", + "description": "Container image tag to use. Defaults to \"latest\"." + }, + "skipPull": { + "type": "boolean", + "description": "Skip pulling container images (use locally cached images)." + }, + "buildLocal": { + "type": "boolean", + "description": "Build container images from source instead of pulling from the registry." + }, + "agentImage": { + "type": "string", + "description": "Override the agent container image (e.g. for a GitHub Actions parity image)." + }, + "tty": { + "type": "boolean", + "description": "Allocate a pseudo-TTY for the agent container." + }, + "dockerHost": { + "type": "string", + "description": "Docker daemon socket or host to connect to (e.g. \"unix:///var/run/docker.sock\")." + } + } + }, + "environment": { + "type": "object", + "description": "Environment variable propagation into the agent container.", + "additionalProperties": false, + "properties": { + "envFile": { + "type": "string", + "description": "Path to a .env file whose variables are injected into the agent container." + }, + "envAll": { + "type": "boolean", + "description": "Forward all host environment variables into the agent container. Use with caution — may expose secrets." + }, + "excludeEnv": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Environment variable names to exclude when envAll is true." + } + } + }, + "logging": { + "type": "object", + "description": "Logging and diagnostics configuration.", + "additionalProperties": false, + "properties": { + "logLevel": { + "type": "string", + "enum": [ + "debug", + "info", + "warn", + "error" + ], + "description": "Log verbosity level. Defaults to \"info\"." + }, + "diagnosticLogs": { + "type": "boolean", + "description": "Enable diagnostic logging (Squid access logs, iptables logs). Logs are written to the work directory." + }, + "auditDir": { + "type": "string", + "description": "Directory path for audit logs." + }, + "proxyLogsDir": { + "type": "string", + "description": "Directory path for Squid proxy access logs." + }, + "sessionStateDir": { + "type": "string", + "description": "Directory path for agent session state (e.g. conversation history). Set to \"/tmp/gh-aw/sandbox/agent/session-state\" for Copilot agent runs." + } + } + }, + "rateLimiting": { + "type": "object", + "description": "Egress rate limiting configuration.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable egress rate limiting." + }, + "requestsPerMinute": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of HTTP requests per minute." + }, + "requestsPerHour": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of HTTP requests per hour." + }, + "bytesPerMinute": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of bytes transferred per minute." + } + } + } + }, + "$defs": { + "providerTarget": { + "type": "object", + "description": "API provider target override.", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "Override the provider API host." + }, + "basePath": { + "type": "string", + "description": "Override the provider API base path." + } + } + }, + "providerHostOnlyTarget": { + "type": "object", + "description": "API provider target override (host only; basePath not supported).", + "additionalProperties": false, + "properties": { + "host": { + "type": "string", + "description": "Override the provider API host." + } + } + } + } +} diff --git a/src/config-file.test.ts b/src/config-file.test.ts index 59ea3f41..37e9b9e7 100644 --- a/src/config-file.test.ts +++ b/src/config-file.test.ts @@ -107,7 +107,7 @@ describe('config-file', () => { it('rejects invalid apiProxy.anthropicCacheTailTtl', () => { const errors = validateAwfFileConfig({ apiProxy: { anthropicCacheTailTtl: '10m' } }); - expect(errors).toContain('config.apiProxy.anthropicCacheTailTtl must be "5m" or "1h"'); + expect(errors).toContain('config.apiProxy.anthropicCacheTailTtl must be one of: 5m, 1h'); }); it('rejects non-object apiProxy.targets', () => { diff --git a/src/config-file.ts b/src/config-file.ts index c8e50465..7e02862b 100644 --- a/src/config-file.ts +++ b/src/config-file.ts @@ -1,6 +1,7 @@ import * as fs from 'fs'; import * as path from 'path'; import * as yaml from 'js-yaml'; +import { validateWithSchema } from './schema-validator'; export interface AwfFileConfig { $schema?: string; @@ -68,230 +69,16 @@ export interface AwfFileConfig { }; } -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null && !Array.isArray(value); -} - -function validateKnownKeys( - value: Record, - keys: string[], - location: string, - errors: string[] -): void { - const allowed = new Set(keys); - for (const key of Object.keys(value)) { - if (!allowed.has(key)) { - errors.push(`${location}.${key} is not supported`); - } - } -} - -function validateStringArray(value: unknown, location: string, errors: string[]): void { - if (!Array.isArray(value) || value.some(item => typeof item !== 'string')) { - errors.push(`${location} must be an array of strings`); - } -} - -function validateStringOrStringArray(value: unknown, location: string, errors: string[]): void { - const isValid = typeof value === 'string' || (Array.isArray(value) && value.every(item => typeof item === 'string')); - if (!isValid) { - errors.push(`${location} must be a string or array of strings`); - } -} - -function validateProviderTarget(value: unknown, location: string, errors: string[], allowBasePath = true): void { - if (!isRecord(value)) { - errors.push(`${location} must be an object`); - return; - } - validateKnownKeys(value, allowBasePath ? ['host', 'basePath'] : ['host'], location, errors); - if (value.host !== undefined && typeof value.host !== 'string') { - errors.push(`${location}.host must be a string`); - } - if (allowBasePath && value.basePath !== undefined && typeof value.basePath !== 'string') { - errors.push(`${location}.basePath must be a string`); - } -} - -function isPositiveInteger(value: unknown): value is number { - return typeof value === 'number' && Number.isInteger(value) && value > 0; -} - +/** + * Validate an unknown value against the AWF config schema. + * Returns an array of human-readable error strings (empty = valid). + * + * Uses the published JSON Schema (awf-config-schema.json) via ajv for + * validation, ensuring the schema is the single source of truth for both + * external consumers (gh-aw compiler) and internal validation. + */ export function validateAwfFileConfig(config: unknown): string[] { - const errors: string[] = []; - - if (!isRecord(config)) { - return ['config root must be an object']; - } - - validateKnownKeys( - config, - ['$schema', 'network', 'apiProxy', 'security', 'container', 'environment', 'logging', 'rateLimiting'], - 'config', - errors - ); - - if (config.$schema !== undefined && typeof config.$schema !== 'string') { - errors.push('config.$schema must be a string'); - } - - if (config.network !== undefined) { - if (!isRecord(config.network)) { - errors.push('config.network must be an object'); - } else { - validateKnownKeys(config.network, ['allowDomains', 'blockDomains', 'dnsServers', 'upstreamProxy'], 'config.network', errors); - if (config.network.allowDomains !== undefined) validateStringArray(config.network.allowDomains, 'config.network.allowDomains', errors); - if (config.network.blockDomains !== undefined) validateStringArray(config.network.blockDomains, 'config.network.blockDomains', errors); - if (config.network.dnsServers !== undefined) validateStringArray(config.network.dnsServers, 'config.network.dnsServers', errors); - if (config.network.upstreamProxy !== undefined && typeof config.network.upstreamProxy !== 'string') { - errors.push('config.network.upstreamProxy must be a string'); - } - } - } - - if (config.apiProxy !== undefined) { - if (!isRecord(config.apiProxy)) { - errors.push('config.apiProxy must be an object'); - } else { - validateKnownKeys(config.apiProxy, ['enabled', 'enableOpenCode', 'anthropicAutoCache', 'anthropicCacheTailTtl', 'targets', 'models'], 'config.apiProxy', errors); - if (config.apiProxy.enabled !== undefined && typeof config.apiProxy.enabled !== 'boolean') { - errors.push('config.apiProxy.enabled must be a boolean'); - } - if (config.apiProxy.enableOpenCode !== undefined && typeof config.apiProxy.enableOpenCode !== 'boolean') { - errors.push('config.apiProxy.enableOpenCode must be a boolean'); - } - if (config.apiProxy.anthropicAutoCache !== undefined && typeof config.apiProxy.anthropicAutoCache !== 'boolean') { - errors.push('config.apiProxy.anthropicAutoCache must be a boolean'); - } - if (config.apiProxy.anthropicCacheTailTtl !== undefined) { - if (config.apiProxy.anthropicCacheTailTtl !== '5m' && config.apiProxy.anthropicCacheTailTtl !== '1h') { - errors.push('config.apiProxy.anthropicCacheTailTtl must be "5m" or "1h"'); - } - } - if (config.apiProxy.targets !== undefined) { - if (!isRecord(config.apiProxy.targets)) { - errors.push('config.apiProxy.targets must be an object'); - } else { - validateKnownKeys(config.apiProxy.targets, ['openai', 'anthropic', 'copilot', 'gemini'], 'config.apiProxy.targets', errors); - if (config.apiProxy.targets.openai !== undefined) validateProviderTarget(config.apiProxy.targets.openai, 'config.apiProxy.targets.openai', errors); - if (config.apiProxy.targets.anthropic !== undefined) validateProviderTarget(config.apiProxy.targets.anthropic, 'config.apiProxy.targets.anthropic', errors); - if (config.apiProxy.targets.copilot !== undefined) validateProviderTarget(config.apiProxy.targets.copilot, 'config.apiProxy.targets.copilot', errors, false); - if (config.apiProxy.targets.gemini !== undefined) validateProviderTarget(config.apiProxy.targets.gemini, 'config.apiProxy.targets.gemini', errors); - } - } - if (config.apiProxy.models !== undefined) { - if (!isRecord(config.apiProxy.models)) { - errors.push('config.apiProxy.models must be an object'); - } else { - for (const [key, value] of Object.entries(config.apiProxy.models)) { - if (!Array.isArray(value) || value.some(v => typeof v !== 'string')) { - errors.push(`config.apiProxy.models["${key}"] must be an array of strings`); - } - } - } - } - } - } - - if (config.security !== undefined) { - if (!isRecord(config.security)) { - errors.push('config.security must be an object'); - } else { - validateKnownKeys( - config.security, - ['sslBump', 'enableDlp', 'enableHostAccess', 'allowHostPorts', 'allowHostServicePorts', 'difcProxy'], - 'config.security', - errors - ); - if (config.security.sslBump !== undefined && typeof config.security.sslBump !== 'boolean') errors.push('config.security.sslBump must be a boolean'); - if (config.security.enableDlp !== undefined && typeof config.security.enableDlp !== 'boolean') errors.push('config.security.enableDlp must be a boolean'); - if (config.security.enableHostAccess !== undefined && typeof config.security.enableHostAccess !== 'boolean') errors.push('config.security.enableHostAccess must be a boolean'); - if (config.security.allowHostPorts !== undefined) validateStringOrStringArray(config.security.allowHostPorts, 'config.security.allowHostPorts', errors); - if (config.security.allowHostServicePorts !== undefined) validateStringOrStringArray(config.security.allowHostServicePorts, 'config.security.allowHostServicePorts', errors); - if (config.security.difcProxy !== undefined) { - if (!isRecord(config.security.difcProxy)) { - errors.push('config.security.difcProxy must be an object'); - } else { - validateKnownKeys(config.security.difcProxy, ['host', 'caCert'], 'config.security.difcProxy', errors); - if (config.security.difcProxy.host !== undefined && typeof config.security.difcProxy.host !== 'string') errors.push('config.security.difcProxy.host must be a string'); - if (config.security.difcProxy.caCert !== undefined && typeof config.security.difcProxy.caCert !== 'string') errors.push('config.security.difcProxy.caCert must be a string'); - } - } - } - } - - if (config.container !== undefined) { - if (!isRecord(config.container)) { - errors.push('config.container must be an object'); - } else { - validateKnownKeys( - config.container, - ['memoryLimit', 'agentTimeout', 'enableDind', 'workDir', 'containerWorkDir', 'imageRegistry', 'imageTag', 'skipPull', 'buildLocal', 'agentImage', 'tty', 'dockerHost'], - 'config.container', - errors - ); - if (config.container.memoryLimit !== undefined && typeof config.container.memoryLimit !== 'string') errors.push('config.container.memoryLimit must be a string'); - if (config.container.agentTimeout !== undefined && !isPositiveInteger(config.container.agentTimeout)) { - errors.push('config.container.agentTimeout must be a positive integer'); - } - if (config.container.enableDind !== undefined && typeof config.container.enableDind !== 'boolean') errors.push('config.container.enableDind must be a boolean'); - if (config.container.workDir !== undefined && typeof config.container.workDir !== 'string') errors.push('config.container.workDir must be a string'); - if (config.container.containerWorkDir !== undefined && typeof config.container.containerWorkDir !== 'string') errors.push('config.container.containerWorkDir must be a string'); - if (config.container.imageRegistry !== undefined && typeof config.container.imageRegistry !== 'string') errors.push('config.container.imageRegistry must be a string'); - if (config.container.imageTag !== undefined && typeof config.container.imageTag !== 'string') errors.push('config.container.imageTag must be a string'); - if (config.container.skipPull !== undefined && typeof config.container.skipPull !== 'boolean') errors.push('config.container.skipPull must be a boolean'); - if (config.container.buildLocal !== undefined && typeof config.container.buildLocal !== 'boolean') errors.push('config.container.buildLocal must be a boolean'); - if (config.container.agentImage !== undefined && typeof config.container.agentImage !== 'string') errors.push('config.container.agentImage must be a string'); - if (config.container.tty !== undefined && typeof config.container.tty !== 'boolean') errors.push('config.container.tty must be a boolean'); - if (config.container.dockerHost !== undefined && typeof config.container.dockerHost !== 'string') errors.push('config.container.dockerHost must be a string'); - } - } - - if (config.environment !== undefined) { - if (!isRecord(config.environment)) { - errors.push('config.environment must be an object'); - } else { - validateKnownKeys(config.environment, ['envFile', 'envAll', 'excludeEnv'], 'config.environment', errors); - if (config.environment.envFile !== undefined && typeof config.environment.envFile !== 'string') errors.push('config.environment.envFile must be a string'); - if (config.environment.envAll !== undefined && typeof config.environment.envAll !== 'boolean') errors.push('config.environment.envAll must be a boolean'); - if (config.environment.excludeEnv !== undefined) validateStringArray(config.environment.excludeEnv, 'config.environment.excludeEnv', errors); - } - } - - if (config.logging !== undefined) { - if (!isRecord(config.logging)) { - errors.push('config.logging must be an object'); - } else { - validateKnownKeys(config.logging, ['logLevel', 'diagnosticLogs', 'auditDir', 'proxyLogsDir', 'sessionStateDir'], 'config.logging', errors); - if (config.logging.logLevel !== undefined && (typeof config.logging.logLevel !== 'string' || !['debug', 'info', 'warn', 'error'].includes(config.logging.logLevel))) { - errors.push('config.logging.logLevel must be one of: debug, info, warn, error'); - } - if (config.logging.diagnosticLogs !== undefined && typeof config.logging.diagnosticLogs !== 'boolean') errors.push('config.logging.diagnosticLogs must be a boolean'); - if (config.logging.auditDir !== undefined && typeof config.logging.auditDir !== 'string') errors.push('config.logging.auditDir must be a string'); - if (config.logging.proxyLogsDir !== undefined && typeof config.logging.proxyLogsDir !== 'string') errors.push('config.logging.proxyLogsDir must be a string'); - if (config.logging.sessionStateDir !== undefined && typeof config.logging.sessionStateDir !== 'string') errors.push('config.logging.sessionStateDir must be a string'); - } - } - - if (config.rateLimiting !== undefined) { - if (!isRecord(config.rateLimiting)) { - errors.push('config.rateLimiting must be an object'); - } else { - validateKnownKeys(config.rateLimiting, ['enabled', 'requestsPerMinute', 'requestsPerHour', 'bytesPerMinute'], 'config.rateLimiting', errors); - if (config.rateLimiting.enabled !== undefined && typeof config.rateLimiting.enabled !== 'boolean') errors.push('config.rateLimiting.enabled must be a boolean'); - if (config.rateLimiting.requestsPerMinute !== undefined && !isPositiveInteger(config.rateLimiting.requestsPerMinute)) { - errors.push('config.rateLimiting.requestsPerMinute must be a positive integer'); - } - if (config.rateLimiting.requestsPerHour !== undefined && !isPositiveInteger(config.rateLimiting.requestsPerHour)) { - errors.push('config.rateLimiting.requestsPerHour must be a positive integer'); - } - if (config.rateLimiting.bytesPerMinute !== undefined && !isPositiveInteger(config.rateLimiting.bytesPerMinute)) { - errors.push('config.rateLimiting.bytesPerMinute must be a positive integer'); - } - } - } - - return errors; + return validateWithSchema(config); } const readStdinSync = (): string => fs.readFileSync(process.stdin.fd, 'utf8'); diff --git a/src/schema-validator.test.ts b/src/schema-validator.test.ts new file mode 100644 index 00000000..2a47b273 --- /dev/null +++ b/src/schema-validator.test.ts @@ -0,0 +1,120 @@ +import { validateWithSchema } from './schema-validator'; + +describe('schema-validator', () => { + describe('validateWithSchema', () => { + it('returns empty array for valid config', () => { + expect(validateWithSchema({})).toEqual([]); + expect(validateWithSchema({ network: { allowDomains: ['github.com'] } })).toEqual([]); + }); + + it('rejects non-object roots', () => { + expect(validateWithSchema(null)).toEqual(['config root must be an object']); + expect(validateWithSchema('string')).toEqual(['config root must be an object']); + expect(validateWithSchema(42)).toEqual(['config root must be an object']); + expect(validateWithSchema([])).toEqual(['config root must be an object']); + expect(validateWithSchema(undefined)).toEqual(['config root must be an object']); + }); + + it('formats additionalProperties as "is not supported"', () => { + const errors = validateWithSchema({ unknownKey: true }); + expect(errors).toContain('config.unknownKey is not supported'); + }); + + it('formats nested additionalProperties', () => { + const errors = validateWithSchema({ network: { badField: true } }); + expect(errors).toContain('config.network.badField is not supported'); + }); + + it('formats type:object errors as "must be an object"', () => { + const errors = validateWithSchema({ network: 'not-object' }); + expect(errors).toContain('config.network must be an object'); + }); + + it('formats array-of-strings fields correctly when given non-array', () => { + const errors = validateWithSchema({ network: { allowDomains: 'github.com' } }); + expect(errors).toContain('config.network.allowDomains must be an array of strings'); + }); + + it('formats array-of-strings fields when items have wrong type', () => { + const errors = validateWithSchema({ network: { blockDomains: [1, 2] } }); + expect(errors).toContain('config.network.blockDomains must be an array of strings'); + }); + + it('formats integer with minimum:1 as "must be a positive integer"', () => { + // Non-integer value + expect(validateWithSchema({ container: { agentTimeout: 1.5 } })) + .toContain('config.container.agentTimeout must be a positive integer'); + // String value + expect(validateWithSchema({ container: { agentTimeout: 'five' } })) + .toContain('config.container.agentTimeout must be a positive integer'); + // Below minimum + expect(validateWithSchema({ container: { agentTimeout: 0 } })) + .toContain('config.container.agentTimeout must be a positive integer'); + expect(validateWithSchema({ container: { agentTimeout: -1 } })) + .toContain('config.container.agentTimeout must be a positive integer'); + }); + + it('formats enum errors as "must be one of"', () => { + const errors = validateWithSchema({ apiProxy: { anthropicCacheTailTtl: '10m' } }); + expect(errors).toContain('config.apiProxy.anthropicCacheTailTtl must be one of: 5m, 1h'); + }); + + it('formats logLevel enum correctly', () => { + const errors = validateWithSchema({ logging: { logLevel: 'verbose' } }); + expect(errors).toContain('config.logging.logLevel must be one of: debug, info, warn, error'); + }); + + it('formats oneOf (string-or-array) fields correctly', () => { + // Number is neither string nor array + const errors = validateWithSchema({ security: { allowHostPorts: 5432 } }); + expect(errors).toContain('config.security.allowHostPorts must be a string or array of strings'); + }); + + it('accepts string and array forms for oneOf fields', () => { + expect(validateWithSchema({ security: { allowHostPorts: '5432' } })).toEqual([]); + expect(validateWithSchema({ security: { allowHostPorts: ['5432', '6379'] } })).toEqual([]); + }); + + it('formats boolean type errors', () => { + const errors = validateWithSchema({ apiProxy: { enabled: 'yes' } }); + expect(errors).toContain('config.apiProxy.enabled must be a boolean'); + }); + + it('formats string type errors for non-array fields', () => { + const errors = validateWithSchema({ container: { memoryLimit: 512 } }); + expect(errors).toContain('config.container.memoryLimit must be a string'); + }); + + it('consolidates multiple item-level errors into one message', () => { + // Array with 3 non-string items should produce 1 error, not 3 + const errors = validateWithSchema({ network: { dnsServers: [1, 2, 3] } }); + const dnsErrors = errors.filter(e => e.includes('dnsServers')); + expect(dnsErrors).toHaveLength(1); + expect(dnsErrors[0]).toBe('config.network.dnsServers must be an array of strings'); + }); + + it('handles rateLimiting integer fields', () => { + expect(validateWithSchema({ rateLimiting: { requestsPerMinute: 0 } })) + .toContain('config.rateLimiting.requestsPerMinute must be a positive integer'); + expect(validateWithSchema({ rateLimiting: { requestsPerHour: -1 } })) + .toContain('config.rateLimiting.requestsPerHour must be a positive integer'); + expect(validateWithSchema({ rateLimiting: { bytesPerMinute: 'lots' } })) + .toContain('config.rateLimiting.bytesPerMinute must be a positive integer'); + }); + + it('returns multiple errors for multiple issues', () => { + const errors = validateWithSchema({ + unknownTop: true, + network: { allowDomains: 'not-array' }, + container: { agentTimeout: -5 }, + }); + expect(errors.length).toBeGreaterThanOrEqual(3); + }); + + it('validates models as object with string-array values', () => { + expect(validateWithSchema({ apiProxy: { models: { 'gpt-4o': ['alias1'] } } })).toEqual([]); + const errors = validateWithSchema({ apiProxy: { models: { 'gpt-4o': 'not-array' } } }); + expect(errors.length).toBeGreaterThan(0); + }); + }); +}); diff --git a/src/schema-validator.ts b/src/schema-validator.ts new file mode 100644 index 00000000..8ff64097 --- /dev/null +++ b/src/schema-validator.ts @@ -0,0 +1,158 @@ +/** + * Runtime config validation using the published JSON Schema. + * + * The schema JSON is loaded from disk so the emitted JS does not contain a + * fragile runtime `require('./awf-config-schema.json')`. The canonical source + * is generated by `scripts/generate-schema.mjs` and kept in sync via CI. + */ + +import Ajv2020, { ErrorObject } from 'ajv/dist/2020'; +import { existsSync, readFileSync } from 'fs'; +import { join } from 'path'; + +function loadSchema(): Record { + const candidatePaths = [ + join(__dirname, 'awf-config-schema.json'), + join(__dirname, '../src/awf-config-schema.json'), + ]; + + for (const candidatePath of candidatePaths) { + if (existsSync(candidatePath)) { + return JSON.parse(readFileSync(candidatePath, 'utf8')) as Record; + } + } + + throw new Error( + `Unable to locate awf-config-schema.json. Checked: ${candidatePaths.join(', ')}`, + ); +} + +// Compile once (module-level singleton). allErrors collects every violation. +// verbose=true provides parentSchema on errors for richer formatting. +const ajv = new Ajv2020({ allErrors: true, verbose: true }); +// 'version' is a metadata keyword (not a standard JSON Schema keyword); register +// it so Ajv strict mode does not reject the schema. +ajv.addKeyword({ keyword: 'version' }); +const validate = ajv.compile(loadSchema()); + +/** + * Check if a schema path points to an array-of-strings field by inspecting + * the parent schema context. + */ +function isArrayOfStringsField(err: ErrorObject): boolean { + // When type=array fails, check if the schema also specifies items.type=string + // All array fields in the AWF schema have items.type=string, so parentSchema is always present + const parentSchema = err.parentSchema as Record | undefined; + const items = parentSchema?.items as Record | undefined; + return err.keyword === 'type' && err.params.type === 'array' && items?.type === 'string'; +} + + +/** + * Format a single ajv error into a human-readable string matching the + * style of the previous hand-written validator (e.g. "config.network.allowDomains must be ..."). + */ +function formatError(err: ErrorObject): string { + const path = err.instancePath + ? 'config' + err.instancePath.replace(/\//g, '.') + : 'config'; + + switch (err.keyword) { + case 'type': + if (err.params.type === 'array' && isArrayOfStringsField(err)) { + return `${path} must be an array of strings`; + } + if (err.params.type === 'integer') { + // All integer fields in the AWF schema have minimum:1 + return `${path} must be a positive integer`; + } + // 'object' → "an object"; remaining types (boolean, string, number) → "a " + return `${path} must be ${err.params.type === 'object' ? 'an object' : `a ${err.params.type}`}`; + + case 'additionalProperties': + return `${path}.${err.params.additionalProperty} is not supported`; + + case 'enum': + return `${path} must be one of: ${(err.params.allowedValues as string[]).join(', ')}`; + + case 'minimum': + return `${path} must be a positive integer`; + + case 'oneOf': + return `${path} must be a string or array of strings`; + + case 'items': + return `${path} must be an array of strings`; + + default: + /* istanbul ignore next -- defensive: all current schema keywords are handled above */ + return `${path} ${err.message || 'is invalid'}`; + } +} + +/** + * Deduplicate and simplify errors. Ajv often emits multiple errors for one + * conceptual issue (e.g. oneOf failures emit sub-schema failures + the oneOf itself). + * Also consolidates per-item errors into a single array-level message. + */ +function deduplicateErrors(errors: ErrorObject[]): ErrorObject[] { + const oneOfPaths = new Set(); + const result: ErrorObject[] = []; + + // First pass: collect oneOf paths + for (const err of errors) { + if (err.keyword === 'oneOf') { + oneOfPaths.add(err.instancePath); + } + } + + // Track which array paths we've already emitted a consolidated message for + const emittedArrayErrors = new Set(); + + // Second pass: filter and consolidate + for (const err of errors) { + // Skip sub-errors of oneOf (they are noise) + if (oneOfPaths.has(err.instancePath) && err.keyword !== 'oneOf') { + continue; + } + + // Consolidate per-item type errors into a single "must be an array of strings" + if (err.keyword === 'type' && /\/\d+$/.test(err.instancePath)) { + const parentPath = err.instancePath.replace(/\/\d+$/, ''); + if (!emittedArrayErrors.has(parentPath)) { + emittedArrayErrors.add(parentPath); + // Create a synthetic error for the parent path + result.push({ + ...err, + instancePath: parentPath, + keyword: 'items', + message: 'must be an array of strings', + }); + } + continue; + } + + result.push(err); + } + + return result; +} + +/** + * Validate an unknown value against the AWF config schema. + * Returns an array of human-readable error strings (empty = valid). + */ +export function validateWithSchema(config: unknown): string[] { + // Handle non-object root specially for a cleaner message + if (typeof config !== 'object' || config === null || Array.isArray(config)) { + return ['config root must be an object']; + } + + const valid = validate(config); + if (valid || !validate.errors) { + return []; + } + + const deduplicated = deduplicateErrors(validate.errors); + return deduplicated.map(formatError); +} diff --git a/src/schema.test.ts b/src/schema.test.ts index 2511824c..92d574eb 100644 --- a/src/schema.test.ts +++ b/src/schema.test.ts @@ -3,6 +3,7 @@ import * as path from 'path'; import Ajv2020 from 'ajv/dist/2020'; const schemaPath = path.join(__dirname, '..', 'docs', 'awf-config.schema.json'); +const schemaV1Path = path.join(__dirname, '..', 'docs', 'awf-config.v1.schema.json'); describe('awf-config.schema.json', () => { let schema: Record; @@ -12,6 +13,8 @@ describe('awf-config.schema.json', () => { const raw = fs.readFileSync(schemaPath, 'utf8'); schema = JSON.parse(raw) as Record; const ajv = new Ajv2020(); + // 'version' is a metadata keyword; register it so strict mode doesn't reject the schema. + ajv.addKeyword({ keyword: 'version' }); validate = ajv.compile(schema); }); @@ -26,6 +29,10 @@ describe('awf-config.schema.json', () => { expect(schema.additionalProperties).toBe(false); }); + it('has a version field', () => { + expect(schema.version).toBe('1'); + }); + it('covers all AwfFileConfig top-level fields', () => { const properties = schema.properties as Record; expect(Object.keys(properties)).toEqual( @@ -173,4 +180,28 @@ describe('awf-config.schema.json', () => { expect(validate({ apiProxy: { models: { 'gpt-4o': ['gpt-4o-2024-11-20'] } } })).toBe(true); expect(validate({ apiProxy: { models: { 'gpt-4o': 'not-an-array' } } })).toBe(false); }); + + it('src/awf-config-schema.json stays in sync with docs/awf-config.schema.json', () => { + const srcSchemaPath = path.join(__dirname, 'awf-config-schema.json'); + const srcSchema = JSON.parse(fs.readFileSync(srcSchemaPath, 'utf8')); + // Compare all fields except $id (which differs for versioned releases) + const docsRest = { ...schema }; + delete docsRest.$id; + const srcRest = { ...srcSchema }; + delete srcRest.$id; + expect(srcRest).toEqual(docsRest); + }); + + it('docs/awf-config.v1.schema.json stays in sync with docs/awf-config.schema.json', () => { + const v1Schema = JSON.parse(fs.readFileSync(schemaV1Path, 'utf8')) as Record; + // v1 schema must have a versioned $id and match the latest schema (ignoring $id) + expect(v1Schema.version).toBe('1'); + expect(typeof v1Schema.$id).toBe('string'); + expect(v1Schema.$id as string).toContain('v1'); + const latestRest = { ...schema }; + delete latestRest.$id; + const v1Rest = { ...v1Schema }; + delete v1Rest.$id; + expect(v1Rest).toEqual(latestRest); + }); });