Live Scenarios #13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Live Scenario Runner (nightly) | |
| # | |
| # Executes the executive-assistant and connector certification | |
| # scenarios against a live LLM runtime with real connector credentials and | |
| # uploads the JSON report. Scheduled/default dispatch runs are report-only | |
| # while this catalog is still being hardened; manual dispatch can opt into | |
| # failing when any scenario fails or the aggregate LLM-judge score falls below | |
| # LIFEOPS_JUDGE_THRESHOLD. Missing setup prerequisites still fail loudly. | |
| # | |
| # Required repo secrets (self-documented): | |
| # LLM provider (at least one): | |
| # OPENAI_API_KEY | |
| # OPENROUTER_API_KEY | |
| # ANTHROPIC_API_KEY | |
| # GOOGLE_GENERATIVE_AI_API_KEY or GOOGLE_API_KEY | |
| # GROQ_API_KEY | |
| # Connector credentials (presence gates the relevant scenarios): | |
| # GOOGLE_OAUTH_CLIENT_ID, GOOGLE_OAUTH_CLIENT_SECRET, GOOGLE_OAUTH_REFRESH_TOKEN | |
| # GMAIL_TEST_ACCOUNT_EMAIL, GMAIL_TEST_ACCOUNT_REFRESH_TOKEN | |
| # CALENDLY_API_TOKEN | |
| # TELEGRAM_BOT_TOKEN, TELEGRAM_TEST_CHAT_ID, TELEGRAM_API_ID, TELEGRAM_API_HASH | |
| # DISCORD_BOT_TOKEN, DISCORD_TEST_GUILD_ID, DISCORD_TEST_CHANNEL_ID | |
| # SIGNAL_CLI_URL, SIGNAL_TEST_NUMBER | |
| # IMESSAGE_BRIDGE_URL, IMESSAGE_TEST_HANDLE | |
| # WHATSAPP_TOKEN, WHATSAPP_PHONE_ID, WHATSAPP_TEST_CONTACT | |
| # TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_FROM_NUMBER, TWILIO_TEST_TO_NUMBER | |
| # X_API_KEY, X_API_SECRET, X_ACCESS_TOKEN, X_ACCESS_SECRET, X_TEST_DM_HANDLE | |
| # NOTIFICATION_RELAY_URL, NOTIFICATION_RELAY_TOKEN | |
| # TRAVEL_BOOKING_API_KEY | |
| # Optional: | |
| # LIFEOPS_JUDGE_THRESHOLD (workflow input, default 0.8) | |
| # SCENARIO_FILTER (comma-separated scenario ids, default all) | |
| # SCENARIO_ENFORCE_GATE (workflow input, default false) | |
| # SKIP_REASON (required if SCENARIO_SKIP is set) | |
| # | |
| # 1Password vault: this workflow's plain `*_API_KEY` secrets are sourced from the | |
| # `eliza-e2e` 1Password vault. The vault must exist in 1Password and the | |
| # corresponding GitHub Actions secrets must be configured on the | |
| # `elizaOS/eliza` repo before this workflow can run successfully. See | |
| # scenario-matrix.yml for the structured `ELIZA_E2E_*` connector secrets. | |
| name: Live Scenarios | |
| on: | |
| schedule: | |
| # 08:30 UTC daily — after upstream nightly at 04:00 UTC settles. | |
| - cron: "30 8 * * *" | |
| workflow_dispatch: | |
| inputs: | |
| scenario_filter: | |
| description: "Comma-separated scenario ids (empty = all)" | |
| required: false | |
| type: string | |
| default: "" | |
| judge_threshold: | |
| description: "LLM-judge minimum pass score (0.0 - 1.0)" | |
| required: false | |
| type: string | |
| default: "0.8" | |
| skip_reason: | |
| description: "If you plan to skip any scenario, document why" | |
| required: false | |
| type: string | |
| default: "" | |
| enforce_gate: | |
| description: "Fail the workflow when live scenarios fail" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| group: live-scenarios-${{ github.ref }} | |
| cancel-in-progress: false | |
| env: | |
| BUN_VERSION: "1.3.14" | |
| NODE_VERSION: "24.15.0" | |
| ELIZA_LIVE_TEST: "1" | |
| permissions: | |
| contents: read | |
| actions: read | |
| jobs: | |
| live-scenarios: | |
| name: Live scenarios (EA + connectors) | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 120 | |
| steps: | |
| - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 | |
| with: | |
| fetch-depth: 0 | |
| filter: blob:none | |
| submodules: false | |
| - name: Setup Node.js | |
| uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| - name: Setup workspace dependencies | |
| uses: ./.github/actions/setup-bun-workspace | |
| with: | |
| bun-version: ${{ env.BUN_VERSION }} | |
| install-command: bun install --ignore-scripts --no-frozen-lockfile | |
| - name: Verify scenario-runner CLI present | |
| run: | | |
| test -f packages/scenario-runner/src/cli.ts || \ | |
| { echo "scenario-runner CLI missing"; exit 1; } | |
| - name: Build live scenario runtime packages | |
| # The live runner executes TypeScript sources directly, but several | |
| # workspace packages intentionally export dist/* entry points. Because | |
| # dependency installation ignores postinstall scripts, build only the | |
| # packages that the live scenario runtime imports through those exports. | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} | |
| GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} | |
| GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} | |
| run: | | |
| echo "::group::Build packages/core" | |
| bun run --cwd packages/core build | |
| echo "::endgroup::" | |
| echo "::group::Build packages/shared" | |
| # plugin-agent-skills (and others) consume @elizaos/shared via dist | |
| # exports (RouteRequestContext, ReadJsonBodyOptions, route schemas). | |
| # Must build before any plugin that imports from it. | |
| bun run --cwd packages/shared build | |
| echo "::endgroup::" | |
| echo "::group::Build packages/skills" | |
| # plugin-agent-skills imports @elizaos/skills via its dist exports | |
| # (getSkillsDir, loadSkills, formatSkillsForPrompt). Must build | |
| # before plugin-agent-skills typechecks its declaration emit. | |
| bun run --cwd packages/skills build | |
| echo "::endgroup::" | |
| provider_package="" | |
| if [ -n "${GROQ_API_KEY:-}" ]; then | |
| provider_package="plugins/plugin-groq" | |
| elif [ -n "${OPENAI_API_KEY:-}" ]; then | |
| provider_package="plugins/plugin-openai" | |
| elif [ -n "${ANTHROPIC_API_KEY:-}" ]; then | |
| provider_package="plugins/plugin-anthropic" | |
| elif [ -n "${GOOGLE_GENERATIVE_AI_API_KEY:-}" ] || [ -n "${GOOGLE_API_KEY:-}" ]; then | |
| provider_package="plugins/plugin-google-genai" | |
| elif [ -n "${OPENROUTER_API_KEY:-}" ]; then | |
| provider_package="plugins/plugin-openrouter" | |
| fi | |
| package_dirs=( | |
| plugins/plugin-sql | |
| plugins/plugin-agent-skills | |
| plugins/plugin-pdf | |
| plugins/plugin-telegram | |
| plugins/plugin-whatsapp | |
| plugins/plugin-signal | |
| plugins/plugin-imessage | |
| ) | |
| if [ -n "$provider_package" ]; then | |
| package_dirs+=("$provider_package") | |
| fi | |
| for package_dir in "${package_dirs[@]}"; do | |
| echo "::group::Build ${package_dir}" | |
| bun run --cwd "$package_dir" build | |
| echo "::endgroup::" | |
| done | |
| - name: Run EA + connector live scenarios | |
| id: run | |
| env: | |
| # LLM providers | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} | |
| GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} | |
| GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} | |
| # Google Workspace | |
| GOOGLE_OAUTH_CLIENT_ID: ${{ secrets.GOOGLE_OAUTH_CLIENT_ID }} | |
| GOOGLE_OAUTH_CLIENT_SECRET: ${{ secrets.GOOGLE_OAUTH_CLIENT_SECRET }} | |
| GOOGLE_OAUTH_REFRESH_TOKEN: ${{ secrets.GOOGLE_OAUTH_REFRESH_TOKEN }} | |
| GMAIL_TEST_ACCOUNT_EMAIL: ${{ secrets.GMAIL_TEST_ACCOUNT_EMAIL }} | |
| GMAIL_TEST_ACCOUNT_REFRESH_TOKEN: ${{ secrets.GMAIL_TEST_ACCOUNT_REFRESH_TOKEN }} | |
| # Calendly | |
| CALENDLY_API_TOKEN: ${{ secrets.CALENDLY_API_TOKEN }} | |
| # Messaging | |
| TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} | |
| TELEGRAM_TEST_CHAT_ID: ${{ secrets.TELEGRAM_TEST_CHAT_ID }} | |
| TELEGRAM_API_ID: ${{ secrets.TELEGRAM_API_ID }} | |
| TELEGRAM_API_HASH: ${{ secrets.TELEGRAM_API_HASH }} | |
| DISCORD_BOT_TOKEN: ${{ secrets.DISCORD_BOT_TOKEN }} | |
| DISCORD_TEST_GUILD_ID: ${{ secrets.DISCORD_TEST_GUILD_ID }} | |
| DISCORD_TEST_CHANNEL_ID: ${{ secrets.DISCORD_TEST_CHANNEL_ID }} | |
| SIGNAL_CLI_URL: ${{ secrets.SIGNAL_CLI_URL }} | |
| SIGNAL_TEST_NUMBER: ${{ secrets.SIGNAL_TEST_NUMBER }} | |
| IMESSAGE_BRIDGE_URL: ${{ secrets.IMESSAGE_BRIDGE_URL }} | |
| IMESSAGE_TEST_HANDLE: ${{ secrets.IMESSAGE_TEST_HANDLE }} | |
| WHATSAPP_TOKEN: ${{ secrets.WHATSAPP_TOKEN }} | |
| WHATSAPP_PHONE_ID: ${{ secrets.WHATSAPP_PHONE_ID }} | |
| WHATSAPP_TEST_CONTACT: ${{ secrets.WHATSAPP_TEST_CONTACT }} | |
| # Twilio SMS/Voice | |
| TWILIO_ACCOUNT_SID: ${{ secrets.TWILIO_ACCOUNT_SID }} | |
| TWILIO_AUTH_TOKEN: ${{ secrets.TWILIO_AUTH_TOKEN }} | |
| TWILIO_FROM_NUMBER: ${{ secrets.TWILIO_FROM_NUMBER }} | |
| TWILIO_TEST_TO_NUMBER: ${{ secrets.TWILIO_TEST_TO_NUMBER }} | |
| # X / DM | |
| X_API_KEY: ${{ secrets.X_API_KEY }} | |
| X_API_SECRET: ${{ secrets.X_API_SECRET }} | |
| X_ACCESS_TOKEN: ${{ secrets.X_ACCESS_TOKEN }} | |
| X_ACCESS_SECRET: ${{ secrets.X_ACCESS_SECRET }} | |
| X_TEST_DM_HANDLE: ${{ secrets.X_TEST_DM_HANDLE }} | |
| # Push notifications + travel | |
| NOTIFICATION_RELAY_URL: ${{ secrets.NOTIFICATION_RELAY_URL }} | |
| NOTIFICATION_RELAY_TOKEN: ${{ secrets.NOTIFICATION_RELAY_TOKEN }} | |
| TRAVEL_BOOKING_API_KEY: ${{ secrets.TRAVEL_BOOKING_API_KEY }} | |
| # Run controls | |
| SCENARIO_FILTER: ${{ inputs.scenario_filter }} | |
| LIFEOPS_JUDGE_THRESHOLD: ${{ inputs.judge_threshold || '0.8' }} | |
| SCENARIO_ENFORCE_GATE: ${{ inputs.enforce_gate && '1' || '0' }} | |
| SKIP_REASON: ${{ inputs.skip_reason }} | |
| REPORT_PATH: artifacts/lifeops-scenario-report.json | |
| RUN_DIR: artifacts/scenario-runs/live | |
| run: node packages/scripts/run-live-scenarios.mjs | |
| - name: Upload scenario report | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a | |
| with: | |
| name: lifeops-scenario-report | |
| path: | | |
| artifacts/lifeops-scenario-report.json | |
| artifacts/scenario-runs/live/ | |
| if-no-files-found: warn | |
| retention-days: 30 |