From 13997cb70924575e43f1b58a0a525a6893c63edd Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Wed, 18 Mar 2026 09:54:13 -0400 Subject: [PATCH 1/8] Provider tests in ts --- .../tests/integration/test_providers.test.ts | 110 +++++++ .../test_providers_code_exec.test.ts | 80 +++++ .../tests/integration/test_providers_lib.ts | 292 ++++++++++++++++++ ui/desktop/vitest.config.ts | 15 +- 4 files changed, 491 insertions(+), 6 deletions(-) create mode 100644 ui/desktop/tests/integration/test_providers.test.ts create mode 100644 ui/desktop/tests/integration/test_providers_code_exec.test.ts create mode 100644 ui/desktop/tests/integration/test_providers_lib.ts diff --git a/ui/desktop/tests/integration/test_providers.test.ts b/ui/desktop/tests/integration/test_providers.test.ts new file mode 100644 index 000000000000..c7fbb77b343f --- /dev/null +++ b/ui/desktop/tests/integration/test_providers.test.ts @@ -0,0 +1,110 @@ +/** + * Provider smoke tests — normal mode (direct tool calls). + * + * Ported from scripts/test_providers.sh. Each available provider/model pair + * gets its own test that spawns `goose run` with the developer builtin, asks + * the model to read files via the shell tool, and validates the output. + */ + +import { test, expect, beforeAll } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { + buildGoose, + discoverTestCases, + runGoose, + isAgenticProvider, + isAllowedFailure, + type TestCase, +} from './test_providers_lib'; + +const BUILTINS = 'developer'; +const TEST_CONTENT = 'test-content-abc123'; + +let gooseBin: string; +let testFile: string; + +beforeAll(() => { + gooseBin = buildGoose(); + + const targetDir = path.resolve(process.cwd(), '..', '..', 'target'); + fs.mkdirSync(targetDir, { recursive: true }); + testFile = path.join(targetDir, 'test-content.txt'); + fs.writeFileSync(testFile, TEST_CONTENT + '\n'); +}); + +const allCases = discoverTestCases(); +const available = allCases.filter((tc) => tc.available && !isAllowedFailure(tc.provider, tc.model)); +const flaky = allCases.filter((tc) => tc.available && isAllowedFailure(tc.provider, tc.model)); +const skipped = allCases.filter((tc) => !tc.available); + +async function runNormalTest(tc: TestCase): Promise { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-test-')); + + try { + let prompt: string; + let tokenA: string | undefined; + let tokenB: string | undefined; + + if (isAgenticProvider(tc.provider)) { + fs.copyFileSync(testFile, path.join(testdir, 'test-content.txt')); + prompt = 'read ./test-content.txt and output its contents exactly'; + } else { + tokenA = `smoke-alpha-${Math.floor(Math.random() * 32768)}`; + tokenB = `smoke-bravo-${Math.floor(Math.random() * 32768)}`; + fs.writeFileSync(path.join(testdir, 'part-a.txt'), tokenA + '\n'); + fs.writeFileSync(path.join(testdir, 'part-b.txt'), tokenB + '\n'); + prompt = + 'Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else.'; + } + + const output = await runGoose(gooseBin, testdir, prompt, BUILTINS, { + GOOSE_PROVIDER: tc.provider, + GOOSE_MODEL: tc.model, + }); + + if (isAgenticProvider(tc.provider)) { + expect( + output.toLowerCase(), + `Expected model output to contain "${TEST_CONTENT}"\n\nFull output:\n${output}` + ).toContain(TEST_CONTENT.toLowerCase()); + } else { + const shellToolPattern = /(shell \| developer)|(▸.*shell)/; + expect( + shellToolPattern.test(output), + `Expected model to use shell tool\n\nFull output:\n${output}` + ).toBe(true); + expect( + output, + `Expected output to contain token from part-a.txt (${tokenA})\n\nFull output:\n${output}` + ).toContain(tokenA); + expect( + output, + `Expected output to contain token from part-b.txt (${tokenB})\n\nFull output:\n${output}` + ).toContain(tokenB); + } + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +} + +if (available.length > 0) { + test.each(available)('$provider / $model', async (tc) => { + await runNormalTest(tc); + }); +} + +if (flaky.length > 0) { + test.each(flaky)('$provider / $model (flaky — allowed to fail)', async (tc) => { + try { + await runNormalTest(tc); + } catch (err) { + console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); + } + }); +} + +if (skipped.length > 0) { + test.skip.each(skipped)('$provider / $model — $skippedReason', () => {}); +} diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts new file mode 100644 index 000000000000..a3d17d8f0bf6 --- /dev/null +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -0,0 +1,80 @@ +/** + * Provider smoke tests — code execution mode (JS batching). + * + * Ported from scripts/test_providers_code_exec.sh. Each available + * (non-agentic) provider/model pair gets its own test that spawns `goose run` + * with the memory + code_execution builtins and validates that the + * code_execution tool was invoked. + */ + +import { test, expect, beforeAll } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { + buildGoose, + discoverTestCases, + runGoose, + isAllowedFailure, + type TestCase, +} from './test_providers_lib'; + +const BUILTINS = 'memory,code_execution'; + +let gooseBin: string; + +beforeAll(() => { + gooseBin = buildGoose(); +}); + +const allCases = discoverTestCases({ skipAgentic: true }); +const available = allCases.filter((tc) => tc.available && !isAllowedFailure(tc.provider, tc.model)); +const flaky = allCases.filter((tc) => tc.available && isAllowedFailure(tc.provider, tc.model)); +const skipped = allCases.filter((tc) => !tc.available); + +async function runCodeExecTest(tc: TestCase): Promise { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-codeexec-')); + + try { + const prompt = + "Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'."; + + const output = await runGoose(gooseBin, testdir, prompt, BUILTINS, { + GOOSE_PROVIDER: tc.provider, + GOOSE_MODEL: tc.model, + }); + + // Matches: "execute | code_execution", "get_function_details | code_execution", + // "tool call | execute", "tool calls | execute" (old format) + // "▸ execute N tool call" (new format with tool_graph) + const codeExecPattern = + /(execute \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)/; + + expect( + codeExecPattern.test(output), + `Expected code_execution tool to be called\n\nFull output:\n${output}` + ).toBe(true); + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +} + +if (available.length > 0) { + test.each(available)('$provider / $model', async (tc) => { + await runCodeExecTest(tc); + }); +} + +if (flaky.length > 0) { + test.each(flaky)('$provider / $model (flaky — allowed to fail)', async (tc) => { + try { + await runCodeExecTest(tc); + } catch (err) { + console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); + } + }); +} + +if (skipped.length > 0) { + test.skip.each(skipped)('$provider / $model — $skippedReason', () => {}); +} diff --git a/ui/desktop/tests/integration/test_providers_lib.ts b/ui/desktop/tests/integration/test_providers_lib.ts new file mode 100644 index 000000000000..3fe5e46441d5 --- /dev/null +++ b/ui/desktop/tests/integration/test_providers_lib.ts @@ -0,0 +1,292 @@ +/** + * Shared library for provider smoke tests. + * + * Ported from scripts/test_providers_lib.sh — keeps the same provider config, + * allowed-failure list, agentic-provider list, and environment detection. + */ + +import { execSync, spawn, type ChildProcess } from 'node:child_process'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +// --------------------------------------------------------------------------- +// Provider configuration +// --------------------------------------------------------------------------- + +const PROVIDER_CONFIG_RAW = ` +openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b +xai -> grok-3 +openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5 +anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-5-20251101 +google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview +tetrate -> claude-sonnet-4-20250514 +databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o +azure_openai -> \${AZURE_OPENAI_DEPLOYMENT_NAME} +aws_bedrock -> us.anthropic.claude-sonnet-4-5-20250929-v1:0 +gcp_vertex_ai -> gemini-2.5-pro +snowflake -> claude-sonnet-4-5 +venice -> llama-3.3-70b +litellm -> gpt-4o-mini +sagemaker_tgi -> sagemaker-tgi-endpoint +github_copilot -> gpt-4.1 +chatgpt_codex -> gpt-5.1-codex +claude-code -> default +codex -> gpt-5.2-codex +gemini-cli -> gemini-2.5-pro +cursor-agent -> auto +ollama -> qwen3 +`; + +const ALLOWED_FAILURES = new Set([ + 'google:gemini-2.5-flash', + 'google:gemini-3-pro-preview', + 'openrouter:nvidia/nemotron-3-nano-30b-a3b', + 'openrouter:qwen/qwen3-coder:exacto', + 'openai:gpt-3.5-turbo', +]); + +const AGENTIC_PROVIDERS = new Set(['claude-code', 'codex', 'gemini-cli', 'cursor-agent']); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function loadDotenv(): void { + const envPath = path.resolve(process.cwd(), '.env'); + if (!fs.existsSync(envPath)) return; + const lines = fs.readFileSync(envPath, 'utf-8').split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + const eqIdx = trimmed.indexOf('='); + if (eqIdx === -1) continue; + const key = trimmed.slice(0, eqIdx); + const value = trimmed.slice(eqIdx + 1); + if (!(key in process.env)) { + process.env[key] = value; + } + } +} + +function hasEnv(name: string): boolean { + return !!process.env[name]; +} + +function hasCmd(name: string): boolean { + try { + execSync(`command -v ${name}`, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} + +function hasFile(p: string): boolean { + return fs.existsSync(p); +} + +export function isAgenticProvider(provider: string): boolean { + return AGENTIC_PROVIDERS.has(provider); +} + +function isProviderAvailable(provider: string): boolean { + switch (provider) { + case 'openrouter': + return hasEnv('OPENROUTER_API_KEY'); + case 'xai': + return hasEnv('XAI_API_KEY'); + case 'openai': + return hasEnv('OPENAI_API_KEY'); + case 'anthropic': + return hasEnv('ANTHROPIC_API_KEY'); + case 'google': + return hasEnv('GOOGLE_API_KEY'); + case 'tetrate': + return hasEnv('TETRATE_API_KEY'); + case 'databricks': + return hasEnv('DATABRICKS_HOST') && hasEnv('DATABRICKS_TOKEN'); + case 'azure_openai': + return hasEnv('AZURE_OPENAI_ENDPOINT') && hasEnv('AZURE_OPENAI_DEPLOYMENT_NAME'); + case 'aws_bedrock': + return hasEnv('AWS_REGION') && (hasEnv('AWS_PROFILE') || hasEnv('AWS_ACCESS_KEY_ID')); + case 'gcp_vertex_ai': + return hasEnv('GCP_PROJECT_ID'); + case 'snowflake': + return hasEnv('SNOWFLAKE_HOST') && hasEnv('SNOWFLAKE_TOKEN'); + case 'venice': + return hasEnv('VENICE_API_KEY'); + case 'litellm': + return hasEnv('LITELLM_API_KEY'); + case 'sagemaker_tgi': + return hasEnv('SAGEMAKER_ENDPOINT_NAME') && hasEnv('AWS_REGION'); + case 'github_copilot': + return ( + hasEnv('GITHUB_COPILOT_TOKEN') || + hasFile(path.join(os.homedir(), '.config/goose/github_copilot_token.json')) + ); + case 'chatgpt_codex': + return ( + hasEnv('CHATGPT_CODEX_TOKEN') || + hasFile(path.join(os.homedir(), '.config/goose/chatgpt_codex_token.json')) + ); + case 'ollama': + return hasEnv('OLLAMA_HOST') || hasCmd('ollama'); + case 'claude-code': + return hasCmd('claude'); + case 'codex': + return hasCmd('codex'); + case 'gemini-cli': + return hasCmd('gemini'); + case 'cursor-agent': + return hasCmd('cursor-agent'); + default: + return true; + } +} + +export function isAllowedFailure(provider: string, model: string): boolean { + return ALLOWED_FAILURES.has(`${provider}:${model}`); +} + +function shouldSkipProvider(provider: string): boolean { + const skip = process.env.SKIP_PROVIDERS; + if (!skip) return false; + return skip + .split(',') + .map((s) => s.trim()) + .includes(provider); +} + +// --------------------------------------------------------------------------- +// Parse provider config +// --------------------------------------------------------------------------- + +interface ProviderLine { + provider: string; + modelsStr: string; +} + +function parseProviderConfig(): ProviderLine[] { + const lines: ProviderLine[] = []; + for (const raw of PROVIDER_CONFIG_RAW.split('\n')) { + const line = raw.trim(); + if (!line || line.startsWith('#')) continue; + const arrowIdx = line.indexOf(' -> '); + if (arrowIdx === -1) continue; + const provider = line.slice(0, arrowIdx).trim(); + let modelsStr = line.slice(arrowIdx + 4).trim(); + modelsStr = modelsStr.replace(/\$\{(\w+)\}/g, (_, name) => process.env[name] ?? ''); + lines.push({ provider, modelsStr }); + } + return lines; +} + +// --------------------------------------------------------------------------- +// Build goose binary +// --------------------------------------------------------------------------- + +export function buildGoose(): string { + if (!process.env.SKIP_BUILD) { + console.error('Building goose...'); + execSync('cargo build --bin goose', { stdio: 'inherit' }); + console.error(''); + } else { + console.error('Skipping build (SKIP_BUILD is set)...'); + console.error(''); + } + return path.resolve(process.cwd(), '..', '..', 'target/debug/goose'); +} + +// --------------------------------------------------------------------------- +// Test case discovery +// --------------------------------------------------------------------------- + +export interface TestCase { + provider: string; + model: string; + available: boolean; + skippedReason?: string; +} + +export function discoverTestCases(options?: { skipAgentic?: boolean }): TestCase[] { + loadDotenv(); + const skipAgentic = options?.skipAgentic ?? false; + const providerLines = parseProviderConfig(); + + const testCases: TestCase[] = []; + + for (const { provider, modelsStr } of providerLines) { + const available = isProviderAvailable(provider); + const models = modelsStr.split('|'); + + for (const model of models) { + if (!available) { + testCases.push({ + provider, + model, + available: false, + skippedReason: 'prerequisites not met', + }); + } else if (shouldSkipProvider(provider)) { + testCases.push({ + provider, + model, + available: false, + skippedReason: 'SKIP_PROVIDERS', + }); + } else if (skipAgentic && isAgenticProvider(provider)) { + testCases.push({ + provider, + model, + available: false, + skippedReason: 'agentic provider skipped in this mode', + }); + } else { + testCases.push({ provider, model, available: true }); + } + } + } + + return testCases; +} + +// --------------------------------------------------------------------------- +// Utility: run goose binary and capture output +// --------------------------------------------------------------------------- + +export function runGoose( + gooseBin: string, + cwd: string, + prompt: string, + builtins: string, + env: Record +): Promise { + return new Promise((resolve) => { + const child: ChildProcess = spawn( + gooseBin, + ['run', '--text', prompt, '--with-builtin', builtins], + { + cwd, + env: { ...process.env, ...env }, + stdio: ['ignore', 'pipe', 'pipe'], + } + ); + + let output = ''; + child.stdout?.on('data', (d) => { + output += String(d); + }); + child.stderr?.on('data', (d) => { + output += String(d); + }); + + child.on('close', () => { + resolve(output); + }); + + child.on('error', (err) => { + resolve(`spawn error: ${err.message}`); + }); + }); +} diff --git a/ui/desktop/vitest.config.ts b/ui/desktop/vitest.config.ts index 7a2965c12f80..7a09ffc3c508 100644 --- a/ui/desktop/vitest.config.ts +++ b/ui/desktop/vitest.config.ts @@ -1,7 +1,7 @@ /// -import { defineConfig } from 'vitest/config' -import react from '@vitejs/plugin-react' -import { resolve } from 'node:path' +import { defineConfig } from 'vitest/config'; +import react from '@vitejs/plugin-react'; +import { resolve } from 'node:path'; const cfg = { plugins: [react()], @@ -15,8 +15,11 @@ const cfg = { environment: 'jsdom', setupFiles: ['./src/test/setup.ts'], css: true, - include: ['src/**/*.{test,spec}.{js,jsx,ts,tsx}'], + include: [ + 'src/**/*.{test,spec}.{js,jsx,ts,tsx}', + 'tests/integration/**/*.{test,spec}.{js,jsx,ts,tsx}', + ], }, -} satisfies Record +} satisfies Record; -export default defineConfig(cfg as any) +export default defineConfig(cfg as any); From 7727a0a40b0e479c3bf1c4c8288f04839f997576 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Tue, 31 Mar 2026 18:30:46 -0400 Subject: [PATCH 2/8] Switch over to it --- .github/workflows/pr-smoke-test.yml | 19 ++- RELEASE_CHECKLIST.md | 2 +- scripts/test_providers.sh | 71 -------- scripts/test_providers_code_exec.sh | 45 ----- scripts/test_providers_lib.sh | 244 ---------------------------- 5 files changed, 14 insertions(+), 367 deletions(-) delete mode 100755 scripts/test_providers.sh delete mode 100755 scripts/test_providers_code_exec.sh delete mode 100755 scripts/test_providers_lib.sh diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index cdc0414ace61..e930b1788b7f 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -110,7 +110,11 @@ jobs: - name: Install agentic providers run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli @zed-industries/claude-agent-acp @zed-industries/codex-acp - - name: Run Smoke Tests with Provider Script + - name: Install Node.js Dependencies + run: source ../../bin/activate-hermit && pnpm install --frozen-lockfile + working-directory: ui/desktop + + - name: Run Smoke Tests (Normal Mode) env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -127,12 +131,10 @@ jobs: SKIP_BUILD: 1 SKIP_PROVIDERS: ${{ vars.SKIP_PROVIDERS || '' }} run: | - # Ensure the HOME directory structure exists mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - - # Run the provider test script (binary already built and downloaded) - bash scripts/test_providers.sh + source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/test_providers.test.ts + working-directory: ui/desktop - name: Set up Python uses: actions/setup-python@v5 @@ -188,6 +190,10 @@ jobs: - name: Make Binary Executable run: chmod +x target/debug/goose + - name: Install Node.js Dependencies + run: source ../../bin/activate-hermit && pnpm install --frozen-lockfile + working-directory: ui/desktop + - name: Run Provider Tests (Code Execution Mode) env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -205,7 +211,8 @@ jobs: run: | mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - bash scripts/test_providers_code_exec.sh + source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/test_providers_code_exec.test.ts + working-directory: ui/desktop compaction-tests: name: Compaction Tests diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index 9a10dc6b5773..fdc0268790ce 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -17,7 +17,7 @@ Make a copy of this document for each version and check off as steps are verifie ### Provider Testing -- [ ] Run `./scripts/test_providers.sh` locally from the release branch and verify all providers/models work +- [ ] Run `cd ui/desktop && pnpm run test:integration -- tests/integration/test_providers.test.ts` locally from the release branch and verify all providers/models work - [ ] Launch goose, click reset providers, choose databricks and a model ### Starting Conversations diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh deleted file mode 100755 index b6c28b8d445e..000000000000 --- a/scripts/test_providers.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -LIB_DIR="$(cd "$(dirname "$0")" && pwd)" -source "$LIB_DIR/test_providers_lib.sh" - -echo "Mode: normal (direct tool calls)" -echo "" - -GOOSE_BIN=$(build_goose) -BUILTINS="developer" - -mkdir -p target -TEST_CONTENT="test-content-abc123" -TEST_FILE="./target/test-content.txt" -echo "$TEST_CONTENT" > "$TEST_FILE" - -run_test() { - local provider="$1" model="$2" result_file="$3" output_file="$4" - local testdir=$(mktemp -d) - - local prompt - if is_agentic_provider "$provider"; then - cp "$TEST_FILE" "$testdir/test-content.txt" - prompt="read ./test-content.txt and output its contents exactly" - else - # Write two files with unique random tokens. Validation checks that the shell - # tool was used and that both tokens appear in the output, proving the model - # actually read the files (random tokens can't be guessed or hallucinated). - local token_a="smoke-alpha-$RANDOM" - local token_b="smoke-bravo-$RANDOM" - echo "$token_a" > "$testdir/part-a.txt" - echo "$token_b" > "$testdir/part-b.txt" - # Store tokens so validation can check them - echo "$token_a" > "$testdir/.token_a" - echo "$token_b" > "$testdir/.token_b" - prompt="Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else." - fi - - ( - export GOOSE_PROVIDER="$provider" - export GOOSE_MODEL="$model" - cd "$testdir" && "$GOOSE_BIN" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1 - ) > "$output_file" 2>&1 - - if is_agentic_provider "$provider"; then - if grep -qi "$TEST_CONTENT" "$output_file"; then - echo "success|test content found by model" > "$result_file" - else - echo "failure|test content not found by model" > "$result_file" - fi - else - local token_a token_b - token_a=$(cat "$testdir/.token_a") - token_b=$(cat "$testdir/.token_b") - if ! grep -qE "(shell \| developer)|(▸.*shell)" "$output_file"; then - echo "failure|model did not use shell tool" > "$result_file" - elif ! grep -q "$token_a" "$output_file"; then - echo "failure|model did not return contents of part-a.txt ($token_a)" > "$result_file" - elif ! grep -q "$token_b" "$output_file"; then - echo "failure|model did not return contents of part-b.txt ($token_b)" > "$result_file" - else - echo "success|model read and returned both file contents" > "$result_file" - fi - fi - - rm -rf "$testdir" -} - -build_test_cases -run_test_cases run_test -report_results diff --git a/scripts/test_providers_code_exec.sh b/scripts/test_providers_code_exec.sh deleted file mode 100755 index c9d720d202a0..000000000000 --- a/scripts/test_providers_code_exec.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Provider smoke tests - code execution mode (JS batching) - -LIB_DIR="$(cd "$(dirname "$0")" && pwd)" -source "$LIB_DIR/test_providers_lib.sh" - -echo "Mode: code_execution (JS batching)" -echo "" - -# --- Setup --- - -GOOSE_BIN=$(build_goose) -BUILTINS="memory,code_execution" - -# --- Test case --- - -run_test() { - local provider="$1" model="$2" result_file="$3" output_file="$4" - local testdir=$(mktemp -d) - - local prompt="Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'." - - # Run goose - ( - export GOOSE_PROVIDER="$provider" - export GOOSE_MODEL="$model" - cd "$testdir" && "$GOOSE_BIN" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1 - ) > "$output_file" 2>&1 - - # Matches: "execute_typescript | code_execution", "get_function_details | code_execution", - # "tool call | execute", "tool calls | execute" (old format) - # "▸ execute N tool call" (new format with tool_graph) - # "▸ execute_typescript" (plain tool name in output) - if grep -qE "(execute_typescript \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)|(▸ execute_typescript)" "$output_file"; then - echo "success|code_execution tool called" > "$result_file" - else - echo "failure|no code_execution tool calls found" > "$result_file" - fi - - rm -rf "$testdir" -} - -build_test_cases --skip-agentic -run_test_cases run_test -report_results diff --git a/scripts/test_providers_lib.sh b/scripts/test_providers_lib.sh deleted file mode 100755 index 0ef52f12d11c..000000000000 --- a/scripts/test_providers_lib.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/bin/bash - -PROVIDER_CONFIG=" -openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b -xai -> grok-3 -openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5 -anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-5-20251101 -google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview -tetrate -> claude-sonnet-4-20250514 -databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o -azure_openai -> ${AZURE_OPENAI_DEPLOYMENT_NAME} -aws_bedrock -> us.anthropic.claude-sonnet-4-5-20250929-v1:0 -gcp_vertex_ai -> gemini-2.5-pro -snowflake -> claude-sonnet-4-5 -venice -> llama-3.3-70b -litellm -> gpt-4o-mini -sagemaker_tgi -> sagemaker-tgi-endpoint -github_copilot -> gpt-4.1 -chatgpt_codex -> gpt-5.1-codex -claude-code -> default -codex -> gpt-5.2-codex -gemini-cli -> gemini-2.5-pro -cursor-agent -> auto -ollama -> qwen3 -" - -# Flaky models allowed to fail without blocking PRs. -ALLOWED_FAILURES=( - "google:gemini-2.5-flash" - "google:gemini-3-pro-preview" - "openrouter:nvidia/nemotron-3-nano-30b-a3b" - "openrouter:qwen/qwen3-coder:exacto" - "openai:gpt-3.5-turbo" -) - -AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent") - -if [ -f .env ]; then - export $(grep -v '^#' .env | xargs) -fi - -build_goose() { - if [ -z "$SKIP_BUILD" ]; then - echo "Building goose..." >&2 - cargo build --bin goose >&2 - echo "" >&2 - else - echo "Skipping build (SKIP_BUILD is set)..." >&2 - echo "" >&2 - fi - - echo "$(pwd)/target/debug/goose" -} - -has_env() { [ -n "${!1}" ]; } -has_cmd() { command -v "$1" &>/dev/null; } -has_file() { [ -f "$1" ]; } - -is_provider_available() { - case "$1" in - openrouter) has_env OPENROUTER_API_KEY ;; - xai) has_env XAI_API_KEY ;; - openai) has_env OPENAI_API_KEY ;; - anthropic) has_env ANTHROPIC_API_KEY ;; - google) has_env GOOGLE_API_KEY ;; - tetrate) has_env TETRATE_API_KEY ;; - databricks) has_env DATABRICKS_HOST && has_env DATABRICKS_TOKEN ;; - azure_openai) has_env AZURE_OPENAI_ENDPOINT && has_env AZURE_OPENAI_DEPLOYMENT_NAME ;; - aws_bedrock) has_env AWS_REGION && { has_env AWS_PROFILE || has_env AWS_ACCESS_KEY_ID; } ;; - gcp_vertex_ai) has_env GCP_PROJECT_ID ;; - snowflake) has_env SNOWFLAKE_HOST && has_env SNOWFLAKE_TOKEN ;; - venice) has_env VENICE_API_KEY ;; - litellm) has_env LITELLM_API_KEY ;; - sagemaker_tgi) has_env SAGEMAKER_ENDPOINT_NAME && has_env AWS_REGION ;; - github_copilot) has_env GITHUB_COPILOT_TOKEN || has_file "$HOME/.config/goose/github_copilot_token.json" ;; - chatgpt_codex) has_env CHATGPT_CODEX_TOKEN || has_file "$HOME/.config/goose/chatgpt_codex_token.json" ;; - ollama) has_env OLLAMA_HOST || has_cmd ollama ;; - claude-code) has_cmd claude ;; - codex) has_cmd codex ;; - gemini-cli) has_cmd gemini ;; - cursor-agent) has_cmd cursor-agent ;; - *) return 0 ;; - esac -} - -is_allowed_failure() { - local key="${1}:${2}" - for allowed in "${ALLOWED_FAILURES[@]}"; do - [ "$allowed" = "$key" ] && return 0 - done - return 1 -} - -should_skip_provider() { - [ -z "$SKIP_PROVIDERS" ] && return 1 - IFS=',' read -ra SKIP_LIST <<< "$SKIP_PROVIDERS" - for skip in "${SKIP_LIST[@]}"; do - skip=$(echo "$skip" | xargs) - [ "$skip" = "$1" ] && return 0 - done - return 1 -} - -is_agentic_provider() { - for agentic in "${AGENTIC_PROVIDERS[@]}"; do - [ "$agentic" = "$1" ] && return 0 - done - return 1 -} - -# build_test_cases [--skip-agentic] -build_test_cases() { - local skip_agentic=false - [ "$1" = "--skip-agentic" ] && skip_agentic=true - - local providers=() - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - local provider="${line%% -> *}" - if is_provider_available "$provider"; then - providers+=("$line") - echo "✓ Including $provider" - else - echo "⚠️ Skipping $provider (prerequisites not met)" - fi - done <<< "$PROVIDER_CONFIG" - echo "" - - TEST_CASES=() - local job_index=0 - for provider_config in "${providers[@]}"; do - local provider="${provider_config%% -> *}" - local models_str="${provider_config#* -> }" - - if should_skip_provider "$provider"; then - echo "⊘ Skipping provider: ${provider} (SKIP_PROVIDERS)" - continue - fi - - if [ "$skip_agentic" = true ] && is_agentic_provider "$provider"; then - echo "⊘ Skipping agentic provider: ${provider}" - continue - fi - - IFS='|' read -ra models <<< "$models_str" - for model in "${models[@]}"; do - TEST_CASES+=("$provider|$model|$job_index") - ((job_index++)) - done - done -} - -# run_test_cases -run_test_cases() { - local test_fn="$1" - - RESULTS_DIR=$(mktemp -d) - trap 'if [ -n "${RESULTS_DIR:-}" ]; then rm -rf -- "$RESULTS_DIR"; fi; if [ -n "${CLEANUP_DIR:-}" ]; then rm -rf -- "$CLEANUP_DIR"; fi' EXIT - MAX_PARALLEL=${MAX_PARALLEL:-$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 8)} - echo "Running ${#TEST_CASES[@]} tests (max $MAX_PARALLEL parallel)" - echo "" - - local running=0 - for ((i=0; i<${#TEST_CASES[@]}; i++)); do - IFS='|' read -r provider model idx <<< "${TEST_CASES[$i]}" - - if [ $i -eq 0 ]; then - # First test runs sequentially to catch early failures - "$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx" - else - "$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx" & - ((running++)) - if [ $running -ge $MAX_PARALLEL ]; then - wait -n 2>/dev/null || wait - ((running--)) - fi - fi - done - wait -} - -report_results() { - echo "" - echo "=== Test Results ===" - echo "" - - RESULTS=() - HARD_FAILURES=() - - for job in "${TEST_CASES[@]}"; do - IFS='|' read -r provider model idx <<< "$job" - - echo "Provider: $provider" - echo "Model: $model" - echo "" - cat "$RESULTS_DIR/output_$idx" - echo "" - - local result_line="" - [ -f "$RESULTS_DIR/result_$idx" ] && result_line=$(cat "$RESULTS_DIR/result_$idx") - local status="${result_line%%|*}" - local msg="${result_line#*|}" - - if [ "$status" = "success" ]; then - echo "✓ SUCCESS: $msg" - RESULTS+=("✓ ${provider}: ${model}") - else - if is_allowed_failure "$provider" "$model"; then - echo "⚠ FLAKY: $msg" - RESULTS+=("⚠ ${provider}: ${model} (flaky)") - else - echo "✗ FAILED: $msg" - RESULTS+=("✗ ${provider}: ${model}") - HARD_FAILURES+=("${provider}: ${model}") - fi - fi - echo "---" - done - - echo "" - echo "=== Test Summary ===" - for result in "${RESULTS[@]}"; do - echo "$result" - done - - if [ ${#HARD_FAILURES[@]} -gt 0 ]; then - echo "" - echo "Hard failures (${#HARD_FAILURES[@]}):" - for failure in "${HARD_FAILURES[@]}"; do - echo " - $failure" - done - echo "" - echo "Some tests failed!" - exit 1 - else - if echo "${RESULTS[@]}" | grep -q "⚠"; then - echo "" - echo "All required tests passed! (some flaky tests failed but are allowed)" - else - echo "" - echo "All tests passed!" - fi - fi -} From 3d5bea8b429dd6274555a4ca6883bc4c644bb0d8 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Wed, 1 Apr 2026 08:23:07 -0400 Subject: [PATCH 3/8] fix the regex --- .../tests/integration/test_providers_code_exec.test.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts index a3d17d8f0bf6..53467c33741c 100644 --- a/ui/desktop/tests/integration/test_providers_code_exec.test.ts +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -44,11 +44,12 @@ async function runCodeExecTest(tc: TestCase): Promise { GOOSE_MODEL: tc.model, }); - // Matches: "execute | code_execution", "get_function_details | code_execution", + // Matches: "execute_typescript | code_execution", "get_function_details | code_execution", // "tool call | execute", "tool calls | execute" (old format) // "▸ execute N tool call" (new format with tool_graph) + // "▸ execute_typescript" (plain tool name in output) const codeExecPattern = - /(execute \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)/; + /(execute_typescript \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)|(▸ execute_typescript)/; expect( codeExecPattern.test(output), From 4d1f328a8800d3b0b1bcaf5c6916535dfdd5cc22 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Wed, 1 Apr 2026 09:29:45 -0400 Subject: [PATCH 4/8] fix targets --- .github/workflows/pr-smoke-test.yml | 3 ++- ui/desktop/vitest.config.ts | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index e930b1788b7f..e30ec8f1042f 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -284,7 +284,8 @@ jobs: GOOSE_PROVIDER: anthropic GOOSE_MODEL: claude-sonnet-4-5-20250929 SHELL: /bin/bash + SKIP_BUILD: 1 run: | echo 'export PATH=/some/fake/path:$PATH' >> $HOME/.bash_profile - source ../../bin/activate-hermit && pnpm run test:integration:debug + source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/goosed.test.ts working-directory: ui/desktop diff --git a/ui/desktop/vitest.config.ts b/ui/desktop/vitest.config.ts index 7a09ffc3c508..f745b9244dcc 100644 --- a/ui/desktop/vitest.config.ts +++ b/ui/desktop/vitest.config.ts @@ -15,10 +15,7 @@ const cfg = { environment: 'jsdom', setupFiles: ['./src/test/setup.ts'], css: true, - include: [ - 'src/**/*.{test,spec}.{js,jsx,ts,tsx}', - 'tests/integration/**/*.{test,spec}.{js,jsx,ts,tsx}', - ], + include: ['src/**/*.{test,spec}.{js,jsx,ts,tsx}'], }, } satisfies Record; From 3db9eb5fb5a8585af4b466524c1f4937154dc1c5 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Wed, 1 Apr 2026 10:02:31 -0400 Subject: [PATCH 5/8] provider types --- .../tests/integration/test_providers.test.ts | 17 +- .../test_providers_code_exec.test.ts | 12 +- .../tests/integration/test_providers_lib.ts | 329 ++++++++++-------- 3 files changed, 194 insertions(+), 164 deletions(-) diff --git a/ui/desktop/tests/integration/test_providers.test.ts b/ui/desktop/tests/integration/test_providers.test.ts index c7fbb77b343f..fccff70ad81b 100644 --- a/ui/desktop/tests/integration/test_providers.test.ts +++ b/ui/desktop/tests/integration/test_providers.test.ts @@ -10,14 +10,7 @@ import { test, expect, beforeAll } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { - buildGoose, - discoverTestCases, - runGoose, - isAgenticProvider, - isAllowedFailure, - type TestCase, -} from './test_providers_lib'; +import { buildGoose, discoverTestCases, runGoose, type TestCase } from './test_providers_lib'; const BUILTINS = 'developer'; const TEST_CONTENT = 'test-content-abc123'; @@ -35,8 +28,8 @@ beforeAll(() => { }); const allCases = discoverTestCases(); -const available = allCases.filter((tc) => tc.available && !isAllowedFailure(tc.provider, tc.model)); -const flaky = allCases.filter((tc) => tc.available && isAllowedFailure(tc.provider, tc.model)); +const available = allCases.filter((tc) => tc.available && !tc.flaky); +const flaky = allCases.filter((tc) => tc.available && tc.flaky); const skipped = allCases.filter((tc) => !tc.available); async function runNormalTest(tc: TestCase): Promise { @@ -47,7 +40,7 @@ async function runNormalTest(tc: TestCase): Promise { let tokenA: string | undefined; let tokenB: string | undefined; - if (isAgenticProvider(tc.provider)) { + if (tc.agentic) { fs.copyFileSync(testFile, path.join(testdir, 'test-content.txt')); prompt = 'read ./test-content.txt and output its contents exactly'; } else { @@ -64,7 +57,7 @@ async function runNormalTest(tc: TestCase): Promise { GOOSE_MODEL: tc.model, }); - if (isAgenticProvider(tc.provider)) { + if (tc.agentic) { expect( output.toLowerCase(), `Expected model output to contain "${TEST_CONTENT}"\n\nFull output:\n${output}` diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts index 53467c33741c..d1234b39b793 100644 --- a/ui/desktop/tests/integration/test_providers_code_exec.test.ts +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -11,13 +11,7 @@ import { test, expect, beforeAll } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { - buildGoose, - discoverTestCases, - runGoose, - isAllowedFailure, - type TestCase, -} from './test_providers_lib'; +import { buildGoose, discoverTestCases, runGoose, type TestCase } from './test_providers_lib'; const BUILTINS = 'memory,code_execution'; @@ -28,8 +22,8 @@ beforeAll(() => { }); const allCases = discoverTestCases({ skipAgentic: true }); -const available = allCases.filter((tc) => tc.available && !isAllowedFailure(tc.provider, tc.model)); -const flaky = allCases.filter((tc) => tc.available && isAllowedFailure(tc.provider, tc.model)); +const available = allCases.filter((tc) => tc.available && !tc.flaky); +const flaky = allCases.filter((tc) => tc.available && tc.flaky); const skipped = allCases.filter((tc) => !tc.available); async function runCodeExecTest(tc: TestCase): Promise { diff --git a/ui/desktop/tests/integration/test_providers_lib.ts b/ui/desktop/tests/integration/test_providers_lib.ts index 3fe5e46441d5..b4b790d109eb 100644 --- a/ui/desktop/tests/integration/test_providers_lib.ts +++ b/ui/desktop/tests/integration/test_providers_lib.ts @@ -14,59 +14,21 @@ import path from 'node:path'; // Provider configuration // --------------------------------------------------------------------------- -const PROVIDER_CONFIG_RAW = ` -openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b -xai -> grok-3 -openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5 -anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-5-20251101 -google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview -tetrate -> claude-sonnet-4-20250514 -databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o -azure_openai -> \${AZURE_OPENAI_DEPLOYMENT_NAME} -aws_bedrock -> us.anthropic.claude-sonnet-4-5-20250929-v1:0 -gcp_vertex_ai -> gemini-2.5-pro -snowflake -> claude-sonnet-4-5 -venice -> llama-3.3-70b -litellm -> gpt-4o-mini -sagemaker_tgi -> sagemaker-tgi-endpoint -github_copilot -> gpt-4.1 -chatgpt_codex -> gpt-5.1-codex -claude-code -> default -codex -> gpt-5.2-codex -gemini-cli -> gemini-2.5-pro -cursor-agent -> auto -ollama -> qwen3 -`; +type ModelEntry = string | { name: string; flaky: true }; -const ALLOWED_FAILURES = new Set([ - 'google:gemini-2.5-flash', - 'google:gemini-3-pro-preview', - 'openrouter:nvidia/nemotron-3-nano-30b-a3b', - 'openrouter:qwen/qwen3-coder:exacto', - 'openai:gpt-3.5-turbo', -]); - -const AGENTIC_PROVIDERS = new Set(['claude-code', 'codex', 'gemini-cli', 'cursor-agent']); +interface ProviderConfig { + provider: string; + models: ModelEntry[]; + agentic?: boolean; + available: () => boolean; +} -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- +function modelName(entry: ModelEntry): string { + return typeof entry === 'string' ? entry : entry.name; +} -function loadDotenv(): void { - const envPath = path.resolve(process.cwd(), '.env'); - if (!fs.existsSync(envPath)) return; - const lines = fs.readFileSync(envPath, 'utf-8').split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) continue; - const eqIdx = trimmed.indexOf('='); - if (eqIdx === -1) continue; - const key = trimmed.slice(0, eqIdx); - const value = trimmed.slice(eqIdx + 1); - if (!(key in process.env)) { - process.env[key] = value; - } - } +function modelFlaky(entry: ModelEntry): boolean { + return typeof entry !== 'string' && entry.flaky; } function hasEnv(name: string): boolean { @@ -86,67 +48,155 @@ function hasFile(p: string): boolean { return fs.existsSync(p); } -export function isAgenticProvider(provider: string): boolean { - return AGENTIC_PROVIDERS.has(provider); -} - -function isProviderAvailable(provider: string): boolean { - switch (provider) { - case 'openrouter': - return hasEnv('OPENROUTER_API_KEY'); - case 'xai': - return hasEnv('XAI_API_KEY'); - case 'openai': - return hasEnv('OPENAI_API_KEY'); - case 'anthropic': - return hasEnv('ANTHROPIC_API_KEY'); - case 'google': - return hasEnv('GOOGLE_API_KEY'); - case 'tetrate': - return hasEnv('TETRATE_API_KEY'); - case 'databricks': - return hasEnv('DATABRICKS_HOST') && hasEnv('DATABRICKS_TOKEN'); - case 'azure_openai': - return hasEnv('AZURE_OPENAI_ENDPOINT') && hasEnv('AZURE_OPENAI_DEPLOYMENT_NAME'); - case 'aws_bedrock': - return hasEnv('AWS_REGION') && (hasEnv('AWS_PROFILE') || hasEnv('AWS_ACCESS_KEY_ID')); - case 'gcp_vertex_ai': - return hasEnv('GCP_PROJECT_ID'); - case 'snowflake': - return hasEnv('SNOWFLAKE_HOST') && hasEnv('SNOWFLAKE_TOKEN'); - case 'venice': - return hasEnv('VENICE_API_KEY'); - case 'litellm': - return hasEnv('LITELLM_API_KEY'); - case 'sagemaker_tgi': - return hasEnv('SAGEMAKER_ENDPOINT_NAME') && hasEnv('AWS_REGION'); - case 'github_copilot': - return ( +function getProviders(): ProviderConfig[] { + return [ + { + provider: 'openrouter', + models: [ + 'google/gemini-2.5-pro', + 'anthropic/claude-sonnet-4.5', + { name: 'qwen/qwen3-coder:exacto', flaky: true }, + 'z-ai/glm-4.6:exacto', + { name: 'nvidia/nemotron-3-nano-30b-a3b', flaky: true }, + ], + available: () => hasEnv('OPENROUTER_API_KEY'), + }, + { + provider: 'xai', + models: ['grok-3'], + available: () => hasEnv('XAI_API_KEY'), + }, + { + provider: 'openai', + models: ['gpt-4o', 'gpt-4o-mini', { name: 'gpt-3.5-turbo', flaky: true }, 'gpt-5'], + available: () => hasEnv('OPENAI_API_KEY'), + }, + { + provider: 'anthropic', + models: ['claude-sonnet-4-5-20250929', 'claude-opus-4-5-20251101'], + available: () => hasEnv('ANTHROPIC_API_KEY'), + }, + { + provider: 'google', + models: [ + 'gemini-2.5-pro', + { name: 'gemini-2.5-flash', flaky: true }, + { name: 'gemini-3-pro-preview', flaky: true }, + 'gemini-3-flash-preview', + ], + available: () => hasEnv('GOOGLE_API_KEY'), + }, + { + provider: 'tetrate', + models: ['claude-sonnet-4-20250514'], + available: () => hasEnv('TETRATE_API_KEY'), + }, + { + provider: 'databricks', + models: ['databricks-claude-sonnet-4', 'gemini-2-5-flash', 'gpt-4o'], + available: () => hasEnv('DATABRICKS_HOST') && hasEnv('DATABRICKS_TOKEN'), + }, + { + provider: 'azure_openai', + models: [process.env.AZURE_OPENAI_DEPLOYMENT_NAME ?? ''], + available: () => hasEnv('AZURE_OPENAI_ENDPOINT') && hasEnv('AZURE_OPENAI_DEPLOYMENT_NAME'), + }, + { + provider: 'aws_bedrock', + models: ['us.anthropic.claude-sonnet-4-5-20250929-v1:0'], + available: () => + hasEnv('AWS_REGION') && (hasEnv('AWS_PROFILE') || hasEnv('AWS_ACCESS_KEY_ID')), + }, + { + provider: 'gcp_vertex_ai', + models: ['gemini-2.5-pro'], + available: () => hasEnv('GCP_PROJECT_ID'), + }, + { + provider: 'snowflake', + models: ['claude-sonnet-4-5'], + available: () => hasEnv('SNOWFLAKE_HOST') && hasEnv('SNOWFLAKE_TOKEN'), + }, + { + provider: 'venice', + models: ['llama-3.3-70b'], + available: () => hasEnv('VENICE_API_KEY'), + }, + { + provider: 'litellm', + models: ['gpt-4o-mini'], + available: () => hasEnv('LITELLM_API_KEY'), + }, + { + provider: 'sagemaker_tgi', + models: ['sagemaker-tgi-endpoint'], + available: () => hasEnv('SAGEMAKER_ENDPOINT_NAME') && hasEnv('AWS_REGION'), + }, + { + provider: 'github_copilot', + models: ['gpt-4.1'], + available: () => hasEnv('GITHUB_COPILOT_TOKEN') || - hasFile(path.join(os.homedir(), '.config/goose/github_copilot_token.json')) - ); - case 'chatgpt_codex': - return ( + hasFile(path.join(os.homedir(), '.config/goose/github_copilot_token.json')), + }, + { + provider: 'chatgpt_codex', + models: ['gpt-5.1-codex'], + available: () => hasEnv('CHATGPT_CODEX_TOKEN') || - hasFile(path.join(os.homedir(), '.config/goose/chatgpt_codex_token.json')) - ); - case 'ollama': - return hasEnv('OLLAMA_HOST') || hasCmd('ollama'); - case 'claude-code': - return hasCmd('claude'); - case 'codex': - return hasCmd('codex'); - case 'gemini-cli': - return hasCmd('gemini'); - case 'cursor-agent': - return hasCmd('cursor-agent'); - default: - return true; - } + hasFile(path.join(os.homedir(), '.config/goose/chatgpt_codex_token.json')), + }, + { + provider: 'claude-code', + models: ['default'], + agentic: true, + available: () => hasCmd('claude'), + }, + { + provider: 'codex', + models: ['gpt-5.2-codex'], + agentic: true, + available: () => hasCmd('codex'), + }, + { + provider: 'gemini-cli', + models: ['gemini-2.5-pro'], + agentic: true, + available: () => hasCmd('gemini'), + }, + { + provider: 'cursor-agent', + models: ['auto'], + agentic: true, + available: () => hasCmd('cursor-agent'), + }, + { + provider: 'ollama', + models: ['qwen3'], + available: () => hasEnv('OLLAMA_HOST') || hasCmd('ollama'), + }, + ]; } -export function isAllowedFailure(provider: string, model: string): boolean { - return ALLOWED_FAILURES.has(`${provider}:${model}`); +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function loadDotenv(): void { + const envPath = path.resolve(process.cwd(), '.env'); + if (!fs.existsSync(envPath)) return; + const lines = fs.readFileSync(envPath, 'utf-8').split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + const eqIdx = trimmed.indexOf('='); + if (eqIdx === -1) continue; + const key = trimmed.slice(0, eqIdx); + const value = trimmed.slice(eqIdx + 1); + if (!(key in process.env)) { + process.env[key] = value; + } + } } function shouldSkipProvider(provider: string): boolean { @@ -158,30 +208,6 @@ function shouldSkipProvider(provider: string): boolean { .includes(provider); } -// --------------------------------------------------------------------------- -// Parse provider config -// --------------------------------------------------------------------------- - -interface ProviderLine { - provider: string; - modelsStr: string; -} - -function parseProviderConfig(): ProviderLine[] { - const lines: ProviderLine[] = []; - for (const raw of PROVIDER_CONFIG_RAW.split('\n')) { - const line = raw.trim(); - if (!line || line.startsWith('#')) continue; - const arrowIdx = line.indexOf(' -> '); - if (arrowIdx === -1) continue; - const provider = line.slice(0, arrowIdx).trim(); - let modelsStr = line.slice(arrowIdx + 4).trim(); - modelsStr = modelsStr.replace(/\$\{(\w+)\}/g, (_, name) => process.env[name] ?? ''); - lines.push({ provider, modelsStr }); - } - return lines; -} - // --------------------------------------------------------------------------- // Build goose binary // --------------------------------------------------------------------------- @@ -206,44 +232,61 @@ export interface TestCase { provider: string; model: string; available: boolean; + flaky: boolean; + agentic: boolean; skippedReason?: string; } export function discoverTestCases(options?: { skipAgentic?: boolean }): TestCase[] { loadDotenv(); const skipAgentic = options?.skipAgentic ?? false; - const providerLines = parseProviderConfig(); + const providers = getProviders(); const testCases: TestCase[] = []; - for (const { provider, modelsStr } of providerLines) { - const available = isProviderAvailable(provider); - const models = modelsStr.split('|'); + for (const pc of providers) { + const providerAvailable = pc.available(); + const agentic = pc.agentic ?? false; + + for (const entry of pc.models) { + const model = modelName(entry); + const flaky = modelFlaky(entry); - for (const model of models) { - if (!available) { + if (!providerAvailable) { testCases.push({ - provider, + provider: pc.provider, model, available: false, + flaky, + agentic, skippedReason: 'prerequisites not met', }); - } else if (shouldSkipProvider(provider)) { + } else if (shouldSkipProvider(pc.provider)) { testCases.push({ - provider, + provider: pc.provider, model, available: false, + flaky, + agentic, skippedReason: 'SKIP_PROVIDERS', }); - } else if (skipAgentic && isAgenticProvider(provider)) { + } else if (skipAgentic && agentic) { testCases.push({ - provider, + provider: pc.provider, model, available: false, + flaky, + agentic, skippedReason: 'agentic provider skipped in this mode', }); } else { - testCases.push({ provider, model, available: true }); + testCases.push({ + provider: pc.provider, + model, + available: true, + flaky, + agentic, + }); } } } From 532468d825b18503a2978ac92342cfe06fba7d86 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Wed, 1 Apr 2026 11:27:00 -0400 Subject: [PATCH 6/8] split --- .../tests/integration/test_providers.test.ts | 119 ++++++++---------- .../test_providers_code_exec.test.ts | 55 +++----- .../tests/integration/test_providers_lib.ts | 54 ++++++++ 3 files changed, 120 insertions(+), 108 deletions(-) diff --git a/ui/desktop/tests/integration/test_providers.test.ts b/ui/desktop/tests/integration/test_providers.test.ts index fccff70ad81b..44feb2a7c00f 100644 --- a/ui/desktop/tests/integration/test_providers.test.ts +++ b/ui/desktop/tests/integration/test_providers.test.ts @@ -1,16 +1,16 @@ /** * Provider smoke tests — normal mode (direct tool calls). * - * Ported from scripts/test_providers.sh. Each available provider/model pair - * gets its own test that spawns `goose run` with the developer builtin, asks - * the model to read files via the shell tool, and validates the output. + * Each available provider/model pair gets its own test that spawns `goose run` + * with the developer builtin, asks the model to read files via the shell tool, + * and validates the output. */ -import { test, expect, beforeAll } from 'vitest'; +import { expect, beforeAll } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { buildGoose, discoverTestCases, runGoose, type TestCase } from './test_providers_lib'; +import { buildGoose, discoverTestCases, runGoose, providerTest } from './test_providers_lib'; const BUILTINS = 'developer'; const TEST_CONTENT = 'test-content-abc123'; @@ -27,77 +27,60 @@ beforeAll(() => { fs.writeFileSync(testFile, TEST_CONTENT + '\n'); }); -const allCases = discoverTestCases(); -const available = allCases.filter((tc) => tc.available && !tc.flaky); -const flaky = allCases.filter((tc) => tc.available && tc.flaky); -const skipped = allCases.filter((tc) => !tc.available); +const { testAgentic, testNonAgentic } = providerTest(discoverTestCases()); -async function runNormalTest(tc: TestCase): Promise { +testNonAgentic('reads files via shell tool', async (tc) => { const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-test-')); - try { - let prompt: string; - let tokenA: string | undefined; - let tokenB: string | undefined; - - if (tc.agentic) { - fs.copyFileSync(testFile, path.join(testdir, 'test-content.txt')); - prompt = 'read ./test-content.txt and output its contents exactly'; - } else { - tokenA = `smoke-alpha-${Math.floor(Math.random() * 32768)}`; - tokenB = `smoke-bravo-${Math.floor(Math.random() * 32768)}`; - fs.writeFileSync(path.join(testdir, 'part-a.txt'), tokenA + '\n'); - fs.writeFileSync(path.join(testdir, 'part-b.txt'), tokenB + '\n'); - prompt = - 'Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else.'; - } + const tokenA = `smoke-alpha-${Math.floor(Math.random() * 32768)}`; + const tokenB = `smoke-bravo-${Math.floor(Math.random() * 32768)}`; + fs.writeFileSync(path.join(testdir, 'part-a.txt'), tokenA + '\n'); + fs.writeFileSync(path.join(testdir, 'part-b.txt'), tokenB + '\n'); - const output = await runGoose(gooseBin, testdir, prompt, BUILTINS, { - GOOSE_PROVIDER: tc.provider, - GOOSE_MODEL: tc.model, - }); + const output = await runGoose( + gooseBin, + testdir, + 'Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else.', + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); - if (tc.agentic) { - expect( - output.toLowerCase(), - `Expected model output to contain "${TEST_CONTENT}"\n\nFull output:\n${output}` - ).toContain(TEST_CONTENT.toLowerCase()); - } else { - const shellToolPattern = /(shell \| developer)|(▸.*shell)/; - expect( - shellToolPattern.test(output), - `Expected model to use shell tool\n\nFull output:\n${output}` - ).toBe(true); - expect( - output, - `Expected output to contain token from part-a.txt (${tokenA})\n\nFull output:\n${output}` - ).toContain(tokenA); - expect( - output, - `Expected output to contain token from part-b.txt (${tokenB})\n\nFull output:\n${output}` - ).toContain(tokenB); - } + const shellToolPattern = /(shell \| developer)|(▸.*shell)/; + expect( + shellToolPattern.test(output), + `Expected model to use shell tool\n\nFull output:\n${output}` + ).toBe(true); + expect( + output, + `Expected output to contain token from part-a.txt (${tokenA})\n\nFull output:\n${output}` + ).toContain(tokenA); + expect( + output, + `Expected output to contain token from part-b.txt (${tokenB})\n\nFull output:\n${output}` + ).toContain(tokenB); } finally { fs.rmSync(testdir, { recursive: true, force: true }); } -} +}); -if (available.length > 0) { - test.each(available)('$provider / $model', async (tc) => { - await runNormalTest(tc); - }); -} +testAgentic('reads file contents', async (tc) => { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-test-')); + try { + fs.copyFileSync(testFile, path.join(testdir, 'test-content.txt')); -if (flaky.length > 0) { - test.each(flaky)('$provider / $model (flaky — allowed to fail)', async (tc) => { - try { - await runNormalTest(tc); - } catch (err) { - console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); - } - }); -} + const output = await runGoose( + gooseBin, + testdir, + 'read ./test-content.txt and output its contents exactly', + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); -if (skipped.length > 0) { - test.skip.each(skipped)('$provider / $model — $skippedReason', () => {}); -} + expect( + output.toLowerCase(), + `Expected model output to contain "${TEST_CONTENT}"\n\nFull output:\n${output}` + ).toContain(TEST_CONTENT.toLowerCase()); + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +}); diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts index d1234b39b793..d166c126cdc1 100644 --- a/ui/desktop/tests/integration/test_providers_code_exec.test.ts +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -1,17 +1,16 @@ /** * Provider smoke tests — code execution mode (JS batching). * - * Ported from scripts/test_providers_code_exec.sh. Each available - * (non-agentic) provider/model pair gets its own test that spawns `goose run` - * with the memory + code_execution builtins and validates that the - * code_execution tool was invoked. + * Each available (non-agentic) provider/model pair gets its own test that + * spawns `goose run` with the memory + code_execution builtins and validates + * that the code_execution tool was invoked. */ -import { test, expect, beforeAll } from 'vitest'; +import { expect, beforeAll } from 'vitest'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { buildGoose, discoverTestCases, runGoose, type TestCase } from './test_providers_lib'; +import { buildGoose, discoverTestCases, runGoose, providerTest } from './test_providers_lib'; const BUILTINS = 'memory,code_execution'; @@ -21,22 +20,18 @@ beforeAll(() => { gooseBin = buildGoose(); }); -const allCases = discoverTestCases({ skipAgentic: true }); -const available = allCases.filter((tc) => tc.available && !tc.flaky); -const flaky = allCases.filter((tc) => tc.available && tc.flaky); -const skipped = allCases.filter((tc) => !tc.available); +const { testAll } = providerTest(discoverTestCases({ skipAgentic: true })); -async function runCodeExecTest(tc: TestCase): Promise { +testAll('invokes code_execution tool', async (tc) => { const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-codeexec-')); - try { - const prompt = - "Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'."; - - const output = await runGoose(gooseBin, testdir, prompt, BUILTINS, { - GOOSE_PROVIDER: tc.provider, - GOOSE_MODEL: tc.model, - }); + const output = await runGoose( + gooseBin, + testdir, + "Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'.", + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); // Matches: "execute_typescript | code_execution", "get_function_details | code_execution", // "tool call | execute", "tool calls | execute" (old format) @@ -52,24 +47,4 @@ async function runCodeExecTest(tc: TestCase): Promise { } finally { fs.rmSync(testdir, { recursive: true, force: true }); } -} - -if (available.length > 0) { - test.each(available)('$provider / $model', async (tc) => { - await runCodeExecTest(tc); - }); -} - -if (flaky.length > 0) { - test.each(flaky)('$provider / $model (flaky — allowed to fail)', async (tc) => { - try { - await runCodeExecTest(tc); - } catch (err) { - console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); - } - }); -} - -if (skipped.length > 0) { - test.skip.each(skipped)('$provider / $model — $skippedReason', () => {}); -} +}); diff --git a/ui/desktop/tests/integration/test_providers_lib.ts b/ui/desktop/tests/integration/test_providers_lib.ts index b4b790d109eb..fe5bd418edc7 100644 --- a/ui/desktop/tests/integration/test_providers_lib.ts +++ b/ui/desktop/tests/integration/test_providers_lib.ts @@ -5,6 +5,7 @@ * allowed-failure list, agentic-provider list, and environment detection. */ +import { test } from 'vitest'; import { execSync, spawn, type ChildProcess } from 'node:child_process'; import fs from 'node:fs'; import os from 'node:os'; @@ -294,6 +295,59 @@ export function discoverTestCases(options?: { skipAgentic?: boolean }): TestCase return testCases; } +// --------------------------------------------------------------------------- +// Test registration helpers +// --------------------------------------------------------------------------- + +type ProviderTestFn = (tc: TestCase) => Promise; + +function registerTests(label: string, cases: TestCase[], fn: ProviderTestFn): void { + const available = cases.filter((tc) => tc.available && !tc.flaky); + const flaky = cases.filter((tc) => tc.available && tc.flaky); + const skipped = cases.filter((tc) => !tc.available); + + if (available.length > 0) { + test.each(available)(`${label} — $provider / $model`, async (tc) => { + await fn(tc); + }); + } + + if (flaky.length > 0) { + test.each(flaky)(`${label} — $provider / $model (flaky)`, async (tc) => { + try { + await fn(tc); + } catch (err) { + console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); + } + }); + } + + if (skipped.length > 0) { + test.skip.each(skipped)(`${label} — $provider / $model — $skippedReason`, () => {}); + } +} + +/** + * Build decorator-style test registrars from a set of discovered test cases. + * + * Usage: + * const { testAll, testAgentic, testNonAgentic } = providerTest(cases); + * + * testAll('reads a file', async (tc) => { ... }); + * testAgentic('delegates work', async (tc) => { ... }); + * testNonAgentic('uses shell tool', async (tc) => { ... }); + */ +export function providerTest(cases: TestCase[]) { + const agentic = cases.filter((tc) => tc.agentic); + const nonAgentic = cases.filter((tc) => !tc.agentic); + + return { + testAll: (label: string, fn: ProviderTestFn) => registerTests(label, cases, fn), + testAgentic: (label: string, fn: ProviderTestFn) => registerTests(label, agentic, fn), + testNonAgentic: (label: string, fn: ProviderTestFn) => registerTests(label, nonAgentic, fn), + }; +} + // --------------------------------------------------------------------------- // Utility: run goose binary and capture output // --------------------------------------------------------------------------- From 33aee214e2d42c8bbf99a2ed0ce01f2eabf7fcea Mon Sep 17 00:00:00 2001 From: Douwe Osinga Date: Wed, 1 Apr 2026 20:31:05 -0400 Subject: [PATCH 7/8] fix: resolve .env from repo root, strip quotes, extend flaky test timeout - loadDotenv() now resolves .env from the repository root via __dirname instead of process.cwd(), matching the old shell script behavior when run from ui/desktop - Strip surrounding quotes from dotenv values so KEY="value" works - Give flaky tests a 120s timeout so the try/catch handler runs before vitest kills the test Signed-off-by: Douwe Osinga --- ui/desktop/tests/integration/test_providers.test.ts | 2 +- .../integration/test_providers_code_exec.test.ts | 2 +- ui/desktop/tests/integration/test_providers_lib.ts | 13 +++++++++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/ui/desktop/tests/integration/test_providers.test.ts b/ui/desktop/tests/integration/test_providers.test.ts index fccff70ad81b..1819f7dd3893 100644 --- a/ui/desktop/tests/integration/test_providers.test.ts +++ b/ui/desktop/tests/integration/test_providers.test.ts @@ -95,7 +95,7 @@ if (flaky.length > 0) { } catch (err) { console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); } - }); + }, 120_000); } if (skipped.length > 0) { diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts index d1234b39b793..85ace41b2b78 100644 --- a/ui/desktop/tests/integration/test_providers_code_exec.test.ts +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -67,7 +67,7 @@ if (flaky.length > 0) { } catch (err) { console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); } - }); + }, 120_000); } if (skipped.length > 0) { diff --git a/ui/desktop/tests/integration/test_providers_lib.ts b/ui/desktop/tests/integration/test_providers_lib.ts index b4b790d109eb..6945843abb80 100644 --- a/ui/desktop/tests/integration/test_providers_lib.ts +++ b/ui/desktop/tests/integration/test_providers_lib.ts @@ -182,8 +182,17 @@ function getProviders(): ProviderConfig[] { // Helpers // --------------------------------------------------------------------------- +function stripQuotes(s: string): string { + if (s.length >= 2 && ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'")))) { + return s.slice(1, -1); + } + return s; +} + function loadDotenv(): void { - const envPath = path.resolve(process.cwd(), '.env'); + // Resolve .env from the repository root (two levels up from ui/desktop). + const repoRoot = path.resolve(__dirname, '..', '..', '..', '..'); + const envPath = path.join(repoRoot, '.env'); if (!fs.existsSync(envPath)) return; const lines = fs.readFileSync(envPath, 'utf-8').split('\n'); for (const line of lines) { @@ -192,7 +201,7 @@ function loadDotenv(): void { const eqIdx = trimmed.indexOf('='); if (eqIdx === -1) continue; const key = trimmed.slice(0, eqIdx); - const value = trimmed.slice(eqIdx + 1); + const value = stripQuotes(trimmed.slice(eqIdx + 1)); if (!(key in process.env)) { process.env[key] = value; } From 4abfea7b98f18b6125af7ed35d12fc26ed9a46f5 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Fri, 24 Apr 2026 13:12:04 -0400 Subject: [PATCH 8/8] update test targets --- .github/workflows/pr-smoke-test.yml | 6 +++--- RELEASE_CHECKLIST.md | 2 +- ui/desktop/package.json | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index 0f251e614837..f49d983dae5e 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -133,7 +133,7 @@ jobs: run: | mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/test_providers.test.ts + source ../../bin/activate-hermit && pnpm run test:integration:providers working-directory: ui/desktop - name: Set up Python @@ -211,7 +211,7 @@ jobs: run: | mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/test_providers_code_exec.test.ts + source ../../bin/activate-hermit && pnpm run test:integration:providers-code-exec working-directory: ui/desktop compaction-tests: @@ -287,5 +287,5 @@ jobs: SKIP_BUILD: 1 run: | echo 'export PATH=/some/fake/path:$PATH' >> $HOME/.bash_profile - source ../../bin/activate-hermit && pnpm run test:integration -- tests/integration/goosed.test.ts + source ../../bin/activate-hermit && pnpm run test:integration:goosed working-directory: ui/desktop diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index fdc0268790ce..e031d00c0c4d 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -17,7 +17,7 @@ Make a copy of this document for each version and check off as steps are verifie ### Provider Testing -- [ ] Run `cd ui/desktop && pnpm run test:integration -- tests/integration/test_providers.test.ts` locally from the release branch and verify all providers/models work +- [ ] Run `cd ui/desktop && pnpm run test:integration:providers` locally from the release branch and verify all providers/models work - [ ] Launch goose, click reset providers, choose databricks and a model ### Starting Conversations diff --git a/ui/desktop/package.json b/ui/desktop/package.json index 643698f97b3e..0988f399e79f 100644 --- a/ui/desktop/package.json +++ b/ui/desktop/package.json @@ -35,6 +35,9 @@ "test:ui": "vitest --ui", "test:coverage": "vitest run --coverage", "test:integration": "vitest run --config vitest.integration.config.ts", + "test:integration:goosed": "vitest run --config vitest.integration.config.ts tests/integration/goosed.test.ts", + "test:integration:providers": "vitest run --config vitest.integration.config.ts tests/integration/test_providers.test.ts", + "test:integration:providers-code-exec": "vitest run --config vitest.integration.config.ts tests/integration/test_providers_code_exec.test.ts", "test:integration:watch": "vitest --config vitest.integration.config.ts", "test:integration:debug": "DEBUG=1 vitest run --config vitest.integration.config.ts", "i18n:extract": "formatjs extract 'src/**/*.{ts,tsx}' --out-file src/i18n/messages/en.json --flatten && pnpm run i18n:compile",