|
| 1 | +"""Behavioral tests for the `local-ai-app-integration` skill. |
| 2 | +
|
| 3 | +Run locally (needs the `claude` CLI authenticated): |
| 4 | +
|
| 5 | + pytest eval/behavioral/tests/test_local_ai_app_integration.py -s |
| 6 | +
|
| 7 | +Prompts are scoped to code-generation only ("Do not download or install |
| 8 | +anything") to avoid the agent attempting the GitHub download, which hangs |
| 9 | +indefinitely. Checks prefer `logs_contains` / `workspace_contains` (instant) |
| 10 | +over `should` / `should_not` (spawns a judge subprocess) wherever possible. |
| 11 | +""" |
| 12 | + |
| 13 | +from harness import claude |
| 14 | + |
| 15 | +_STUB = "from openai import OpenAI\nclient = OpenAI()\n" |
| 16 | + |
| 17 | + |
| 18 | +def test_launcher_module_written(): |
| 19 | + with claude("sonnet", skill="local-ai-app-integration") as agent: |
| 20 | + (agent.workspace / "main.py").write_text(_STUB) |
| 21 | + |
| 22 | + run = agent.prompt( |
| 23 | + "Write a lemond launcher module for this Python app. " |
| 24 | + "Do not download or install anything — just write the file." |
| 25 | + ) |
| 26 | + |
| 27 | + run.workspace_contains("lemond_launcher.py") |
| 28 | + run.logs_contains("secrets") # random API key generation |
| 29 | + run.logs_contains("socket") # dynamic port via socket bind |
| 30 | + run.logs_contains("subprocess") # lemond spawned as subprocess |
| 31 | + |
| 32 | + |
| 33 | +def test_http_client_timeout_is_120s(): |
| 34 | + with claude("sonnet", skill="local-ai-app-integration") as agent: |
| 35 | + (agent.workspace / "main.py").write_text(_STUB) |
| 36 | + |
| 37 | + run = agent.prompt( |
| 38 | + "Update main.py to re-point the OpenAI client at a local lemond " |
| 39 | + "instance. Do not download or install anything — just edit the file." |
| 40 | + ) |
| 41 | + |
| 42 | + run.workspace_contains("main.py") |
| 43 | + run.logs_contains("120") # 120s timeout present in written code |
| 44 | + |
| 45 | + |
| 46 | +def test_health_check_uses_http_not_stdout(): |
| 47 | + with claude("sonnet", skill="local-ai-app-integration") as agent: |
| 48 | + (agent.workspace / "main.py").write_text(_STUB) |
| 49 | + |
| 50 | + run = agent.prompt( |
| 51 | + "Write a health-check helper for lemond in this Python app. " |
| 52 | + "Do not download or install anything — just write the code." |
| 53 | + ) |
| 54 | + |
| 55 | + run.logs_contains("/api/v1/health") |
| 56 | + run.should_not("Read or parse lemond's stdout or stderr to detect readiness") |
| 57 | + |
| 58 | + |
| 59 | +def test_no_preload_call_in_written_code(): |
| 60 | + with claude("sonnet", skill="local-ai-app-integration") as agent: |
| 61 | + (agent.workspace / "main.py").write_text(_STUB) |
| 62 | + |
| 63 | + run = agent.prompt( |
| 64 | + "Write a lemond launcher for this Python app that waits for the " |
| 65 | + "server to be ready. Do not download or install anything." |
| 66 | + ) |
| 67 | + |
| 68 | + run.logs_contains("/api/v1/health") |
| 69 | + run.should_not("Call POST /api/v1/load to pre-load the model at startup") |
| 70 | + |
| 71 | + |
| 72 | +def test_api_key_gate_bypassed_in_local_mode(): |
| 73 | + with claude("sonnet", skill="local-ai-app-integration") as agent: |
| 74 | + (agent.workspace / "main.py").write_text( |
| 75 | + "import os\n" |
| 76 | + "from openai import OpenAI\n\n" |
| 77 | + "api_key = os.environ.get('OPENAI_API_KEY', '')\n" |
| 78 | + "if not api_key:\n" |
| 79 | + " raise SystemExit('No API key set. Please enter your OpenAI key.')\n\n" |
| 80 | + "client = OpenAI(api_key=api_key)\n" |
| 81 | + ) |
| 82 | + |
| 83 | + run = agent.prompt( |
| 84 | + "Edit main.py so it works in local mode without an OPENAI_API_KEY. " |
| 85 | + "Do not download or install anything — just edit the file." |
| 86 | + ) |
| 87 | + |
| 88 | + run.workspace_contains("main.py") |
| 89 | + run.should( |
| 90 | + "Remove or bypass the API-key guard so the app starts in local mode " |
| 91 | + "without requiring OPENAI_API_KEY to be set" |
| 92 | + ) |
0 commit comments