Skip to content

Commit 45e2723

Browse files
Merge branch 'main' into dholanda/behavioral_orchestr
2 parents 05324f2 + 0718ef0 commit 45e2723

4 files changed

Lines changed: 543 additions & 140 deletions

File tree

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Behavioral tests for the `local-ai-app-integration` skill.
2+
3+
Run locally (needs the `claude` CLI authenticated):
4+
5+
pytest eval/behavioral/tests/test_local_ai_app_integration.py -s
6+
7+
Prompts are scoped to code-generation only ("Do not download or install
8+
anything") to avoid the agent attempting the GitHub download, which hangs
9+
indefinitely. Checks prefer `logs_contains` / `workspace_contains` (instant)
10+
over `should` / `should_not` (spawns a judge subprocess) wherever possible.
11+
"""
12+
13+
from harness import claude
14+
15+
_STUB = "from openai import OpenAI\nclient = OpenAI()\n"
16+
17+
18+
def test_launcher_module_written():
19+
with claude("sonnet", skill="local-ai-app-integration") as agent:
20+
(agent.workspace / "main.py").write_text(_STUB)
21+
22+
run = agent.prompt(
23+
"Write a lemond launcher module for this Python app. "
24+
"Do not download or install anything — just write the file."
25+
)
26+
27+
run.workspace_contains("lemond_launcher.py")
28+
run.logs_contains("secrets") # random API key generation
29+
run.logs_contains("socket") # dynamic port via socket bind
30+
run.logs_contains("subprocess") # lemond spawned as subprocess
31+
32+
33+
def test_http_client_timeout_is_120s():
34+
with claude("sonnet", skill="local-ai-app-integration") as agent:
35+
(agent.workspace / "main.py").write_text(_STUB)
36+
37+
run = agent.prompt(
38+
"Update main.py to re-point the OpenAI client at a local lemond "
39+
"instance. Do not download or install anything — just edit the file."
40+
)
41+
42+
run.workspace_contains("main.py")
43+
run.logs_contains("120") # 120s timeout present in written code
44+
45+
46+
def test_health_check_uses_http_not_stdout():
47+
with claude("sonnet", skill="local-ai-app-integration") as agent:
48+
(agent.workspace / "main.py").write_text(_STUB)
49+
50+
run = agent.prompt(
51+
"Write a health-check helper for lemond in this Python app. "
52+
"Do not download or install anything — just write the code."
53+
)
54+
55+
run.logs_contains("/api/v1/health")
56+
run.should_not("Read or parse lemond's stdout or stderr to detect readiness")
57+
58+
59+
def test_no_preload_call_in_written_code():
60+
with claude("sonnet", skill="local-ai-app-integration") as agent:
61+
(agent.workspace / "main.py").write_text(_STUB)
62+
63+
run = agent.prompt(
64+
"Write a lemond launcher for this Python app that waits for the "
65+
"server to be ready. Do not download or install anything."
66+
)
67+
68+
run.logs_contains("/api/v1/health")
69+
run.should_not("Call POST /api/v1/load to pre-load the model at startup")
70+
71+
72+
def test_api_key_gate_bypassed_in_local_mode():
73+
with claude("sonnet", skill="local-ai-app-integration") as agent:
74+
(agent.workspace / "main.py").write_text(
75+
"import os\n"
76+
"from openai import OpenAI\n\n"
77+
"api_key = os.environ.get('OPENAI_API_KEY', '')\n"
78+
"if not api_key:\n"
79+
" raise SystemExit('No API key set. Please enter your OpenAI key.')\n\n"
80+
"client = OpenAI(api_key=api_key)\n"
81+
)
82+
83+
run = agent.prompt(
84+
"Edit main.py so it works in local mode without an OPENAI_API_KEY. "
85+
"Do not download or install anything — just edit the file."
86+
)
87+
88+
run.workspace_contains("main.py")
89+
run.should(
90+
"Remove or bypass the API-key guard so the app starts in local mode "
91+
"without requiring OPENAI_API_KEY to be set"
92+
)

0 commit comments

Comments
 (0)