-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_local_ai_app_integration.py
More file actions
92 lines (67 loc) · 3.47 KB
/
Copy pathtest_local_ai_app_integration.py
File metadata and controls
92 lines (67 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""Behavioral tests for the `local-ai-app-integration` skill.
Run locally (needs the `claude` CLI authenticated):
pytest eval/behavioral/tests/test_local_ai_app_integration.py -s
Prompts are scoped to code-generation only ("Do not download or install
anything") to avoid the agent attempting the GitHub download, which hangs
indefinitely. Checks prefer `logs_contains` / `workspace_contains` (instant)
over `should` / `should_not` (spawns a judge subprocess) wherever possible.
"""
from harness import claude
_STUB = "from openai import OpenAI\nclient = OpenAI()\n"
def test_launcher_module_written():
with claude("sonnet", skill="local-ai-app-integration") as agent:
(agent.workspace / "main.py").write_text(_STUB)
run = agent.prompt(
"Write a lemond launcher module for this Python app. "
"Do not download or install anything — just write the file."
)
run.workspace_contains("lemond_launcher.py")
run.logs_contains("secrets") # random API key generation
run.logs_contains("socket") # dynamic port via socket bind
run.logs_contains("subprocess") # lemond spawned as subprocess
def test_http_client_timeout_is_120s():
with claude("sonnet", skill="local-ai-app-integration") as agent:
(agent.workspace / "main.py").write_text(_STUB)
run = agent.prompt(
"Update main.py to re-point the OpenAI client at a local lemond "
"instance. Do not download or install anything — just edit the file."
)
run.workspace_contains("main.py")
run.logs_contains("120") # 120s timeout present in written code
def test_health_check_uses_http_not_stdout():
with claude("sonnet", skill="local-ai-app-integration") as agent:
(agent.workspace / "main.py").write_text(_STUB)
run = agent.prompt(
"Write a health-check helper for lemond in this Python app. "
"Do not download or install anything — just write the code."
)
run.logs_contains("/api/v1/health")
run.should_not("Read or parse lemond's stdout or stderr to detect readiness")
def test_no_preload_call_in_written_code():
with claude("sonnet", skill="local-ai-app-integration") as agent:
(agent.workspace / "main.py").write_text(_STUB)
run = agent.prompt(
"Write a lemond launcher for this Python app that waits for the "
"server to be ready. Do not download or install anything."
)
run.logs_contains("/api/v1/health")
run.should_not("Call POST /api/v1/load to pre-load the model at startup")
def test_api_key_gate_bypassed_in_local_mode():
with claude("sonnet", skill="local-ai-app-integration") as agent:
(agent.workspace / "main.py").write_text(
"import os\n"
"from openai import OpenAI\n\n"
"api_key = os.environ.get('OPENAI_API_KEY', '')\n"
"if not api_key:\n"
" raise SystemExit('No API key set. Please enter your OpenAI key.')\n\n"
"client = OpenAI(api_key=api_key)\n"
)
run = agent.prompt(
"Edit main.py so it works in local mode without an OPENAI_API_KEY. "
"Do not download or install anything — just edit the file."
)
run.workspace_contains("main.py")
run.should(
"Remove or bypass the API-key guard so the app starts in local mode "
"without requiring OPENAI_API_KEY to be set"
)