Skip to content

Commit 1378daf

Browse files
committed
PL-135538: Replace real llama-server tests with in-process dummy engine
- Add src/skvaider/dummy_engine/ -- DummyModel with HTTP control API (POST /__control/set_response, GET /__control/last_request, POST /__control/reset) - Add DummyModelConfig to inference config's AnyModelConfig discriminated union - Wire DummyModel into inference server lifespan (lazy import, isinstance branch) - Replace OpenAIServerMock with DummyModel-based fixture - Rename gemma -> gemma_real, add gemma fixture using DummyModel (default) - Rename embeddinggemma -> embeddinggemma_real for stability test - Update 6 tests to use dummy engine (test_health, test_metrics, test_proxy, test_model_name_case_normalization, test_health_monitoring streaming) - Add aiohttp dev dependency
1 parent 5ec3303 commit 1378daf

11 files changed

Lines changed: 1274 additions & 115 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ module-name = ["aramaki", "skvaider"]
6464
typeCheckingMode = "strict"
6565
venvPath = "."
6666
venv = ".devenv/state/venv"
67-
exclude = ["doc/**"]
67+
exclude = ["doc/**", "scripts/**"]
6868

6969
[[tool.basedpyright.executionEnvironments]]
7070
root = "src/skvaider/inference/tests"
@@ -74,11 +74,11 @@ reportPrivateUsage = "none"
7474
root = "src/skvaider/tests"
7575
reportPrivateUsage = "none"
7676

77-
7877
[dependency-groups]
7978
dev = [
8079
"pytest>=8.4.1",
8180
"pytest-asyncio>=1.1.0",
8281
"pytest-cov>=6.2.1",
8382
"pytest-timeout>=2.4.0",
83+
"aiohttp>=3.9.0",
8484
]

scripts/repro_fc55454.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/usr/bin/env python3
2+
"""Reproduction for FC-55454: tool calls not executed when stream=False.
3+
4+
Uses the WHQ endpoint (ai.whq.fcio.net) via the config-whq.toml from
5+
skvaider_api_client. Compares streaming vs non-streaming behavior.
6+
"""
7+
8+
import asyncio
9+
import os
10+
import sys
11+
12+
from openai import AsyncOpenAI
13+
14+
# ---------------------------------------------------------------------------
15+
# Config — loaded from skvaider_api_client/config-whq.toml
16+
# ---------------------------------------------------------------------------
17+
BASE_URL = "https://ai.dev.fcio.net/openai/v1"
18+
API_KEY = "eyJpZCI6IDYwLCAic2VjcmV0IjogIlRqUUlEbkdPdHQ1b1dsMktoMTdDYVNhUk82RTZya3VzY1ZoWHBIVXgifQ=="
19+
MODEL = "gpt-oss:20b"
20+
MESSAGES = [
21+
{"role": "user", "content": "What's the weather like in Hamburg right now?"}
22+
]
23+
24+
TOOLS = [
25+
{
26+
"type": "function",
27+
"function": {
28+
"name": "get_weather",
29+
"description": "Get the current weather for a location.",
30+
"parameters": {
31+
"type": "object",
32+
"properties": {
33+
"location": {
34+
"type": "string",
35+
"description": "City and country, e.g. 'Berlin, Germany'",
36+
},
37+
"unit": {
38+
"type": "string",
39+
"enum": ["celsius", "fahrenheit"],
40+
"description": "Temperature unit",
41+
},
42+
},
43+
"required": ["location"],
44+
},
45+
},
46+
}
47+
]
48+
49+
50+
async def test_non_streaming():
51+
"""TEST A: Non-streaming (stream=False) — this is the broken path."""
52+
print("=" * 70)
53+
print("TEST A: NON-STREAMING (stream=False)")
54+
print("=" * 70)
55+
56+
client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)
57+
58+
resp = await client.chat.completions.create(
59+
model=MODEL,
60+
messages=MESSAGES,
61+
tools=TOOLS,
62+
tool_choice="auto",
63+
stream=False,
64+
)
65+
66+
choice = resp.choices[0]
67+
print(f"finish_reason: {choice.finish_reason}")
68+
print(f"message.content: {choice.message.content!r}")
69+
print(f"message.tool_calls: {choice.message.tool_calls}")
70+
71+
if choice.message.tool_calls:
72+
print("[OK] Tool calls present in non-streaming response")
73+
for tc in choice.message.tool_calls:
74+
print(f" - {tc.function.name}({tc.function.arguments})")
75+
return True
76+
else:
77+
print("[FAIL] No tool_calls in non-streaming response")
78+
if choice.message.content:
79+
print(f" Got content instead: {choice.message.content!r}")
80+
return False
81+
82+
83+
async def test_streaming():
84+
"""TEST B: Streaming (stream=True) — this should work."""
85+
print()
86+
print("=" * 70)
87+
print("TEST B: STREAMING (stream=True)")
88+
print("=" * 70)
89+
90+
client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)
91+
92+
stream = await client.chat.completions.create(
93+
model=MODEL,
94+
messages=MESSAGES,
95+
tools=TOOLS,
96+
tool_choice="auto",
97+
stream=True,
98+
)
99+
100+
# Accumulate chunks and reconstruct the final message
101+
content_parts = []
102+
tool_calls = []
103+
finish_reason = None
104+
async for chunk in stream:
105+
if not chunk.choices:
106+
continue
107+
delta = chunk.choices[0].delta
108+
if delta.content:
109+
content_parts.append(delta.content)
110+
if delta.tool_calls:
111+
for tc in delta.tool_calls:
112+
# Merge incremental tool call deltas
113+
idx = tc.index
114+
while len(tool_calls) <= idx:
115+
tool_calls.append(
116+
{"id": "", "function": {"name": "", "arguments": ""}}
117+
)
118+
if tc.id:
119+
tool_calls[idx]["id"] = tc.id
120+
if tc.function:
121+
if tc.function.name:
122+
tool_calls[idx]["function"]["name"] = tc.function.name
123+
if tc.function.arguments:
124+
tool_calls[idx]["function"]["arguments"] += (
125+
tc.function.arguments
126+
)
127+
if chunk.choices[0].finish_reason:
128+
finish_reason = chunk.choices[0].finish_reason
129+
130+
content = "".join(content_parts)
131+
print(f"finish_reason: {finish_reason}")
132+
print(f"message.content: {content!r}")
133+
print(f"tool_calls: {tool_calls}")
134+
135+
if tool_calls:
136+
print("[OK] Tool calls present in streaming response")
137+
for tc in tool_calls:
138+
print(
139+
f" - {tc['function']['name']}({tc['function']['arguments']})"
140+
)
141+
return True
142+
else:
143+
print("[FAIL] No tool_calls in streaming response")
144+
if content:
145+
print(f" Got content instead: {content!r}")
146+
return False
147+
148+
149+
async def main():
150+
a_pass = await test_non_streaming()
151+
b_pass = await test_streaming()
152+
153+
print()
154+
print("=" * 70)
155+
print("SUMMARY")
156+
print("=" * 70)
157+
print(f" Non-streaming: {'PASS' if a_pass else 'FAIL'}")
158+
print(f" Streaming: {'PASS' if b_pass else 'FAIL'}")
159+
160+
if not a_pass and b_pass:
161+
print()
162+
print(
163+
"CONFIRMED: Bug FC-55454 — tool calls missing in non-streaming mode."
164+
)
165+
return 1
166+
elif a_pass and b_pass:
167+
print()
168+
print("Both modes work — bug may be fixed or model-dependent.")
169+
return 0
170+
else:
171+
print()
172+
print("Both failed — check model availability / API key.")
173+
return 2
174+
175+
176+
if __name__ == "__main__":
177+
sys.exit(asyncio.run(main()))

0 commit comments

Comments
 (0)