Skip to content

Commit b822d4d

Browse files
committed
PL-135538: Switch last 3 tests from real llama-server to dummy engine
- test_manager_start_model: use gemma (dummy), remove /v1/models assertion - test_manager_start_crash_quick_return: replaced with test_manager_start_gemma_is_dummy (proves default fixture is DummyModel) - test_embeddinggemma_output_stability: replaced with test_dummy_embedding_determinism (proves deterministic embeddings) - Add embeddinggemma / embeddinggemma_dummy fixtures (dummy engine)
1 parent f6747ac commit b822d4d

3 files changed

Lines changed: 86 additions & 135 deletions

File tree

src/skvaider/inference/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,26 @@ async def gemma(gemma_dummy: DummyModel) -> DummyModel:
222222
return gemma_dummy
223223

224224

225+
@pytest.fixture
226+
async def embeddinggemma_dummy(manager: Manager) -> DummyModel:
227+
"""In-process dummy embedding model."""
228+
config = DummyModelConfig(
229+
id="embeddinggemma",
230+
task="embedding",
231+
max_requests=4,
232+
port=get_port(),
233+
)
234+
model = DummyModel(config, manager.manifest_changed.set)
235+
manager.add_model(model)
236+
return model
237+
238+
239+
@pytest.fixture
240+
async def embeddinggemma(embeddinggemma_dummy: DummyModel) -> DummyModel:
241+
"""Default embeddinggemma fixture — uses the in-process dummy engine."""
242+
return embeddinggemma_dummy
243+
244+
225245
@pytest.fixture
226246
async def embeddinggemma_real(
227247
models_cache: Path, manager: Manager

src/skvaider/inference/tests/test_manager.py

Lines changed: 45 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,23 @@
1-
import asyncio
21
from pathlib import Path
32

43
import httpx
54
import pytest
65

6+
from skvaider.dummy_engine import DummyModel
77
from skvaider.inference.config import LlamaModelFile, LlamaServerModelConfig
88
from skvaider.inference.manager import Manager
99
from skvaider.inference.model import LlamaModel
1010

1111

12-
async def test_manager_start_crash_quick_return(
13-
gemma_real: LlamaModel, manager: Manager
14-
):
15-
gemma_real._config.cmd_args = ["--asdf"]
16-
with pytest.raises(asyncio.CancelledError):
17-
await asyncio.wait_for(manager.start_model("gemma"), timeout=10)
18-
19-
20-
async def test_download_model_success(gemma_real: LlamaModel):
21-
await gemma_real.download()
22-
assert gemma_real.model_files[0].exists()
23-
assert gemma_real.integrity_marker_file.exists()
24-
25-
26-
async def test_download_model_wrong_hash(tmp_path: Path, gguf_http_server: str):
27-
config = LlamaServerModelConfig(
28-
id="gemma",
29-
files=[
30-
LlamaModelFile(
31-
url=f"{gguf_http_server}/not-a-model.gguf",
32-
hash="foobar",
33-
)
34-
],
35-
context_size=1024,
36-
port=0,
37-
task="chat",
38-
)
39-
model = LlamaModel(config, lambda: None)
40-
model.datadir = tmp_path
41-
with pytest.raises(ValueError) as e:
42-
await model.download()
43-
assert (
44-
e.value.args[0]
45-
== "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
46-
)
47-
assert model.model_files[0].exists()
48-
assert not model.integrity_marker_file.exists()
49-
50-
51-
async def test_manager_start_model(gemma_real: LlamaModel, manager: Manager):
12+
async def test_manager_start_model(gemma: DummyModel, manager: Manager):
13+
"""Full lifecycle via manager — start, use, chat, unload."""
14+
model = gemma
5215
assert await manager.use_model("unknown-model") is None
5316

5417
# not yet started, not usable
5518
assert await manager.use_model("gemma") is None
5619

57-
model = await manager.start_model("gemma")
20+
await manager.start_model("gemma")
5821
assert model.config.id == "gemma"
5922
assert model.endpoint
6023
assert model.endpoint.startswith("http://127.0.0.1:")
@@ -65,56 +28,6 @@ async def test_manager_start_model(gemma_real: LlamaModel, manager: Manager):
6528
r.raise_for_status()
6629
assert r.json() == {"status": "ok"}
6730

68-
# Get model info via OpenAI-compatible endpoint
69-
r = await client.get(f"{model.endpoint}/v1/models")
70-
r.raise_for_status()
71-
models = r.json()
72-
data0 = models["data"][0]
73-
data0.pop("created", None)
74-
# shows up in ci - differente llama-cpp version?
75-
data0.pop("aliases", None)
76-
data0.pop("tags", None)
77-
assert models == {
78-
"data": [
79-
{
80-
"id": "gemma",
81-
"meta": {
82-
"n_ctx_train": 32768,
83-
"n_embd": 640,
84-
"n_params": 268098176,
85-
"n_vocab": 262144,
86-
"size": 247407104,
87-
"vocab_type": 1,
88-
},
89-
"object": "model",
90-
"owned_by": "llamacpp",
91-
},
92-
],
93-
"models": [
94-
{
95-
"capabilities": ["completion"],
96-
"description": "",
97-
"details": {
98-
"families": [""],
99-
"family": "",
100-
"format": "gguf",
101-
"parameter_size": "",
102-
"parent_model": "",
103-
"quantization_level": "",
104-
},
105-
"digest": "",
106-
"model": "gemma",
107-
"modified_at": "",
108-
"name": "gemma",
109-
"parameters": "",
110-
"size": "",
111-
"tags": [""],
112-
"type": "model",
113-
}
114-
],
115-
"object": "list",
116-
}
117-
11831
# Run a simple completion via OpenAI-compatible chat API
11932
r = await client.post(
12033
f"{model.endpoint}/v1/chat/completions",
@@ -144,6 +57,46 @@ async def test_manager_start_model(gemma_real: LlamaModel, manager: Manager):
14457
assert model.health_status == ""
14558

14659

60+
async def test_manager_start_gemma_is_dummy(
61+
gemma: DummyModel, manager: Manager
62+
):
63+
"""Default gemma fixture uses DummyModel (not LlamaModel)."""
64+
assert isinstance(gemma, DummyModel)
65+
assert gemma._engine == "dummy"
66+
67+
68+
async def test_download_model_success(gemma_real: LlamaModel):
69+
"""Real LlamaModel download still works (no subprocess needed)."""
70+
await gemma_real.download()
71+
assert gemma_real.model_files[0].exists()
72+
assert gemma_real.integrity_marker_file.exists()
73+
74+
75+
async def test_download_model_wrong_hash(tmp_path: Path, gguf_http_server: str):
76+
config = LlamaServerModelConfig(
77+
id="gemma",
78+
files=[
79+
LlamaModelFile(
80+
url=f"{gguf_http_server}/not-a-model.gguf",
81+
hash="foobar",
82+
)
83+
],
84+
context_size=1024,
85+
port=0,
86+
task="chat",
87+
)
88+
model = LlamaModel(config, lambda: None)
89+
model.datadir = tmp_path
90+
with pytest.raises(ValueError) as e:
91+
await model.download()
92+
assert (
93+
e.value.args[0]
94+
== "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
95+
)
96+
assert model.model_files[0].exists()
97+
assert not model.integrity_marker_file.exists()
98+
99+
147100
async def test_download_split_model(tmp_path: Path, gguf_http_server: str):
148101
config = LlamaServerModelConfig(
149102
id="split-gemma",
Lines changed: 21 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,29 @@
1-
import json
2-
import pathlib
3-
41
import httpx
52
import pytest
63

7-
from skvaider.inference.manager import Model
8-
4+
from skvaider.dummy_engine import DummyModel
95

10-
@pytest.mark.timeout(120)
11-
async def test_embeddinggemma_output_stability(embeddinggemma_real: Model):
12-
await embeddinggemma_real.start()
136

14-
async with httpx.AsyncClient(timeout=120) as client:
15-
response = await client.post(
16-
f"{embeddinggemma_real.endpoint}/v1/embeddings",
17-
json={
18-
"input": "why is the sky blue?",
19-
"temperature": 0.0,
20-
"seed": 42,
21-
"cache_prompt": False,
22-
},
23-
) # inspired by ollama/integration/model_arch_Test_go.
24-
assert response.status_code == 200
7+
@pytest.mark.timeout(30)
8+
async def test_dummy_embedding_determinism(embeddinggemma: DummyModel):
9+
"""Dummy engine produces deterministic embeddings."""
10+
await embeddinggemma.start()
2511

26-
# uncomment to update the expected output
27-
# with open(pathlib.Path(__file__).parent / "fixtures" / "embeddinggemma_stability_output.json", "w") as f:
28-
# f.write(response.text)
12+
async with httpx.AsyncClient(timeout=30) as client:
13+
response1 = await client.post(
14+
f"{embeddinggemma.endpoint}/v1/embeddings",
15+
json={"input": "why is the sky blue?"},
16+
)
17+
assert response1.status_code == 200
18+
data1 = response1.json()
2919

30-
with open(
31-
pathlib.Path(__file__).parent
32-
/ "fixtures"
33-
/ "embeddinggemma_stability_output.json",
34-
"r",
35-
) as f:
36-
expected_response = json.load(f)
20+
response2 = await client.post(
21+
f"{embeddinggemma.endpoint}/v1/embeddings",
22+
json={"input": "why is the sky blue?"},
23+
)
24+
assert response2.status_code == 200
25+
data2 = response2.json()
3726

38-
# check data, max 1e-2 difference in each embedding value
39-
resp_json = response.json()
40-
for resp_item, exp_item in zip(
41-
resp_json["data"], expected_response["data"]
42-
):
43-
resp_embedding = resp_item["embedding"]
44-
exp_embedding = exp_item["embedding"]
45-
assert len(resp_embedding) == len(exp_embedding)
46-
for r_val, e_val in zip(resp_embedding, exp_embedding):
47-
assert abs(r_val - e_val) < 1e-2
48-
# delete data to compare the rest of the response
49-
del resp_json["data"]
50-
del expected_response["data"]
51-
assert resp_json == expected_response
27+
# Same input → same deterministic embedding
28+
assert data1["data"][0]["embedding"] == data2["data"][0]["embedding"]
29+
assert len(data1["data"][0]["embedding"]) > 0

0 commit comments

Comments
 (0)