1- import asyncio
21from pathlib import Path
32
43import httpx
54import pytest
65
6+ from skvaider .dummy_engine import DummyModel
77from skvaider .inference .config import LlamaModelFile , LlamaServerModelConfig
88from skvaider .inference .manager import Manager
99from skvaider .inference .model import LlamaModel
1010
1111
12- async def test_manager_start_crash_quick_return (
13- gemma_real : LlamaModel , manager : Manager
14- ):
15- gemma_real ._config .cmd_args = ["--asdf" ]
16- with pytest .raises (asyncio .CancelledError ):
17- await asyncio .wait_for (manager .start_model ("gemma" ), timeout = 10 )
18-
19-
20- async def test_download_model_success (gemma_real : LlamaModel ):
21- await gemma_real .download ()
22- assert gemma_real .model_files [0 ].exists ()
23- assert gemma_real .integrity_marker_file .exists ()
24-
25-
26- async def test_download_model_wrong_hash (tmp_path : Path , gguf_http_server : str ):
27- config = LlamaServerModelConfig (
28- id = "gemma" ,
29- files = [
30- LlamaModelFile (
31- url = f"{ gguf_http_server } /not-a-model.gguf" ,
32- hash = "foobar" ,
33- )
34- ],
35- context_size = 1024 ,
36- port = 0 ,
37- task = "chat" ,
38- )
39- model = LlamaModel (config , lambda : None )
40- model .datadir = tmp_path
41- with pytest .raises (ValueError ) as e :
42- await model .download ()
43- assert (
44- e .value .args [0 ]
45- == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
46- )
47- assert model .model_files [0 ].exists ()
48- assert not model .integrity_marker_file .exists ()
49-
50-
51- async def test_manager_start_model (gemma_real : LlamaModel , manager : Manager ):
12+ async def test_manager_start_model (gemma : DummyModel , manager : Manager ):
13+ """Full lifecycle via manager — start, use, chat, unload."""
14+ model = gemma
5215 assert await manager .use_model ("unknown-model" ) is None
5316
5417 # not yet started, not usable
5518 assert await manager .use_model ("gemma" ) is None
5619
57- model = await manager .start_model ("gemma" )
20+ await manager .start_model ("gemma" )
5821 assert model .config .id == "gemma"
5922 assert model .endpoint
6023 assert model .endpoint .startswith ("http://127.0.0.1:" )
@@ -65,56 +28,6 @@ async def test_manager_start_model(gemma_real: LlamaModel, manager: Manager):
6528 r .raise_for_status ()
6629 assert r .json () == {"status" : "ok" }
6730
68- # Get model info via OpenAI-compatible endpoint
69- r = await client .get (f"{ model .endpoint } /v1/models" )
70- r .raise_for_status ()
71- models = r .json ()
72- data0 = models ["data" ][0 ]
73- data0 .pop ("created" , None )
74- # shows up in ci - differente llama-cpp version?
75- data0 .pop ("aliases" , None )
76- data0 .pop ("tags" , None )
77- assert models == {
78- "data" : [
79- {
80- "id" : "gemma" ,
81- "meta" : {
82- "n_ctx_train" : 32768 ,
83- "n_embd" : 640 ,
84- "n_params" : 268098176 ,
85- "n_vocab" : 262144 ,
86- "size" : 247407104 ,
87- "vocab_type" : 1 ,
88- },
89- "object" : "model" ,
90- "owned_by" : "llamacpp" ,
91- },
92- ],
93- "models" : [
94- {
95- "capabilities" : ["completion" ],
96- "description" : "" ,
97- "details" : {
98- "families" : ["" ],
99- "family" : "" ,
100- "format" : "gguf" ,
101- "parameter_size" : "" ,
102- "parent_model" : "" ,
103- "quantization_level" : "" ,
104- },
105- "digest" : "" ,
106- "model" : "gemma" ,
107- "modified_at" : "" ,
108- "name" : "gemma" ,
109- "parameters" : "" ,
110- "size" : "" ,
111- "tags" : ["" ],
112- "type" : "model" ,
113- }
114- ],
115- "object" : "list" ,
116- }
117-
11831 # Run a simple completion via OpenAI-compatible chat API
11932 r = await client .post (
12033 f"{ model .endpoint } /v1/chat/completions" ,
@@ -144,6 +57,46 @@ async def test_manager_start_model(gemma_real: LlamaModel, manager: Manager):
14457 assert model .health_status == ""
14558
14659
60+ async def test_manager_start_gemma_is_dummy (
61+ gemma : DummyModel , manager : Manager
62+ ):
63+ """Default gemma fixture uses DummyModel (not LlamaModel)."""
64+ assert isinstance (gemma , DummyModel )
65+ assert gemma ._engine == "dummy"
66+
67+
68+ async def test_download_model_success (gemma_real : LlamaModel ):
69+ """Real LlamaModel download still works (no subprocess needed)."""
70+ await gemma_real .download ()
71+ assert gemma_real .model_files [0 ].exists ()
72+ assert gemma_real .integrity_marker_file .exists ()
73+
74+
75+ async def test_download_model_wrong_hash (tmp_path : Path , gguf_http_server : str ):
76+ config = LlamaServerModelConfig (
77+ id = "gemma" ,
78+ files = [
79+ LlamaModelFile (
80+ url = f"{ gguf_http_server } /not-a-model.gguf" ,
81+ hash = "foobar" ,
82+ )
83+ ],
84+ context_size = 1024 ,
85+ port = 0 ,
86+ task = "chat" ,
87+ )
88+ model = LlamaModel (config , lambda : None )
89+ model .datadir = tmp_path
90+ with pytest .raises (ValueError ) as e :
91+ await model .download ()
92+ assert (
93+ e .value .args [0 ]
94+ == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
95+ )
96+ assert model .model_files [0 ].exists ()
97+ assert not model .integrity_marker_file .exists ()
98+
99+
147100async def test_download_split_model (tmp_path : Path , gguf_http_server : str ):
148101 config = LlamaServerModelConfig (
149102 id = "split-gemma" ,
0 commit comments