@@ -948,7 +948,7 @@ async def status():
948948 stats = _engine .get_stats ()
949949
950950 return {
951- "status" : "running " if stats .get ("running" ) else "stopped " ,
951+ "status" : "generating " if stats .get ("running" ) else "idle " ,
952952 "model" : _model_name ,
953953 "uptime_s" : round (stats .get ("uptime_seconds" , 0 ), 1 ),
954954 "steps_executed" : stats .get ("steps_executed" , 0 ),
@@ -985,7 +985,11 @@ async def cache_stats():
985985 "pil_image_cache" : get_pil_cache_stats (),
986986 }
987987 except ImportError :
988- return {"error" : "Cache stats not available (mlx_vlm not loaded)" }
988+ return {
989+ "message" : "Vision cache stats not available (text-only model loaded). "
990+ "Prompt cache is managed internally by the engine." ,
991+ "model_type" : "llm" ,
992+ }
989993
990994
991995@app .delete ("/v1/cache" )
@@ -1019,6 +1023,14 @@ async def list_models() -> ModelsResponse:
10191023 return ModelsResponse (data = models )
10201024
10211025
1026+ @app .get ("/v1/models/{model_id}" , dependencies = [Depends (verify_api_key )])
1027+ async def retrieve_model (model_id : str ) -> ModelInfo :
1028+ """Retrieve a specific model by ID (OpenAI-compatible)."""
1029+ if model_id in (_model_name , _model_alias ):
1030+ return ModelInfo (id = model_id )
1031+ raise HTTPException (status_code = 404 , detail = f"Model '{ model_id } ' not found" )
1032+
1033+
10221034# =============================================================================
10231035# Embeddings Endpoint
10241036# =============================================================================
@@ -1570,6 +1582,13 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
15701582 """Create a text completion."""
15711583 engine = get_engine ()
15721584
1585+ # Validate model name matches loaded model
1586+ if request .model and request .model not in (_model_name , _model_alias ):
1587+ raise HTTPException (
1588+ status_code = 404 ,
1589+ detail = f"Model '{ request .model } ' not found. Available: { ', ' .join (filter (None , [_model_alias , _model_name ]))} " ,
1590+ )
1591+
15731592 # Handle single prompt or list of prompts
15741593 prompts = request .prompt if isinstance (request .prompt , list ) else [request .prompt ]
15751594
@@ -1701,6 +1720,13 @@ async def create_chat_completion(request: ChatCompletionRequest, raw_request: Re
17011720 detail = "messages must not be empty" ,
17021721 )
17031722
1723+ # Validate model name matches loaded model
1724+ if request .model and request .model not in (_model_name , _model_alias ):
1725+ raise HTTPException (
1726+ status_code = 404 ,
1727+ detail = f"Model '{ request .model } ' not found. Available: { ', ' .join (filter (None , [_model_alias , _model_name ]))} " ,
1728+ )
1729+
17041730 # Validate message roles
17051731 _valid_roles = {"system" , "user" , "assistant" , "tool" , "developer" }
17061732 for msg in request .messages :
@@ -2242,6 +2268,16 @@ async def create_anthropic_message(
22422268 )
22432269 logger .debug (f"[REQUEST] last user message preview: { last_user_preview !r} " )
22442270
2271+ # Validate model name matches loaded model
2272+ if anthropic_request .model and anthropic_request .model not in (
2273+ _model_name ,
2274+ _model_alias ,
2275+ ):
2276+ raise HTTPException (
2277+ status_code = 404 ,
2278+ detail = f"Model '{ anthropic_request .model } ' not found. Available: { ', ' .join (filter (None , [_model_alias , _model_name ]))} " ,
2279+ )
2280+
22452281 # Convert Anthropic request -> OpenAI request
22462282 openai_request = anthropic_to_openai (anthropic_request )
22472283
0 commit comments