1- """Local cache for ``./bai deployment chat`` per-deployment settings .
1+ """Local cache for ``./bai deployment chat`` per-deployment endpoint metadata .
22
3- Persists the manager-resolved ``endpoint_url`` and the served model name
4- discovered from the inference endpoint, plus a separate map of API keys
5- the user registered through ``./bai deployment chat-config set``. The
6- endpoint entry is auto-managed (refetched when missing); the token is
7- user-supplied and never auto-discovered.
3+ Stores the manager-resolved ``endpoint_url`` and the served model name
4+ discovered from the inference endpoint. Auto-managed: refetched on cache
5+ miss, never user-edited. The user-supplied API key lives in a separate
6+ file managed by ``deployment_chat_config``.
87
9- Stored as a single JSON file at ``~/.backend.ai/deployment_chat.json``
10- with ``0600`` permissions because the API keys are kept in plaintext.
8+ Persisted as a JSON file at ``~/.backend.ai/deployment_chat.json``.
119"""
1210
1311from __future__ import annotations
1614import os
1715import stat
1816import tempfile
19- from dataclasses import dataclass , field
2017from datetime import datetime
2118from pathlib import Path
22- from typing import Any
2319from uuid import UUID
2420
25- from pydantic import BaseModel , ConfigDict , ValidationError
21+ from pydantic import BaseModel , ConfigDict , Field , ValidationError
2622
2723from ai .backend .client .cli .v2 .helpers import CONFIG_DIR
2824
@@ -40,60 +36,28 @@ class DeploymentChatCacheEntry(BaseModel):
4036 last_synced_at : datetime
4137
4238
43- @dataclass
44- class DeploymentChatCache :
45- """In-memory representation of the chat cache file.
39+ class DeploymentChatCache (BaseModel ):
40+ """In-memory representation of the chat cache file."""
4641
47- ``entries`` is the auto-managed endpoint cache; ``tokens`` is the
48- user-managed API-key store. They are kept in the same file under
49- distinct top-level keys.
50- """
51-
52- entries : dict [UUID , DeploymentChatCacheEntry ] = field (default_factory = dict )
53- tokens : dict [UUID , str ] = field (default_factory = dict )
42+ schema_version : int = Field (default = CHAT_CACHE_SCHEMA_VERSION )
43+ deployments : dict [UUID , DeploymentChatCacheEntry ] = Field (default_factory = dict )
5444
5545 def get (self , deployment_id : UUID ) -> DeploymentChatCacheEntry | None :
56- return self .entries .get (deployment_id )
46+ return self .deployments .get (deployment_id )
5747
5848 def upsert (self , deployment_id : UUID , entry : DeploymentChatCacheEntry ) -> None :
59- self .entries [deployment_id ] = entry
49+ self .deployments [deployment_id ] = entry
6050
6151 def remove (self , deployment_id : UUID ) -> bool :
62- had_entry = self .entries .pop (deployment_id , None ) is not None
63- had_token = self .tokens .pop (deployment_id , None ) is not None
64- return had_entry or had_token
65-
66- def get_token (self , deployment_id : UUID ) -> str | None :
67- return self .tokens .get (deployment_id )
68-
69- def set_token (self , deployment_id : UUID , token : str ) -> None :
70- self .tokens [deployment_id ] = token
71-
72- def clear_token (self , deployment_id : UUID ) -> bool :
73- return self .tokens .pop (deployment_id , None ) is not None
74-
75- def to_dict (self ) -> dict [str , Any ]:
76- return {
77- "schema_version" : CHAT_CACHE_SCHEMA_VERSION ,
78- "deployments" : {
79- str (dep_id ): entry .model_dump (mode = "json" ) for dep_id , entry in self .entries .items ()
80- },
81- "tokens" : {str (dep_id ): token for dep_id , token in self .tokens .items ()},
82- }
52+ return self .deployments .pop (deployment_id , None ) is not None
8353
8454
8555class IncompatibleChatCacheError (Exception ):
8656 """Raised when the on-disk cache file uses a newer schema than this build."""
8757
8858
8959def load_chat_cache (path : Path = CHAT_CACHE_FILE ) -> DeploymentChatCache :
90- """Load the chat cache; return an empty cache when the file is absent or unreadable.
91-
92- A corrupted JSON file or unreadable file is treated as an empty cache —
93- individual malformed entries are skipped rather than aborting the whole
94- load. A schema version newer than this build raises
95- :class:`IncompatibleChatCacheError` so the caller can warn the user.
96- """
60+ """Load the chat cache; return an empty cache when the file is absent or unreadable."""
9761 if not path .exists ():
9862 return DeploymentChatCache ()
9963 try :
@@ -109,7 +73,7 @@ def load_chat_cache(path: Path = CHAT_CACHE_FILE) -> DeploymentChatCache:
10973 f"deployment_chat.json schema version { schema } is newer than supported "
11074 f"{ CHAT_CACHE_SCHEMA_VERSION } ; please upgrade the client."
11175 )
112- entries : dict [UUID , DeploymentChatCacheEntry ] = {}
76+ deployments : dict [UUID , DeploymentChatCacheEntry ] = {}
11377 deployments_raw = raw .get ("deployments" ) or {}
11478 if isinstance (deployments_raw , dict ):
11579 for key , value in deployments_raw .items ():
@@ -120,26 +84,16 @@ def load_chat_cache(path: Path = CHAT_CACHE_FILE) -> DeploymentChatCache:
12084 if not isinstance (value , dict ):
12185 continue
12286 try :
123- entries [dep_id ] = DeploymentChatCacheEntry .model_validate (value )
87+ deployments [dep_id ] = DeploymentChatCacheEntry .model_validate (value )
12488 except ValidationError :
12589 continue
126- tokens : dict [UUID , str ] = {}
127- tokens_raw = raw .get ("tokens" ) or {}
128- if isinstance (tokens_raw , dict ):
129- for key , value in tokens_raw .items ():
130- try :
131- dep_id = UUID (str (key ))
132- except ValueError :
133- continue
134- if isinstance (value , str ):
135- tokens [dep_id ] = value
136- return DeploymentChatCache (entries = entries , tokens = tokens )
90+ return DeploymentChatCache (deployments = deployments )
13791
13892
13993def save_chat_cache (cache : DeploymentChatCache , path : Path = CHAT_CACHE_FILE ) -> None :
140- """Atomically write the chat cache and enforce ``0600`` permissions ."""
94+ """Atomically write the chat cache."""
14195 path .parent .mkdir (parents = True , exist_ok = True )
142- payload = json . dumps ( cache .to_dict (), indent = 2 , ensure_ascii = False )
96+ payload = cache .model_dump_json ( indent = 2 )
14397 fd , tmp_path_str = tempfile .mkstemp (
14498 prefix = path .name + "." ,
14599 suffix = ".tmp" ,
@@ -155,12 +109,3 @@ def save_chat_cache(cache: DeploymentChatCache, path: Path = CHAT_CACHE_FILE) ->
155109 if tmp_path .exists ():
156110 tmp_path .unlink (missing_ok = True )
157111 raise
158-
159-
160- def mask_token (token : str | None ) -> str :
161- """Render a token as ``sk-***...***xxxx`` for diagnostic display."""
162- if token is None :
163- return "<unset>"
164- if len (token ) <= 8 :
165- return "***"
166- return f"{ token [:3 ]} ***...***{ token [- 4 :]} "
0 commit comments