refactor(BA-5528): apply review feedback for chat CLI

jopemachine · jopemachine · commit c5e25053da07 · 2026-04-28T11:17:16.000+09:00
Address review comments on PR #11344: - chat.py: - Drop the auto-clear of the cached API key on inference 401/403 — it was deleting user-supplied config out from under them. Just raise the error and ask the user to re-register. - Use print() instead of sys.stdout.write() for the response payload. - chat_config.py: - Remove --no-token; clearing is the dedicated chat-config clear command's job. Resolved-key handling collapses to a single expression. - Use print() instead of click.echo() for status output. - Parse the inference endpoint's /v1/models response with a typed Pydantic model (_ServedModelsResponse) instead of manual dict.get walking. - _print_entry now delegates the entry portion to DeploymentChatCacheEntry.format_summary() so the per-entry fields are owned by the cache type. - deployment_chat_cache.py / deployment_chat_config.py: - Drop schema_version as a Pydantic field on the wrapper model. The version is metadata, not data — emit it manually around model_dump in save_*, and check it manually in load_* before validating individual records. - DeploymentChatCacheEntry gains a format_summary() method returning the endpoint/default_model/last_synced_at lines so consumers don't duplicate that formatting.
diff --git a/src/ai/backend/client/cli/v2/deployment/chat.py b/src/ai/backend/client/cli/v2/deployment/chat.py
@@ -21,7 +21,6 @@
 from ai.backend.client.cli.v2.deployment_chat_config import (
     IncompatibleChatConfigError,
     load_chat_config,
-    save_chat_config,
 )
 from ai.backend.client.cli.v2.helpers import create_v2_registry, load_v2_config
 
@@ -73,7 +72,6 @@ def chat(
     runtime variants. Pass them through ``--params`` instead.
     """
     import json
-    import sys
 
     from ai.backend.client.v2.deployment_chat import (
         DeploymentChatAuthError,
@@ -148,19 +146,16 @@ async def _run() -> None:
                     body,
                 )
             except DeploymentChatAuthError as e:
-                chat_config.clear_token(deployment_id)
-                save_chat_config(chat_config)
                 raise click.ClickException(
                     f"The inference endpoint rejected the configured API key for "
-                    f"deployment {deployment_id}. The cached key has been cleared.\n"
-                    "Register a new one with:\n"
+                    f"deployment {deployment_id}. Re-register with:\n"
                     f"  ./bai deployment chat-config set {deployment_id} --token <api_key>"
                 ) from e
             except BackendAPIError as e:
                 raise click.ClickException(
                     f"Inference endpoint error ({e.status} {e.reason}): {e.data}"
                 ) from e
-        sys.stdout.write(json.dumps(response, indent=2, ensure_ascii=False, default=str) + "\n")
+        print(json.dumps(response, indent=2, ensure_ascii=False, default=str))
 
     _run_async(_run)
 
diff --git a/src/ai/backend/client/cli/v2/deployment/chat_config.py b/src/ai/backend/client/cli/v2/deployment/chat_config.py
@@ -9,6 +9,7 @@
 from uuid import UUID
 
 import click
+from pydantic import BaseModel, ConfigDict, Field, ValidationError
 
 from ai.backend.client.cli.v2.deployment_chat_cache import (
     DeploymentChatCacheEntry,
@@ -25,6 +26,16 @@
 from ai.backend.client.cli.v2.helpers import create_v2_registry, load_v2_config
 
 
+class _ServedModelEntry(BaseModel):
+    model_config = ConfigDict(extra="allow")
+    id: str
+
+
+class _ServedModelsResponse(BaseModel):
+    model_config = ConfigDict(extra="allow")
+    data: list[_ServedModelEntry] = Field(default_factory=list)
+
+
 def _run_async(coro_fn: Callable[[], Awaitable[None]]) -> None:
     from ai.backend.client.exceptions import BackendAPIError
 
@@ -61,12 +72,6 @@ def chat_config() -> None:
         "Omit when the runtime was started without an API key."
     ),
 )
-@click.option(
-    "--no-token",
-    is_flag=True,
-    default=False,
-    help="Explicitly clear the cached API key (deployment exposes no auth).",
-)
 @click.option(
     "--default-model",
     default=None,
@@ -79,13 +84,9 @@ def chat_config() -> None:
 def set_(
     deployment_id: UUID,
     api_key: str | None,
-    no_token: bool,
     default_model: str | None,
 ) -> None:
     """Register or update the chat cache entry for a deployment."""
-    if api_key and no_token:
-        raise click.ClickException("--token and --no-token are mutually exclusive.")
-
     connection = load_v2_config()
     try:
         cache = load_chat_cache()
@@ -97,13 +98,7 @@ def set_(
         raise click.ClickException(str(e)) from e
 
     existing_entry = cache.get(deployment_id)
-    resolved_key: str | None
-    if no_token:
-        resolved_key = None
-    elif api_key is not None:
-        resolved_key = api_key
-    else:
-        resolved_key = chat_config_store.get_token(deployment_id)
+    resolved_key = api_key if api_key is not None else chat_config_store.get_token(deployment_id)
 
     async def _run() -> None:
         registry = await create_v2_registry(connection)
@@ -137,16 +132,14 @@ async def _run() -> None:
             ),
         )
         save_chat_cache(cache)
-        if resolved_key is None:
-            chat_config_store.clear_token(deployment_id)
-        else:
+        if resolved_key is not None:
             chat_config_store.set_token(deployment_id, resolved_key)
-        save_chat_config(chat_config_store)
+            save_chat_config(chat_config_store)
 
-        click.echo(f"Updated chat cache entry for deployment {deployment_id}.")
+        print(f"Updated chat cache entry for deployment {deployment_id}.")
         if served_model:
-            click.echo(f"  default_model: {served_model}")
-        click.echo(f"  api_key:       {mask_token(resolved_key)}")
+            print(f"  default_model: {served_model}")
+        print(f"  api_key:       {mask_token(resolved_key)}")
 
     _run_async(_run)
 
@@ -174,13 +167,11 @@ async def _discover_model(
             payload = await client.list_models(endpoint_url, api_key)
         except (DeploymentChatAuthError, BackendAPIError, BackendClientError):
             return fallback
-    data = payload.get("data") if isinstance(payload, dict) else None
-    if not isinstance(data, list):
+    try:
+        parsed = _ServedModelsResponse.model_validate(payload)
+    except ValidationError:
         return fallback
-    for entry in data:
-        if isinstance(entry, dict) and isinstance(entry.get("id"), str):
-            return str(entry["id"])
-    return fallback
+    return parsed.data[0].id if parsed.data else fallback
 
 
 @chat_config.command(name="show")
@@ -206,11 +197,11 @@ def show(deployment_id: UUID | None) -> None:
 
     dep_ids = set(cache.deployments) | set(chat_config_store.tokens)
     if not dep_ids:
-        click.echo("No chat cache entries.")
+        print("No chat cache entries.")
         return
     for dep_id in dep_ids:
         _print_entry(dep_id, cache.get(dep_id), chat_config_store.get_token(dep_id))
-        click.echo("")
+        print()
 
 
 @chat_config.command(name="clear")
@@ -233,21 +224,25 @@ def clear(deployment_id: UUID) -> None:
     if removed_token:
         save_chat_config(chat_config_store)
     if removed_entry or removed_token:
-        click.echo(f"Removed chat cache entry for deployment {deployment_id}.")
+        print(f"Removed chat cache entry for deployment {deployment_id}.")
     else:
-        click.echo(f"No chat cache entry for deployment {deployment_id}.")
+        print(f"No chat cache entry for deployment {deployment_id}.")
 
 
 def _print_entry(
     deployment_id: UUID,
     entry: DeploymentChatCacheEntry | None,
     token: str | None,
 ) -> None:
-    click.echo(f"deployment_id : {deployment_id}")
-    click.echo(f"endpoint_url  : {entry.endpoint_url if entry else '-'}")
-    click.echo(f"api_key       : {mask_token(token)}")
-    click.echo(f"default_model : {(entry.default_model if entry else None) or '-'}")
-    click.echo(f"last_synced_at: {entry.last_synced_at.isoformat() if entry else '-'}")
+    print(f"deployment_id : {deployment_id}")
+    if entry is not None:
+        for line in entry.format_summary():
+            print(line)
+    else:
+        print("endpoint_url  : -")
+        print("default_model : -")
+        print("last_synced_at: -")
+    print(f"api_key       : {mask_token(token)}")
 
 
 __all__ = ("chat_config",)
diff --git a/src/ai/backend/client/cli/v2/deployment_chat_cache.py b/src/ai/backend/client/cli/v2/deployment_chat_cache.py
@@ -35,11 +35,17 @@ class DeploymentChatCacheEntry(BaseModel):
     default_model: str | None = None
     last_synced_at: datetime
 
+    def format_summary(self) -> list[str]:
+        return [
+            f"endpoint_url  : {self.endpoint_url}",
+            f"default_model : {self.default_model or '-'}",
+            f"last_synced_at: {self.last_synced_at.isoformat()}",
+        ]
+
 
 class DeploymentChatCache(BaseModel):
     """In-memory representation of the chat cache file."""
 
-    schema_version: int = Field(default=CHAT_CACHE_SCHEMA_VERSION)
     deployments: dict[UUID, DeploymentChatCacheEntry] = Field(default_factory=dict)
 
     def get(self, deployment_id: UUID) -> DeploymentChatCacheEntry | None:
@@ -93,7 +99,8 @@ def load_chat_cache(path: Path = CHAT_CACHE_FILE) -> DeploymentChatCache:
 def save_chat_cache(cache: DeploymentChatCache, path: Path = CHAT_CACHE_FILE) -> None:
     """Atomically write the chat cache."""
     path.parent.mkdir(parents=True, exist_ok=True)
-    payload = cache.model_dump_json(indent=2)
+    body = {"schema_version": CHAT_CACHE_SCHEMA_VERSION, **cache.model_dump(mode="json")}
+    payload = json.dumps(body, indent=2, ensure_ascii=False)
     fd, tmp_path_str = tempfile.mkstemp(
         prefix=path.name + ".",
         suffix=".tmp",
diff --git a/src/ai/backend/client/cli/v2/deployment_chat_config.py b/src/ai/backend/client/cli/v2/deployment_chat_config.py
@@ -29,7 +29,6 @@
 class DeploymentChatConfig(BaseModel):
     """Per-deployment API key registry (user-managed)."""
 
-    schema_version: int = Field(default=CHAT_CONFIG_SCHEMA_VERSION)
     tokens: dict[UUID, str] = Field(default_factory=dict)
 
     def get_token(self, deployment_id: UUID) -> str | None:
@@ -79,7 +78,8 @@ def load_chat_config(path: Path = CHAT_CONFIG_FILE) -> DeploymentChatConfig:
 def save_chat_config(config: DeploymentChatConfig, path: Path = CHAT_CONFIG_FILE) -> None:
     """Atomically write the chat config and enforce ``0600`` permissions."""
     path.parent.mkdir(parents=True, exist_ok=True)
-    payload = config.model_dump_json(indent=2)
+    body = {"schema_version": CHAT_CONFIG_SCHEMA_VERSION, **config.model_dump(mode="json")}
+    payload = json.dumps(body, indent=2, ensure_ascii=False)
     fd, tmp_path_str = tempfile.mkstemp(
         prefix=path.name + ".",
         suffix=".tmp",
diff --git a/tests/unit/client/cli/test_deployment_chat_cache.py b/tests/unit/client/cli/test_deployment_chat_cache.py
@@ -140,6 +140,20 @@ def test_load_skips_malformed_entry_payload(self, tmp_path: Path) -> None:
         assert list(loaded.deployments.keys()) == [good_id]
 
 
+class TestEntryFormatSummary:
+    def test_format_summary_returns_lines(self) -> None:
+        entry = _entry(default_model="meta/test-model")
+        lines = entry.format_summary()
+        assert any("endpoint_url" in line for line in lines)
+        assert any("meta/test-model" in line for line in lines)
+        assert any("last_synced_at" in line for line in lines)
+
+    def test_format_summary_dash_for_missing_default_model(self) -> None:
+        entry = _entry(default_model=None)
+        lines = entry.format_summary()
+        assert any("default_model : -" in line for line in lines)
+
+
 class TestEntryMutations:
     def test_upsert_overwrites_existing_entry(self) -> None:
         cache = DeploymentChatCache()