homeassistant-ai · sergeykad · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/src/ha_mcp/__main__.py b/src/ha_mcp/__main__.py
@@ -30,6 +30,8 @@
 from collections.abc import Coroutine  # noqa: E402
 from typing import TYPE_CHECKING, Any  # noqa: E402
 
+from fastmcp.exceptions import ToolError  # noqa: E402
+from pydantic import ValidationError as PydanticValidationError  # noqa: E402
 from starlette.requests import Request  # noqa: E402
 from starlette.responses import PlainTextResponse  # noqa: E402
 
@@ -365,6 +367,36 @@ def filter(self, record: logging.LogRecord) -> bool:
         return True
 
 
+class ToolValidationLogFilter(logging.Filter):
+    """Demote fastmcp tool-failure tracebacks to single-line warnings.
+
+    Pydantic ValidationError and tool-raised ToolError aren't server bugs,
+    so the traceback through fastmcp/pydantic internals is just noise. The
+    structured error detail is preserved in the WARNING message; stack is
+    intentionally dropped because these are user-input errors, not bugs.
+    """
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        if record.name != "fastmcp.server.server" or not record.exc_info:
+            return True
+
+        msg = record.getMessage()
+        err = record.exc_info[1]
+        if "Error validating tool" in msg and isinstance(err, PydanticValidationError):
+            record.msg = f"{msg}: {err.errors(include_url=False)}"
+        elif "Error calling tool" in msg and isinstance(err, ToolError):
+            record.msg = f"{msg}: {err}"
+        else:
+            return True
+
+        record.args = ()
+        record.levelno = logging.WARNING
+        record.levelname = "WARNING"
+        record.exc_info = None
+        record.exc_text = None
+        return True
+
+
 def _setup_logging(log_level_str: str, force: bool = False) -> None:
     """Configure root logger with consistent timestamp format."""
     logging.basicConfig(
@@ -376,6 +408,7 @@ def _setup_logging(log_level_str: str, force: bool = False) -> None:
     logging.getLogger("mcp.server.streamable_http").addFilter(
         StatelessSessionLogFilter()
     )
+    logging.getLogger("fastmcp.server.server").addFilter(ToolValidationLogFilter())
 
 
 def _get_timestamped_uvicorn_log_config() -> dict:

diff --git a/tests/src/unit/test_tool_validation_log_filter.py b/tests/src/unit/test_tool_validation_log_filter.py
@@ -0,0 +1,118 @@
+"""Unit tests for ToolValidationLogFilter."""
+
+import logging
+
+import pytest
+from fastmcp.exceptions import FastMCPError, ToolError
+from pydantic import BaseModel, ValidationError
+
+from ha_mcp.__main__ import ToolValidationLogFilter
+
+
+def _pydantic_validation_error() -> ValidationError:
+    class _Model(BaseModel):
+        age: int
+
+    with pytest.raises(ValidationError) as excinfo:
+        _Model(age="nope")
+    return excinfo.value
+
+
+class TestToolValidationLogFilter:
+    """Verify the filter demotes fastmcp tool-failure tracebacks to WARNING."""
+
+    def setup_method(self):
+        self.log_filter = ToolValidationLogFilter()
+
+    def _make_record(
+        self,
+        name: str,
+        msg: str,
+        exc: BaseException | None,
+    ) -> logging.LogRecord:
+        exc_info = (type(exc), exc, None) if exc is not None else None
+        return logging.LogRecord(
+            name=name,
+            level=logging.ERROR,
+            pathname="",
+            lineno=0,
+            msg=msg,
+            args=(),
+            exc_info=exc_info,
+        )
+
+    def test_demotes_validation_error_to_warning(self):
+        err = _pydantic_validation_error()
+        record = self._make_record(
+            "fastmcp.server.server",
+            "Error validating tool 'ha_foo'",
+            err,
+        )
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.WARNING
+        assert record.levelname == "WARNING"
+        assert record.exc_info is None
+        assert record.exc_text is None
+        # Structured error info folded into the message, no pydantic URL.
+        assert "age" in record.getMessage()
+        assert "errors.pydantic.dev" not in record.getMessage()
+
+    def test_demotes_tool_error_to_warning(self):
+        err = ToolError("bad input")
+        record = self._make_record(
+            "fastmcp.server.server",
+            "Error calling tool 'ha_foo'",
+            err,
+        )
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.WARNING
+        assert record.exc_info is None
+        assert "bad input" in record.getMessage()
+
+    def test_passes_bare_exception_through_untouched(self):
+        err = RuntimeError("server bug")
+        record = self._make_record(
+            "fastmcp.server.server",
+            "Error calling tool 'ha_foo'",
+            err,
+        )
+        original_exc_info = record.exc_info
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.ERROR
+        assert record.exc_info is original_exc_info
+
+    def test_passes_non_tool_fastmcp_error_through(self):
+        # A hypothetical future FastMCPError subclass that is NOT a ToolError
+        # (e.g. AuthorizationError) should retain its traceback.
+        class FutureAuthError(FastMCPError):
+            pass
+
+        err = FutureAuthError("unauthorized")
+        record = self._make_record(
+            "fastmcp.server.server",
+            "Error calling tool 'ha_foo'",
+            err,
+        )
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.ERROR
+        assert record.exc_info is not None
+
+    def test_leaves_other_loggers_unchanged(self):
+        err = ToolError("boom")
+        record = self._make_record(
+            "some.other.logger",
+            "Error calling tool 'ha_foo'",
+            err,
+        )
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.ERROR
+        assert record.exc_info is not None
+
+    def test_passes_record_without_exc_info(self):
+        record = self._make_record(
+            "fastmcp.server.server",
+            "Error calling tool 'ha_foo'",
+            None,
+        )
+        assert self.log_filter.filter(record) is True
+        assert record.levelno == logging.ERROR
diff --git a/tests/uat/README.md b/tests/uat/README.md
@@ -2,6 +2,23 @@
 
 Executes MCP test scenarios on real AI agent CLIs (Claude, Gemini, OpenAI-compatible) against a Home Assistant test instance. Designed to be driven by a calling agent that generates scenarios dynamically, runs them, and evaluates results.
 
+## Quick Start
+
+Two runners, two use cases:
+
+```bash
+# Run the pre-built story catalog (most common)
+uv run python tests/uat/stories/run_story.py --all --agents gemini
+
+# Run one ad-hoc scenario (must pipe JSON via stdin or use --scenario-file)
+echo '{"test_prompt":"Search for light entities. Report how many you found."}' \
+  | uv run python tests/uat/run_uat.py --agents gemini
+```
+
+Commands must be prefixed with `uv run python` — the repo targets Python 3.13 via uv.
+
+For OpenAI-compatible endpoints (LM Studio, Ollama, vLLM), the `--base-url` must include the `/v1` suffix, e.g. `http://172.19.0.1:1234/v1`. LM Studio requires the model to be loaded in its UI first; Ollama auto-loads on demand.
+
 ## Architecture
 
 ```
@@ -48,26 +65,26 @@ Each prompt runs in a separate CLI invocation (fresh context, no PR knowledge).
 ```bash
 # Pipe scenario from stdin
 echo '{"test_prompt":"Search for light entities. Report how many you found."}' | \
-  python tests/uat/run_uat.py --agents gemini
+  uv run python tests/uat/run_uat.py --agents gemini
 
 # From file
-python tests/uat/run_uat.py --scenario-file /tmp/scenario.json --agents claude,gemini
+uv run python tests/uat/run_uat.py --scenario-file /tmp/scenario.json --agents claude,gemini
 
 # Against already-running HA (skip container startup)
-python tests/uat/run_uat.py --ha-url http://localhost:8123 --ha-token TOKEN --agents gemini
+uv run python tests/uat/run_uat.py --ha-url http://localhost:8123 --ha-token TOKEN --agents gemini
 
 # Test a specific branch
-echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
+echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
 
 # Local code (default) vs branch
-python tests/uat/run_uat.py                    # uses: uv run --project . ha-mcp
-python tests/uat/run_uat.py --branch pr-551    # uses: uvx --from git+...@pr-551 ha-mcp
+uv run python tests/uat/run_uat.py                    # uses: uv run --project . ha-mcp
+uv run python tests/uat/run_uat.py --branch pr-551    # uses: uvx --from git+...@pr-551 ha-mcp
 
 # OpenAI-compatible local LLM (LM Studio, Ollama, vLLM, etc.)
-echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --agents openai --base-url http://localhost:1234/v1
+echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --agents openai --base-url http://localhost:1234/v1
 
 # With a specific model and API key
-echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --agents openai \
+echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --agents openai \
   --base-url http://localhost:1234/v1 --model my-model --api-key sk-xxx
 ```
 
@@ -176,10 +193,10 @@ To check if a failure is a regression vs pre-existing:
 
 ```bash
 # Test the PR branch
-echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
+echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
 
 # Compare against master
-echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch master --agents gemini
+echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch master --agents gemini
 ```
 
 ## Dependencies

diff --git a/tests/uat/_inprocess.py b/tests/uat/_inprocess.py
@@ -0,0 +1,67 @@
+"""Shared in-process FastMCP client context for UAT.
+
+Used by the story runner (setup/verify/teardown) and pytest fixtures.
+Constructing the server is ~1s; this lets callers share one instance
+across many tool calls against the same HA instance.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import os
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from fastmcp import Client
+
+
+@contextlib.asynccontextmanager
+async def inprocess_mcp_client(
+    ha_url: str, ha_token: str
+) -> AsyncIterator[Client]:
+    """Build one in-process FastMCP client for setup/verify/teardown.
+
+    Clearing ``ha_mcp.config._settings`` forces the next ``get_global_settings()``
+    call to re-read the env vars we just set. The WebSocket disconnect tears
+    down any cached connection to the previous URL so the next tool call
+    reconnects to ``ha_url``.
+
+    One client is shared across many tool calls to amortize the ~1s server
+    construction cost. The tradeoff: if the shared client's WebSocket gets into
+    a bad state mid-run, every subsequent call through it inherits the problem.
+
+    Not safe for concurrent use: ``os.environ`` and ``ha_mcp.config._settings``
+    are process-global, so overlapping callers would race on both.
+    """
+    from fastmcp import Client
+
+    import ha_mcp.config
+    from ha_mcp.client import HomeAssistantClient
+    from ha_mcp.client.websocket_client import websocket_manager
+    from ha_mcp.server import HomeAssistantSmartMCPServer
+
+    prev_url = os.environ.get("HOMEASSISTANT_URL")
+    prev_token = os.environ.get("HOMEASSISTANT_TOKEN")
+    prev_settings = ha_mcp.config._settings
+    try:
+        os.environ["HOMEASSISTANT_URL"] = ha_url
+        os.environ["HOMEASSISTANT_TOKEN"] = ha_token
+        ha_mcp.config._settings = None
+        await websocket_manager.disconnect()
+
+        client = HomeAssistantClient(base_url=ha_url, token=ha_token)
+        server = HomeAssistantSmartMCPServer(client=client)
+        async with Client(server.mcp) as mcp_client:
+            yield mcp_client
+    finally:
+        await websocket_manager.disconnect()
+        if prev_url is None:
+            os.environ.pop("HOMEASSISTANT_URL", None)
+        else:
+            os.environ["HOMEASSISTANT_URL"] = prev_url
+        if prev_token is None:
+            os.environ.pop("HOMEASSISTANT_TOKEN", None)
+        else:
+            os.environ["HOMEASSISTANT_TOKEN"] = prev_token
+        ha_mcp.config._settings = prev_settings
diff --git a/tests/uat/_logging.py b/tests/uat/_logging.py
@@ -0,0 +1,11 @@
+"""Shared logging setup for UAT entry points."""
+
+from __future__ import annotations
+
+import logging
+
+
+def configure_cli_logging() -> None:
+    """Silence third-party INFO chatter; keep our uat.* trace visible."""
+    logging.basicConfig(level=logging.WARNING, format="%(message)s")
+    logging.getLogger("uat").setLevel(logging.INFO)