Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/ha_mcp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from collections.abc import Coroutine # noqa: E402
from typing import TYPE_CHECKING, Any # noqa: E402

from fastmcp.exceptions import ToolError # noqa: E402
from pydantic import ValidationError as PydanticValidationError # noqa: E402
from starlette.requests import Request # noqa: E402
from starlette.responses import PlainTextResponse # noqa: E402

Expand Down Expand Up @@ -365,6 +367,36 @@ def filter(self, record: logging.LogRecord) -> bool:
return True


class ToolValidationLogFilter(logging.Filter):
"""Demote fastmcp tool-failure tracebacks to single-line warnings.

Pydantic ValidationError and tool-raised ToolError aren't server bugs,
so the traceback through fastmcp/pydantic internals is just noise. The
structured error detail is preserved in the WARNING message; stack is
intentionally dropped because these are user-input errors, not bugs.
"""

def filter(self, record: logging.LogRecord) -> bool:
if record.name != "fastmcp.server.server" or not record.exc_info:
return True

msg = record.getMessage()
err = record.exc_info[1]
if "Error validating tool" in msg and isinstance(err, PydanticValidationError):
record.msg = f"{msg}: {err.errors(include_url=False)}"
elif "Error calling tool" in msg and isinstance(err, ToolError):
record.msg = f"{msg}: {err}"
else:
return True

record.args = ()
record.levelno = logging.WARNING
record.levelname = "WARNING"
record.exc_info = None
record.exc_text = None
return True


def _setup_logging(log_level_str: str, force: bool = False) -> None:
"""Configure root logger with consistent timestamp format."""
logging.basicConfig(
Expand All @@ -376,6 +408,7 @@ def _setup_logging(log_level_str: str, force: bool = False) -> None:
logging.getLogger("mcp.server.streamable_http").addFilter(
StatelessSessionLogFilter()
)
logging.getLogger("fastmcp.server.server").addFilter(ToolValidationLogFilter())


def _get_timestamped_uvicorn_log_config() -> dict:
Expand Down
118 changes: 118 additions & 0 deletions tests/src/unit/test_tool_validation_log_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Unit tests for ToolValidationLogFilter."""

import logging

import pytest
from fastmcp.exceptions import FastMCPError, ToolError
from pydantic import BaseModel, ValidationError

from ha_mcp.__main__ import ToolValidationLogFilter


def _pydantic_validation_error() -> ValidationError:
class _Model(BaseModel):
age: int

with pytest.raises(ValidationError) as excinfo:
_Model(age="nope")
return excinfo.value


class TestToolValidationLogFilter:
"""Verify the filter demotes fastmcp tool-failure tracebacks to WARNING."""

def setup_method(self):
self.log_filter = ToolValidationLogFilter()

def _make_record(
self,
name: str,
msg: str,
exc: BaseException | None,
) -> logging.LogRecord:
exc_info = (type(exc), exc, None) if exc is not None else None
return logging.LogRecord(
name=name,
level=logging.ERROR,
pathname="",
lineno=0,
msg=msg,
args=(),
exc_info=exc_info,
)

def test_demotes_validation_error_to_warning(self):
err = _pydantic_validation_error()
record = self._make_record(
"fastmcp.server.server",
"Error validating tool 'ha_foo'",
err,
)
assert self.log_filter.filter(record) is True
assert record.levelno == logging.WARNING
assert record.levelname == "WARNING"
assert record.exc_info is None
assert record.exc_text is None
# Structured error info folded into the message, no pydantic URL.
assert "age" in record.getMessage()
assert "errors.pydantic.dev" not in record.getMessage()

def test_demotes_tool_error_to_warning(self):
err = ToolError("bad input")
record = self._make_record(
"fastmcp.server.server",
"Error calling tool 'ha_foo'",
err,
)
assert self.log_filter.filter(record) is True
assert record.levelno == logging.WARNING
assert record.exc_info is None
assert "bad input" in record.getMessage()

def test_passes_bare_exception_through_untouched(self):
err = RuntimeError("server bug")
record = self._make_record(
"fastmcp.server.server",
"Error calling tool 'ha_foo'",
err,
)
original_exc_info = record.exc_info
assert self.log_filter.filter(record) is True
assert record.levelno == logging.ERROR
assert record.exc_info is original_exc_info

def test_passes_non_tool_fastmcp_error_through(self):
# A hypothetical future FastMCPError subclass that is NOT a ToolError
# (e.g. AuthorizationError) should retain its traceback.
class FutureAuthError(FastMCPError):
pass

err = FutureAuthError("unauthorized")
record = self._make_record(
"fastmcp.server.server",
"Error calling tool 'ha_foo'",
err,
)
assert self.log_filter.filter(record) is True
assert record.levelno == logging.ERROR
assert record.exc_info is not None

def test_leaves_other_loggers_unchanged(self):
err = ToolError("boom")
record = self._make_record(
"some.other.logger",
"Error calling tool 'ha_foo'",
err,
)
assert self.log_filter.filter(record) is True
assert record.levelno == logging.ERROR
assert record.exc_info is not None

def test_passes_record_without_exc_info(self):
record = self._make_record(
"fastmcp.server.server",
"Error calling tool 'ha_foo'",
None,
)
assert self.log_filter.filter(record) is True
assert record.levelno == logging.ERROR
37 changes: 27 additions & 10 deletions tests/uat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@

Executes MCP test scenarios on real AI agent CLIs (Claude, Gemini, OpenAI-compatible) against a Home Assistant test instance. Designed to be driven by a calling agent that generates scenarios dynamically, runs them, and evaluates results.

## Quick Start

Two runners, two use cases:

```bash
# Run the pre-built story catalog (most common)
uv run python tests/uat/stories/run_story.py --all --agents gemini

# Run one ad-hoc scenario (must pipe JSON via stdin or use --scenario-file)
echo '{"test_prompt":"Search for light entities. Report how many you found."}' \
| uv run python tests/uat/run_uat.py --agents gemini
```

Commands must be prefixed with `uv run python` — the repo targets Python 3.13 via uv.

For OpenAI-compatible endpoints (LM Studio, Ollama, vLLM), the `--base-url` must include the `/v1` suffix, e.g. `http://172.19.0.1:1234/v1`. LM Studio requires the model to be loaded in its UI first; Ollama auto-loads on demand.

## Architecture

```
Expand Down Expand Up @@ -48,26 +65,26 @@ Each prompt runs in a separate CLI invocation (fresh context, no PR knowledge).
```bash
# Pipe scenario from stdin
echo '{"test_prompt":"Search for light entities. Report how many you found."}' | \
python tests/uat/run_uat.py --agents gemini
uv run python tests/uat/run_uat.py --agents gemini

# From file
python tests/uat/run_uat.py --scenario-file /tmp/scenario.json --agents claude,gemini
uv run python tests/uat/run_uat.py --scenario-file /tmp/scenario.json --agents claude,gemini

# Against already-running HA (skip container startup)
python tests/uat/run_uat.py --ha-url http://localhost:8123 --ha-token TOKEN --agents gemini
uv run python tests/uat/run_uat.py --ha-url http://localhost:8123 --ha-token TOKEN --agents gemini

# Test a specific branch
echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini

# Local code (default) vs branch
python tests/uat/run_uat.py # uses: uv run --project . ha-mcp
python tests/uat/run_uat.py --branch pr-551 # uses: uvx --from git+...@pr-551 ha-mcp
uv run python tests/uat/run_uat.py # uses: uv run --project . ha-mcp
uv run python tests/uat/run_uat.py --branch pr-551 # uses: uvx --from git+...@pr-551 ha-mcp

# OpenAI-compatible local LLM (LM Studio, Ollama, vLLM, etc.)
echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --agents openai --base-url http://localhost:1234/v1
echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --agents openai --base-url http://localhost:1234/v1

# With a specific model and API key
echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --agents openai \
echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --agents openai \
--base-url http://localhost:1234/v1 --model my-model --api-key sk-xxx
```

Expand Down Expand Up @@ -176,10 +193,10 @@ To check if a failure is a regression vs pre-existing:

```bash
# Test the PR branch
echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini
echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch feat/tool-errors --agents gemini

# Compare against master
echo '{"test_prompt":"..."}' | python tests/uat/run_uat.py --branch master --agents gemini
echo '{"test_prompt":"..."}' | uv run python tests/uat/run_uat.py --branch master --agents gemini
```

## Dependencies
Expand Down
67 changes: 67 additions & 0 deletions tests/uat/_inprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Shared in-process FastMCP client context for UAT.

Used by the story runner (setup/verify/teardown) and pytest fixtures.
Constructing the server is ~1s; this lets callers share one instance
across many tool calls against the same HA instance.
"""

from __future__ import annotations

import contextlib
import os
from collections.abc import AsyncIterator
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from fastmcp import Client


@contextlib.asynccontextmanager
async def inprocess_mcp_client(
ha_url: str, ha_token: str
) -> AsyncIterator[Client]:
"""Build one in-process FastMCP client for setup/verify/teardown.

Clearing ``ha_mcp.config._settings`` forces the next ``get_global_settings()``
call to re-read the env vars we just set. The WebSocket disconnect tears
down any cached connection to the previous URL so the next tool call
reconnects to ``ha_url``.

One client is shared across many tool calls to amortize the ~1s server
construction cost. The tradeoff: if the shared client's WebSocket gets into
a bad state mid-run, every subsequent call through it inherits the problem.

Not safe for concurrent use: ``os.environ`` and ``ha_mcp.config._settings``
are process-global, so overlapping callers would race on both.
"""
from fastmcp import Client

import ha_mcp.config
from ha_mcp.client import HomeAssistantClient
from ha_mcp.client.websocket_client import websocket_manager
from ha_mcp.server import HomeAssistantSmartMCPServer

prev_url = os.environ.get("HOMEASSISTANT_URL")
prev_token = os.environ.get("HOMEASSISTANT_TOKEN")
prev_settings = ha_mcp.config._settings
try:
os.environ["HOMEASSISTANT_URL"] = ha_url
os.environ["HOMEASSISTANT_TOKEN"] = ha_token
ha_mcp.config._settings = None
await websocket_manager.disconnect()

client = HomeAssistantClient(base_url=ha_url, token=ha_token)
server = HomeAssistantSmartMCPServer(client=client)
async with Client(server.mcp) as mcp_client:
yield mcp_client
finally:
await websocket_manager.disconnect()
if prev_url is None:
os.environ.pop("HOMEASSISTANT_URL", None)
else:
os.environ["HOMEASSISTANT_URL"] = prev_url
if prev_token is None:
os.environ.pop("HOMEASSISTANT_TOKEN", None)
else:
os.environ["HOMEASSISTANT_TOKEN"] = prev_token
ha_mcp.config._settings = prev_settings
11 changes: 11 additions & 0 deletions tests/uat/_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Shared logging setup for UAT entry points."""

from __future__ import annotations

import logging


def configure_cli_logging() -> None:
"""Silence third-party INFO chatter; keep our uat.* trace visible."""
logging.basicConfig(level=logging.WARNING, format="%(message)s")
logging.getLogger("uat").setLevel(logging.INFO)
Loading
Loading