OpenHands
diff --git a/‎openhands-agent-server/openhands/agent_server/mcp_router.py‎
Lines changed: 142 additions & 12 deletions b/‎openhands-agent-server/openhands/agent_server/mcp_router.py‎
Lines changed: 142 additions & 12 deletions
@@ -6,22 +6,29 @@
 conversation start (and there manifest as a noisy traceback that aborts
 agent initialization).
 
-The endpoint is intentionally side-effect-free: it spins up the MCP
-connection, lists the advertised tools, then tears the connection down.
-It never mutates server state or touches stored settings.
+The endpoint never mutates server state or touches stored settings: it
+spins up the MCP connection, lists the advertised tools, optionally invokes
+one caller-chosen tool (``tool_call``), then tears the connection down.
+The optional tool call exists because listing tools does not exercise the
+credentials many servers only use inside tool handlers (e.g. the Slack MCP
+server starts fine with a bogus token); callers must pick a read-only tool.
 """
 
 from __future__ import annotations
 
 import asyncio
 from typing import Annotated, Any, Literal
 
-from fastapi import APIRouter
+import mcp.types
+from fastapi import APIRouter, Request
 from pydantic import BaseModel, Field, model_validator
 
+from openhands.agent_server._secrets_exposure import get_cipher
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp import create_mcp_tools
 from openhands.sdk.mcp.exceptions import MCPError, MCPTimeoutError
+from openhands.sdk.utils.cipher import Cipher
+from openhands.sdk.utils.pydantic_secrets import decrypt_str_with_cipher_or_keep
 
 
 logger = get_logger(__name__)
@@ -85,6 +92,22 @@ def to_fastmcp_dict(self) -> dict[str, Any]:
         return out
 
 
+class MCPToolCallSpec(BaseModel):
+    """A single tool invocation to run as part of the connection test.
+
+    Listing tools does not exercise the credentials many servers only use
+    inside tool handlers, so callers can name one tool to invoke after the
+    listing succeeds. Callers are responsible for choosing a read-only tool;
+    the endpoint executes it verbatim.
+    """
+
+    name: str = Field(..., min_length=1, description="Name of the tool to invoke")
+    arguments: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Arguments passed to the tool unchanged.",
+    )
+
+
 class MCPTestRequest(BaseModel):
     """Body for ``POST /api/mcp/test``."""
 
@@ -108,6 +131,15 @@ class MCPTestRequest(BaseModel):
         le=120,
         description="Seconds to wait for connection + tools/list to complete.",
     )
+    tool_call: MCPToolCallSpec | None = Field(
+        default=None,
+        description=(
+            "Optional read-only tool to invoke after listing succeeds, so "
+            "callers can verify credentials the server only exercises on "
+            "tool invocation. Its outcome is reported verbatim in "
+            "`tool_result` without affecting `ok`."
+        ),
+    )
 
     @model_validator(mode="after")
     def _strip_name(self) -> MCPTestRequest:
@@ -117,6 +149,19 @@ def _strip_name(self) -> MCPTestRequest:
         return self
 
 
+class MCPToolCallResult(BaseModel):
+    """Verbatim outcome of the requested ``tool_call``.
+
+    The endpoint stays provider-neutral: many servers report upstream
+    failures (e.g. Slack's ``{"ok": false, "error": "invalid_auth"}``)
+    as ordinary text content with ``isError`` unset, so interpreting the
+    payload is the caller's job.
+    """
+
+    is_error: bool = Field(description="The MCP-level isError flag of the result.")
+    text: str = Field(description="Concatenated text content of the result.")
+
+
 class MCPTestSuccess(BaseModel):
     """Response when the candidate server connects and lists its tools."""
 
@@ -125,6 +170,10 @@ class MCPTestSuccess(BaseModel):
         default_factory=list,
         description="Names of tools advertised by the MCP server.",
     )
+    tool_result: MCPToolCallResult | None = Field(
+        default=None,
+        description=("Outcome of the requested `tool_call`, when one was supplied."),
+    )
 
 
 class MCPTestFailure(BaseModel):
@@ -151,18 +200,81 @@ class MCPTestFailure(BaseModel):
 # ---------------------------------------------------------------------------
 
 
-def _server_to_fastmcp_dict(spec: _StdioMCPServerSpec | _RemoteMCPServerSpec) -> dict:
+def _decrypt_mapping(cipher: Cipher | None, mapping: dict[str, str]) -> dict[str, str]:
+    """Decrypt Fernet-encrypted values round-tripped from settings.
+
+    The GUI fetches stored settings with ``X-Expose-Secrets: encrypted`` and
+    forwards the ciphertext unchanged so the edit flow can test the *real*
+    stored credentials without ever seeing them. Plaintext values (the
+    common case: freshly typed input) pass through untouched.
+    """
+    if cipher is None:
+        return dict(mapping)
+    return {
+        key: decrypt_str_with_cipher_or_keep(
+            cipher, value, description="MCP test env/headers"
+        )
+        for key, value in mapping.items()
+    }
+
+
+def _server_to_fastmcp_dict(
+    spec: _StdioMCPServerSpec | _RemoteMCPServerSpec, cipher: Cipher | None
+) -> dict:
     if isinstance(spec, _StdioMCPServerSpec):
         out: dict[str, Any] = {"command": spec.command, "args": list(spec.args)}
         if spec.env:
-            out["env"] = dict(spec.env)
+            out["env"] = _decrypt_mapping(cipher, spec.env)
         if spec.cwd:
             out["cwd"] = spec.cwd
         return out
-    return spec.to_fastmcp_dict()
+    remote = spec.to_fastmcp_dict()
+    if "headers" in remote:
+        remote["headers"] = _decrypt_mapping(cipher, remote["headers"])
+    return remote
+
+
+def _run_tool_call(
+    client: Any, spec: MCPToolCallSpec, tool_names: list[str], timeout: float
+) -> MCPToolCallResult:
+    """Invoke the requested tool on the connected client.
+
+    Uses ``call_tool_mcp`` (not ``call_tool``, which raises on ``isError``)
+    so in-band failures come back as data -- mirrors ``MCPToolExecutor``.
+    A timeout is reported as an errored result rather than failing the
+    whole test: the server did connect and list, which is still useful.
+    """
+    if spec.name not in tool_names:
+        return MCPToolCallResult(
+            is_error=True,
+            text=(
+                f"Tool {spec.name!r} not advertised by server "
+                f"(available: {', '.join(tool_names) or 'none'})"
+            ),
+        )
+    try:
+        result: mcp.types.CallToolResult = client.call_async_from_sync(
+            client.call_tool_mcp,
+            name=spec.name,
+            arguments=spec.arguments,
+            timeout=timeout,
+        )
+    except TimeoutError:
+        return MCPToolCallResult(
+            is_error=True,
+            text=f"Tool {spec.name!r} call timed out after {timeout} seconds",
+        )
+    text = "\n".join(
+        block.text
+        for block in result.content
+        if isinstance(block, mcp.types.TextContent)
+    )
+    return MCPToolCallResult(is_error=bool(result.isError), text=text)
 
 
-def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
+def _probe_mcp_server(
+    request: MCPTestRequest, cipher: Cipher | None
+) -> MCPTestResponse:
     """Synchronous probe -- safe to run inside ``run_in_executor``.
 
     ``create_mcp_tools`` already runs its own event loop in a background
@@ -171,14 +283,22 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
     threadpool first.
     """
 
-    config = {"mcpServers": {request.name: _server_to_fastmcp_dict(request.server)}}
+    config = {
+        "mcpServers": {request.name: _server_to_fastmcp_dict(request.server, cipher)}
+    }
 
     try:
         # ``create_mcp_tools`` returns a client that owns a background loop
         # and a (possibly long-lived) subprocess. Use the context-manager
         # form so we always tear it down, even when listing succeeded.
         with create_mcp_tools(config, timeout=request.timeout) as client:
-            return MCPTestSuccess(tools=[tool.name for tool in client.tools])
+            tool_names = [tool.name for tool in client.tools]
+            tool_result: MCPToolCallResult | None = None
+            if request.tool_call is not None:
+                tool_result = _run_tool_call(
+                    client, request.tool_call, tool_names, request.timeout
+                )
+            return MCPTestSuccess(tools=tool_names, tool_result=tool_result)
     except MCPTimeoutError as exc:
         logger.info("MCP test timed out for server %r: %s", request.name, exc)
         return MCPTestFailure(error=str(exc), error_kind="timeout")
@@ -215,11 +335,21 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
         "Attempt to connect to a candidate MCP server and list its tools, "
         "without persisting any settings. Useful for validating user input "
         "in 'add MCP server' flows before storing the config. "
+        "Optionally invokes one caller-chosen (read-only) tool via "
+        "`tool_call` and reports its outcome in `tool_result`, so callers "
+        "can verify credentials that are only exercised on tool invocation. "
+        "Encrypted `env`/`headers` values round-tripped from settings are "
+        "decrypted before the connection is attempted. "
         "Returns 200 with `ok=false` for connection / timeout failures "
         "(those are expected during validation, not server errors)."
     ),
 )
-async def test_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
+async def test_mcp_server(
+    request: MCPTestRequest, http_request: Request
+) -> MCPTestResponse:
     """Probe a single MCP server config and report whether it works."""
+    # Resolve the cipher here: the threadpool function below must not
+    # reach back into ``http_request.app.state``.
+    cipher = get_cipher(http_request)
     loop = asyncio.get_running_loop()
-    return await loop.run_in_executor(None, _probe_mcp_server, request)
+    return await loop.run_in_executor(None, _probe_mcp_server, request, cipher)