66conversation start (and there manifest as a noisy traceback that aborts
77agent initialization).
88
9- The endpoint is intentionally side-effect-free: it spins up the MCP
10- connection, lists the advertised tools, then tears the connection down.
11- It never mutates server state or touches stored settings.
9+ The endpoint never mutates server state or touches stored settings: it
10+ spins up the MCP connection, lists the advertised tools, optionally invokes
11+ one caller-chosen tool (``tool_call``), then tears the connection down.
12+ The optional tool call exists because listing tools does not exercise the
13+ credentials many servers only use inside tool handlers (e.g. the Slack MCP
14+ server starts fine with a bogus token); callers must pick a read-only tool.
1215"""
1316
1417from __future__ import annotations
1518
1619import asyncio
1720from typing import Annotated , Any , Literal
1821
19- from fastapi import APIRouter
22+ import mcp .types
23+ from fastapi import APIRouter , Request
2024from pydantic import BaseModel , Field , model_validator
2125
26+ from openhands .agent_server ._secrets_exposure import get_cipher
2227from openhands .sdk .logger import get_logger
2328from openhands .sdk .mcp import create_mcp_tools
2429from openhands .sdk .mcp .exceptions import MCPError , MCPTimeoutError
30+ from openhands .sdk .utils .cipher import Cipher
31+ from openhands .sdk .utils .pydantic_secrets import decrypt_str_with_cipher_or_keep
2532
2633
2734logger = get_logger (__name__ )
@@ -85,6 +92,22 @@ def to_fastmcp_dict(self) -> dict[str, Any]:
8592 return out
8693
8794
95+ class MCPToolCallSpec (BaseModel ):
96+ """A single tool invocation to run as part of the connection test.
97+
98+ Listing tools does not exercise the credentials many servers only use
99+ inside tool handlers, so callers can name one tool to invoke after the
100+ listing succeeds. Callers are responsible for choosing a read-only tool;
101+ the endpoint executes it verbatim.
102+ """
103+
104+ name : str = Field (..., min_length = 1 , description = "Name of the tool to invoke" )
105+ arguments : dict [str , Any ] = Field (
106+ default_factory = dict ,
107+ description = "Arguments passed to the tool unchanged." ,
108+ )
109+
110+
88111class MCPTestRequest (BaseModel ):
89112 """Body for ``POST /api/mcp/test``."""
90113
@@ -108,6 +131,15 @@ class MCPTestRequest(BaseModel):
108131 le = 120 ,
109132 description = "Seconds to wait for connection + tools/list to complete." ,
110133 )
134+ tool_call : MCPToolCallSpec | None = Field (
135+ default = None ,
136+ description = (
137+ "Optional read-only tool to invoke after listing succeeds, so "
138+ "callers can verify credentials the server only exercises on "
139+ "tool invocation. Its outcome is reported verbatim in "
140+ "`tool_result` without affecting `ok`."
141+ ),
142+ )
111143
112144 @model_validator (mode = "after" )
113145 def _strip_name (self ) -> MCPTestRequest :
@@ -117,6 +149,19 @@ def _strip_name(self) -> MCPTestRequest:
117149 return self
118150
119151
152+ class MCPToolCallResult (BaseModel ):
153+ """Verbatim outcome of the requested ``tool_call``.
154+
155+ The endpoint stays provider-neutral: many servers report upstream
156+ failures (e.g. Slack's ``{"ok": false, "error": "invalid_auth"}``)
157+ as ordinary text content with ``isError`` unset, so interpreting the
158+ payload is the caller's job.
159+ """
160+
161+ is_error : bool = Field (description = "The MCP-level isError flag of the result." )
162+ text : str = Field (description = "Concatenated text content of the result." )
163+
164+
120165class MCPTestSuccess (BaseModel ):
121166 """Response when the candidate server connects and lists its tools."""
122167
@@ -125,6 +170,10 @@ class MCPTestSuccess(BaseModel):
125170 default_factory = list ,
126171 description = "Names of tools advertised by the MCP server." ,
127172 )
173+ tool_result : MCPToolCallResult | None = Field (
174+ default = None ,
175+ description = ("Outcome of the requested `tool_call`, when one was supplied." ),
176+ )
128177
129178
130179class MCPTestFailure (BaseModel ):
@@ -151,18 +200,81 @@ class MCPTestFailure(BaseModel):
151200# ---------------------------------------------------------------------------
152201
153202
154- def _server_to_fastmcp_dict (spec : _StdioMCPServerSpec | _RemoteMCPServerSpec ) -> dict :
203+ def _decrypt_mapping (cipher : Cipher | None , mapping : dict [str , str ]) -> dict [str , str ]:
204+ """Decrypt Fernet-encrypted values round-tripped from settings.
205+
206+ The GUI fetches stored settings with ``X-Expose-Secrets: encrypted`` and
207+ forwards the ciphertext unchanged so the edit flow can test the *real*
208+ stored credentials without ever seeing them. Plaintext values (the
209+ common case: freshly typed input) pass through untouched.
210+ """
211+ if cipher is None :
212+ return dict (mapping )
213+ return {
214+ key : decrypt_str_with_cipher_or_keep (
215+ cipher , value , description = "MCP test env/headers"
216+ )
217+ for key , value in mapping .items ()
218+ }
219+
220+
221+ def _server_to_fastmcp_dict (
222+ spec : _StdioMCPServerSpec | _RemoteMCPServerSpec , cipher : Cipher | None
223+ ) -> dict :
155224 if isinstance (spec , _StdioMCPServerSpec ):
156225 out : dict [str , Any ] = {"command" : spec .command , "args" : list (spec .args )}
157226 if spec .env :
158- out ["env" ] = dict ( spec .env )
227+ out ["env" ] = _decrypt_mapping ( cipher , spec .env )
159228 if spec .cwd :
160229 out ["cwd" ] = spec .cwd
161230 return out
162- return spec .to_fastmcp_dict ()
231+ remote = spec .to_fastmcp_dict ()
232+ if "headers" in remote :
233+ remote ["headers" ] = _decrypt_mapping (cipher , remote ["headers" ])
234+ return remote
235+
236+
237+ def _run_tool_call (
238+ client : Any , spec : MCPToolCallSpec , tool_names : list [str ], timeout : float
239+ ) -> MCPToolCallResult :
240+ """Invoke the requested tool on the connected client.
241+
242+ Uses ``call_tool_mcp`` (not ``call_tool``, which raises on ``isError``)
243+ so in-band failures come back as data -- mirrors ``MCPToolExecutor``.
244+ A timeout is reported as an errored result rather than failing the
245+ whole test: the server did connect and list, which is still useful.
246+ """
247+ if spec .name not in tool_names :
248+ return MCPToolCallResult (
249+ is_error = True ,
250+ text = (
251+ f"Tool { spec .name !r} not advertised by server "
252+ f"(available: { ', ' .join (tool_names ) or 'none' } )"
253+ ),
254+ )
255+ try :
256+ result : mcp .types .CallToolResult = client .call_async_from_sync (
257+ client .call_tool_mcp ,
258+ name = spec .name ,
259+ arguments = spec .arguments ,
260+ timeout = timeout ,
261+ )
262+ except TimeoutError :
263+ return MCPToolCallResult (
264+ is_error = True ,
265+ text = f"Tool { spec .name !r} call timed out after { timeout } seconds" ,
266+ )
267+ text = "\n " .join (
268+ block .text
269+ for block in result .content
270+ if isinstance (block , mcp .types .TextContent )
271+ )
272+ return MCPToolCallResult (is_error = bool (result .isError ), text = text )
163273
164274
165- def _probe_mcp_server (request : MCPTestRequest ) -> MCPTestResponse :
275+ def _probe_mcp_server (
276+ request : MCPTestRequest , cipher : Cipher | None
277+ ) -> MCPTestResponse :
166278 """Synchronous probe -- safe to run inside ``run_in_executor``.
167279
168280 ``create_mcp_tools`` already runs its own event loop in a background
@@ -171,14 +283,22 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
171283 threadpool first.
172284 """
173285
174- config = {"mcpServers" : {request .name : _server_to_fastmcp_dict (request .server )}}
286+ config = {
287+ "mcpServers" : {request .name : _server_to_fastmcp_dict (request .server , cipher )}
288+ }
175289
176290 try :
177291 # ``create_mcp_tools`` returns a client that owns a background loop
178292 # and a (possibly long-lived) subprocess. Use the context-manager
179293 # form so we always tear it down, even when listing succeeded.
180294 with create_mcp_tools (config , timeout = request .timeout ) as client :
181- return MCPTestSuccess (tools = [tool .name for tool in client .tools ])
295+ tool_names = [tool .name for tool in client .tools ]
296+ tool_result : MCPToolCallResult | None = None
297+ if request .tool_call is not None :
298+ tool_result = _run_tool_call (
299+ client , request .tool_call , tool_names , request .timeout
300+ )
301+ return MCPTestSuccess (tools = tool_names , tool_result = tool_result )
182302 except MCPTimeoutError as exc :
183303 logger .info ("MCP test timed out for server %r: %s" , request .name , exc )
184304 return MCPTestFailure (error = str (exc ), error_kind = "timeout" )
@@ -215,11 +335,21 @@ def _probe_mcp_server(request: MCPTestRequest) -> MCPTestResponse:
215335 "Attempt to connect to a candidate MCP server and list its tools, "
216336 "without persisting any settings. Useful for validating user input "
217337 "in 'add MCP server' flows before storing the config. "
338+ "Optionally invokes one caller-chosen (read-only) tool via "
339+ "`tool_call` and reports its outcome in `tool_result`, so callers "
340+ "can verify credentials that are only exercised on tool invocation. "
341+ "Encrypted `env`/`headers` values round-tripped from settings are "
342+ "decrypted before the connection is attempted. "
218343 "Returns 200 with `ok=false` for connection / timeout failures "
219344 "(those are expected during validation, not server errors)."
220345 ),
221346)
222- async def test_mcp_server (request : MCPTestRequest ) -> MCPTestResponse :
347+ async def test_mcp_server (
348+ request : MCPTestRequest , http_request : Request
349+ ) -> MCPTestResponse :
223350 """Probe a single MCP server config and report whether it works."""
351+ # Resolve the cipher here: the threadpool function below must not
352+ # reach back into ``http_request.app.state``.
353+ cipher = get_cipher (http_request )
224354 loop = asyncio .get_running_loop ()
225- return await loop .run_in_executor (None , _probe_mcp_server , request )
355+ return await loop .run_in_executor (None , _probe_mcp_server , request , cipher )
0 commit comments