Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 109 additions & 19 deletions src/agentscope/mcp/_stateful_client_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""The base MCP stateful client class in AgentScope, that provides basic
functionality for stateful MCP clients."""
import asyncio
from abc import ABC
from contextlib import AsyncExitStack
from typing import List
Expand All @@ -19,6 +20,13 @@ class StatefulClientBase(MCPClientBase, ABC):

The developers should use `connect()` and `close()` methods to manage
the client lifecycle.

The context manager lifecycle (``AsyncExitStack`` enter/exit) is run
inside a dedicated background task so that ``connect()`` and ``close()``
can safely be called from different asyncio tasks — this avoids the
``anyio.CancelScope`` "exit in a different task" error that occurs in
frameworks like uvicorn/FastAPI where startup and shutdown may run in
separate tasks.
"""

is_connected: bool
Expand All @@ -43,17 +51,29 @@ def __init__(self, name: str) -> None:
# Cache the tools to avoid fetching them multiple times
self._cached_tools = None

async def connect(self) -> None:
"""Connect to MCP server."""
if self.is_connected:
raise RuntimeError(
"The MCP server is already connected. Call close() "
"before connecting again.",
)

self.stack = AsyncExitStack()

# Cross-task lifecycle management: a dedicated background task
# owns the AsyncExitStack so that __aenter__/__aexit__ always
# execute in the same task, satisfying anyio.CancelScope.
self._lifecycle_task: asyncio.Task | None = None
self._stop_event: asyncio.Event | None = None
self._ready_event: asyncio.Event | None = None
self._init_error: BaseException | None = None

# ------------------------------------------------------------------
# Lifecycle worker
# ------------------------------------------------------------------

async def _lifecycle_worker(self) -> None:
"""Run the full context-manager lifecycle in one task.

This method is spawned by ``connect()`` as a background task.
It enters the ``AsyncExitStack``, signals readiness, then blocks
until ``close()`` sets the stop event. On exit the stack is
closed **in the same task** that entered it, which is the key
requirement of ``anyio.CancelScope``.
"""
try:
self.stack = AsyncExitStack()
context = await self.stack.enter_async_context(
self.client,
)
Expand All @@ -63,36 +83,106 @@ async def connect(self) -> None:
await self.session.initialize()

self.is_connected = True
self._ready_event.set()
logger.info("MCP client connected.")
except Exception:
await self.stack.aclose()
self.stack = None

# Block until close() signals. The wait may also be
# interrupted by CancelledError if the session's internal
# anyio cancel scope is torn down; treat that as a stop.
try:
await self._stop_event.wait()
except (asyncio.CancelledError, Exception):
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_lifecycle_worker() swallows all exceptions from await self._stop_event.wait() via except (asyncio.CancelledError, Exception): pass. This can hide real programming errors (e.g., _stop_event being None) and make unexpected shutdowns silent. It’s safer to only treat cancellation as a stop signal and let unexpected exceptions propagate/log explicitly.

Suggested change
# Block until close() signals. The wait may also be
# interrupted by CancelledError if the session's internal
# anyio cancel scope is torn down; treat that as a stop.
try:
await self._stop_event.wait()
except (asyncio.CancelledError, Exception):
# Block until close() signals. The wait may also be
# interrupted by CancelledError if the session's internal
# anyio cancel scope is torn down; treat only cancellation
# as a stop signal and let unexpected errors propagate.
try:
await self._stop_event.wait()
except asyncio.CancelledError:
# Cancellation during shutdown is expected here.

Copilot uses AI. Check for mistakes.
pass

except Exception as e:
self._init_error = e
self._ready_event.set()
finally:
self.session = None
self.is_connected = False
self._cached_tools = None
if self.stack:
try:
await self.stack.aclose()
except Exception as e:
logger.warning(
"Error during MCP client cleanup: %s",
e,
)
finally:
self.stack = None

# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------

async def connect(self) -> None:
"""Connect to MCP server.

Spawns a background task that owns the full context-manager
lifecycle so that ``close()`` can be called from any task.
"""
if self.is_connected:
raise RuntimeError(
"The MCP server is already connected. Call close() "
"before connecting again.",
)

self._stop_event = asyncio.Event()
self._ready_event = asyncio.Event()
self._init_error = None

self._lifecycle_task = asyncio.create_task(
self._lifecycle_worker(),
)
Comment on lines +120 to +141
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change introduces new cross-task lifecycle semantics (background task ownership + event signaling) intended to prevent the anyio CancelScope “exit in a different task” issue. Existing MCP tests cover basic connect()/close() but don’t cover calling close() from a different task or overlapping connect()/close(). Please add unit tests to lock in the new behavior and prevent regressions (e.g., connect() then asyncio.create_task(client.close()) from another task, ensuring no hang and is_connected transitions correctly).

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

connect() only checks is_connected, so two concurrent connect() calls can both proceed before _lifecycle_worker flips is_connected=True. The second call overwrites _stop_event/_ready_event/_lifecycle_task, leaving the first lifecycle task orphaned and potentially leaking resources. Consider guarding with _lifecycle_task is not None and not _lifecycle_task.done() (or an asyncio.Lock) and raising a clear error (or awaiting the in-flight connect).

Copilot uses AI. Check for mistakes.

try:
await self._ready_event.wait()
except BaseException:
# If connect() is cancelled externally (e.g. asyncio.wait_for
# timeout), ensure the lifecycle worker is stopped. We must
# cancel the task (not just set _stop_event) because the worker
# may still be blocked inside enter_async_context().
self._lifecycle_task.cancel()
try:
await self._lifecycle_task
except (asyncio.CancelledError, Exception):
pass
self._lifecycle_task = None
raise

if self._init_error is not None:
await self._lifecycle_task
self._lifecycle_task = None
raise self._init_error

Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s a race where close() can be called before _ready_event is set: the worker will set _ready_event, immediately observe _stop_event already set, then exit and set is_connected=False. In that case connect() returns successfully even though the client is already closed. After waiting on _ready_event, consider validating that is_connected is still True (and/or _stop_event is not set / _lifecycle_task still running) and raise an error if shutdown happened during connect.

Suggested change
if (
not self.is_connected
or self._stop_event.is_set()
or self._lifecycle_task is None
or self._lifecycle_task.done()
):
if self._lifecycle_task is not None:
await self._lifecycle_task
self._lifecycle_task = None
raise RuntimeError(
"The MCP server was closed during connect().",
)

Copilot uses AI. Check for mistakes.
async def close(self, ignore_errors: bool = True) -> None:
"""Clean up the MCP client resources. You must call this method when
your application is done.

Signals the background lifecycle task to exit and waits for full
cleanup.

Args:
ignore_errors (`bool`):
Whether to ignore errors during cleanup. Defaults to `True`.
"""
if not self.is_connected:
if not self.is_connected and self._lifecycle_task is None:
raise RuntimeError(
"The MCP server is not connected. Call connect() before "
"closing.",
)
Comment on lines 66 to 178
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New/updated docstrings (_lifecycle_worker(), connect(), close()) don’t follow the repository’s docstring template (description + explicit Args/Returns sections with typed backticks). Please align these docstrings with the project’s standard so generated docs stay consistent.

Copilot uses AI. Check for mistakes.

try:
await self.stack.aclose()
if self._stop_event:
self._stop_event.set()
if self._lifecycle_task:
await self._lifecycle_task
self._lifecycle_task = None
except Exception as e:
if not ignore_errors:
raise e
logger.warning("Error during MCP client cleanup: %s", e)
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

close() doesn’t reset _lifecycle_task (and related lifecycle fields) if await self._lifecycle_task raises. That can leave the instance stuck in a state where future close() calls keep re-awaiting a failed task. Consider moving _lifecycle_task = None (and clearing _stop_event/_ready_event) into a finally block so state is consistent even on errors/cancellation.

Suggested change
self._lifecycle_task = None
except Exception as e:
if not ignore_errors:
raise e
logger.warning("Error during MCP client cleanup: %s", e)
except Exception as e:
if not ignore_errors:
raise e
logger.warning("Error during MCP client cleanup: %s", e)
finally:
self._lifecycle_task = None
self._stop_event = None
self._ready_event = None

Copilot uses AI. Check for mistakes.
finally:
self.stack = None
self.session = None
self.is_connected = False

async def list_tools(self) -> List[mcp.types.Tool]:
"""Get all available tools from the server.
Expand Down
Loading