Add sampling support example and add ask_user (#138)

simba-git · web-flow · commit 1779e0439cd8 · 2025-07-14T00:13:42.000-07:00
* Add sampling support to chatbot example * Use LLM in sampling callback with version check (#139) * Use info logging in chatbot (#142) * Add ask_user support (#143) * Upgrade MCP and add ask_user * Fix example lint issues
diff --git a/docs/api/context.md b/docs/api/context.md
@@ -40,6 +40,22 @@ result = await ctx.ask_llm(
 print(result.content.text)
 ```
 
+## User Elicitation
+
+Call `ask_user()` when you need additional input from the client. It wraps the
+underlying MCP `elicit()` API:
+
+```python
+class BookingPreferences(BaseModel):
+    alternativeDate: str | None
+    checkAlternative: bool = False
+
+result = await ctx.ask_user(
+    message="No tables available. Try another date?",
+    schema=BookingPreferences,
+)
+```
+
 ## Extending Context
 
 For now, if you need context functionality, you can extend the base class:
diff --git a/examples/openai_chat_agent/app.py b/examples/openai_chat_agent/app.py
@@ -7,17 +7,75 @@
 from __future__ import annotations
 
 import asyncio
+import logging
 import os
+from importlib import metadata
+from typing import TYPE_CHECKING
 
 import httpx
 from dotenv import load_dotenv
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_ollama import ChatOllama
 from langchain_openai import ChatOpenAI
-from mcp_use import MCPAgent, MCPClient
+from mcp.types import (
+    CreateMessageRequestParams,
+    CreateMessageResult,
+    ErrorData,
+    TextContent,
+)
+from mcp_use import MCPAgent, MCPClient, load_config_file
+from packaging.version import Version
+
+if TYPE_CHECKING:  # pragma: no cover - only for type hints
+    from mcp import ClientSession
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
 
 SYSTEM_MESSAGE = "You are a helpful assistant that talks to the user and uses tools via MCP."
 
 
+def make_sampling_callback(llm: ChatOpenAI | ChatOllama):
+    async def sampling_callback(
+        context: ClientSession, params: CreateMessageRequestParams
+    ) -> CreateMessageResult | ErrorData:
+        lc_messages = []
+        system_prompt = getattr(params, "systemPrompt", None)
+        if system_prompt:
+            lc_messages.append(SystemMessage(content=system_prompt))
+        for msg in params.messages:
+            content = msg.content.text
+            if msg.role == "assistant":
+                lc_messages.append(AIMessage(content=content))
+            else:
+                lc_messages.append(HumanMessage(content=content))
+
+        try:
+            logger.info(f"Sampling with messages: {lc_messages}")
+            max_tokens = getattr(params, "maxTokens", None)
+            stop_sequences = getattr(params, "stopSequences", None)
+            result_msg = await llm.ainvoke(
+                lc_messages,
+                temperature=params.temperature,
+                max_tokens=max_tokens,
+                stop=stop_sequences,
+            )
+        except Exception as exc:
+            logger.error(f"Failed to invoke llm for sampling: {exc}")
+            return ErrorData(code=400, message=str(exc))
+
+        text = getattr(result_msg, "content", str(result_msg))
+        model_name = getattr(llm, "model", "llm")
+        logger.info(f"Sampling result: {text}")
+        return CreateMessageResult(
+            content=TextContent(text=text, type="text"),
+            model=model_name,
+            role="assistant",
+        )
+
+    return sampling_callback
+
+
 async def ensure_ollama_running(model: str) -> None:
     """Check that an Ollama server is running."""
     try:
@@ -40,18 +98,34 @@ async def run_memory_chat() -> None:
     load_dotenv()
     config_file = os.path.join(os.path.dirname(__file__), "config.json")
 
-    print("Initializing chat...")
-    client = MCPClient.from_config_file(config_file)
-
     openai_key = os.getenv("OPENAI_API_KEY")
     ollama_model = os.getenv("OLLAMA_MODEL", "llama3.2")
 
+    print("Initializing chat...")
+
     if openai_key:
         llm = ChatOpenAI(model="gpt-4o")
     else:
         await ensure_ollama_running(ollama_model)
         llm = ChatOllama(model=ollama_model)
 
+    try:
+        mcp_use_version = metadata.version("mcp_use")
+    except metadata.PackageNotFoundError:  # pragma: no cover - dev env only
+        mcp_use_version = "0"
+
+    if Version(mcp_use_version) > Version("1.3.6"):
+        client = MCPClient(
+            load_config_file(config_file),
+            sampling_callback=make_sampling_callback(llm),
+        )
+    else:
+        logger.warning(
+            "mcp-use %s does not support sampling, install >1.3.6. Disabling sampling callback",
+            mcp_use_version,
+        )
+        client = MCPClient(load_config_file(config_file))
+
     agent = MCPAgent(
         llm=llm,
         client=client,
diff --git a/examples/openai_chat_agent/config.json b/examples/openai_chat_agent/config.json
@@ -1,8 +1,8 @@
 {
   "mcpServers": {
-    "shop_api": {
+    "travel_agent": {
       "command": "python",
-      "args": ["../shop_api/app.py"]
+      "args": ["../server_side_llm_travel_planner/app.py"]
     }
   }
 }
diff --git a/examples/openai_chat_agent/requirements.txt b/examples/openai_chat_agent/requirements.txt
@@ -4,3 +4,4 @@ langchain_ollama
 langchain_community
 mcp_use
 python-dotenv
+httpx
diff --git a/examples/server_side_llm_travel_planner/app.py b/examples/server_side_llm_travel_planner/app.py
@@ -63,22 +63,21 @@ async def plan_trip(
 ) -> list[Destination]:
     """Return three destinations that best match the given preferences."""
     ctx = app.get_context()
-
     bullet_list = "\n".join(f"- {d.name}: {d.summary}" for d in DESTINATIONS)
     prompt = (
         "Select the three best destinations from the list below based on the "
-        "given preferences. Reply with a JSON list of names only.\nPreferences: "
+        "given preferences. Reply with a JSON list of names only. "
+        "The text should be directly parsable with json.loads in Python. "
+        'Do NOT add ```json like markdown. Example response:\n["San Francisco"]'
+        "\n\n\nPreferences: "
         f"{preferences}\n\n{bullet_list}"
     )
-    result = await ctx.sampling(
+    result = await ctx.ask_llm(
         prompt,
         model_preferences=prefer_fast_model(),
         max_tokens=50,
     )
-    try:
-        names = json.loads(result.content.text)
-    except Exception:
-        return []
+    names = json.loads(result.content.text)
     return [d for d in DESTINATIONS if d.name in names]
 
 
diff --git a/src/enrichmcp/context.py b/src/enrichmcp/context.py
@@ -6,6 +6,7 @@
 
 from typing import Literal
 
+from mcp.server.elicitation import ElicitationResult, ElicitSchemaModelT
 from mcp.server.fastmcp import Context  # pyright: ignore[reportMissingTypeArgument]
 from mcp.types import (
     CreateMessageResult,
@@ -108,6 +109,15 @@ async def sampling(
 
         return await self.ask_llm(messages, **kwargs)
 
+    async def ask_user(
+        self,
+        message: str,
+        schema: type[ElicitSchemaModelT],
+    ) -> ElicitationResult:
+        """Interactively ask the client for input using MCP elicitation."""
+
+        return await super().elicit(message=message, schema=schema)
+
 
 def prefer_fast_model() -> ModelPreferences:
     """Model preferences optimized for speed and cost."""
diff --git a/tests/test_elicitation.py b/tests/test_elicitation.py
@@ -0,0 +1,28 @@
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+from pydantic import BaseModel
+
+from enrichmcp import EnrichContext
+
+
+class Prefs(BaseModel):
+    choice: bool
+
+
+@pytest.mark.asyncio
+async def test_ask_user_delegates_to_context_elicit():
+    ctx = EnrichContext.model_construct(_request_context=Mock())
+
+    with patch("enrichmcp.context.Context.elicit", AsyncMock(return_value="ok")) as mock:
+        got = await ctx.ask_user("hi", Prefs)
+        assert got == "ok"
+        mock.assert_awaited_once_with(message="hi", schema=Prefs)
+
+
+@pytest.mark.asyncio
+async def test_ask_user_requires_request_context():
+    ctx = EnrichContext()
+
+    with pytest.raises(ValueError, match="outside of a request"):
+        await ctx.ask_user("hi", Prefs)
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,8 @@`
`1`	`1`	`{`
`2`	`2`	`"mcpServers": {`
`3`		`- "shop_api": {`
	`3`	`+ "travel_agent": {`
`4`	`4`	`"command": "python",`
`5`		`- "args": ["../shop_api/app.py"]`
	`5`	`+ "args": ["../server_side_llm_travel_planner/app.py"]`
`6`	`6`	`}`
`7`	`7`	`}`
`8`	`8`	`}`