diff --git a/pyproject.toml b/pyproject.toml index ab90844..7c6d639 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "cachetools>=5.0.0", "fastmcp==2.10.5", "jmespath~=1.0.1", + "json-repair", "loguru", "pydantic>=2.0,<2.12", ] diff --git a/src/mcp_server_datahub/mcp_server.py b/src/mcp_server_datahub/mcp_server.py index cb72f33..1e2e307 100644 --- a/src/mcp_server_datahub/mcp_server.py +++ b/src/mcp_server_datahub/mcp_server.py @@ -17,6 +17,7 @@ import pathlib import re import string +import threading from typing import ( Any, Awaitable, @@ -43,6 +44,7 @@ from datahub.sdk.search_filters import Filter, FilterDsl, load_filters from datahub.utilities.ordered_set import OrderedSet from fastmcp import FastMCP +from json_repair import repair_json from loguru import logger from pydantic import BaseModel @@ -984,13 +986,34 @@ def get_entities(urns: List[str] | str) -> List[dict] | dict: """ client = get_datahub_client() - # Handle single URN for backward compatibility + # Handle JSON-stringified arrays (same issue as filters in search tool) + # Some MCP clients/LLMs pass arrays as JSON strings instead of proper lists if isinstance(urns, str): - urns = [urns] - return_single = True + urns_str = urns.strip() # Remove leading/trailing whitespace + + # Try to parse as JSON array first + if urns_str.startswith("["): + try: + # Use json_repair to handle malformed JSON from LLMs + urns = json.loads(repair_json(urns_str)) + return_single = False + except (json.JSONDecodeError, Exception) as e: + logger.warning( + f"Failed to parse URNs as JSON array: {e}. Treating as single URN." + ) + # Not valid JSON, treat as single URN string + urns = [urns_str] + return_single = True + else: + # Single URN string + urns = [urns_str] + return_single = True else: return_single = False + # Trim whitespace from each URN (defensive against string concatenation issues) + urns = [urn.strip() for urn in urns] + results = [] for urn in urns: try: @@ -2467,6 +2490,7 @@ def _find_upstream_lineage_path( # Track if tools have been registered to prevent duplicate registration _tools_registered = False +_tools_registration_lock = threading.Lock() def register_all_tools(is_oss: bool = False) -> None: @@ -2476,15 +2500,19 @@ def register_all_tools(is_oss: bool = False) -> None: is_oss: If True, use OSS-compatible tool descriptions (limited sorting fields). If False, use Cloud descriptions (full sorting features). - Note: Can only be called once. Subsequent calls are no-ops to prevent duplicate registration. + Note: Thread-safe. Can be called multiple times from different threads. + Only the first call will register tools, subsequent calls are no-ops. """ global _tools_registered - if _tools_registered: - logger.debug("Tools already registered, skipping duplicate registration") - return - _tools_registered = True - logger.info(f"Registering MCP tools (is_oss={is_oss})") + # Thread-safe check-and-set using lock + with _tools_registration_lock: + if _tools_registered: + logger.debug("Tools already registered, skipping duplicate registration") + return + + _tools_registered = True + logger.info(f"Registering MCP tools (is_oss={is_oss})") # Choose sorting documentation based on deployment type if not is_oss: diff --git a/uv.lock b/uv.lock index 9829b3f..e584c9e 100644 --- a/uv.lock +++ b/uv.lock @@ -848,6 +848,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, ] +[[package]] +name = "json-repair" +version = "0.54.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/46/d3a4d9a3dad39bb4a2ad16b8adb9fe2e8611b20b71197fe33daa6768e85d/json_repair-0.54.1.tar.gz", hash = "sha256:d010bc31f1fc66e7c36dc33bff5f8902674498ae5cb8e801ad455a53b455ad1d", size = 38555, upload-time = "2025-11-19T14:55:24.265Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/96/c9aad7ee949cc1bf15df91f347fbc2d3bd10b30b80c7df689ce6fe9332b5/json_repair-0.54.1-py3-none-any.whl", hash = "sha256:016160c5db5d5fe443164927bb58d2dfbba5f43ad85719fa9bc51c713a443ab1", size = 29311, upload-time = "2025-11-19T14:55:22.886Z" }, +] + [[package]] name = "jsonref" version = "1.1.0" @@ -939,6 +948,7 @@ dependencies = [ { name = "cachetools" }, { name = "fastmcp" }, { name = "jmespath" }, + { name = "json-repair" }, { name = "loguru" }, { name = "pydantic" }, ] @@ -960,6 +970,7 @@ requires-dist = [ { name = "cachetools", specifier = ">=5.0.0" }, { name = "fastmcp", specifier = "==2.10.5" }, { name = "jmespath", specifier = "~=1.0.1" }, + { name = "json-repair" }, { name = "loguru" }, { name = "pydantic", specifier = ">=2.0,<2.12" }, ]