Merge pull request #21484 from BerriAI/fix/mcp-test-isolation

jquinter · web-flow · commit d01adf58f55d · 2026-02-18T19:06:59.000-03:00
fix(tests): resolve MCP test isolation failures in parallel execution
diff --git a/tests/test_litellm/integrations/langfuse/test_langfuse_prompt_management.py b/tests/test_litellm/integrations/langfuse/test_langfuse_prompt_management.py
@@ -1,12 +1,26 @@
 import os
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 from litellm.integrations.langfuse.langfuse_prompt_management import (
     LangfusePromptManagement,
 )
 
 
 class TestLangfusePromptManagement:
+    def setup_method(self):
+        # Mock langfuse package to avoid triggering real import.
+        # The real langfuse import fails on Python 3.14 due to pydantic v1 incompatibility.
+        # This also prevents test-ordering issues when earlier tests remove sys.modules["langfuse"].
+        self._mock_langfuse = MagicMock()
+        self._mock_langfuse.version.__version__ = "3.0.0"
+        self._langfuse_patcher = patch.dict(
+            "sys.modules", {"langfuse": self._mock_langfuse}
+        )
+        self._langfuse_patcher.start()
+
+    def teardown_method(self):
+        self._langfuse_patcher.stop()
+
     def test_get_prompt_from_id(self):
         langfuse_prompt_management = LangfusePromptManagement()
         with patch.object(
diff --git a/tests/test_litellm/integrations/test_langfuse.py b/tests/test_litellm/integrations/test_langfuse.py
@@ -472,8 +472,15 @@ def test_max_langfuse_clients_limit():
     """
     Test that the max langfuse clients limit is respected when initializing multiple clients
     """
+    # Mock langfuse package to avoid triggering real import.
+    # The real langfuse import fails on Python 3.14 due to pydantic v1 incompatibility,
+    # and sys.modules["langfuse"] may be absent after other tests in the suite clean up.
+    mock_langfuse = MagicMock()
+    mock_langfuse.version.__version__ = "3.0.0"
     # Set max clients to 2 for testing
-    with patch.object(langfuse_module, "MAX_LANGFUSE_INITIALIZED_CLIENTS", 2):
+    with patch.dict("sys.modules", {"langfuse": mock_langfuse}), patch.object(
+        langfuse_module, "MAX_LANGFUSE_INITIALIZED_CLIENTS", 2
+    ):
         # Reset the counter
         litellm.initialized_langfuse_clients = 0
 
diff --git a/tests/test_litellm/integrations/test_langfuse_otel.py b/tests/test_litellm/integrations/test_langfuse_otel.py
@@ -161,7 +161,7 @@ def add_metadata_from_header(litellm_params, metadata):
         # Use monkeypatch so the real module is restored after the test runs,
         # preventing sys.modules corruption that would break patch() targets in
         # later tests (the patch would hit the stub while the real module's
-        # globals remain unpatch-ed).
+        # globals remain unpatched).
         monkeypatch.setitem(sys.modules, "litellm.integrations.langfuse.langfuse", stub_module)  # type: ignore
 
         kwargs = {"litellm_params": {"metadata": {"foo": "bar"}}}
diff --git a/tests/test_litellm/proxy/_experimental/mcp_server/test_mcp_debug.py b/tests/test_litellm/proxy/_experimental/mcp_server/test_mcp_debug.py
@@ -230,7 +230,7 @@ async def mock_send(message):
         )
 
         message = {"type": "http.response.start", "status": 200, "headers": []}
-        asyncio.get_event_loop().run_until_complete(wrapped(message))
+        asyncio.run(wrapped(message))
 
         assert len(captured) == 1
         headers = dict(captured[0]["headers"])
@@ -247,6 +247,6 @@ async def mock_send(message):
         )
 
         body_msg = {"type": "http.response.body", "body": b"hello"}
-        asyncio.get_event_loop().run_until_complete(wrapped(body_msg))
+        asyncio.run(wrapped(body_msg))
 
         assert captured[0] == body_msg
diff --git a/tests/test_litellm/proxy/_experimental/mcp_server/test_mcp_server_manager.py b/tests/test_litellm/proxy/_experimental/mcp_server/test_mcp_server_manager.py
@@ -39,7 +39,16 @@ def _reload_mcp_manager_module():
         "litellm.proxy._experimental.mcp_server.mcp_server_manager"
     ]
     importlib.reload(utils_module)
-    return importlib.reload(manager_module)
+    reloaded = importlib.reload(manager_module)
+    # After reload, server.py still holds a stale reference to the old
+    # global_mcp_server_manager. Update it so tests that exercise server.py
+    # functions (e.g. _get_tools_from_mcp_servers) use the fresh instance.
+    server_module = sys.modules.get(
+        "litellm.proxy._experimental.mcp_server.server"
+    )
+    if server_module is not None and hasattr(server_module, "global_mcp_server_manager"):
+        server_module.global_mcp_server_manager = reloaded.global_mcp_server_manager
+    return reloaded
 
 
 class TestMCPServerManager:
diff --git a/tests/test_litellm/proxy/test_litellm_pre_call_utils.py b/tests/test_litellm/proxy/test_litellm_pre_call_utils.py
@@ -1014,91 +1014,95 @@ async def test_add_litellm_metadata_from_request_headers():
     # Set up test logger
     litellm._turn_on_debug()
     test_logger = TestCustomLogger()
+    original_callbacks = litellm.callbacks
     litellm.callbacks = [test_logger]
 
-    # Prepare test data (ensure no streaming, add mock_response and api_key to route to litellm.acompletion)
-    headers = {"x-litellm-spend-logs-metadata": '{"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion", "timestamp": "2025-09-02T10:30:00Z"}'}
-    data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}], "stream": False, "mock_response": "Hi", "api_key": "fake-key"}
-    
-    # Create mock request with headers
-    mock_request = MagicMock(spec=Request)
-    mock_request.headers = headers
-    mock_request.url.path = "/chat/completions"
-    
-    # Create mock response
-    mock_fastapi_response = MagicMock(spec=Response)
-    
-    # Create mock user API key dict
-    mock_user_api_key_dict = UserAPIKeyAuth(
-        api_key="test-key",
-        user_id="test-user",
-        org_id="test-org"
-    )
-    
-    # Create mock proxy logging object
-    mock_proxy_logging_obj = MagicMock(spec=ProxyLogging)
-    
-    # Create async functions for the hooks
-    async def mock_during_call_hook(*args, **kwargs):
-        return None
-        
-    async def mock_pre_call_hook(*args, **kwargs):
-        return data
-        
-    async def mock_post_call_success_hook(*args, **kwargs):
-        # Return the response unchanged
-        return kwargs.get('response', args[2] if len(args) > 2 else None)
-        
-    mock_proxy_logging_obj.during_call_hook = mock_during_call_hook
-    mock_proxy_logging_obj.pre_call_hook = mock_pre_call_hook
-    mock_proxy_logging_obj.post_call_success_hook = mock_post_call_success_hook
-    
-    # Create mock proxy config
-    mock_proxy_config = MagicMock()
-    
-    # Create mock general settings
-    general_settings = {}
-    
-    # Create mock select_data_generator with correct signature
-    def mock_select_data_generator(response=None, user_api_key_dict=None, request_data=None):
-        async def mock_generator():
-            yield "data: " + json.dumps({"choices": [{"delta": {"content": "Hello"}}]}) + "\n\n"
-            yield "data: [DONE]\n\n"
-        return mock_generator()
-    
-    # Create the processor
-    processor = ProxyBaseLLMRequestProcessing(data=data)
-    
-    # Call base_process_llm_request (it will use the mock_response="Hi" parameter)
-    result = await processor.base_process_llm_request(
-        request=mock_request,
-        fastapi_response=mock_fastapi_response,
-        user_api_key_dict=mock_user_api_key_dict,
-        route_type="acompletion",
-        proxy_logging_obj=mock_proxy_logging_obj,
-        general_settings=general_settings,
-        proxy_config=mock_proxy_config,
-        select_data_generator=mock_select_data_generator,
-        llm_router=None,
-        model="gpt-4",
-        is_streaming_request=False
-    )
-    
-    # Sleep for 3 seconds to allow logging to complete
-    await asyncio.sleep(3)
-    
-    # Check if standard_logging_object was set
-    assert test_logger.standard_logging_object is not None, "standard_logging_object should be populated after LLM request"
-    
-    # Verify the logging object contains expected metadata
-    standard_logging_obj = test_logger.standard_logging_object
+    try:
+        # Prepare test data (ensure no streaming, add mock_response and api_key to route to litellm.acompletion)
+        headers = {"x-litellm-spend-logs-metadata": '{"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion", "timestamp": "2025-09-02T10:30:00Z"}'}
+        data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}], "stream": False, "mock_response": "Hi", "api_key": "fake-key"}
+
+        # Create mock request with headers
+        mock_request = MagicMock(spec=Request)
+        mock_request.headers = headers
+        mock_request.url.path = "/chat/completions"
+
+        # Create mock response
+        mock_fastapi_response = MagicMock(spec=Response)
+
+        # Create mock user API key dict
+        mock_user_api_key_dict = UserAPIKeyAuth(
+            api_key="test-key",
+            user_id="test-user",
+            org_id="test-org"
+        )
 
-    print(f"Standard logging object captured: {json.dumps(standard_logging_obj, indent=4, default=str)}")
+        # Create mock proxy logging object
+        mock_proxy_logging_obj = MagicMock(spec=ProxyLogging)
+
+        # Create async functions for the hooks
+        async def mock_during_call_hook(*args, **kwargs):
+            return None
+
+        async def mock_pre_call_hook(*args, **kwargs):
+            return data
+
+        async def mock_post_call_success_hook(*args, **kwargs):
+            # Return the response unchanged
+            return kwargs.get('response', args[2] if len(args) > 2 else None)
+
+        mock_proxy_logging_obj.during_call_hook = mock_during_call_hook
+        mock_proxy_logging_obj.pre_call_hook = mock_pre_call_hook
+        mock_proxy_logging_obj.post_call_success_hook = mock_post_call_success_hook
+
+        # Create mock proxy config
+        mock_proxy_config = MagicMock()
+
+        # Create mock general settings
+        general_settings = {}
+
+        # Create mock select_data_generator with correct signature
+        def mock_select_data_generator(response=None, user_api_key_dict=None, request_data=None):
+            async def mock_generator():
+                yield "data: " + json.dumps({"choices": [{"delta": {"content": "Hello"}}]}) + "\n\n"
+                yield "data: [DONE]\n\n"
+            return mock_generator()
+
+        # Create the processor
+        processor = ProxyBaseLLMRequestProcessing(data=data)
+
+        # Call base_process_llm_request (it will use the mock_response="Hi" parameter)
+        result = await processor.base_process_llm_request(
+            request=mock_request,
+            fastapi_response=mock_fastapi_response,
+            user_api_key_dict=mock_user_api_key_dict,
+            route_type="acompletion",
+            proxy_logging_obj=mock_proxy_logging_obj,
+            general_settings=general_settings,
+            proxy_config=mock_proxy_config,
+            select_data_generator=mock_select_data_generator,
+            llm_router=None,
+            model="gpt-4",
+            is_streaming_request=False
+        )
+
+        # Sleep for 3 seconds to allow logging to complete
+        await asyncio.sleep(3)
+
+        # Check if standard_logging_object was set
+        assert test_logger.standard_logging_object is not None, "standard_logging_object should be populated after LLM request"
+
+        # Verify the logging object contains expected metadata
+        standard_logging_obj = test_logger.standard_logging_object
+
+        print(f"Standard logging object captured: {json.dumps(standard_logging_obj, indent=4, default=str)}")
+
+        SPEND_LOGS_METADATA = standard_logging_obj["metadata"]["spend_logs_metadata"]
+        assert SPEND_LOGS_METADATA == dict(json.loads(headers["x-litellm-spend-logs-metadata"])), "spend_logs_metadata should be the same as the headers"
+    finally:
+        litellm.callbacks = original_callbacks
 
-    SPEND_LOGS_METADATA = standard_logging_obj["metadata"]["spend_logs_metadata"]
-    assert SPEND_LOGS_METADATA == dict(json.loads(headers["x-litellm-spend-logs-metadata"])), "spend_logs_metadata should be the same as the headers"
 
-        
 
 def test_get_internal_user_header_from_mapping_returns_expected_header():
     mappings = [

Original file line number	Diff line number	Diff line change
`@@ -230,7 +230,7 @@ async def mock_send(message):`
`230`	`230`	`)`
`231`	`231`
`232`	`232`	`message = {"type": "http.response.start", "status": 200, "headers": []}`
`233`		`- asyncio.get_event_loop().run_until_complete(wrapped(message))`
	`233`	`+ asyncio.run(wrapped(message))`
`234`	`234`
`235`	`235`	`assert len(captured) == 1`
`236`	`236`	`headers = dict(captured[0]["headers"])`
`@@ -247,6 +247,6 @@ async def mock_send(message):`
`247`	`247`	`)`
`248`	`248`
`249`	`249`	`body_msg = {"type": "http.response.body", "body": b"hello"}`
`250`		`- asyncio.get_event_loop().run_until_complete(wrapped(body_msg))`
	`250`	`+ asyncio.run(wrapped(body_msg))`
`251`	`251`
`252`	`252`	`assert captured[0] == body_msg`