Merge pull request #5896 from VasuBansal7576/codex/pr-minimax-single

TimothyZhang7 · web-flow · commit b0fd8b83f071 · 2026-03-06T17:54:41.000-08:00
fix: add minimax provider mapping and stream fallback
diff --git a/core/framework/credentials/aden/client.py b/core/framework/credentials/aden/client.py
@@ -30,15 +30,14 @@
 
 from __future__ import annotations
 
+import json as _json
 import logging
 import os
 import time
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any
 
-import json as _json
-
 import httpx
 
 logger = logging.getLogger(__name__)
diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py
@@ -117,6 +117,7 @@ def _sync_wrapper(*args, _orig=original, **kwargs):
 RATE_LIMIT_MAX_RETRIES = 10
 RATE_LIMIT_BACKOFF_BASE = 2  # seconds
 RATE_LIMIT_MAX_DELAY = 120  # seconds - cap to prevent absurd waits
+MINIMAX_API_BASE = "https://api.minimax.io/v1"
 
 # Empty-stream retries use a short fixed delay, not the rate-limit backoff.
 # Conversation-structure issues are deterministic — long waits don't help.
@@ -324,11 +325,13 @@ def __init__(
         """
         self.model = model
         self.api_key = api_key
-        self.api_base = api_base
+        self.api_base = api_base or self._default_api_base_for_model(model)
         self.extra_kwargs = kwargs
         # The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
         # several standard OpenAI params: max_output_tokens, stream_options.
-        self._codex_backend = bool(api_base and "chatgpt.com/backend-api/codex" in api_base)
+        self._codex_backend = bool(
+            self.api_base and "chatgpt.com/backend-api/codex" in self.api_base
+        )
 
         if litellm is None:
             raise ImportError(
@@ -341,6 +344,14 @@ def __init__(
         # override the mode.  The responses_api_bridge in litellm handles
         # converting Chat Completions requests to Responses API format.
 
+    @staticmethod
+    def _default_api_base_for_model(model: str) -> str | None:
+        """Return provider-specific default API base when required."""
+        model_lower = model.lower()
+        if model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            return MINIMAX_API_BASE
+        return None
+
     def _completion_with_rate_limit_retry(
         self, max_retries: int | None = None, **kwargs: Any
     ) -> Any:
@@ -735,6 +746,77 @@ def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
             },
         }
 
+    def _is_minimax_model(self) -> bool:
+        """Return True when the configured model targets MiniMax."""
+        model = (self.model or "").lower()
+        return model.startswith("minimax/") or model.startswith("minimax-")
+
+    async def _stream_via_nonstream_completion(
+        self,
+        messages: list[dict[str, Any]],
+        system: str,
+        tools: list[Tool] | None,
+        max_tokens: int,
+        response_format: dict[str, Any] | None,
+        json_mode: bool,
+    ) -> AsyncIterator[StreamEvent]:
+        """Fallback path: convert non-stream completion to stream events.
+
+        Some providers currently fail in LiteLLM's chunk parser for stream=True.
+        For those providers we do a regular async completion and emit equivalent
+        stream events so higher layers continue to work.
+        """
+        from framework.llm.stream_events import (
+            FinishEvent,
+            StreamErrorEvent,
+            TextDeltaEvent,
+            TextEndEvent,
+            ToolCallEvent,
+        )
+
+        try:
+            response = await self.acomplete(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                json_mode=json_mode,
+            )
+        except Exception as e:
+            yield StreamErrorEvent(error=str(e), recoverable=False)
+            return
+
+        raw = response.raw_response
+        tool_calls = []
+        if raw and hasattr(raw, "choices") and raw.choices:
+            msg = raw.choices[0].message
+            tool_calls = msg.tool_calls or []
+
+        for tc in tool_calls:
+            parsed_args: Any
+            args = tc.function.arguments if tc.function else ""
+            try:
+                parsed_args = json.loads(args) if args else {}
+            except json.JSONDecodeError:
+                parsed_args = {"_raw": args}
+            yield ToolCallEvent(
+                tool_use_id=getattr(tc, "id", ""),
+                tool_name=tc.function.name if tc.function else "",
+                tool_input=parsed_args,
+            )
+
+        if response.content:
+            yield TextDeltaEvent(content=response.content, snapshot=response.content)
+            yield TextEndEvent(full_text=response.content)
+
+        yield FinishEvent(
+            stop_reason=response.stop_reason or "stop",
+            input_tokens=response.input_tokens,
+            output_tokens=response.output_tokens,
+            model=response.model,
+        )
+
     async def stream(
         self,
         messages: list[dict[str, Any]],
@@ -762,6 +844,20 @@ async def stream(
             ToolCallEvent,
         )
 
+        # MiniMax currently fails in litellm's stream chunk parser for some
+        # responses (missing "id" in stream chunks). Use non-stream fallback.
+        if self._is_minimax_model():
+            async for event in self._stream_via_nonstream_completion(
+                messages=messages,
+                system=system,
+                tools=tools,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                json_mode=json_mode,
+            ):
+                yield event
+            return
+
         full_messages: list[dict[str, Any]] = []
         if system:
             full_messages.append({"role": "system", "content": system})
diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py
@@ -959,11 +959,16 @@ def load(
         if not agent_json_path.is_file():
             raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
 
-        content = agent_json_path.read_text(encoding="utf-8").strip()
-        if not content:
-            raise FileNotFoundError(f"agent.json is empty: {agent_json_path}")
+        with open(agent_json_path, encoding="utf-8") as f:
+            export_data = f.read()
 
-        graph, goal = load_agent_export(content)
+        if not export_data.strip():
+            raise ValueError(f"Empty agent export file: {agent_json_path}")
+
+        try:
+            graph, goal = load_agent_export(export_data)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
 
         return cls(
             agent_path=agent_path,
@@ -1307,6 +1312,8 @@ def _get_api_key_env_var(self, model: str) -> str | None:
             return "REPLICATE_API_KEY"
         elif model_lower.startswith("together/"):
             return "TOGETHER_API_KEY"
+        elif model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            return "MINIMAX_API_KEY"
         else:
             # Default: assume OpenAI-compatible
             return "OPENAI_API_KEY"
@@ -1325,6 +1332,8 @@ def _get_api_key_from_credential_store(self) -> str | None:
         cred_id = None
         if model_lower.startswith("anthropic/") or model_lower.startswith("claude"):
             cred_id = "anthropic"
+        elif model_lower.startswith("minimax/") or model_lower.startswith("minimax-"):
+            cred_id = "minimax"
         # Add more mappings as providers are added to LLM_CREDENTIALS
 
         if cred_id is None:
diff --git a/core/tests/test_litellm_provider.py b/core/tests/test_litellm_provider.py
@@ -14,7 +14,7 @@
 import threading
 import time
 from datetime import UTC, datetime, timedelta
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
@@ -58,6 +58,20 @@ def test_init_with_api_base(self):
         )
         assert provider.api_base == "https://my-proxy.com/v1"
 
+    def test_init_minimax_defaults_api_base(self):
+        """MiniMax should default to the official OpenAI-compatible endpoint."""
+        provider = LiteLLMProvider(model="minimax/MiniMax-M2.1", api_key="my-key")
+        assert provider.api_base == "https://api.minimax.io/v1"
+
+    def test_init_minimax_keeps_custom_api_base(self):
+        """Explicit api_base should win over MiniMax defaults."""
+        provider = LiteLLMProvider(
+            model="minimax/MiniMax-M2.1",
+            api_key="my-key",
+            api_base="https://proxy.example/v1",
+        )
+        assert provider.api_base == "https://proxy.example/v1"
+
     def test_init_ollama_no_key_needed(self):
         """Test that Ollama models don't require API key."""
         with patch.dict(os.environ, {}, clear=True):
@@ -631,6 +645,43 @@ def complete(
         )
 
 
+class TestMiniMaxStreamFallback:
+    """MiniMax models should use non-stream fallback due to parser incompatibility."""
+
+    @pytest.mark.asyncio
+    async def test_stream_uses_nonstream_fallback_for_minimax(self):
+        """stream() should call acomplete() and synthesize stream events for MiniMax."""
+        from framework.llm.stream_events import FinishEvent, TextDeltaEvent
+
+        provider = LiteLLMProvider(model="minimax-text-01", api_key="test-key")
+
+        mock_response = LLMResponse(
+            content="hello from minimax",
+            model="minimax-text-01",
+            input_tokens=7,
+            output_tokens=4,
+            stop_reason="stop",
+            raw_response=None,
+        )
+        provider.acomplete = AsyncMock(return_value=mock_response)
+
+        events = []
+        async for event in provider.stream(messages=[{"role": "user", "content": "hi"}]):
+            events.append(event)
+
+        assert provider.acomplete.await_count == 1
+        assert any(isinstance(e, TextDeltaEvent) for e in events)
+        finish = [e for e in events if isinstance(e, FinishEvent)]
+        assert len(finish) == 1
+        assert finish[0].model == "minimax-text-01"
+
+    def test_is_minimax_model_variants(self):
+        """Recognize both prefixed and plain MiniMax model names."""
+        assert LiteLLMProvider(model="minimax-text-01", api_key="x")._is_minimax_model()
+        assert LiteLLMProvider(model="minimax/minimax-text-01", api_key="x")._is_minimax_model()
+        assert not LiteLLMProvider(model="gpt-4o-mini", api_key="x")._is_minimax_model()
+
+
 # ---------------------------------------------------------------------------
 # AgentRunner._is_local_model — parameterized tests
 # ---------------------------------------------------------------------------
diff --git a/core/tests/test_runner_api_key_env_var.py b/core/tests/test_runner_api_key_env_var.py
@@ -0,0 +1,23 @@
+from framework.runner.runner import AgentRunner
+
+
+class _NoopRegistry:
+    def cleanup(self) -> None:
+        pass
+
+
+def _runner_for_unit_test() -> AgentRunner:
+    runner = AgentRunner.__new__(AgentRunner)
+    runner._tool_registry = _NoopRegistry()
+    runner._temp_dir = None
+    return runner
+
+
+def test_minimax_provider_prefix_maps_to_minimax_api_key():
+    runner = _runner_for_unit_test()
+    assert runner._get_api_key_env_var("minimax/minimax-text-01") == "MINIMAX_API_KEY"
+
+
+def test_minimax_model_name_prefix_maps_to_minimax_api_key():
+    runner = _runner_for_unit_test()
+    assert runner._get_api_key_env_var("minimax-chat") == "MINIMAX_API_KEY"
diff --git a/quickstart.sh b/quickstart.sh
diff --git a/scripts/check_llm_key.py b/scripts/check_llm_key.py