@@ -117,6 +117,7 @@ def _sync_wrapper(*args, _orig=original, **kwargs):
117117RATE_LIMIT_MAX_RETRIES = 10
118118RATE_LIMIT_BACKOFF_BASE = 2 # seconds
119119RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits
120+ MINIMAX_API_BASE = "https://api.minimax.io/v1"
120121
121122# Empty-stream retries use a short fixed delay, not the rate-limit backoff.
122123# Conversation-structure issues are deterministic — long waits don't help.
@@ -324,11 +325,13 @@ def __init__(
324325 """
325326 self .model = model
326327 self .api_key = api_key
327- self .api_base = api_base
328+ self .api_base = api_base or self . _default_api_base_for_model ( model )
328329 self .extra_kwargs = kwargs
329330 # The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
330331 # several standard OpenAI params: max_output_tokens, stream_options.
331- self ._codex_backend = bool (api_base and "chatgpt.com/backend-api/codex" in api_base )
332+ self ._codex_backend = bool (
333+ self .api_base and "chatgpt.com/backend-api/codex" in self .api_base
334+ )
332335
333336 if litellm is None :
334337 raise ImportError (
@@ -341,6 +344,14 @@ def __init__(
341344 # override the mode. The responses_api_bridge in litellm handles
342345 # converting Chat Completions requests to Responses API format.
343346
347+ @staticmethod
348+ def _default_api_base_for_model (model : str ) -> str | None :
349+ """Return provider-specific default API base when required."""
350+ model_lower = model .lower ()
351+ if model_lower .startswith ("minimax/" ) or model_lower .startswith ("minimax-" ):
352+ return MINIMAX_API_BASE
353+ return None
354+
344355 def _completion_with_rate_limit_retry (
345356 self , max_retries : int | None = None , ** kwargs : Any
346357 ) -> Any :
@@ -735,6 +746,77 @@ def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
735746 },
736747 }
737748
749+ def _is_minimax_model (self ) -> bool :
750+ """Return True when the configured model targets MiniMax."""
751+ model = (self .model or "" ).lower ()
752+ return model .startswith ("minimax/" ) or model .startswith ("minimax-" )
753+
754+ async def _stream_via_nonstream_completion (
755+ self ,
756+ messages : list [dict [str , Any ]],
757+ system : str ,
758+ tools : list [Tool ] | None ,
759+ max_tokens : int ,
760+ response_format : dict [str , Any ] | None ,
761+ json_mode : bool ,
762+ ) -> AsyncIterator [StreamEvent ]:
763+ """Fallback path: convert non-stream completion to stream events.
764+
765+ Some providers currently fail in LiteLLM's chunk parser for stream=True.
766+ For those providers we do a regular async completion and emit equivalent
767+ stream events so higher layers continue to work.
768+ """
769+ from framework .llm .stream_events import (
770+ FinishEvent ,
771+ StreamErrorEvent ,
772+ TextDeltaEvent ,
773+ TextEndEvent ,
774+ ToolCallEvent ,
775+ )
776+
777+ try :
778+ response = await self .acomplete (
779+ messages = messages ,
780+ system = system ,
781+ tools = tools ,
782+ max_tokens = max_tokens ,
783+ response_format = response_format ,
784+ json_mode = json_mode ,
785+ )
786+ except Exception as e :
787+ yield StreamErrorEvent (error = str (e ), recoverable = False )
788+ return
789+
790+ raw = response .raw_response
791+ tool_calls = []
792+ if raw and hasattr (raw , "choices" ) and raw .choices :
793+ msg = raw .choices [0 ].message
794+ tool_calls = msg .tool_calls or []
795+
796+ for tc in tool_calls :
797+ parsed_args : Any
798+ args = tc .function .arguments if tc .function else ""
799+ try :
800+ parsed_args = json .loads (args ) if args else {}
801+ except json .JSONDecodeError :
802+ parsed_args = {"_raw" : args }
803+ yield ToolCallEvent (
804+ tool_use_id = getattr (tc , "id" , "" ),
805+ tool_name = tc .function .name if tc .function else "" ,
806+ tool_input = parsed_args ,
807+ )
808+
809+ if response .content :
810+ yield TextDeltaEvent (content = response .content , snapshot = response .content )
811+ yield TextEndEvent (full_text = response .content )
812+
813+ yield FinishEvent (
814+ stop_reason = response .stop_reason or "stop" ,
815+ input_tokens = response .input_tokens ,
816+ output_tokens = response .output_tokens ,
817+ model = response .model ,
818+ )
819+
738820 async def stream (
739821 self ,
740822 messages : list [dict [str , Any ]],
@@ -762,6 +844,20 @@ async def stream(
762844 ToolCallEvent ,
763845 )
764846
847+ # MiniMax currently fails in litellm's stream chunk parser for some
848+ # responses (missing "id" in stream chunks). Use non-stream fallback.
849+ if self ._is_minimax_model ():
850+ async for event in self ._stream_via_nonstream_completion (
851+ messages = messages ,
852+ system = system ,
853+ tools = tools ,
854+ max_tokens = max_tokens ,
855+ response_format = response_format ,
856+ json_mode = json_mode ,
857+ ):
858+ yield event
859+ return
860+
765861 full_messages : list [dict [str , Any ]] = []
766862 if system :
767863 full_messages .append ({"role" : "system" , "content" : system })
0 commit comments