diff --git a/deeptutor/services/llm/factory.py b/deeptutor/services/llm/factory.py index 4fe8a97d1..6345f5cf5 100644 --- a/deeptutor/services/llm/factory.py +++ b/deeptutor/services/llm/factory.py @@ -272,6 +272,8 @@ def _sanitize_call_kwargs( if not supports_response_format(binding, model): extra_kwargs.pop("response_format", None) + if binding == "azure_openai" and "max_completion_tokens" in extra_kwargs: + extra_kwargs["max_tokens"] = extra_kwargs.pop("max_completion_tokens") return extra_kwargs diff --git a/tests/services/llm/test_factory_provider_exec.py b/tests/services/llm/test_factory_provider_exec.py index 8f2aa95d8..a6ba1c5c5 100644 --- a/tests/services/llm/test_factory_provider_exec.py +++ b/tests/services/llm/test_factory_provider_exec.py @@ -201,6 +201,28 @@ async def test_complete_strips_unsupported_response_format(monkeypatch) -> None: assert "response_format" not in provider.complete_kwargs +@pytest.mark.asyncio +async def test_complete_normalizes_azure_max_completion_tokens(monkeypatch) -> None: + cfg = _make_cfg( + model="gpt-5.4", + binding="azure_openai", + provider_name="azure_openai", + ) + provider = _FakeProvider() + + monkeypatch.setattr("deeptutor.services.llm.factory.get_llm_config", lambda: cfg) + monkeypatch.setattr( + "deeptutor.services.llm.factory.get_runtime_provider", + lambda _config: provider, + ) + + result = await complete("hello", max_completion_tokens=200) + + assert result == "ok" + assert provider.complete_kwargs["max_tokens"] == 200 + assert "max_completion_tokens" not in provider.complete_kwargs + + @pytest.mark.asyncio async def test_complete_passes_retry_delays(monkeypatch) -> None: cfg = _make_cfg()