From 358dab52ce02944dfe479fa448cc0d2d3537413d Mon Sep 17 00:00:00 2001 From: Adavya Sharma Date: Fri, 13 Mar 2026 23:59:12 -0700 Subject: [PATCH 1/4] fix: sanitize chat payloads and provider precedence --- cli.py | 2 +- hermes_cli/main.py | 4 +-- hermes_cli/runtime_provider.py | 10 +++--- run_agent.py | 17 +++++++++- tests/test_batch_runner_checkpoint.py | 2 +- tests/test_cli_provider_resolution.py | 18 +++++++++- tests/test_provider_parity.py | 41 +++++++++++++++++++++++ tests/test_runtime_provider_resolution.py | 7 ++++ 8 files changed, 91 insertions(+), 10 deletions(-) diff --git a/cli.py b/cli.py index 3e91812ca..b01c28d31 100755 --- a/cli.py +++ b/cli.py @@ -1151,8 +1151,8 @@ def __init__( # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials(). self.requested_provider = ( provider - or os.getenv("HERMES_INFERENCE_PROVIDER") or CLI_CONFIG["model"].get("provider") + or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto" ) self._provider_source: Optional[str] = None diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 4f83933d5..74759c87c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -745,8 +745,8 @@ def cmd_model(args): config_provider = model_cfg.get("provider") effective_provider = ( - os.getenv("HERMES_INFERENCE_PROVIDER") - or config_provider + config_provider + or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto" ) try: diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 5a39c79cd..17f86f300 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -33,15 +33,17 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str: if requested and requested.strip(): return requested.strip().lower() - env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower() - if env_provider: - return env_provider - model_cfg = _get_model_config() cfg_provider = model_cfg.get("provider") if isinstance(cfg_provider, str) and cfg_provider.strip(): return cfg_provider.strip().lower() + # Prefer the persisted config selection over any stale shell/.env + # provider override so chat uses the endpoint the user last saved. + env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower() + if env_provider: + return env_provider + return "auto" diff --git a/run_agent.py b/run_agent.py index b9bacf7d6..b67b439f0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2737,6 +2737,21 @@ def _build_api_kwargs(self, api_messages: list) -> dict: return kwargs + sanitized_messages = copy.deepcopy(api_messages) + for msg in sanitized_messages: + if not isinstance(msg, dict): + continue + + # Codex-only replay state must not leak into strict chat-completions APIs. + msg.pop("codex_reasoning_items", None) + + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call.pop("call_id", None) + tool_call.pop("response_item_id", None) + provider_preferences = {} if self.providers_allowed: provider_preferences["only"] = self.providers_allowed @@ -2753,7 +2768,7 @@ def _build_api_kwargs(self, api_messages: list) -> dict: api_kwargs = { "model": self.model, - "messages": api_messages, + "messages": sanitized_messages, "tools": self.tools if self.tools else None, "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)), } diff --git a/tests/test_batch_runner_checkpoint.py b/tests/test_batch_runner_checkpoint.py index ebf9bce7e..4ce105d75 100644 --- a/tests/test_batch_runner_checkpoint.py +++ b/tests/test_batch_runner_checkpoint.py @@ -3,7 +3,7 @@ import json import os from pathlib import Path -from multiprocessing import Lock +from threading import Lock from unittest.mock import patch, MagicMock import pytest diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 2a3dc43e0..8c22dd7ac 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,22 @@ def _runtime_resolve(**kwargs): assert shell.api_mode == "codex_responses" +def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch): + cli = _import_cli() + + monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter") + config_copy = dict(cli.CLI_CONFIG) + model_copy = dict(config_copy.get("model", {})) + model_copy["provider"] = "custom" + model_copy["base_url"] = "https://api.fireworks.ai/inference/v1" + config_copy["model"] = model_copy + monkeypatch.setattr(cli, "CLI_CONFIG", config_copy) + + shell = cli.HermesCLI(model="fireworks/minimax-m2p5", compact=True, max_turns=1) + + assert shell.requested_provider == "custom" + + def test_codex_provider_replaces_incompatible_default_model(monkeypatch): """When provider resolves to openai-codex and no model was explicitly chosen, the global config default (e.g. anthropic/claude-opus-4.6) must @@ -310,4 +326,4 @@ def _resolve_provider(requested, **kwargs): assert "Warning:" in output assert "falling back to auto provider detection" in output.lower() - assert "No change." in output + assert "No change." in output \ No newline at end of file diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 2ee313144..124fcf630 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -95,6 +95,47 @@ def test_no_responses_api_fields(self, monkeypatch): assert "instructions" not in kwargs assert "store" not in kwargs + def test_strips_codex_only_tool_call_fields_from_chat_messages(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [ + {"role": "user", "content": "hi"}, + { + "role": "assistant", + "content": "Checking now.", + "codex_reasoning_items": [ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"}, + ], + "tool_calls": [ + { + "id": "call_123", + "call_id": "call_123", + "response_item_id": "fc_123", + "type": "function", + "function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"}, + "extra_content": {"thought_signature": "opaque"}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_123", "content": "/tmp"}, + ] + + kwargs = agent._build_api_kwargs(messages) + + assistant_msg = kwargs["messages"][1] + tool_call = assistant_msg["tool_calls"][0] + + assert "codex_reasoning_items" not in assistant_msg + assert tool_call["id"] == "call_123" + assert tool_call["function"]["name"] == "terminal" + assert tool_call["extra_content"] == {"thought_signature": "opaque"} + assert "call_id" not in tool_call + assert "response_item_id" not in tool_call + + # Original stored history must remain unchanged for Responses replay mode. + assert messages[1]["tool_calls"][0]["call_id"] == "call_123" + assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123" + assert "codex_reasoning_items" in messages[1] + class TestBuildApiKwargsNousPortal: def test_includes_nous_product_tags(self, monkeypatch): diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 9631591b8..520205df0 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -181,3 +181,10 @@ def test_resolve_requested_provider_precedence(monkeypatch): monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous") monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"}) assert rp.resolve_requested_provider("openrouter") == "openrouter" + assert rp.resolve_requested_provider() == "openai-codex" + + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + assert rp.resolve_requested_provider() == "nous" + + monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False) + assert rp.resolve_requested_provider() == "auto" From 08208323f294772df15996e3408cd11605bb545c Mon Sep 17 00:00:00 2001 From: Adavya Sharma Date: Wed, 11 Mar 2026 08:07:56 +0000 Subject: [PATCH 2/4] test: cover fireworks tool-call payload sanitization --- tests/test_provider_parity.py | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 124fcf630..bb91b055c 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -168,6 +168,52 @@ def test_no_openrouter_extra_body(self, monkeypatch): extra = kwargs.get("extra_body", {}) assert "reasoning" not in extra + def test_fireworks_tool_call_payload_strips_codex_only_fields(self, monkeypatch): + agent = _make_agent( + monkeypatch, + "custom", + base_url="https://api.fireworks.ai/inference/v1", + ) + messages = [ + {"role": "user", "content": "hi"}, + { + "role": "assistant", + "content": "Checking now.", + "codex_reasoning_items": [ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"}, + ], + "tool_calls": [ + { + "id": "call_fw_123", + "call_id": "call_fw_123", + "response_item_id": "fc_fw_123", + "type": "function", + "function": { + "name": "terminal", + "arguments": "{\"command\":\"pwd\"}", + }, + } + ], + }, + {"role": "tool", "tool_call_id": "call_fw_123", "content": "/tmp"}, + ] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["tools"][0]["function"]["name"] == "web_search" + assert "input" not in kwargs + assert kwargs.get("extra_body", {}) == {} + + assistant_msg = kwargs["messages"][1] + tool_call = assistant_msg["tool_calls"][0] + + assert "codex_reasoning_items" not in assistant_msg + assert tool_call["id"] == "call_fw_123" + assert tool_call["type"] == "function" + assert tool_call["function"]["name"] == "terminal" + assert "call_id" not in tool_call + assert "response_item_id" not in tool_call + class TestBuildApiKwargsCodex: def test_uses_responses_api_format(self, monkeypatch): From a628c607f0abf6ecad444f256a2ec705df6af395 Mon Sep 17 00:00:00 2001 From: Adavya Sharma Date: Wed, 11 Mar 2026 08:35:27 +0000 Subject: [PATCH 3/4] fix: preserve chat kwargs identity when no sanitization is needed --- run_agent.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/run_agent.py b/run_agent.py index b67b439f0..19cc626e3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2737,20 +2737,41 @@ def _build_api_kwargs(self, api_messages: list) -> dict: return kwargs - sanitized_messages = copy.deepcopy(api_messages) - for msg in sanitized_messages: + sanitized_messages = api_messages + needs_sanitization = False + for msg in api_messages: if not isinstance(msg, dict): continue - - # Codex-only replay state must not leak into strict chat-completions APIs. - msg.pop("codex_reasoning_items", None) + if "codex_reasoning_items" in msg: + needs_sanitization = True + break tool_calls = msg.get("tool_calls") if isinstance(tool_calls, list): for tool_call in tool_calls: - if isinstance(tool_call, dict): - tool_call.pop("call_id", None) - tool_call.pop("response_item_id", None) + if not isinstance(tool_call, dict): + continue + if "call_id" in tool_call or "response_item_id" in tool_call: + needs_sanitization = True + break + if needs_sanitization: + break + + if needs_sanitization: + sanitized_messages = copy.deepcopy(api_messages) + for msg in sanitized_messages: + if not isinstance(msg, dict): + continue + + # Codex-only replay state must not leak into strict chat-completions APIs. + msg.pop("codex_reasoning_items", None) + + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call.pop("call_id", None) + tool_call.pop("response_item_id", None) provider_preferences = {} if self.providers_allowed: From 2166292157a5163cad744090e505c74cfd679bac Mon Sep 17 00:00:00 2001 From: teknium1 Date: Fri, 13 Mar 2026 23:59:47 -0700 Subject: [PATCH 4/4] fix: clarify provider precedence docstring --- hermes_cli/runtime_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 17f86f300..6cd57f95d 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -29,7 +29,7 @@ def _get_model_config() -> Dict[str, Any]: def resolve_requested_provider(requested: Optional[str] = None) -> str: - """Resolve provider request from explicit arg, env, then config.""" + """Resolve provider request from explicit arg, config, then env.""" if requested and requested.strip(): return requested.strip().lower()