From 358dab52ce02944dfe479fa448cc0d2d3537413d Mon Sep 17 00:00:00 2001
From: Adavya Sharma <adavyasharma@gmail.com>
Date: Fri, 13 Mar 2026 23:59:12 -0700
Subject: [PATCH 1/4] fix: sanitize chat payloads and provider precedence

---
 cli.py                                    |  2 +-
 hermes_cli/main.py                        |  4 +--
 hermes_cli/runtime_provider.py            | 10 +++---
 run_agent.py                              | 17 +++++++++-
 tests/test_batch_runner_checkpoint.py     |  2 +-
 tests/test_cli_provider_resolution.py     | 18 +++++++++-
 tests/test_provider_parity.py             | 41 +++++++++++++++++++++++
 tests/test_runtime_provider_resolution.py |  7 ++++
 8 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/cli.py b/cli.py
index 3e91812ca..b01c28d31 100755
--- a/cli.py
+++ b/cli.py
@@ -1151,8 +1151,8 @@ def __init__(
         # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
         self.requested_provider = (
             provider
-            or os.getenv("HERMES_INFERENCE_PROVIDER")
             or CLI_CONFIG["model"].get("provider")
+            or os.getenv("HERMES_INFERENCE_PROVIDER")
             or "auto"
         )
         self._provider_source: Optional[str] = None
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 4f83933d5..74759c87c 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -745,8 +745,8 @@ def cmd_model(args):
         config_provider = model_cfg.get("provider")
 
     effective_provider = (
-        os.getenv("HERMES_INFERENCE_PROVIDER")
-        or config_provider
+        config_provider
+        or os.getenv("HERMES_INFERENCE_PROVIDER")
         or "auto"
     )
     try:
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 5a39c79cd..17f86f300 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -33,15 +33,17 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
     if requested and requested.strip():
         return requested.strip().lower()
 
-    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
-    if env_provider:
-        return env_provider
-
     model_cfg = _get_model_config()
     cfg_provider = model_cfg.get("provider")
     if isinstance(cfg_provider, str) and cfg_provider.strip():
         return cfg_provider.strip().lower()
 
+    # Prefer the persisted config selection over any stale shell/.env
+    # provider override so chat uses the endpoint the user last saved.
+    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+    if env_provider:
+        return env_provider
+
     return "auto"
 
 
diff --git a/run_agent.py b/run_agent.py
index b9bacf7d6..b67b439f0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2737,6 +2737,21 @@ def _build_api_kwargs(self, api_messages: list) -> dict:
 
             return kwargs
 
+        sanitized_messages = copy.deepcopy(api_messages)
+        for msg in sanitized_messages:
+            if not isinstance(msg, dict):
+                continue
+
+            # Codex-only replay state must not leak into strict chat-completions APIs.
+            msg.pop("codex_reasoning_items", None)
+
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tool_call in tool_calls:
+                    if isinstance(tool_call, dict):
+                        tool_call.pop("call_id", None)
+                        tool_call.pop("response_item_id", None)
+
         provider_preferences = {}
         if self.providers_allowed:
             provider_preferences["only"] = self.providers_allowed
@@ -2753,7 +2768,7 @@ def _build_api_kwargs(self, api_messages: list) -> dict:
 
         api_kwargs = {
             "model": self.model,
-            "messages": api_messages,
+            "messages": sanitized_messages,
             "tools": self.tools if self.tools else None,
             "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
         }
diff --git a/tests/test_batch_runner_checkpoint.py b/tests/test_batch_runner_checkpoint.py
index ebf9bce7e..4ce105d75 100644
--- a/tests/test_batch_runner_checkpoint.py
+++ b/tests/test_batch_runner_checkpoint.py
@@ -3,7 +3,7 @@
 import json
 import os
 from pathlib import Path
-from multiprocessing import Lock
+from threading import Lock
 from unittest.mock import patch, MagicMock
 
 import pytest
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index 2a3dc43e0..8c22dd7ac 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -162,6 +162,22 @@ def _runtime_resolve(**kwargs):
     assert shell.api_mode == "codex_responses"
 
 
+def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
+    cli = _import_cli()
+
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+    config_copy = dict(cli.CLI_CONFIG)
+    model_copy = dict(config_copy.get("model", {}))
+    model_copy["provider"] = "custom"
+    model_copy["base_url"] = "https://api.fireworks.ai/inference/v1"
+    config_copy["model"] = model_copy
+    monkeypatch.setattr(cli, "CLI_CONFIG", config_copy)
+
+    shell = cli.HermesCLI(model="fireworks/minimax-m2p5", compact=True, max_turns=1)
+
+    assert shell.requested_provider == "custom"
+
+
 def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
     """When provider resolves to openai-codex and no model was explicitly
     chosen, the global config default (e.g. anthropic/claude-opus-4.6) must
@@ -310,4 +326,4 @@ def _resolve_provider(requested, **kwargs):
 
     assert "Warning:" in output
     assert "falling back to auto provider detection" in output.lower()
-    assert "No change." in output
+    assert "No change." in output
\ No newline at end of file
diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py
index 2ee313144..124fcf630 100644
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -95,6 +95,47 @@ def test_no_responses_api_fields(self, monkeypatch):
         assert "instructions" not in kwargs
         assert "store" not in kwargs
 
+    def test_strips_codex_only_tool_call_fields_from_chat_messages(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "content": "Checking now.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"},
+                ],
+                "tool_calls": [
+                    {
+                        "id": "call_123",
+                        "call_id": "call_123",
+                        "response_item_id": "fc_123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"},
+                        "extra_content": {"thought_signature": "opaque"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_123", "content": "/tmp"},
+        ]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assistant_msg = kwargs["messages"][1]
+        tool_call = assistant_msg["tool_calls"][0]
+
+        assert "codex_reasoning_items" not in assistant_msg
+        assert tool_call["id"] == "call_123"
+        assert tool_call["function"]["name"] == "terminal"
+        assert tool_call["extra_content"] == {"thought_signature": "opaque"}
+        assert "call_id" not in tool_call
+        assert "response_item_id" not in tool_call
+
+        # Original stored history must remain unchanged for Responses replay mode.
+        assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
+        assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
+        assert "codex_reasoning_items" in messages[1]
+
 
 class TestBuildApiKwargsNousPortal:
     def test_includes_nous_product_tags(self, monkeypatch):
diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py
index 9631591b8..520205df0 100644
--- a/tests/test_runtime_provider_resolution.py
+++ b/tests/test_runtime_provider_resolution.py
@@ -181,3 +181,10 @@ def test_resolve_requested_provider_precedence(monkeypatch):
     monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
     monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
     assert rp.resolve_requested_provider("openrouter") == "openrouter"
+    assert rp.resolve_requested_provider() == "openai-codex"
+
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    assert rp.resolve_requested_provider() == "nous"
+
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    assert rp.resolve_requested_provider() == "auto"

From 08208323f294772df15996e3408cd11605bb545c Mon Sep 17 00:00:00 2001
From: Adavya Sharma <adavyasharma@gmail.com>
Date: Wed, 11 Mar 2026 08:07:56 +0000
Subject: [PATCH 2/4] test: cover fireworks tool-call payload sanitization

---
 tests/test_provider_parity.py | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py
index 124fcf630..bb91b055c 100644
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -168,6 +168,52 @@ def test_no_openrouter_extra_body(self, monkeypatch):
         extra = kwargs.get("extra_body", {})
         assert "reasoning" not in extra
 
+    def test_fireworks_tool_call_payload_strips_codex_only_fields(self, monkeypatch):
+        agent = _make_agent(
+            monkeypatch,
+            "custom",
+            base_url="https://api.fireworks.ai/inference/v1",
+        )
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "content": "Checking now.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"},
+                ],
+                "tool_calls": [
+                    {
+                        "id": "call_fw_123",
+                        "call_id": "call_fw_123",
+                        "response_item_id": "fc_fw_123",
+                        "type": "function",
+                        "function": {
+                            "name": "terminal",
+                            "arguments": "{\"command\":\"pwd\"}",
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_fw_123", "content": "/tmp"},
+        ]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["tools"][0]["function"]["name"] == "web_search"
+        assert "input" not in kwargs
+        assert kwargs.get("extra_body", {}) == {}
+
+        assistant_msg = kwargs["messages"][1]
+        tool_call = assistant_msg["tool_calls"][0]
+
+        assert "codex_reasoning_items" not in assistant_msg
+        assert tool_call["id"] == "call_fw_123"
+        assert tool_call["type"] == "function"
+        assert tool_call["function"]["name"] == "terminal"
+        assert "call_id" not in tool_call
+        assert "response_item_id" not in tool_call
+
 
 class TestBuildApiKwargsCodex:
     def test_uses_responses_api_format(self, monkeypatch):

From a628c607f0abf6ecad444f256a2ec705df6af395 Mon Sep 17 00:00:00 2001
From: Adavya Sharma <adavyasharma@gmail.com>
Date: Wed, 11 Mar 2026 08:35:27 +0000
Subject: [PATCH 3/4] fix: preserve chat kwargs identity when no sanitization
 is needed

---
 run_agent.py | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index b67b439f0..19cc626e3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2737,20 +2737,41 @@ def _build_api_kwargs(self, api_messages: list) -> dict:
 
             return kwargs
 
-        sanitized_messages = copy.deepcopy(api_messages)
-        for msg in sanitized_messages:
+        sanitized_messages = api_messages
+        needs_sanitization = False
+        for msg in api_messages:
             if not isinstance(msg, dict):
                 continue
-
-            # Codex-only replay state must not leak into strict chat-completions APIs.
-            msg.pop("codex_reasoning_items", None)
+            if "codex_reasoning_items" in msg:
+                needs_sanitization = True
+                break
 
             tool_calls = msg.get("tool_calls")
             if isinstance(tool_calls, list):
                 for tool_call in tool_calls:
-                    if isinstance(tool_call, dict):
-                        tool_call.pop("call_id", None)
-                        tool_call.pop("response_item_id", None)
+                    if not isinstance(tool_call, dict):
+                        continue
+                    if "call_id" in tool_call or "response_item_id" in tool_call:
+                        needs_sanitization = True
+                        break
+                if needs_sanitization:
+                    break
+
+        if needs_sanitization:
+            sanitized_messages = copy.deepcopy(api_messages)
+            for msg in sanitized_messages:
+                if not isinstance(msg, dict):
+                    continue
+
+                # Codex-only replay state must not leak into strict chat-completions APIs.
+                msg.pop("codex_reasoning_items", None)
+
+                tool_calls = msg.get("tool_calls")
+                if isinstance(tool_calls, list):
+                    for tool_call in tool_calls:
+                        if isinstance(tool_call, dict):
+                            tool_call.pop("call_id", None)
+                            tool_call.pop("response_item_id", None)
 
         provider_preferences = {}
         if self.providers_allowed:

From 2166292157a5163cad744090e505c74cfd679bac Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Fri, 13 Mar 2026 23:59:47 -0700
Subject: [PATCH 4/4] fix: clarify provider precedence docstring

---
 hermes_cli/runtime_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 17f86f300..6cd57f95d 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -29,7 +29,7 @@ def _get_model_config() -> Dict[str, Any]:
 
 
 def resolve_requested_provider(requested: Optional[str] = None) -> str:
-    """Resolve provider request from explicit arg, env, then config."""
+    """Resolve provider request from explicit arg, config, then env."""
     if requested and requested.strip():
         return requested.strip().lower()