Merge pull request NousResearch#2110 from NousResearch/hermes/hermes-5d6932ba

teknium1 · web-flow · commit b7b585656bb2 · 2026-03-20T06:01:44.000-07:00
fix: session reset + custom provider model switch + honcho base_url
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
@@ -206,11 +206,11 @@ def _strip_yaml_frontmatter(content: str) -> str:
         "contextually appropriate."
     ),
     "cron": (
-        "You are running as a scheduled cron job. Your final response is automatically "
-        "delivered to the job's configured destination, so do not use send_message to "
-        "send to that same target again. If you want the user to receive something in "
-        "the scheduled destination, put it directly in your final response. Use "
-        "send_message only for additional or different targets."
+        "You are running as a scheduled cron job. There is no user present — you "
+        "cannot ask questions, request clarification, or wait for follow-up. Execute "
+        "the task fully and autonomously, making reasonable decisions where needed. "
+        "Your final response is automatically delivered to the job's configured "
+        "destination — put the primary content directly in your response."
     ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
diff --git a/cli.py b/cli.py
@@ -3517,8 +3517,17 @@ def process_command(self, command: str) -> bool:
                 # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
                 current_provider = self.provider or self.requested_provider or "openrouter"
                 target_provider, new_model = parse_model_input(raw_input, current_provider)
-                # Auto-detect provider when no explicit provider:model syntax was used
-                if target_provider == current_provider:
+                # Auto-detect provider when no explicit provider:model syntax was used.
+                # Skip auto-detection for custom providers — the model name might
+                # coincidentally match a known provider's catalog, but the user
+                # intends to use it on their custom endpoint.  Require explicit
+                # provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
+                # to switch away from a custom endpoint.
+                _base = self.base_url or ""
+                is_custom = current_provider == "custom" or (
+                    "localhost" in _base or "127.0.0.1" in _base
+                )
+                if target_provider == current_provider and not is_custom:
                     from hermes_cli.models import detect_provider_for_model
                     detected = detect_provider_for_model(new_model, current_provider)
                     if detected:
@@ -3586,6 +3595,13 @@ def process_command(self, command: str) -> bool:
                         if message:
                             print(f"  Reason: {message}")
                         print("  Note: Model will revert on restart. Use a verified model to save to config.")
+
+                    # Helpful hint when staying on a custom endpoint
+                    if is_custom and not provider_changed:
+                        endpoint = self.base_url or "custom endpoint"
+                        print(f"  Endpoint: {endpoint}")
+                        print(f"  Tip: To switch providers, use /model provider:model")
+                        print(f"       e.g. /model openai-codex:gpt-5.2-codex")
             else:
                 self._show_model_and_providers()
         elif canonical == "provider":
diff --git a/cron/scheduler.py b/cron/scheduler.py
@@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             providers_ignored=pr.get("ignore"),
             providers_order=pr.get("order"),
             provider_sort=pr.get("sort"),
-            disabled_toolsets=["cronjob"],
+            disabled_toolsets=["cronjob", "messaging", "clarify"],
             quiet_mode=True,
             platform="cron",
             session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
@@ -670,6 +670,11 @@ def ensure_hermes_home():
         "password": True,
         "category": "tool",
     },
+    "HONCHO_BASE_URL": {
+        "description": "Base URL for self-hosted Honcho instances (no API key needed)",
+        "prompt": "Honcho base URL (e.g. http://localhost:8000)",
+        "category": "tool",
+    },
 
     # ── Messaging platforms ──
     "TELEGRAM_BOT_TOKEN": {
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
@@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
     return value.strip().lower().replace(" ", "-")
 
 
+def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
+    """Auto-detect api_mode from the resolved base URL.
+
+    Direct api.openai.com endpoints need the Responses API for GPT-5.x
+    tool calls with reasoning (chat/completions returns 400).
+    """
+    normalized = (base_url or "").strip().lower().rstrip("/")
+    if "api.openai.com" in normalized and "openrouter" not in normalized:
+        return "codex_responses"
+    return None
+
+
 def _auto_detect_local_model(base_url: str) -> str:
     """Query a local server for its model name when only one model is loaded."""
     if not base_url:
@@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
 
     return {
         "provider": "openrouter",
-        "api_mode": custom_provider.get("api_mode", "chat_completions"),
+        "api_mode": custom_provider.get("api_mode")
+        or _detect_api_mode_for_url(base_url)
+        or "chat_completions",
         "base_url": base_url,
         "api_key": api_key,
         "source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
@@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
 
     return {
         "provider": "openrouter",
-        "api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
+        "api_mode": _parse_api_mode(model_cfg.get("api_mode"))
+        or _detect_api_mode_for_url(base_url)
+        or "chat_completions",
         "base_url": base_url,
         "api_key": api_key,
         "source": source,
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
@@ -117,11 +117,13 @@ def peer_memory_mode(self, peer_name: str) -> str:
     def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
         """Create config from environment variables (fallback)."""
         api_key = os.environ.get("HONCHO_API_KEY")
+        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
         return cls(
             workspace_id=workspace_id,
             api_key=api_key,
             environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
-            enabled=bool(api_key),
+            base_url=base_url,
+            enabled=bool(api_key or base_url),
         )
 
     @classmethod
@@ -171,17 +173,23 @@ def from_global_config(
             or raw.get("environment", "production")
         )
 
-        # Auto-enable when API key is present (unless explicitly disabled)
-        # Host-level enabled wins, then root-level, then auto-enable if key exists.
+        base_url = (
+            raw.get("baseUrl")
+            or os.environ.get("HONCHO_BASE_URL", "").strip()
+            or None
+        )
+
+        # Auto-enable when API key or base_url is present (unless explicitly disabled)
+        # Host-level enabled wins, then root-level, then auto-enable if key/url exists.
         host_enabled = host_block.get("enabled")
         root_enabled = raw.get("enabled")
         if host_enabled is not None:
             enabled = host_enabled
         elif root_enabled is not None:
             enabled = root_enabled
         else:
-            # Not explicitly set anywhere -> auto-enable if API key exists
-            enabled = bool(api_key)
+            # Not explicitly set anywhere -> auto-enable if API key or base_url exists
+            enabled = bool(api_key or base_url)
 
         # write_frequency: accept int or string
         raw_wf = (
@@ -214,6 +222,7 @@ def from_global_config(
             workspace_id=workspace,
             api_key=api_key,
             environment=environment,
+            base_url=base_url,
             peer_name=host_block.get("peerName") or raw.get("peerName"),
             ai_peer=ai_peer,
             linked_hosts=linked_hosts,
@@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
     if config is None:
         config = HonchoClientConfig.from_global_config()
 
-    if not config.api_key:
+    if not config.api_key and not config.base_url:
         raise ValueError(
             "Honcho API key not found. "
             "Get your API key at https://app.honcho.dev, "
-            "then run 'hermes honcho setup' or set HONCHO_API_KEY."
+            "then run 'hermes honcho setup' or set HONCHO_API_KEY. "
+            "For local instances, set HONCHO_BASE_URL instead."
         )
 
     try:
diff --git a/run_agent.py b/run_agent.py
@@ -501,6 +501,12 @@ def __init__(
         else:
             self.api_mode = "chat_completions"
 
+        # Direct OpenAI sessions use the Responses API path.  GPT-5.x tool
+        # calls with reasoning are rejected on /v1/chat/completions, and
+        # Hermes is a tool-using client by default.
+        if self.api_mode == "chat_completions" and self._is_direct_openai_url():
+            self.api_mode = "codex_responses"
+
         # Pre-warm OpenRouter model metadata cache in a background thread.
         # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
         # HTTP request on the first API response when pricing is estimated.
@@ -1057,6 +1063,9 @@ def reset_session_state(self):
         if hasattr(self, "context_compressor") and self.context_compressor:
             self.context_compressor.last_prompt_tokens = 0
             self.context_compressor.last_completion_tokens = 0
+            self.context_compressor.last_total_tokens = 0
+            self.context_compressor.compression_count = 0
+            self.context_compressor._context_probed = False
     
     @staticmethod
     def _safe_print(*args, **kwargs):
@@ -1085,18 +1094,19 @@ def _vprint(self, *args, force: bool = False, **kwargs):
             return
         self._safe_print(*args, **kwargs)
 
+    def _is_direct_openai_url(self, base_url: str = None) -> bool:
+        """Return True when a base URL targets OpenAI's native API."""
+        url = (base_url or self._base_url_lower).lower()
+        return "api.openai.com" in url and "openrouter" not in url
+
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
         
         OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
         'max_completion_tokens'. OpenRouter, local models, and older
         OpenAI models use 'max_tokens'.
         """
-        _is_direct_openai = (
-            "api.openai.com" in self._base_url_lower
-            and "openrouter" not in self._base_url_lower
-        )
-        if _is_direct_openai:
+        if self._is_direct_openai_url():
             return {"max_completion_tokens": value}
         return {"max_tokens": value}
 
@@ -3558,13 +3568,15 @@ def _try_activate_fallback(self) -> bool:
                     fb_provider)
                 return False
 
-            # Determine api_mode from provider
+            # Determine api_mode from provider / base URL
             fb_api_mode = "chat_completions"
             fb_base_url = str(fb_client.base_url)
             if fb_provider == "openai-codex":
                 fb_api_mode = "codex_responses"
             elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
                 fb_api_mode = "anthropic_messages"
+            elif self._is_direct_openai_url(fb_base_url):
+                fb_api_mode = "codex_responses"
 
             old_model = self.model
             self.model = fb_model
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
@@ -60,6 +60,21 @@ def test_custom_workspace(self):
         config = HonchoClientConfig.from_env(workspace_id="custom")
         assert config.workspace_id == "custom"
 
+    def test_reads_base_url_from_env(self):
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_env()
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
+    def test_enabled_without_api_key_when_base_url_set(self):
+        """base_url alone (no API key) is sufficient to enable a local instance."""
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            os.environ.pop("HONCHO_API_KEY", None)
+            config = HonchoClientConfig.from_env()
+        assert config.api_key is None
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
 
 class TestFromGlobalConfig:
     def test_missing_config_falls_back_to_env(self, tmp_path):
@@ -188,6 +203,36 @@ def test_api_key_env_fallback(self, tmp_path):
             config = HonchoClientConfig.from_global_config(config_path=config_file)
         assert config.api_key == "env-key"
 
+    def test_base_url_env_fallback(self, tmp_path):
+        """HONCHO_BASE_URL env var is used when no baseUrl in config JSON."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"workspace": "local"}))
+
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://localhost:8000"
+        assert config.enabled is True
+
+    def test_base_url_from_config_root(self, tmp_path):
+        """baseUrl in config root is read and takes precedence over env var."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"}))
+
+        with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
+            config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://config-host:9000"
+
+    def test_base_url_not_read_from_host_block(self, tmp_path):
+        """baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey)."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "baseUrl": "http://root:9000",
+            "hosts": {"hermes": {"baseUrl": "http://host-block:9001"}},
+        }))
+
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.base_url == "http://root:9000"
+
 
 class TestResolveSessionName:
     def test_manual_override(self):
diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py
@@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
         "prompt_toolkit.key_binding": MagicMock(),
         "prompt_toolkit.completion": MagicMock(),
         "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
     }
     with patch.dict(sys.modules, prompt_toolkit_stubs), \
          patch.dict("os.environ", clean_env, clear=False):
diff --git a/tests/test_cli_new_session.py b/tests/test_cli_new_session.py
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):`
`42`	`42`	`"prompt_toolkit.key_binding": MagicMock(),`
`43`	`43`	`"prompt_toolkit.completion": MagicMock(),`
`44`	`44`	`"prompt_toolkit.formatted_text": MagicMock(),`
	`45`	`+ "prompt_toolkit.auto_suggest": MagicMock(),`
`45`	`46`	`}`
`46`	`47`	`with patch.dict(sys.modules, prompt_toolkit_stubs), \`
`47`	`48`	`patch.dict("os.environ", clean_env, clear=False):`