Skip to content

Commit b7b5856

Browse files
authored
Merge pull request NousResearch#2110 from NousResearch/hermes/hermes-5d6932ba
fix: session reset + custom provider model switch + honcho base_url
2 parents aa64163 + 4494c0b commit b7b5856

File tree

11 files changed

+214
-28
lines changed

11 files changed

+214
-28
lines changed

agent/prompt_builder.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,11 @@ def _strip_yaml_frontmatter(content: str) -> str:
206206
"contextually appropriate."
207207
),
208208
"cron": (
209-
"You are running as a scheduled cron job. Your final response is automatically "
210-
"delivered to the job's configured destination, so do not use send_message to "
211-
"send to that same target again. If you want the user to receive something in "
212-
"the scheduled destination, put it directly in your final response. Use "
213-
"send_message only for additional or different targets."
209+
"You are running as a scheduled cron job. There is no user present — you "
210+
"cannot ask questions, request clarification, or wait for follow-up. Execute "
211+
"the task fully and autonomously, making reasonable decisions where needed. "
212+
"Your final response is automatically delivered to the job's configured "
213+
"destination — put the primary content directly in your response."
214214
),
215215
"cli": (
216216
"You are a CLI AI Agent. Try not to use markdown but simple text "

cli.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3517,8 +3517,17 @@ def process_command(self, command: str) -> bool:
35173517
# Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
35183518
current_provider = self.provider or self.requested_provider or "openrouter"
35193519
target_provider, new_model = parse_model_input(raw_input, current_provider)
3520-
# Auto-detect provider when no explicit provider:model syntax was used
3521-
if target_provider == current_provider:
3520+
# Auto-detect provider when no explicit provider:model syntax was used.
3521+
# Skip auto-detection for custom providers — the model name might
3522+
# coincidentally match a known provider's catalog, but the user
3523+
# intends to use it on their custom endpoint. Require explicit
3524+
# provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
3525+
# to switch away from a custom endpoint.
3526+
_base = self.base_url or ""
3527+
is_custom = current_provider == "custom" or (
3528+
"localhost" in _base or "127.0.0.1" in _base
3529+
)
3530+
if target_provider == current_provider and not is_custom:
35223531
from hermes_cli.models import detect_provider_for_model
35233532
detected = detect_provider_for_model(new_model, current_provider)
35243533
if detected:
@@ -3586,6 +3595,13 @@ def process_command(self, command: str) -> bool:
35863595
if message:
35873596
print(f" Reason: {message}")
35883597
print(" Note: Model will revert on restart. Use a verified model to save to config.")
3598+
3599+
# Helpful hint when staying on a custom endpoint
3600+
if is_custom and not provider_changed:
3601+
endpoint = self.base_url or "custom endpoint"
3602+
print(f" Endpoint: {endpoint}")
3603+
print(f" Tip: To switch providers, use /model provider:model")
3604+
print(f" e.g. /model openai-codex:gpt-5.2-codex")
35893605
else:
35903606
self._show_model_and_providers()
35913607
elif canonical == "provider":

cron/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
391391
providers_ignored=pr.get("ignore"),
392392
providers_order=pr.get("order"),
393393
provider_sort=pr.get("sort"),
394-
disabled_toolsets=["cronjob"],
394+
disabled_toolsets=["cronjob", "messaging", "clarify"],
395395
quiet_mode=True,
396396
platform="cron",
397397
session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",

hermes_cli/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,11 @@ def ensure_hermes_home():
670670
"password": True,
671671
"category": "tool",
672672
},
673+
"HONCHO_BASE_URL": {
674+
"description": "Base URL for self-hosted Honcho instances (no API key needed)",
675+
"prompt": "Honcho base URL (e.g. http://localhost:8000)",
676+
"category": "tool",
677+
},
673678

674679
# ── Messaging platforms ──
675680
"TELEGRAM_BOT_TOKEN": {

hermes_cli/runtime_provider.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
2424
return value.strip().lower().replace(" ", "-")
2525

2626

27+
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
28+
"""Auto-detect api_mode from the resolved base URL.
29+
30+
Direct api.openai.com endpoints need the Responses API for GPT-5.x
31+
tool calls with reasoning (chat/completions returns 400).
32+
"""
33+
normalized = (base_url or "").strip().lower().rstrip("/")
34+
if "api.openai.com" in normalized and "openrouter" not in normalized:
35+
return "codex_responses"
36+
return None
37+
38+
2739
def _auto_detect_local_model(base_url: str) -> str:
2840
"""Query a local server for its model name when only one model is loaded."""
2941
if not base_url:
@@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
185197

186198
return {
187199
"provider": "openrouter",
188-
"api_mode": custom_provider.get("api_mode", "chat_completions"),
200+
"api_mode": custom_provider.get("api_mode")
201+
or _detect_api_mode_for_url(base_url)
202+
or "chat_completions",
189203
"base_url": base_url,
190204
"api_key": api_key,
191205
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
@@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
263277

264278
return {
265279
"provider": "openrouter",
266-
"api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
280+
"api_mode": _parse_api_mode(model_cfg.get("api_mode"))
281+
or _detect_api_mode_for_url(base_url)
282+
or "chat_completions",
267283
"base_url": base_url,
268284
"api_key": api_key,
269285
"source": source,

honcho_integration/client.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ def peer_memory_mode(self, peer_name: str) -> str:
117117
def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
118118
"""Create config from environment variables (fallback)."""
119119
api_key = os.environ.get("HONCHO_API_KEY")
120+
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
120121
return cls(
121122
workspace_id=workspace_id,
122123
api_key=api_key,
123124
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
124-
enabled=bool(api_key),
125+
base_url=base_url,
126+
enabled=bool(api_key or base_url),
125127
)
126128

127129
@classmethod
@@ -171,17 +173,23 @@ def from_global_config(
171173
or raw.get("environment", "production")
172174
)
173175

174-
# Auto-enable when API key is present (unless explicitly disabled)
175-
# Host-level enabled wins, then root-level, then auto-enable if key exists.
176+
base_url = (
177+
raw.get("baseUrl")
178+
or os.environ.get("HONCHO_BASE_URL", "").strip()
179+
or None
180+
)
181+
182+
# Auto-enable when API key or base_url is present (unless explicitly disabled)
183+
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
176184
host_enabled = host_block.get("enabled")
177185
root_enabled = raw.get("enabled")
178186
if host_enabled is not None:
179187
enabled = host_enabled
180188
elif root_enabled is not None:
181189
enabled = root_enabled
182190
else:
183-
# Not explicitly set anywhere -> auto-enable if API key exists
184-
enabled = bool(api_key)
191+
# Not explicitly set anywhere -> auto-enable if API key or base_url exists
192+
enabled = bool(api_key or base_url)
185193

186194
# write_frequency: accept int or string
187195
raw_wf = (
@@ -214,6 +222,7 @@ def from_global_config(
214222
workspace_id=workspace,
215223
api_key=api_key,
216224
environment=environment,
225+
base_url=base_url,
217226
peer_name=host_block.get("peerName") or raw.get("peerName"),
218227
ai_peer=ai_peer,
219228
linked_hosts=linked_hosts,
@@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
348357
if config is None:
349358
config = HonchoClientConfig.from_global_config()
350359

351-
if not config.api_key:
360+
if not config.api_key and not config.base_url:
352361
raise ValueError(
353362
"Honcho API key not found. "
354363
"Get your API key at https://app.honcho.dev, "
355-
"then run 'hermes honcho setup' or set HONCHO_API_KEY."
364+
"then run 'hermes honcho setup' or set HONCHO_API_KEY. "
365+
"For local instances, set HONCHO_BASE_URL instead."
356366
)
357367

358368
try:

run_agent.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,12 @@ def __init__(
501501
else:
502502
self.api_mode = "chat_completions"
503503

504+
# Direct OpenAI sessions use the Responses API path. GPT-5.x tool
505+
# calls with reasoning are rejected on /v1/chat/completions, and
506+
# Hermes is a tool-using client by default.
507+
if self.api_mode == "chat_completions" and self._is_direct_openai_url():
508+
self.api_mode = "codex_responses"
509+
504510
# Pre-warm OpenRouter model metadata cache in a background thread.
505511
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
506512
# HTTP request on the first API response when pricing is estimated.
@@ -1057,6 +1063,9 @@ def reset_session_state(self):
10571063
if hasattr(self, "context_compressor") and self.context_compressor:
10581064
self.context_compressor.last_prompt_tokens = 0
10591065
self.context_compressor.last_completion_tokens = 0
1066+
self.context_compressor.last_total_tokens = 0
1067+
self.context_compressor.compression_count = 0
1068+
self.context_compressor._context_probed = False
10601069

10611070
@staticmethod
10621071
def _safe_print(*args, **kwargs):
@@ -1085,18 +1094,19 @@ def _vprint(self, *args, force: bool = False, **kwargs):
10851094
return
10861095
self._safe_print(*args, **kwargs)
10871096

1097+
def _is_direct_openai_url(self, base_url: str = None) -> bool:
1098+
"""Return True when a base URL targets OpenAI's native API."""
1099+
url = (base_url or self._base_url_lower).lower()
1100+
return "api.openai.com" in url and "openrouter" not in url
1101+
10881102
def _max_tokens_param(self, value: int) -> dict:
10891103
"""Return the correct max tokens kwarg for the current provider.
10901104
10911105
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
10921106
'max_completion_tokens'. OpenRouter, local models, and older
10931107
OpenAI models use 'max_tokens'.
10941108
"""
1095-
_is_direct_openai = (
1096-
"api.openai.com" in self._base_url_lower
1097-
and "openrouter" not in self._base_url_lower
1098-
)
1099-
if _is_direct_openai:
1109+
if self._is_direct_openai_url():
11001110
return {"max_completion_tokens": value}
11011111
return {"max_tokens": value}
11021112

@@ -3558,13 +3568,15 @@ def _try_activate_fallback(self) -> bool:
35583568
fb_provider)
35593569
return False
35603570

3561-
# Determine api_mode from provider
3571+
# Determine api_mode from provider / base URL
35623572
fb_api_mode = "chat_completions"
35633573
fb_base_url = str(fb_client.base_url)
35643574
if fb_provider == "openai-codex":
35653575
fb_api_mode = "codex_responses"
35663576
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
35673577
fb_api_mode = "anthropic_messages"
3578+
elif self._is_direct_openai_url(fb_base_url):
3579+
fb_api_mode = "codex_responses"
35683580

35693581
old_model = self.model
35703582
self.model = fb_model

tests/honcho_integration/test_client.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,21 @@ def test_custom_workspace(self):
6060
config = HonchoClientConfig.from_env(workspace_id="custom")
6161
assert config.workspace_id == "custom"
6262

63+
def test_reads_base_url_from_env(self):
64+
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
65+
config = HonchoClientConfig.from_env()
66+
assert config.base_url == "http://localhost:8000"
67+
assert config.enabled is True
68+
69+
def test_enabled_without_api_key_when_base_url_set(self):
70+
"""base_url alone (no API key) is sufficient to enable a local instance."""
71+
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
72+
os.environ.pop("HONCHO_API_KEY", None)
73+
config = HonchoClientConfig.from_env()
74+
assert config.api_key is None
75+
assert config.base_url == "http://localhost:8000"
76+
assert config.enabled is True
77+
6378

6479
class TestFromGlobalConfig:
6580
def test_missing_config_falls_back_to_env(self, tmp_path):
@@ -188,6 +203,36 @@ def test_api_key_env_fallback(self, tmp_path):
188203
config = HonchoClientConfig.from_global_config(config_path=config_file)
189204
assert config.api_key == "env-key"
190205

206+
def test_base_url_env_fallback(self, tmp_path):
207+
"""HONCHO_BASE_URL env var is used when no baseUrl in config JSON."""
208+
config_file = tmp_path / "config.json"
209+
config_file.write_text(json.dumps({"workspace": "local"}))
210+
211+
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
212+
config = HonchoClientConfig.from_global_config(config_path=config_file)
213+
assert config.base_url == "http://localhost:8000"
214+
assert config.enabled is True
215+
216+
def test_base_url_from_config_root(self, tmp_path):
217+
"""baseUrl in config root is read and takes precedence over env var."""
218+
config_file = tmp_path / "config.json"
219+
config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"}))
220+
221+
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
222+
config = HonchoClientConfig.from_global_config(config_path=config_file)
223+
assert config.base_url == "http://config-host:9000"
224+
225+
def test_base_url_not_read_from_host_block(self, tmp_path):
226+
"""baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey)."""
227+
config_file = tmp_path / "config.json"
228+
config_file.write_text(json.dumps({
229+
"baseUrl": "http://root:9000",
230+
"hosts": {"hermes": {"baseUrl": "http://host-block:9001"}},
231+
}))
232+
233+
config = HonchoClientConfig.from_global_config(config_path=config_file)
234+
assert config.base_url == "http://root:9000"
235+
191236

192237
class TestResolveSessionName:
193238
def test_manual_override(self):

tests/test_cli_init.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
4242
"prompt_toolkit.key_binding": MagicMock(),
4343
"prompt_toolkit.completion": MagicMock(),
4444
"prompt_toolkit.formatted_text": MagicMock(),
45+
"prompt_toolkit.auto_suggest": MagicMock(),
4546
}
4647
with patch.dict(sys.modules, prompt_toolkit_stubs), \
4748
patch.dict("os.environ", clean_env, clear=False):

0 commit comments

Comments
 (0)