Skip to content

Commit 5528e2c

Browse files
Merge pull request nesquena#3041 from nesquena/release/stage-batch34
stage-batch34: v0.51.152 / Release DX — optional gateway-backed browser chat
2 parents 4d6269e + bd7576b commit 5528e2c

5 files changed

Lines changed: 478 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33

44
## [Unreleased]
55

6+
## [v0.51.152] — 2026-05-28 — Release DX (stage-batch34 — single-PR optional gateway-backed browser chat)
7+
8+
### Added
9+
10+
- Browser chat can now opt into a default-off `HERMES_WEBUI_CHAT_BACKEND=gateway` bridge that routes new WebUI turns through a running Hermes Gateway API server while preserving the existing WebUI chat start/stream contract. Strict enable: only the literal values `gateway`, `api_server`, or `api-server` activate the bridge — generic truthy strings like `1` or `true` keep the legacy in-process WebUI runtime. Configurable via `HERMES_WEBUI_GATEWAY_BASE_URL` (default `http://127.0.0.1:8642`) and `HERMES_WEBUI_GATEWAY_API_KEY` (falls back to `API_SERVER_KEY`). New `api/gateway_chat.py` module isolates the bridge logic; existing direct WebUI chat path unchanged when the env/config is not set. (#3021)
11+
612
## [v0.51.151] — 2026-05-28 — Release DW (stage-batch33 — 3-PR mid-risk batch: SSE reattach + title-lang + composer cap)
713

814
### Fixed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,32 @@ at 256 KiB before parsing. The browser only receives a compact status event
162162
(`source`, `label`, message count, and redacted errors), never the prefill
163163
message bodies.
164164

165+
### Optional Gateway-backed browser chat
166+
167+
By default, browser chat runs through WebUI's in-process legacy runtime. Advanced
168+
self-hosted deployments can opt into routing new browser turns through a running
169+
Hermes Gateway API server while preserving the existing WebUI `/api/chat/start`
170+
and `/api/chat/stream` browser contract:
171+
172+
```bash
173+
HERMES_WEBUI_CHAT_BACKEND=gateway \
174+
HERMES_WEBUI_GATEWAY_BASE_URL=http://127.0.0.1:8642 \
175+
HERMES_WEBUI_GATEWAY_API_KEY=... \
176+
./ctl.sh restart
177+
```
178+
179+
`HERMES_WEBUI_CHAT_BACKEND` is intentionally strict: only `gateway`,
180+
`api_server`, or `api-server` enable the bridge. Generic truthy values such as
181+
`1` or `true` are ignored so existing deployments do not change execution
182+
ownership accidentally. If `HERMES_WEBUI_GATEWAY_API_KEY` is omitted, WebUI falls
183+
back to `API_SERVER_KEY` when present.
184+
185+
The bridge is best used by operators who already run Hermes Gateway/API Server
186+
locally and want browser-originated chat to use the same runtime/tool path as
187+
messaging surfaces. Attachments, cancellation, approvals, and clarify prompts
188+
still follow WebUI's current compatibility path and may not match every messaging
189+
surface until the runtime-adapter migration is complete.
190+
165191
The bootstrap will:
166192

167193
1. Detect Hermes Agent and, if missing, attempt the official installer (`curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash`).

api/gateway_chat.py

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
"""Default-off Hermes Gateway bridge for browser-originated chat turns."""
2+
from __future__ import annotations
3+
4+
import json
5+
import logging
6+
import os
7+
import threading
8+
import time
9+
import urllib.error
10+
import urllib.request
11+
from typing import Any
12+
13+
from api.config import (
14+
CANCEL_FLAGS,
15+
STREAMS,
16+
STREAMS_LOCK,
17+
STREAM_LAST_EVENT_ID,
18+
STREAM_LIVE_TOOL_CALLS,
19+
STREAM_PARTIAL_TEXT,
20+
STREAM_REASONING_TEXT,
21+
_get_session_agent_lock,
22+
register_active_run,
23+
unregister_active_run,
24+
update_active_run,
25+
)
26+
from api.helpers import _redact_text, redact_session_data
27+
from api.models import get_session
28+
from api.run_journal import RunJournalWriter
29+
30+
logger = logging.getLogger(__name__)
31+
32+
_WEBUI_CHAT_BACKEND_ENV = "HERMES_WEBUI_CHAT_BACKEND"
33+
_WEBUI_GATEWAY_BASE_URL_ENV = "HERMES_WEBUI_GATEWAY_BASE_URL"
34+
_WEBUI_GATEWAY_API_KEY_ENV = "HERMES_WEBUI_GATEWAY_API_KEY"
35+
_GATEWAY_CHAT_BACKENDS = {"gateway", "api_server", "api-server"}
36+
37+
38+
def webui_chat_backend_mode(config_data=None, environ: dict[str, str] | None = None) -> str:
39+
"""Return the explicitly selected browser chat backend.
40+
41+
The default remains the in-process WebUI runtime. Only explicit gateway
42+
values opt browser chat into the Hermes API server bridge; generic truthy
43+
strings are deliberately ignored so deployments do not change execution
44+
ownership by accident.
45+
"""
46+
source = os.environ if environ is None else environ
47+
cfg = config_data if isinstance(config_data, dict) else {}
48+
raw = str(
49+
source.get(_WEBUI_CHAT_BACKEND_ENV)
50+
or cfg.get("webui_chat_backend")
51+
or ""
52+
).strip().lower()
53+
if raw in _GATEWAY_CHAT_BACKENDS:
54+
return "gateway"
55+
return "legacy"
56+
57+
58+
def webui_gateway_chat_enabled(config_data=None, environ: dict[str, str] | None = None) -> bool:
59+
return webui_chat_backend_mode(config_data, environ) == "gateway"
60+
61+
62+
def _gateway_base_url(config_data=None, environ: dict[str, str] | None = None) -> str:
63+
source = os.environ if environ is None else environ
64+
cfg = config_data if isinstance(config_data, dict) else {}
65+
raw = str(
66+
source.get(_WEBUI_GATEWAY_BASE_URL_ENV)
67+
or cfg.get("webui_gateway_base_url")
68+
or "http://127.0.0.1:8642"
69+
).strip()
70+
return raw.rstrip("/") or "http://127.0.0.1:8642"
71+
72+
73+
def _gateway_api_key(environ: dict[str, str] | None = None) -> str:
74+
source = os.environ if environ is None else environ
75+
return str(
76+
source.get(_WEBUI_GATEWAY_API_KEY_ENV)
77+
or source.get("API_SERVER_KEY")
78+
or ""
79+
).strip()
80+
81+
82+
def _gateway_sse_delta(payload: dict) -> str:
83+
"""Extract assistant text from an OpenAI-compatible streaming chunk."""
84+
try:
85+
choices = payload.get("choices") or []
86+
if not choices:
87+
return ""
88+
choice = choices[0] or {}
89+
delta = choice.get("delta") or {}
90+
content = delta.get("content")
91+
if isinstance(content, str):
92+
return content
93+
message = choice.get("message") or {}
94+
content = message.get("content")
95+
return content if isinstance(content, str) else ""
96+
except Exception:
97+
return ""
98+
99+
100+
def _gateway_stream_usage(payload: dict) -> dict:
101+
usage = payload.get("usage") if isinstance(payload, dict) else None
102+
if not isinstance(usage, dict):
103+
return {}
104+
return {
105+
"input_tokens": int(usage.get("prompt_tokens") or usage.get("input_tokens") or 0),
106+
"output_tokens": int(usage.get("completion_tokens") or usage.get("output_tokens") or 0),
107+
"estimated_cost": usage.get("estimated_cost") or usage.get("estimated_cost_usd") or 0,
108+
}
109+
110+
111+
def _stream_writeback_is_current(session: Any, stream_id: str) -> bool:
112+
return bool(stream_id and getattr(session, "active_stream_id", None) == stream_id)
113+
114+
115+
def _clear_gateway_pending_state(session: Any, stream_id: str) -> None:
116+
if not _stream_writeback_is_current(session, stream_id):
117+
return
118+
session.active_stream_id = None
119+
session.pending_user_message = None
120+
session.pending_attachments = None
121+
session.pending_started_at = None
122+
session.save()
123+
124+
125+
def _run_gateway_chat_streaming(
126+
session_id,
127+
msg_text,
128+
model,
129+
workspace,
130+
stream_id,
131+
attachments=None,
132+
*,
133+
model_provider=None,
134+
):
135+
"""Bridge a WebUI chat turn through Hermes Gateway's API server.
136+
137+
This default-off path keeps the browser contract unchanged: /api/chat/start
138+
still returns a local stream_id and /api/chat/stream still receives WebUI SSE
139+
event names. The worker translates OpenAI-compatible streaming chunks from
140+
the configured Gateway API server into those local events and persists the
141+
final user/assistant turn back into the WebUI session.
142+
"""
143+
q = STREAMS.get(stream_id)
144+
if q is None:
145+
return
146+
register_active_run(
147+
stream_id,
148+
session_id=session_id,
149+
started_at=time.time(),
150+
phase="gateway-starting",
151+
workspace=str(workspace),
152+
model=model,
153+
provider=model_provider,
154+
backend="gateway",
155+
)
156+
try:
157+
run_journal = RunJournalWriter(session_id, stream_id)
158+
except Exception:
159+
run_journal = None
160+
logger.debug("Failed to initialize gateway run journal for stream %s", stream_id, exc_info=True)
161+
cancel_event = threading.Event()
162+
with STREAMS_LOCK:
163+
CANCEL_FLAGS[stream_id] = cancel_event
164+
STREAM_PARTIAL_TEXT[stream_id] = ""
165+
STREAM_REASONING_TEXT[stream_id] = ""
166+
STREAM_LIVE_TOOL_CALLS[stream_id] = []
167+
168+
def put_gateway_event(event, data):
169+
if cancel_event.is_set() and event not in ("cancel", "error", "apperror"):
170+
return
171+
if run_journal is not None:
172+
try:
173+
journaled = run_journal.append_sse_event(event, data)
174+
event_id = (journaled or {}).get("event_id") if isinstance(journaled, dict) else None
175+
if event_id:
176+
STREAM_LAST_EVENT_ID[stream_id] = event_id
177+
except Exception:
178+
logger.debug("Failed to append gateway event %s for stream %s", event, stream_id, exc_info=True)
179+
try:
180+
q.put_nowait((event, data))
181+
except Exception:
182+
logger.debug("Failed to put gateway event to queue")
183+
184+
s = None
185+
final_text = ""
186+
usage = {"input_tokens": 0, "output_tokens": 0, "estimated_cost": 0}
187+
try:
188+
s = get_session(session_id)
189+
from api.config import get_config # imported lazily to avoid config-cycle churn
190+
191+
cfg = get_config()
192+
base_url = _gateway_base_url(cfg)
193+
api_key = _gateway_api_key()
194+
url = f"{base_url}/v1/chat/completions"
195+
headers = {
196+
"Content-Type": "application/json",
197+
"Accept": "text/event-stream",
198+
"X-Hermes-Session-Id": session_id,
199+
}
200+
if api_key:
201+
headers["Authorization"] = f"Bearer {api_key}"
202+
# Scope Gateway long-term continuity to this WebUI conversation
203+
# without exposing the browser's auth cookie or CSRF material.
204+
headers["X-Hermes-Session-Key"] = f"webui:{session_id}"
205+
body = {
206+
"model": model or "default",
207+
"stream": True,
208+
"messages": [{"role": "user", "content": str(msg_text or "")}],
209+
}
210+
if model_provider:
211+
body["provider"] = model_provider
212+
req = urllib.request.Request(
213+
url,
214+
data=json.dumps(body).encode("utf-8"),
215+
headers=headers,
216+
method="POST",
217+
)
218+
update_active_run(stream_id, phase="gateway-request")
219+
last_payload = {}
220+
with urllib.request.urlopen(req, timeout=600) as resp:
221+
for raw_line in resp:
222+
if cancel_event.is_set():
223+
put_gateway_event("cancel", {"message": "Cancelled by user"})
224+
return
225+
line = raw_line.decode("utf-8", errors="replace").strip()
226+
if not line or not line.startswith("data:"):
227+
continue
228+
data = line[5:].strip()
229+
if data == "[DONE]":
230+
break
231+
try:
232+
payload = json.loads(data)
233+
except json.JSONDecodeError:
234+
continue
235+
last_payload = payload
236+
delta = _gateway_sse_delta(payload)
237+
if delta:
238+
final_text += delta
239+
if stream_id in STREAM_PARTIAL_TEXT:
240+
STREAM_PARTIAL_TEXT[stream_id] += delta
241+
put_gateway_event("token", {"text": delta})
242+
usage.update({k: v for k, v in _gateway_stream_usage(payload).items() if v})
243+
usage.update({k: v for k, v in _gateway_stream_usage(last_payload).items() if v})
244+
assistant_text = final_text.strip()
245+
if not assistant_text:
246+
put_gateway_event("apperror", {
247+
"label": "Gateway returned no response",
248+
"type": "gateway_empty_response",
249+
"message": "Gateway returned no assistant message for this turn.",
250+
"hint": "Check that Hermes Gateway API server is running and reachable.",
251+
})
252+
return
253+
with _get_session_agent_lock(session_id):
254+
s = get_session(session_id)
255+
if not _stream_writeback_is_current(s, stream_id):
256+
return
257+
now = int(time.time())
258+
user_msg = {"role": "user", "content": str(msg_text or ""), "timestamp": now}
259+
if attachments:
260+
user_msg["attachments"] = list(attachments)
261+
assistant_msg = {"role": "assistant", "content": assistant_text, "timestamp": now}
262+
previous_context = list(getattr(s, "context_messages", None) or getattr(s, "messages", None) or [])
263+
s.context_messages = previous_context + [user_msg, assistant_msg]
264+
display = list(getattr(s, "messages", None) or [])
265+
# Avoid duplicating the eager-save checkpointed user message.
266+
if display:
267+
latest = display[-1]
268+
if isinstance(latest, dict) and latest.get("role") == "user":
269+
latest_text = " ".join(str(latest.get("content") or "").split())
270+
msg_norm = " ".join(str(msg_text or "").split())
271+
if latest_text == msg_norm:
272+
display = display[:-1]
273+
s.messages = display + [user_msg, assistant_msg]
274+
s.active_stream_id = None
275+
s.pending_user_message = None
276+
s.pending_attachments = None
277+
s.pending_started_at = None
278+
s.workspace = str(workspace)
279+
s.model = model
280+
s.model_provider = model_provider
281+
s.save()
282+
gateway_session_payload = s.compact() | {"messages": s.messages, "tool_calls": []}
283+
put_gateway_event("done", {"session": redact_session_data(gateway_session_payload), "usage": usage})
284+
put_gateway_event("stream_end", {"session_id": session_id})
285+
except urllib.error.HTTPError as exc:
286+
try:
287+
err_body = exc.read(2048).decode("utf-8", errors="replace")
288+
except Exception:
289+
err_body = ""
290+
safe = _redact_text(err_body or str(exc))[:500]
291+
put_gateway_event("apperror", {
292+
"label": "Gateway request failed",
293+
"type": "gateway_http_error",
294+
"message": f"Gateway returned HTTP {exc.code}.",
295+
"hint": safe or "Check the configured Gateway API server.",
296+
})
297+
except Exception as exc:
298+
safe = _redact_text(str(exc))[:500]
299+
put_gateway_event("apperror", {
300+
"label": "Gateway request failed",
301+
"type": "gateway_error",
302+
"message": safe or "Gateway request failed.",
303+
"hint": "Check HERMES_WEBUI_GATEWAY_BASE_URL and Gateway API server health.",
304+
})
305+
finally:
306+
if s is not None:
307+
try:
308+
with _get_session_agent_lock(session_id):
309+
_clear_gateway_pending_state(get_session(session_id), stream_id)
310+
except Exception:
311+
logger.debug("Failed to clear gateway stream state", exc_info=True)
312+
with STREAMS_LOCK:
313+
CANCEL_FLAGS.pop(stream_id, None)
314+
STREAM_PARTIAL_TEXT.pop(stream_id, None)
315+
STREAM_REASONING_TEXT.pop(stream_id, None)
316+
STREAM_LIVE_TOOL_CALLS.pop(stream_id, None)
317+
STREAM_LAST_EVENT_ID.pop(stream_id, None)
318+
STREAMS.pop(stream_id, None)
319+
unregister_active_run(stream_id)

api/routes.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,6 +2579,7 @@ def _keep_latest_messaging_session_per_source(
25792579
cancel_stream,
25802580
_materialize_pending_user_turn_before_error,
25812581
)
2582+
from api.gateway_chat import _run_gateway_chat_streaming, webui_gateway_chat_enabled
25822583
from api.run_journal import (
25832584
find_run_summary,
25842585
read_run_events,
@@ -8987,10 +8988,15 @@ def _start_chat_stream_for_session(
89878988
if goal_related:
89888989
STREAM_GOAL_RELATED[stream_id] = True
89898990
diag.stage("worker_thread_start") if diag else None
8991+
backend_is_gateway = webui_gateway_chat_enabled(get_config())
8992+
worker_target = _run_gateway_chat_streaming if backend_is_gateway else _run_agent_streaming
8993+
worker_kwargs = {"model_provider": model_provider}
8994+
if not backend_is_gateway:
8995+
worker_kwargs["goal_related"] = goal_related
89908996
thr = threading.Thread(
8991-
target=_run_agent_streaming,
8997+
target=worker_target,
89928998
args=(s.session_id, msg, model, workspace, stream_id, attachments),
8993-
kwargs={"model_provider": model_provider, "goal_related": goal_related},
8999+
kwargs=worker_kwargs,
89949000
daemon=True,
89959001
)
89969002
thr.start()

0 commit comments

Comments
 (0)