|
| 1 | +"""OpenAI-compatible bridge from Hermes-native envs to benchmark harnesses. |
| 2 | +
|
| 3 | +The Hermes-native terminal/simulation envs drive their rollout loop through an |
| 4 | +OpenAI chat-completions endpoint. For cross-harness runs we keep that real env |
| 5 | +and scorer, but point the model endpoint at this local bridge so model turns |
| 6 | +are answered by the selected Eliza/OpenClaw adapter instead of bypassing the |
| 7 | +harness label. |
| 8 | +""" |
| 9 | + |
| 10 | +from __future__ import annotations |
| 11 | + |
| 12 | +import json |
| 13 | +import os |
| 14 | +import socket |
| 15 | +import threading |
| 16 | +import time |
| 17 | +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer |
| 18 | +from typing import Any, Mapping |
| 19 | + |
| 20 | + |
| 21 | +class HarnessOpenAIProxy: |
| 22 | + """Small local ``/chat/completions`` server backed by a harness client.""" |
| 23 | + |
| 24 | + def __init__( |
| 25 | + self, |
| 26 | + *, |
| 27 | + harness: str, |
| 28 | + provider: str, |
| 29 | + model: str, |
| 30 | + upstream_base_url: str | None = None, |
| 31 | + ) -> None: |
| 32 | + harness = harness.strip().lower() |
| 33 | + if harness not in {"eliza", "openclaw"}: |
| 34 | + raise ValueError(f"unsupported proxy harness: {harness!r}") |
| 35 | + self.harness = harness |
| 36 | + self.provider = provider or "cerebras" |
| 37 | + self.model = model |
| 38 | + self.upstream_base_url = upstream_base_url |
| 39 | + self._client: Any | None = None |
| 40 | + self._server_handle: Any | None = None |
| 41 | + self._httpd: ThreadingHTTPServer | None = None |
| 42 | + self._thread: threading.Thread | None = None |
| 43 | + self.base_url: str | None = None |
| 44 | + |
| 45 | + def start(self) -> "HarnessOpenAIProxy": |
| 46 | + self._client, self._server_handle = _build_client( |
| 47 | + harness=self.harness, |
| 48 | + provider=self.provider, |
| 49 | + model=self.model, |
| 50 | + upstream_base_url=self.upstream_base_url, |
| 51 | + ) |
| 52 | + |
| 53 | + proxy = self |
| 54 | + |
| 55 | + class Handler(BaseHTTPRequestHandler): |
| 56 | + protocol_version = "HTTP/1.1" |
| 57 | + |
| 58 | + def do_GET(self) -> None: # noqa: N802 |
| 59 | + if self.path.rstrip("/") in {"", "/v1", "/health"}: |
| 60 | + self._write_json({"status": "ok", "harness": proxy.harness}) |
| 61 | + return |
| 62 | + self.send_error(404) |
| 63 | + |
| 64 | + def do_POST(self) -> None: # noqa: N802 |
| 65 | + path = self.path.rstrip("/") |
| 66 | + if path not in {"/chat/completions", "/v1/chat/completions"}: |
| 67 | + self.send_error(404) |
| 68 | + return |
| 69 | + try: |
| 70 | + payload = self._read_json() |
| 71 | + response = proxy.complete(payload) |
| 72 | + self._write_json(response) |
| 73 | + except Exception as exc: # noqa: BLE001 |
| 74 | + self._write_json( |
| 75 | + { |
| 76 | + "error": { |
| 77 | + "message": f"{exc.__class__.__name__}: {exc}", |
| 78 | + "type": "harness_proxy_error", |
| 79 | + } |
| 80 | + }, |
| 81 | + status=500, |
| 82 | + ) |
| 83 | + |
| 84 | + def log_message(self, format: str, *args: Any) -> None: |
| 85 | + return |
| 86 | + |
| 87 | + def _read_json(self) -> dict[str, Any]: |
| 88 | + length = int(self.headers.get("Content-Length") or "0") |
| 89 | + raw = self.rfile.read(length) if length else b"{}" |
| 90 | + data = json.loads(raw.decode("utf-8")) |
| 91 | + if not isinstance(data, dict): |
| 92 | + raise ValueError("request body must be a JSON object") |
| 93 | + return data |
| 94 | + |
| 95 | + def _write_json(self, payload: Mapping[str, Any], *, status: int = 200) -> None: |
| 96 | + body = json.dumps(payload, ensure_ascii=True).encode("utf-8") |
| 97 | + self.send_response(status) |
| 98 | + self.send_header("Content-Type", "application/json") |
| 99 | + self.send_header("Content-Length", str(len(body))) |
| 100 | + self.end_headers() |
| 101 | + self.wfile.write(body) |
| 102 | + |
| 103 | + host = "127.0.0.1" |
| 104 | + self._httpd = ThreadingHTTPServer((host, _free_port(host)), Handler) |
| 105 | + self.base_url = f"http://{host}:{self._httpd.server_port}/v1" |
| 106 | + self._thread = threading.Thread( |
| 107 | + target=self._httpd.serve_forever, |
| 108 | + name=f"{self.harness}-openai-proxy", |
| 109 | + daemon=True, |
| 110 | + ) |
| 111 | + self._thread.start() |
| 112 | + return self |
| 113 | + |
| 114 | + def stop(self) -> None: |
| 115 | + if self._httpd is not None: |
| 116 | + self._httpd.shutdown() |
| 117 | + self._httpd.server_close() |
| 118 | + self._httpd = None |
| 119 | + if self._thread is not None: |
| 120 | + self._thread.join(timeout=5) |
| 121 | + self._thread = None |
| 122 | + stop = getattr(self._server_handle, "stop", None) |
| 123 | + if callable(stop): |
| 124 | + stop() |
| 125 | + self._server_handle = None |
| 126 | + self._client = None |
| 127 | + |
| 128 | + def complete(self, payload: Mapping[str, Any]) -> dict[str, Any]: |
| 129 | + if self._client is None: |
| 130 | + raise RuntimeError("proxy client is not started") |
| 131 | + messages = _messages(payload.get("messages")) |
| 132 | + text = _last_user_text(messages) |
| 133 | + context: dict[str, Any] = { |
| 134 | + "benchmark": "hermes_native_env", |
| 135 | + "source_benchmark": "hermes_native_env", |
| 136 | + "harness_proxy": self.harness, |
| 137 | + "messages": messages, |
| 138 | + "tools": payload.get("tools") if isinstance(payload.get("tools"), list) else [], |
| 139 | + "tool_choice": payload.get("tool_choice"), |
| 140 | + "temperature": payload.get("temperature"), |
| 141 | + "max_tokens": payload.get("max_tokens"), |
| 142 | + } |
| 143 | + response = self._client.send_message(text, context=context) |
| 144 | + content = str(getattr(response, "text", "") or "") |
| 145 | + params = getattr(response, "params", {}) or {} |
| 146 | + tool_calls = _normalize_tool_calls(params.get("tool_calls")) |
| 147 | + message: dict[str, Any] = {"role": "assistant", "content": content} |
| 148 | + if tool_calls: |
| 149 | + message["tool_calls"] = tool_calls |
| 150 | + message["content"] = content or None |
| 151 | + now = int(time.time()) |
| 152 | + return { |
| 153 | + "id": f"chatcmpl-{self.harness}-{now}", |
| 154 | + "object": "chat.completion", |
| 155 | + "created": now, |
| 156 | + "model": self.model, |
| 157 | + "choices": [{"index": 0, "message": message, "finish_reason": "tool_calls" if tool_calls else "stop"}], |
| 158 | + "usage": _usage(params.get("usage")), |
| 159 | + } |
| 160 | + |
| 161 | + |
| 162 | +def _build_client( |
| 163 | + *, |
| 164 | + harness: str, |
| 165 | + provider: str, |
| 166 | + model: str, |
| 167 | + upstream_base_url: str | None, |
| 168 | +) -> tuple[Any, Any | None]: |
| 169 | + if harness == "eliza": |
| 170 | + from eliza_adapter import ElizaClient, ElizaServerManager # noqa: WPS433 |
| 171 | + |
| 172 | + if not os.environ.get("ELIZA_BENCH_URL"): |
| 173 | + server = ElizaServerManager() |
| 174 | + server.start() |
| 175 | + return server.client, server |
| 176 | + client = ElizaClient() |
| 177 | + client.wait_until_ready(timeout=180) |
| 178 | + return client, None |
| 179 | + if harness == "openclaw": |
| 180 | + from openclaw_adapter.client import OpenClawClient # noqa: WPS433 |
| 181 | + |
| 182 | + return ( |
| 183 | + OpenClawClient( |
| 184 | + provider=provider or "cerebras", |
| 185 | + model=model, |
| 186 | + base_url=upstream_base_url, |
| 187 | + direct_openai_compatible=True, |
| 188 | + ), |
| 189 | + None, |
| 190 | + ) |
| 191 | + raise ValueError(f"unsupported proxy harness: {harness!r}") |
| 192 | + |
| 193 | + |
| 194 | +def _free_port(host: str) -> int: |
| 195 | + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: |
| 196 | + sock.bind((host, 0)) |
| 197 | + return int(sock.getsockname()[1]) |
| 198 | + |
| 199 | + |
| 200 | +def _messages(value: object) -> list[dict[str, Any]]: |
| 201 | + if not isinstance(value, list): |
| 202 | + return [] |
| 203 | + rows: list[dict[str, Any]] = [] |
| 204 | + for item in value: |
| 205 | + if isinstance(item, dict): |
| 206 | + rows.append(dict(item)) |
| 207 | + return rows |
| 208 | + |
| 209 | + |
| 210 | +def _last_user_text(messages: list[dict[str, Any]]) -> str: |
| 211 | + for message in reversed(messages): |
| 212 | + if message.get("role") == "user": |
| 213 | + content = message.get("content") |
| 214 | + if isinstance(content, str): |
| 215 | + return content |
| 216 | + if isinstance(content, list): |
| 217 | + parts: list[str] = [] |
| 218 | + for part in content: |
| 219 | + if isinstance(part, dict) and isinstance(part.get("text"), str): |
| 220 | + parts.append(part["text"]) |
| 221 | + return "\n".join(parts) |
| 222 | + return json.dumps(messages, ensure_ascii=True) |
| 223 | + |
| 224 | + |
| 225 | +def _normalize_tool_calls(value: object) -> list[dict[str, Any]]: |
| 226 | + if not isinstance(value, list): |
| 227 | + return [] |
| 228 | + calls: list[dict[str, Any]] = [] |
| 229 | + for index, item in enumerate(value): |
| 230 | + if not isinstance(item, dict): |
| 231 | + continue |
| 232 | + function = item.get("function") |
| 233 | + if not isinstance(function, dict): |
| 234 | + name = item.get("name") |
| 235 | + arguments = item.get("arguments") |
| 236 | + if isinstance(name, str): |
| 237 | + function = { |
| 238 | + "name": name, |
| 239 | + "arguments": arguments if isinstance(arguments, str) else json.dumps(arguments or {}), |
| 240 | + } |
| 241 | + if not isinstance(function, dict) or not isinstance(function.get("name"), str): |
| 242 | + continue |
| 243 | + arguments = function.get("arguments") |
| 244 | + calls.append( |
| 245 | + { |
| 246 | + "id": str(item.get("id") or f"call_{index}"), |
| 247 | + "type": "function", |
| 248 | + "function": { |
| 249 | + "name": function["name"], |
| 250 | + "arguments": arguments if isinstance(arguments, str) else json.dumps(arguments or {}), |
| 251 | + }, |
| 252 | + } |
| 253 | + ) |
| 254 | + return calls |
| 255 | + |
| 256 | + |
| 257 | +def _usage(value: object) -> dict[str, int]: |
| 258 | + if not isinstance(value, dict): |
| 259 | + return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} |
| 260 | + usage: dict[str, int] = {} |
| 261 | + for key in ("prompt_tokens", "completion_tokens", "total_tokens"): |
| 262 | + raw = value.get(key) |
| 263 | + usage[key] = int(raw) if isinstance(raw, (int, float)) else 0 |
| 264 | + return usage |
| 265 | + |
| 266 | + |
| 267 | +__all__ = ["HarnessOpenAIProxy"] |
0 commit comments