feat(training): Groq + OpenAI-compat teacher providers w/ retry-backoff

lalalune · claude · lalalune · commit 50e940e14577 · 2026-05-05T02:43:59.000-07:00
Adds two teacher providers to both synthesizers: - groq: https://api.groq.com/openai/v1 (GROQ_API_KEY) - openai: configurable base via OPENAI_BASE_URL (OPENAI_API_KEY) Both share a single call_openai_compat helper that: - Sends the OpenAI-compatible /v1/chat/completions request - Sets a non-default User-Agent so Cloudflare's edge doesn't 403 with the default Python-urllib UA (cf-error 1010) - Sets reasoning_effort=low so reasoning models (gpt-oss, deepseek-r1) don't burn the entire token budget on hidden reasoning before emitting content - Falls back to the reasoning field if content is empty (e.g. tight max_tokens; rare with the higher 2048 default) - Retries with exponential backoff on 429 / 5xx, honoring the Retry-After header when present Default max_tokens raised from 1024 to 2048 to fit reasoning + output for evaluator templates that emit longer JSON / TOON. Tested: --teacher-provider groq --teacher-model openai/gpt-oss-120b generates valid Phase-4 reflection records that classify into Phase 4 and pass audit_pipeline_shapes.py. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/packages/training/scripts/synthesize_evaluator_prompts.py b/packages/training/scripts/synthesize_evaluator_prompts.py
@@ -71,7 +71,7 @@
 class TeacherCfg:
     provider: str
     model: str
-    max_tokens: int = 1024
+    max_tokens: int = 2048
     temperature: float = 0.7
 
 
@@ -98,9 +98,107 @@ def call_anthropic(cfg: TeacherCfg, system: str, user: str) -> str:
     return "".join(parts).strip()
 
 
+def call_openai_compat(cfg: TeacherCfg, system: str, user: str, *,
+                       base_url: str, api_key_env: str) -> str:
+    """OpenAI-compatible /v1/chat/completions caller. Used for Groq, Together,
+    Fireworks, vLLM, LM Studio, Ollama — anything that speaks the OpenAI
+    chat API. ``cfg.model`` is sent verbatim, e.g. ``openai/gpt-oss-120b``.
+
+    Reasoning models (gpt-oss, deepseek-r1, qwen-3-thinking) split their output
+    between a `reasoning` field and `content`. We use `reasoning_effort=low`
+    so most of the budget goes to `content`, then fall back to `reasoning` when
+    `content` is empty (rare, but happens on tight max_tokens)."""
+    import json as _json
+    import urllib.request
+    api_key = os.environ.get(api_key_env)
+    if not api_key:
+        raise RuntimeError(
+            f"{api_key_env} not set. Export it before running this script "
+            f"(or use --dry-run for stubbed output)."
+        )
+    payload = {
+        "model": cfg.model,
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+        "temperature": cfg.temperature,
+        "max_tokens": cfg.max_tokens,
+        "reasoning_effort": "low",
+    }
+    body = _json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        base_url.rstrip("/") + "/chat/completions",
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+            # Cloudflare on Groq's edge rejects the default `Python-urllib/x.y`
+            # User-Agent with HTTP 403 (cf-error 1010). Any non-default UA works.
+            "User-Agent": "milady-synth/1.0 (+https://github.com/elizaOS/eliza)",
+        },
+        method="POST",
+    )
+    # Retry with exponential backoff on 429 (rate limit) and 5xx (transient
+    # upstream errors). The backoff respects the `Retry-After` header when
+    # the server provides one (Groq does on 429).
+    import urllib.error, time, random as _random
+    last_exc: Exception | None = None
+    for attempt in range(6):
+        try:
+            with urllib.request.urlopen(req, timeout=180) as resp:
+                result = _json.loads(resp.read().decode("utf-8"))
+            break
+        except urllib.error.HTTPError as e:
+            last_exc = e
+            if e.code in (429, 500, 502, 503, 504):
+                retry_after = e.headers.get("Retry-After")
+                if retry_after and retry_after.isdigit():
+                    delay = float(retry_after)
+                else:
+                    delay = (2 ** attempt) + _random.uniform(0, 0.5)
+                time.sleep(min(delay, 60.0))
+                continue
+            raise
+        except (urllib.error.URLError, TimeoutError) as e:
+            last_exc = e
+            time.sleep((2 ** attempt) + _random.uniform(0, 0.5))
+            continue
+    else:
+        raise RuntimeError(f"teacher request failed after retries: {last_exc}")
+
+    msg = result["choices"][0]["message"]
+    content = (msg.get("content") or "").strip()
+    if not content:
+        # Reasoning model exhausted max_tokens before producing content.
+        # Surface the reasoning instead so the caller can detect & retry.
+        content = (msg.get("reasoning") or "").strip()
+    return content
+
+
+def call_groq(cfg: TeacherCfg, system: str, user: str) -> str:
+    return call_openai_compat(
+        cfg, system, user,
+        base_url="https://api.groq.com/openai/v1",
+        api_key_env="GROQ_API_KEY",
+    )
+
+
+def call_openai(cfg: TeacherCfg, system: str, user: str) -> str:
+    return call_openai_compat(
+        cfg, system, user,
+        base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
+        api_key_env="OPENAI_API_KEY",
+    )
+
+
 def call_teacher(cfg: TeacherCfg, system: str, user: str) -> str:
     if cfg.provider == "anthropic":
         return call_anthropic(cfg, system, user)
+    if cfg.provider == "groq":
+        return call_groq(cfg, system, user)
+    if cfg.provider == "openai":
+        return call_openai(cfg, system, user)
     raise ValueError(f"unknown teacher provider: {cfg.provider}")
 
 
diff --git a/packages/training/scripts/synthesize_phase3_actions.py b/packages/training/scripts/synthesize_phase3_actions.py
@@ -73,7 +73,7 @@
 class TeacherCfg:
     provider: str
     model: str
-    max_tokens: int = 1024
+    max_tokens: int = 2048
     temperature: float = 0.7
 
 
@@ -100,9 +100,95 @@ def call_anthropic(cfg: TeacherCfg, system: str, user: str) -> str:
     return "".join(parts).strip()
 
 
+def call_openai_compat(cfg: TeacherCfg, system: str, user: str, *,
+                       base_url: str, api_key_env: str) -> str:
+    """OpenAI-compatible /v1/chat/completions caller. See evaluator
+    synthesizer for the same code; kept duplicated to keep each
+    synthesizer file standalone."""
+    import json as _json
+    import urllib.request
+    api_key = os.environ.get(api_key_env)
+    if not api_key:
+        raise RuntimeError(
+            f"{api_key_env} not set. Export it before running this script "
+            f"(or use --dry-run for stubbed output)."
+        )
+    payload = {
+        "model": cfg.model,
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+        "temperature": cfg.temperature,
+        "max_tokens": cfg.max_tokens,
+        "reasoning_effort": "low",
+    }
+    body = _json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        base_url.rstrip("/") + "/chat/completions",
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+            "User-Agent": "milady-synth/1.0 (+https://github.com/elizaOS/eliza)",
+        },
+        method="POST",
+    )
+    import urllib.error, time, random as _random
+    last_exc: Exception | None = None
+    for attempt in range(6):
+        try:
+            with urllib.request.urlopen(req, timeout=180) as resp:
+                result = _json.loads(resp.read().decode("utf-8"))
+            break
+        except urllib.error.HTTPError as e:
+            last_exc = e
+            if e.code in (429, 500, 502, 503, 504):
+                retry_after = e.headers.get("Retry-After")
+                if retry_after and retry_after.isdigit():
+                    delay = float(retry_after)
+                else:
+                    delay = (2 ** attempt) + _random.uniform(0, 0.5)
+                time.sleep(min(delay, 60.0))
+                continue
+            raise
+        except (urllib.error.URLError, TimeoutError) as e:
+            last_exc = e
+            time.sleep((2 ** attempt) + _random.uniform(0, 0.5))
+            continue
+    else:
+        raise RuntimeError(f"teacher request failed after retries: {last_exc}")
+
+    msg = result["choices"][0]["message"]
+    content = (msg.get("content") or "").strip()
+    if not content:
+        content = (msg.get("reasoning") or "").strip()
+    return content
+
+
+def call_groq(cfg: TeacherCfg, system: str, user: str) -> str:
+    return call_openai_compat(
+        cfg, system, user,
+        base_url="https://api.groq.com/openai/v1",
+        api_key_env="GROQ_API_KEY",
+    )
+
+
+def call_openai(cfg: TeacherCfg, system: str, user: str) -> str:
+    return call_openai_compat(
+        cfg, system, user,
+        base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
+        api_key_env="OPENAI_API_KEY",
+    )
+
+
 def call_teacher(cfg: TeacherCfg, system: str, user: str) -> str:
     if cfg.provider == "anthropic":
         return call_anthropic(cfg, system, user)
+    if cfg.provider == "groq":
+        return call_groq(cfg, system, user)
+    if cfg.provider == "openai":
+        return call_openai(cfg, system, user)
     raise ValueError(f"unknown teacher provider: {cfg.provider}")