Skip to content

Commit 50e940e

Browse files
lalaluneclaude
andcommitted
feat(training): Groq + OpenAI-compat teacher providers w/ retry-backoff
Adds two teacher providers to both synthesizers: - groq: https://api.groq.com/openai/v1 (GROQ_API_KEY) - openai: configurable base via OPENAI_BASE_URL (OPENAI_API_KEY) Both share a single call_openai_compat helper that: - Sends the OpenAI-compatible /v1/chat/completions request - Sets a non-default User-Agent so Cloudflare's edge doesn't 403 with the default Python-urllib UA (cf-error 1010) - Sets reasoning_effort=low so reasoning models (gpt-oss, deepseek-r1) don't burn the entire token budget on hidden reasoning before emitting content - Falls back to the reasoning field if content is empty (e.g. tight max_tokens; rare with the higher 2048 default) - Retries with exponential backoff on 429 / 5xx, honoring the Retry-After header when present Default max_tokens raised from 1024 to 2048 to fit reasoning + output for evaluator templates that emit longer JSON / TOON. Tested: --teacher-provider groq --teacher-model openai/gpt-oss-120b generates valid Phase-4 reflection records that classify into Phase 4 and pass audit_pipeline_shapes.py. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a5c4525 commit 50e940e

2 files changed

Lines changed: 186 additions & 2 deletions

File tree

packages/training/scripts/synthesize_evaluator_prompts.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
class TeacherCfg:
7272
provider: str
7373
model: str
74-
max_tokens: int = 1024
74+
max_tokens: int = 2048
7575
temperature: float = 0.7
7676

7777

@@ -98,9 +98,107 @@ def call_anthropic(cfg: TeacherCfg, system: str, user: str) -> str:
9898
return "".join(parts).strip()
9999

100100

101+
def call_openai_compat(cfg: TeacherCfg, system: str, user: str, *,
102+
base_url: str, api_key_env: str) -> str:
103+
"""OpenAI-compatible /v1/chat/completions caller. Used for Groq, Together,
104+
Fireworks, vLLM, LM Studio, Ollama — anything that speaks the OpenAI
105+
chat API. ``cfg.model`` is sent verbatim, e.g. ``openai/gpt-oss-120b``.
106+
107+
Reasoning models (gpt-oss, deepseek-r1, qwen-3-thinking) split their output
108+
between a `reasoning` field and `content`. We use `reasoning_effort=low`
109+
so most of the budget goes to `content`, then fall back to `reasoning` when
110+
`content` is empty (rare, but happens on tight max_tokens)."""
111+
import json as _json
112+
import urllib.request
113+
api_key = os.environ.get(api_key_env)
114+
if not api_key:
115+
raise RuntimeError(
116+
f"{api_key_env} not set. Export it before running this script "
117+
f"(or use --dry-run for stubbed output)."
118+
)
119+
payload = {
120+
"model": cfg.model,
121+
"messages": [
122+
{"role": "system", "content": system},
123+
{"role": "user", "content": user},
124+
],
125+
"temperature": cfg.temperature,
126+
"max_tokens": cfg.max_tokens,
127+
"reasoning_effort": "low",
128+
}
129+
body = _json.dumps(payload).encode("utf-8")
130+
req = urllib.request.Request(
131+
base_url.rstrip("/") + "/chat/completions",
132+
data=body,
133+
headers={
134+
"Content-Type": "application/json",
135+
"Authorization": f"Bearer {api_key}",
136+
# Cloudflare on Groq's edge rejects the default `Python-urllib/x.y`
137+
# User-Agent with HTTP 403 (cf-error 1010). Any non-default UA works.
138+
"User-Agent": "milady-synth/1.0 (+https://github.com/elizaOS/eliza)",
139+
},
140+
method="POST",
141+
)
142+
# Retry with exponential backoff on 429 (rate limit) and 5xx (transient
143+
# upstream errors). The backoff respects the `Retry-After` header when
144+
# the server provides one (Groq does on 429).
145+
import urllib.error, time, random as _random
146+
last_exc: Exception | None = None
147+
for attempt in range(6):
148+
try:
149+
with urllib.request.urlopen(req, timeout=180) as resp:
150+
result = _json.loads(resp.read().decode("utf-8"))
151+
break
152+
except urllib.error.HTTPError as e:
153+
last_exc = e
154+
if e.code in (429, 500, 502, 503, 504):
155+
retry_after = e.headers.get("Retry-After")
156+
if retry_after and retry_after.isdigit():
157+
delay = float(retry_after)
158+
else:
159+
delay = (2 ** attempt) + _random.uniform(0, 0.5)
160+
time.sleep(min(delay, 60.0))
161+
continue
162+
raise
163+
except (urllib.error.URLError, TimeoutError) as e:
164+
last_exc = e
165+
time.sleep((2 ** attempt) + _random.uniform(0, 0.5))
166+
continue
167+
else:
168+
raise RuntimeError(f"teacher request failed after retries: {last_exc}")
169+
170+
msg = result["choices"][0]["message"]
171+
content = (msg.get("content") or "").strip()
172+
if not content:
173+
# Reasoning model exhausted max_tokens before producing content.
174+
# Surface the reasoning instead so the caller can detect & retry.
175+
content = (msg.get("reasoning") or "").strip()
176+
return content
177+
178+
179+
def call_groq(cfg: TeacherCfg, system: str, user: str) -> str:
180+
return call_openai_compat(
181+
cfg, system, user,
182+
base_url="https://api.groq.com/openai/v1",
183+
api_key_env="GROQ_API_KEY",
184+
)
185+
186+
187+
def call_openai(cfg: TeacherCfg, system: str, user: str) -> str:
188+
return call_openai_compat(
189+
cfg, system, user,
190+
base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
191+
api_key_env="OPENAI_API_KEY",
192+
)
193+
194+
101195
def call_teacher(cfg: TeacherCfg, system: str, user: str) -> str:
102196
if cfg.provider == "anthropic":
103197
return call_anthropic(cfg, system, user)
198+
if cfg.provider == "groq":
199+
return call_groq(cfg, system, user)
200+
if cfg.provider == "openai":
201+
return call_openai(cfg, system, user)
104202
raise ValueError(f"unknown teacher provider: {cfg.provider}")
105203

106204

packages/training/scripts/synthesize_phase3_actions.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
class TeacherCfg:
7474
provider: str
7575
model: str
76-
max_tokens: int = 1024
76+
max_tokens: int = 2048
7777
temperature: float = 0.7
7878

7979

@@ -100,9 +100,95 @@ def call_anthropic(cfg: TeacherCfg, system: str, user: str) -> str:
100100
return "".join(parts).strip()
101101

102102

103+
def call_openai_compat(cfg: TeacherCfg, system: str, user: str, *,
104+
base_url: str, api_key_env: str) -> str:
105+
"""OpenAI-compatible /v1/chat/completions caller. See evaluator
106+
synthesizer for the same code; kept duplicated to keep each
107+
synthesizer file standalone."""
108+
import json as _json
109+
import urllib.request
110+
api_key = os.environ.get(api_key_env)
111+
if not api_key:
112+
raise RuntimeError(
113+
f"{api_key_env} not set. Export it before running this script "
114+
f"(or use --dry-run for stubbed output)."
115+
)
116+
payload = {
117+
"model": cfg.model,
118+
"messages": [
119+
{"role": "system", "content": system},
120+
{"role": "user", "content": user},
121+
],
122+
"temperature": cfg.temperature,
123+
"max_tokens": cfg.max_tokens,
124+
"reasoning_effort": "low",
125+
}
126+
body = _json.dumps(payload).encode("utf-8")
127+
req = urllib.request.Request(
128+
base_url.rstrip("/") + "/chat/completions",
129+
data=body,
130+
headers={
131+
"Content-Type": "application/json",
132+
"Authorization": f"Bearer {api_key}",
133+
"User-Agent": "milady-synth/1.0 (+https://github.com/elizaOS/eliza)",
134+
},
135+
method="POST",
136+
)
137+
import urllib.error, time, random as _random
138+
last_exc: Exception | None = None
139+
for attempt in range(6):
140+
try:
141+
with urllib.request.urlopen(req, timeout=180) as resp:
142+
result = _json.loads(resp.read().decode("utf-8"))
143+
break
144+
except urllib.error.HTTPError as e:
145+
last_exc = e
146+
if e.code in (429, 500, 502, 503, 504):
147+
retry_after = e.headers.get("Retry-After")
148+
if retry_after and retry_after.isdigit():
149+
delay = float(retry_after)
150+
else:
151+
delay = (2 ** attempt) + _random.uniform(0, 0.5)
152+
time.sleep(min(delay, 60.0))
153+
continue
154+
raise
155+
except (urllib.error.URLError, TimeoutError) as e:
156+
last_exc = e
157+
time.sleep((2 ** attempt) + _random.uniform(0, 0.5))
158+
continue
159+
else:
160+
raise RuntimeError(f"teacher request failed after retries: {last_exc}")
161+
162+
msg = result["choices"][0]["message"]
163+
content = (msg.get("content") or "").strip()
164+
if not content:
165+
content = (msg.get("reasoning") or "").strip()
166+
return content
167+
168+
169+
def call_groq(cfg: TeacherCfg, system: str, user: str) -> str:
170+
return call_openai_compat(
171+
cfg, system, user,
172+
base_url="https://api.groq.com/openai/v1",
173+
api_key_env="GROQ_API_KEY",
174+
)
175+
176+
177+
def call_openai(cfg: TeacherCfg, system: str, user: str) -> str:
178+
return call_openai_compat(
179+
cfg, system, user,
180+
base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
181+
api_key_env="OPENAI_API_KEY",
182+
)
183+
184+
103185
def call_teacher(cfg: TeacherCfg, system: str, user: str) -> str:
104186
if cfg.provider == "anthropic":
105187
return call_anthropic(cfg, system, user)
188+
if cfg.provider == "groq":
189+
return call_groq(cfg, system, user)
190+
if cfg.provider == "openai":
191+
return call_openai(cfg, system, user)
106192
raise ValueError(f"unknown teacher provider: {cfg.provider}")
107193

108194

0 commit comments

Comments
 (0)