Skip to content

Commit e3970a9

Browse files
lalaluneclaude
andcommitted
training(eliza-1): wire qwen3.5-27b SFT launch path + extend stage_base_v1_candidate to 27b
Lands the missing pieces so the eliza-1-27b cloud SFT + HF publish chain is addressable from train_vast.sh and stage_base_v1_candidate.py. No live run launched here — the swarm's 0_8b path on Nebius is still working through retries, and the only currently-listed Vast B200-1x offer is $7.5/hr × ~50h projected wall = ~$375 (over the operator's $200 cap). Nebius H200-1x (per the registry entry's extras={"nebius_machine": "H200-1x"}) remains the cheaper launch target once the swarm proves the path end-to-end. scripts/lib/vast.py: - Add b200-1x target (1× NVIDIA B200, ≈183 GB, min_per_gpu_ram_gb=170). Cheapest single-GPU fit on Vast for qwen3.5-27b's 130 GB working-set budget at seq=32k with apollo_mini + grad-ckpt + Liger CE. scripts/train_vast.sh: - Wire qwen3.5-27b → b200-1x, FSDP_WORLD_SIZE=1 (was falling through to the catch-all blackwell6000-2x default; that worked but projected ~$458 / 208h at the current $/hr, badly off the registry's intent). - qwen3.6-27b legacy stays on b200-2x (still the right target for the larger 64k-seq context budget on that backbone). scripts/publish/stage_base_v1_candidate.py: - Add "27b" to --tier choices, REQUIRED_KERNELS_BY_TIER, RAM_BUDGET_MB (mirrors eliza1_manifest.REQUIRED_KERNELS_BY_TIER["27b"] + publish/orchestrator.RAM_BUDGET_BY_TIER["27b"]). - Introduce QWEN3_PARAMS_BY_TIER lookup; replace the two hardcoded `'1.7B' if tier=='1_7b' else '0.6B'` ternaries with the lookup so the lineage/provenance/README blocks render correctly for 27b. - Make the cuda/rocm kernel-verify caveats tier-aware: for 27b both are tier-supported (per SUPPORTED_BACKENDS_BY_TIER["27b"]); rocm stays "skipped/needs-hardware" because the build host has no AMD GPU. - Make the drafter target-meta note tier-aware (no longer hardcoded to "Upstream Qwen3-0.6B GGUF used as the DFlash drafter for the 1.7B target"). - Document the Q8_0 voice-asset gap: VOICE_QUANT_BY_TIER["27b"]="Q8_0" but elizaos/eliza-1-assets only carries Q4_K_M voice GGUFs under 1_7b/. The candidate bundle still stages with Q4_K_M (installable but the orchestrator's release-gate stays red until Q8_0 OmniVoice GGUFs are derived and pushed to elizaos/eliza-1-assets/27b/). Verified: - scripts/publish/test_orchestrator.py: 36/36 pass. - scripts/test_backends_vast.py + scripts/test_vast_budget.py: 26/26 pass. - bash scripts/train_vast.sh provision-and-train --registry-key qwen3.5-27b --dry-run: resolves gpu_target=b200-1x, world_size=1 (was b200-2x default). - python -m scripts.publish.stage_base_v1_candidate --help: accepts --tier 27b. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d50d44e commit e3970a9

3 files changed

Lines changed: 68 additions & 9 deletions

File tree

packages/training/scripts/lib/vast.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@
115115
"min_per_gpu_ram_gb": 130,
116116
"description": "1× H200 SXM (141 GB) — 9B SFT or 27B at low seq_len",
117117
},
118+
"b200-1x": {
119+
"gpu_names": ["B200"],
120+
"num_gpus": 1,
121+
# B200 = 180 GB HBM3e per GPU; gpu_ram>=170 robust to ECC reserve.
122+
"min_per_gpu_ram_gb": 170,
123+
"description": "1× NVIDIA B200 (≈183 GB) — qwen3.5-27b SFT default (130 GB budget @ seq=32k fits with headroom)",
124+
},
118125
# ─── multi-GPU targets (27B+) ───
119126
"blackwell6000-2x": {
120127
# Both Server (S) and Workstation (WS) editions are 96 GB GDDR7

packages/training/scripts/publish/stage_base_v1_candidate.py

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,26 @@
4040
REQUIRED_KERNELS_BY_TIER = {
4141
"0_6b": ["turboquant_q3", "qjl", "polarquant", "dflash"],
4242
"1_7b": ["turboquant_q4", "qjl", "polarquant", "dflash"],
43+
# 27b matches eliza1_manifest.REQUIRED_KERNELS_BY_TIER["27b"] — adds
44+
# turbo3_tcq on top of the base 1.7b set for long-context cache compression.
45+
"27b": ["turboquant_q4", "qjl", "polarquant", "dflash", "turbo3_tcq"],
4346
}
4447
RAM_BUDGET_MB = {
4548
"0_6b": (2500, 3700),
4649
"1_7b": (4000, 5500),
50+
# 27b matches publish/orchestrator.RAM_BUDGET_BY_TIER["27b"] — sized for
51+
# 96GB+ Mac / high-VRAM desktop class hosts under the Q4_POLAR text bundle.
52+
"27b": (24000, 32000),
53+
}
54+
# Per-tier upstream-Qwen3 substitute used by the lineage block and the
55+
# README/provenance prose. Falls back to "0.6B" for unknown tiers to match
56+
# the script's historical default behavior.
57+
QWEN3_PARAMS_BY_TIER = {
58+
"0_6b": "0.6B",
59+
"1_7b": "1.7B",
60+
# The 27b cloud tier substitutes against Qwen3.5-27B (no Qwen3-27B
61+
# variant exists upstream); the lineage block records the real base.
62+
"27b": "27B",
4763
}
4864
TEXT_CTX = 32768
4965

@@ -90,7 +106,7 @@ def download_asset(repo: str, remote_path: str, dest: Path) -> None:
90106

91107
def main(argv: list[str] | None = None) -> int:
92108
ap = argparse.ArgumentParser(description=__doc__)
93-
ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b"))
109+
ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b", "27b"))
94110
ap.add_argument("--text-gguf", required=True, type=Path)
95111
ap.add_argument("--text-sidecar", type=Path, default=None,
96112
help="The .eliza1.json sidecar for the text GGUF (quant block).")
@@ -171,9 +187,13 @@ def main(argv: list[str] | None = None) -> int:
171187
"sha256": drafter_sha,
172188
"source": args.drafter_source,
173189
"note": (
174-
"Upstream Qwen3-0.6B GGUF used as the DFlash drafter for the "
175-
"1.7B target; shares the Qwen3 BPE vocabulary so speculative "
176-
"decoding is correct (modest acceptance — not a distilled drafter)."
190+
# For 27b the canonical drafter is the Qwen3.5-aligned 0.6B
191+
# distilled drafter (elizaos/eliza-1-drafter-0_6b-qwen3_5);
192+
# for 0_6b/1_7b it's the upstream Qwen3 0.6B GGUF reused as-is.
193+
f"DFlash drafter for the {QWEN3_PARAMS_BY_TIER.get(tier, '0.6B')} text target. "
194+
"Shares the Qwen3.5/Qwen3 BPE vocabulary with the target so speculative "
195+
"decoding is correct. See the drafter source repo for whether this "
196+
"candidate is a distilled drafter or the upstream base GGUF (not distilled)."
177197
),
178198
},
179199
"acceptanceWindow": None,
@@ -184,6 +204,15 @@ def main(argv: list[str] | None = None) -> int:
184204
# --- voice / asr / vad / cache from elizaos/eliza-1-assets/1_7b/ ---
185205
# The OmniVoice / Qwen3-ASR / Silero bytes are model-size-independent; the
186206
# assets repo only carries the 1_7b key, so reuse them under any tier.
207+
#
208+
# 27b caveat: eliza1_manifest.VOICE_QUANT_BY_TIER["27b"] == "Q8_0", so
209+
# required_voice_artifacts_for_tier("27b") returns the Q8_0 names. This
210+
# staging path still copies the Q4_K_M bytes (the only ones present in
211+
# the assets repo today) — the orchestrator's voice-artifact gate will
212+
# therefore fail in publish mode until Q8_0 OmniVoice GGUFs are derived
213+
# and pushed to elizaos/eliza-1-assets/27b/. The candidate bundle is
214+
# still installable on a runtime that can load Q4_K_M voice, but the
215+
# release gate stays red. Track as a separate dependency.
187216
asset_map = [
188217
("1_7b/tts/omnivoice-base-Q4_K_M.gguf", out / "tts" / "omnivoice-base-Q4_K_M.gguf"),
189218
("1_7b/tts/omnivoice-tokenizer-Q4_K_M.gguf", out / "tts" / "omnivoice-tokenizer-Q4_K_M.gguf"),
@@ -288,9 +317,10 @@ def num(key: str) -> float | None:
288317
}, indent=2) + "\n")
289318

290319
# --- lineage ---
320+
params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
291321
lineage = {
292322
"text": M.LineageEntry(
293-
base=f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{'1.7B' if tier=='1_7b' else '0.6B'})",
323+
base=f"{args.drafter_source.split('/')[0]}/Qwen3-{params} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{params})",
294324
license="apache-2.0",
295325
),
296326
"voice": M.LineageEntry(base="Serveurperso/OmniVoice-GGUF@361609388ae572a820d085185bbbe2a2aac4b30e", license="apache-2.0"),
@@ -319,15 +349,29 @@ def num(key: str) -> float | None:
319349
status="pass", at_commit="08032d57",
320350
report="packages/inference/verify/cuda-runtime-dispatch-evidence.json",
321351
device="NVIDIA GeForce RTX 5080 Laptop GPU (Blackwell, cc 12.0)",
322-
caveat="cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence",
352+
# For 27b cuda is a tier-supported backend (per
353+
# eliza1_manifest.SUPPORTED_BACKENDS_BY_TIER["27b"]); for 0_6b/1_7b
354+
# it stays "extra evidence" — see the caveat tier-switch below.
355+
caveat=(
356+
"cuda is a tier-supported backend for 27b"
357+
if tier == "27b"
358+
else "cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence"
359+
),
323360
),
324361
"metal": M.KernelVerification(
325362
status="skipped", at_commit="08032d57", report="not-run",
326363
caveat="needs-hardware: no Apple/Metal device on the build host",
327364
),
328365
"rocm": M.KernelVerification(
329366
status="skipped", at_commit="08032d57", report="not-applicable",
330-
caveat="rocm is not a tier-supported backend for 1_7b/0_6b",
367+
# rocm is a tier-supported backend for 27b but cannot be verified
368+
# on this build host (no AMD GPU); 0_6b/1_7b don't list rocm as
369+
# supported at all.
370+
caveat=(
371+
"rocm is a tier-supported backend for 27b but no AMD GPU on the build host (needs-hardware)"
372+
if tier == "27b"
373+
else "rocm is not a tier-supported backend for 1_7b/0_6b"
374+
),
331375
),
332376
}
333377

@@ -337,7 +381,7 @@ def num(key: str) -> float | None:
337381
"finetuned": True,
338382
"sourceModels": {
339383
"text": {
340-
"repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'}",
384+
"repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{params}",
341385
"convertedVia": "packages/inference/llama.cpp/convert_hf_to_gguf.py + scripts/optimize_for_eliza1.py (PolarQuant/QJL/TurboQuant)",
342386
"note": "Fine-tuned (APOLLO full-parameter SFT) then optimized. Documented substitute for the not-yet-published Qwen3.5 base; NOT strictly base-v1 semantics — this is a finetuned candidate.",
343387
},
@@ -443,7 +487,7 @@ def _render_readme(
443487
optimized: bool,
444488
eval_results: dict[str, Any],
445489
) -> str:
446-
params = "1.7B" if tier == "1_7b" else "0.6B"
490+
params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
447491
base_repo = f"{drafter_source.split('/')[0]}/Qwen3-{params}"
448492
if optimized:
449493
text_para = (

packages/training/scripts/train_vast.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,14 @@ case "$PIPELINE" in
248248
DEFAULT_GPU_TARGET="blackwell6000-1x"
249249
DEFAULT_FSDP_WORLD_SIZE=1
250250
;;
251+
qwen3.5-27b)
252+
# Registry budget: 130 GB working set on a single 141 GB H200 or 183
253+
# GB B200 (apollo_mini rank-1, grad ckpt, Liger CE, micro_batch=1
254+
# seq=32k). B200-1x is the cheapest single-GPU fit (≈$3.8/hr × ~50h
255+
# ≈ $190) and FSDP_WORLD_SIZE=1 matches the registry's extras block.
256+
DEFAULT_GPU_TARGET="b200-1x"
257+
DEFAULT_FSDP_WORLD_SIZE=1
258+
;;
251259
qwen3.6-27b)
252260
DEFAULT_GPU_TARGET="b200-2x"
253261
DEFAULT_FSDP_WORLD_SIZE=2

0 commit comments

Comments
 (0)