4040REQUIRED_KERNELS_BY_TIER = {
4141 "0_6b" : ["turboquant_q3" , "qjl" , "polarquant" , "dflash" ],
4242 "1_7b" : ["turboquant_q4" , "qjl" , "polarquant" , "dflash" ],
43+ # 27b matches eliza1_manifest.REQUIRED_KERNELS_BY_TIER["27b"] — adds
44+ # turbo3_tcq on top of the base 1.7b set for long-context cache compression.
45+ "27b" : ["turboquant_q4" , "qjl" , "polarquant" , "dflash" , "turbo3_tcq" ],
4346}
4447RAM_BUDGET_MB = {
4548 "0_6b" : (2500 , 3700 ),
4649 "1_7b" : (4000 , 5500 ),
50+ # 27b matches publish/orchestrator.RAM_BUDGET_BY_TIER["27b"] — sized for
51+ # 96GB+ Mac / high-VRAM desktop class hosts under the Q4_POLAR text bundle.
52+ "27b" : (24000 , 32000 ),
53+ }
54+ # Per-tier upstream-Qwen3 substitute used by the lineage block and the
55+ # README/provenance prose. Falls back to "0.6B" for unknown tiers to match
56+ # the script's historical default behavior.
57+ QWEN3_PARAMS_BY_TIER = {
58+ "0_6b" : "0.6B" ,
59+ "1_7b" : "1.7B" ,
60+ # The 27b cloud tier substitutes against Qwen3.5-27B (no Qwen3-27B
61+ # variant exists upstream); the lineage block records the real base.
62+ "27b" : "27B" ,
4763}
4864TEXT_CTX = 32768
4965
@@ -90,7 +106,7 @@ def download_asset(repo: str, remote_path: str, dest: Path) -> None:
90106
91107def main (argv : list [str ] | None = None ) -> int :
92108 ap = argparse .ArgumentParser (description = __doc__ )
93- ap .add_argument ("--tier" , required = True , choices = ("0_6b" , "1_7b" ))
109+ ap .add_argument ("--tier" , required = True , choices = ("0_6b" , "1_7b" , "27b" ))
94110 ap .add_argument ("--text-gguf" , required = True , type = Path )
95111 ap .add_argument ("--text-sidecar" , type = Path , default = None ,
96112 help = "The .eliza1.json sidecar for the text GGUF (quant block)." )
@@ -171,9 +187,13 @@ def main(argv: list[str] | None = None) -> int:
171187 "sha256" : drafter_sha ,
172188 "source" : args .drafter_source ,
173189 "note" : (
174- "Upstream Qwen3-0.6B GGUF used as the DFlash drafter for the "
175- "1.7B target; shares the Qwen3 BPE vocabulary so speculative "
176- "decoding is correct (modest acceptance — not a distilled drafter)."
190+ # For 27b the canonical drafter is the Qwen3.5-aligned 0.6B
191+ # distilled drafter (elizaos/eliza-1-drafter-0_6b-qwen3_5);
192+ # for 0_6b/1_7b it's the upstream Qwen3 0.6B GGUF reused as-is.
193+ f"DFlash drafter for the { QWEN3_PARAMS_BY_TIER .get (tier , '0.6B' )} text target. "
194+ "Shares the Qwen3.5/Qwen3 BPE vocabulary with the target so speculative "
195+ "decoding is correct. See the drafter source repo for whether this "
196+ "candidate is a distilled drafter or the upstream base GGUF (not distilled)."
177197 ),
178198 },
179199 "acceptanceWindow" : None ,
@@ -184,6 +204,15 @@ def main(argv: list[str] | None = None) -> int:
184204 # --- voice / asr / vad / cache from elizaos/eliza-1-assets/1_7b/ ---
185205 # The OmniVoice / Qwen3-ASR / Silero bytes are model-size-independent; the
186206 # assets repo only carries the 1_7b key, so reuse them under any tier.
207+ #
208+ # 27b caveat: eliza1_manifest.VOICE_QUANT_BY_TIER["27b"] == "Q8_0", so
209+ # required_voice_artifacts_for_tier("27b") returns the Q8_0 names. This
210+ # staging path still copies the Q4_K_M bytes (the only ones present in
211+ # the assets repo today) — the orchestrator's voice-artifact gate will
212+ # therefore fail in publish mode until Q8_0 OmniVoice GGUFs are derived
213+ # and pushed to elizaos/eliza-1-assets/27b/. The candidate bundle is
214+ # still installable on a runtime that can load Q4_K_M voice, but the
215+ # release gate stays red. Track as a separate dependency.
187216 asset_map = [
188217 ("1_7b/tts/omnivoice-base-Q4_K_M.gguf" , out / "tts" / "omnivoice-base-Q4_K_M.gguf" ),
189218 ("1_7b/tts/omnivoice-tokenizer-Q4_K_M.gguf" , out / "tts" / "omnivoice-tokenizer-Q4_K_M.gguf" ),
@@ -288,9 +317,10 @@ def num(key: str) -> float | None:
288317 }, indent = 2 ) + "\n " )
289318
290319 # --- lineage ---
320+ params = QWEN3_PARAMS_BY_TIER .get (tier , "0.6B" )
291321 lineage = {
292322 "text" : M .LineageEntry (
293- base = f"{ args .drafter_source .split ('/' )[0 ]} /Qwen3-{ '1.7B' if tier == '1_7b' else '0.6B' } (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{ '1.7B' if tier == '1_7b' else '0.6B' } )" ,
323+ base = f"{ args .drafter_source .split ('/' )[0 ]} /Qwen3-{ params } (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{ params } )" ,
294324 license = "apache-2.0" ,
295325 ),
296326 "voice" : M .LineageEntry (base = "Serveurperso/OmniVoice-GGUF@361609388ae572a820d085185bbbe2a2aac4b30e" , license = "apache-2.0" ),
@@ -319,15 +349,29 @@ def num(key: str) -> float | None:
319349 status = "pass" , at_commit = "08032d57" ,
320350 report = "packages/inference/verify/cuda-runtime-dispatch-evidence.json" ,
321351 device = "NVIDIA GeForce RTX 5080 Laptop GPU (Blackwell, cc 12.0)" ,
322- caveat = "cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence" ,
352+ # For 27b cuda is a tier-supported backend (per
353+ # eliza1_manifest.SUPPORTED_BACKENDS_BY_TIER["27b"]); for 0_6b/1_7b
354+ # it stays "extra evidence" — see the caveat tier-switch below.
355+ caveat = (
356+ "cuda is a tier-supported backend for 27b"
357+ if tier == "27b"
358+ else "cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence"
359+ ),
323360 ),
324361 "metal" : M .KernelVerification (
325362 status = "skipped" , at_commit = "08032d57" , report = "not-run" ,
326363 caveat = "needs-hardware: no Apple/Metal device on the build host" ,
327364 ),
328365 "rocm" : M .KernelVerification (
329366 status = "skipped" , at_commit = "08032d57" , report = "not-applicable" ,
330- caveat = "rocm is not a tier-supported backend for 1_7b/0_6b" ,
367+ # rocm is a tier-supported backend for 27b but cannot be verified
368+ # on this build host (no AMD GPU); 0_6b/1_7b don't list rocm as
369+ # supported at all.
370+ caveat = (
371+ "rocm is a tier-supported backend for 27b but no AMD GPU on the build host (needs-hardware)"
372+ if tier == "27b"
373+ else "rocm is not a tier-supported backend for 1_7b/0_6b"
374+ ),
331375 ),
332376 }
333377
@@ -337,7 +381,7 @@ def num(key: str) -> float | None:
337381 "finetuned" : True ,
338382 "sourceModels" : {
339383 "text" : {
340- "repo" : f"{ args .drafter_source .split ('/' )[0 ]} /Qwen3-{ '1.7B' if tier == '1_7b' else '0.6B' } " ,
384+ "repo" : f"{ args .drafter_source .split ('/' )[0 ]} /Qwen3-{ params } " ,
341385 "convertedVia" : "packages/inference/llama.cpp/convert_hf_to_gguf.py + scripts/optimize_for_eliza1.py (PolarQuant/QJL/TurboQuant)" ,
342386 "note" : "Fine-tuned (APOLLO full-parameter SFT) then optimized. Documented substitute for the not-yet-published Qwen3.5 base; NOT strictly base-v1 semantics — this is a finetuned candidate." ,
343387 },
@@ -443,7 +487,7 @@ def _render_readme(
443487 optimized : bool ,
444488 eval_results : dict [str , Any ],
445489) -> str :
446- params = "1.7B" if tier == "1_7b" else " 0.6B"
490+ params = QWEN3_PARAMS_BY_TIER . get ( tier , " 0.6B")
447491 base_repo = f"{ drafter_source .split ('/' )[0 ]} /Qwen3-{ params } "
448492 if optimized :
449493 text_para = (
0 commit comments