Skip to content

Commit 5db02d7

Browse files
committed
Merge remote-tracking branch 'origin/develop' into develop
2 parents e9e723a + e3970a9 commit 5db02d7

6 files changed

Lines changed: 171 additions & 35 deletions

File tree

packages/app-core/src/services/local-inference/manifest/schema.ts

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,20 +371,41 @@ export const Eliza1RamBudgetSchema = z
371371
// Release-state vocabulary. `base-v1` is the v1 product: the upstream BASE
372372
// models — GGUF-converted via the elizaOS/llama.cpp fork and fully
373373
// Eliza-optimized (every quant/kernel trick in inference/AGENTS.md §3) —
374-
// but NOT fine-tuned (fine-tuning ships in v2). `finetuned-v2` is the v2
375-
// state; `local-standin` is a non-publishable staging shape;
376-
// `upload-candidate` / `final` are the historical fine-tuned-v1 publish
377-
// states retained for forward-compat. Mirrors `ELIZA_1_RELEASE_STATES` in
374+
// but NOT fine-tuned (fine-tuning ships in v2). `base-v1-candidate` is the
375+
// in-progress state of a base-v1 bundle before every release-blocking
376+
// gate (real fork-built bytes, every supported-backend kernel verify,
377+
// every required platform-dispatch report, the runnable-on-base evals)
378+
// has gone green. It is publishable to HuggingFace as a download target
379+
// and is installable on a device whose backend it verified, but is not
380+
// the strict release — its `defaultEligible` stays `false` at publish
381+
// time. `finetuned-v2` is the v2 state; `local-standin` is a non-publishable
382+
// staging shape; `upload-candidate` / `final` are the historical
383+
// fine-tuned-v1 publish states retained for forward-compat. Mirrors
384+
// `ELIZA_1_RELEASE_STATES` in
378385
// `packages/training/scripts/manifest/eliza1_manifest.py`.
379386
export const ELIZA_1_RELEASE_STATES = [
380387
"local-standin",
388+
"base-v1-candidate",
381389
"base-v1",
382390
"finetuned-v2",
383391
"upload-candidate",
384392
"final",
385393
] as const;
386394
export type Eliza1ReleaseState = (typeof ELIZA_1_RELEASE_STATES)[number];
387395

396+
// Release-channel vocabulary recorded on a published manifest.
397+
// `recommended` is the fine-tuned Eliza-1 (ships in v2) — the channel a
398+
// device may auto-promote to the strict default. `base-v1` is the
399+
// upstream-base + kernel-optimized release: every quant/kernel trick
400+
// applied, but the text weights are the upstream base GGUFs (not the
401+
// fine-tuned Eliza-1). A `base-v1`-channel manifest MUST be
402+
// `defaultEligible: false` at publish time. The on-device gate
403+
// (`canSetAsDefault`) still promotes a contract-valid `base-v1` bundle to
404+
// the fallback default when no `recommended` channel bundle is installed —
405+
// see `validator.ts`. Mirrors `ELIZA_1_RELEASE_CHANNELS` (Python side).
406+
export const ELIZA_1_RELEASE_CHANNELS = ["recommended", "base-v1"] as const;
407+
export type Eliza1ReleaseChannel = (typeof ELIZA_1_RELEASE_CHANNELS)[number];
408+
388409
// Provenance slots — the bundle components whose upstream source repo a
389410
// `base-v1` manifest must record. Mirrors `ELIZA_1_PROVENANCE_SLOTS`
390411
// (Python side).
@@ -451,7 +472,23 @@ export const Eliza1ManifestSchema = z
451472
// per shipped component. The contract validator requires per-component
452473
// coverage when `releaseState === "base-v1"`.
453474
provenance: Eliza1ProvenanceSchema.optional(),
475+
// Optional. Defaults to `"recommended"` semantically when unset (the
476+
// fine-tuned Eliza-1 — the channel allowed to auto-promote to the
477+
// strict device default). A `"base-v1"`-channel manifest is the
478+
// upstream-base + kernel-optimized release; it MUST be
479+
// `defaultEligible: false` at publish time. The on-device gate
480+
// (`canSetAsDefault`) still allows a contract-valid `base-v1` bundle
481+
// to fill an empty default slot when no `recommended` channel bundle
482+
// is installed; the recommender prefers `defaultEligible: true` over
483+
// candidates whenever both are available.
484+
releaseChannel: z.enum(ELIZA_1_RELEASE_CHANNELS).optional(),
454485
defaultEligible: z.boolean(),
486+
// Optional. Free-text quant tag emitted by the publish-side manifest
487+
// builder (e.g. `"Q3_K_S"`, `"Q4_K_M"`). Not consumed by the runtime
488+
// validator — declared here so a manifest carrying it is not rejected
489+
// by Zod's default strip behaviour silently masking a real publish
490+
// field. Accepted as a permissive string.
491+
textQuant: z.string().min(1).optional(),
455492
})
456493
// The id MUST encode the tier so catalogs can derive tier from id without
457494
// re-reading the manifest. Example: `id: "eliza-1-9b"`.
@@ -462,4 +499,16 @@ export const Eliza1ManifestSchema = z
462499
message: "id must start with `eliza-1-<tier>`",
463500
path: ["id"],
464501
},
502+
)
503+
// A `base-v1`-channel manifest is the upstream-base release. At publish
504+
// time it MUST be `defaultEligible: false` — the on-device gate
505+
// (`canSetAsDefault`) is the one that allows it to fill an empty default
506+
// slot when no `recommended` bundle is installed. Mirrors
507+
// inference/AGENTS.md §6 and the Python manifest builder.
508+
.refine(
509+
(m) => m.releaseChannel !== "base-v1" || m.defaultEligible === false,
510+
{
511+
message: "releaseChannel=base-v1 requires defaultEligible: false",
512+
path: ["defaultEligible"],
513+
},
465514
);

packages/app-core/src/services/local-inference/manifest/validator.ts

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,19 +91,36 @@ export function parseManifestOrThrow(input: unknown): Eliza1Manifest {
9191

9292
/**
9393
* `canSetAsDefault` is the recommendation-engine gate. A manifest that
94-
* passes this is allowed to be picked as the default bundle for the
95-
* device — it is `defaultEligible`, contract-valid, AND every backend
96-
* it claims to verify is one the device exposes.
94+
* passes this is allowed to fill an empty default slot for the device:
95+
*
96+
* - the manifest is contract-valid (every required kernel declared, every
97+
* required eval green for a strict release, lineage/files consistent),
98+
* - the device RAM meets the manifest's `ramBudgetMb.min` floor,
99+
* - the device exposes at least one backend the manifest verified `pass`
100+
* on out of the tier's supported set.
101+
*
102+
* A `defaultEligible: true` manifest is the strict release: every supported
103+
* backend kernel-verified `pass`, every required eval green. A
104+
* `defaultEligible: false` manifest with `releaseState` in the candidate /
105+
* staging vocabulary (`base-v1-candidate`, `local-standin`,
106+
* `upload-candidate`) is still permitted to fill an empty default slot
107+
* **when this device can run it** — the recommender prefers a strict
108+
* release over a candidate when both are installed (see
109+
* `isStrictReleaseManifest`). This mirrors the install gate
110+
* (`downloader.assertBundleInstallable`): if the device can install + run
111+
* the bundle, it can also fall back to running it as the default. The
112+
* historic "candidate bundles must never be a default" rule produced the
113+
* worse outcome of installing a bundle but leaving the model slot empty,
114+
* forcing the user to manually pick the only model they had downloaded.
97115
*
98116
* The device-caps check rejects "this device has Vulkan only but the
99-
* manifest only verified Metal/CUDA" — a manifest may be globally
100-
* default-eligible but not on this device.
117+
* manifest only verified Metal/CUDA" — a manifest may be contract-valid
118+
* but not runnable on this device.
101119
*/
102120
export function canSetAsDefault(
103121
manifest: Eliza1Manifest,
104122
device: Eliza1DeviceCaps,
105123
): boolean {
106-
if (!manifest.defaultEligible) return false;
107124
if (collectContractErrors(manifest).length > 0) return false;
108125
if (manifest.ramBudgetMb.min > device.ramMb) return false;
109126

@@ -121,6 +138,16 @@ export function canSetAsDefault(
121138
return overlapping.length > 0;
122139
}
123140

141+
/**
142+
* Strict release identifier: a `defaultEligible: true` manifest. The
143+
* recommender uses this to prefer a strict release over a candidate
144+
* bundle when both are installed and contract-valid. Mirrors the
145+
* publish-side `eliza1_gates.yaml` strict bar.
146+
*/
147+
export function isStrictReleaseManifest(manifest: Eliza1Manifest): boolean {
148+
return manifest.defaultEligible === true;
149+
}
150+
124151
// ---------------------------------------------------------------------------
125152
// Internal: contract rules from AGENTS.md §3 + §6
126153
// ---------------------------------------------------------------------------

packages/app-core/src/services/local-inference/recommendation.ts

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,13 @@ export type BundleDefaultEligibility =
460460
canBeDefault: false;
461461
/** Distinct, machine-readable reason — surfaced to the UI alongside
462462
* the `BundleIncompatibleError` the downloader raises for the same
463-
* conditions. */
463+
* conditions. `contract-invalid` covers both the historic
464+
* "not-default-eligible" case (eval gate not passed for a strict
465+
* release) and any other manifest-contract failure caught by
466+
* `collectContractErrors`. */
464467
reason:
465468
| "no-manifest"
466-
| "not-default-eligible"
469+
| "contract-invalid"
467470
| "ram-below-floor"
468471
| "kernels-unverified-on-device"
469472
| "not-verified-on-device";
@@ -476,15 +479,20 @@ export type BundleDefaultEligibility =
476479
* not default):
477480
*
478481
* - the bundle ships a validated `eliza-1.manifest.json`,
479-
* - the manifest is `defaultEligible` AND contract-valid (which in turn
480-
* means every required kernel is verified AND every required eval passed
482+
* - the manifest is contract-valid (every required kernel declared, every
483+
* required eval green for a strict release, lineage/files consistent
481484
* enforced by `canSetAsDefault` → `collectContractErrors`),
482485
* - the device exposes at least one backend the manifest verified `pass` on
483486
* out of the tier's supported set,
484487
* - the device RAM meets the manifest's `ramBudgetMb.min` floor,
485488
* - the bundle has passed the one-time on-device verify pass
486489
* (`InstalledModel.bundleVerifiedAt` is set) — a materialized-but-unverified
487490
* bundle is never auto-selected, per AGENTS.md §7.
491+
*
492+
* `manifest.defaultEligible: true` is NOT required at the gate level — a
493+
* `base-v1-candidate` bundle that passes every above condition is allowed
494+
* to fill an empty default slot. The recommender prefers a strict release
495+
* (`defaultEligible: true`) over a candidate when both are installed.
488496
*/
489497
export function canBundleBeDefaultOnDevice(
490498
installed: InstalledModel,
@@ -511,13 +519,6 @@ export function canBundleBeDefaultOnDevice(
511519
if (canSetAsDefault(manifest, caps)) return { canBeDefault: true };
512520

513521
// canSetAsDefault returned false — disambiguate why so the UI/log is precise.
514-
if (!manifest.defaultEligible) {
515-
return {
516-
canBeDefault: false,
517-
reason: "not-default-eligible",
518-
detail: `${installed.id}: manifest defaultEligible is false (evals/kernels not all green at publish time)`,
519-
};
520-
}
521522
if (manifest.ramBudgetMb.min > caps.ramMb) {
522523
return {
523524
canBeDefault: false,
@@ -541,13 +542,13 @@ export function canBundleBeDefaultOnDevice(
541542
detail: `${installed.id}: no backend the device exposes (${deviceBackends}) has a 'pass' kernel-verify report in the manifest`,
542543
};
543544
}
544-
// Contract-valid manifest, RAM ok, backend ok — but canSetAsDefault still
545-
// said no. That can only be a contract-error path (e.g. an eval gate not
546-
// passed) the manifest validator caught; surface it as not-default-eligible.
545+
// RAM ok, backend ok — the failure must be a manifest-contract path the
546+
// validator caught (e.g. a required-eval gate not passed for a strict
547+
// release, a lineage/files mismatch, an inconsistent provenance block).
547548
return {
548549
canBeDefault: false,
549-
reason: "not-default-eligible",
550-
detail: `${installed.id}: manifest failed the default-eligibility contract check (an eval gate or kernel-coverage rule)`,
550+
reason: "contract-invalid",
551+
detail: `${installed.id}: manifest failed the contract check (an eval gate, kernel-coverage rule, or lineage/files consistency rule)`,
551552
};
552553
}
553554

packages/training/scripts/lib/vast.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@
115115
"min_per_gpu_ram_gb": 130,
116116
"description": "1× H200 SXM (141 GB) — 9B SFT or 27B at low seq_len",
117117
},
118+
"b200-1x": {
119+
"gpu_names": ["B200"],
120+
"num_gpus": 1,
121+
# B200 = 180 GB HBM3e per GPU; gpu_ram>=170 robust to ECC reserve.
122+
"min_per_gpu_ram_gb": 170,
123+
"description": "1× NVIDIA B200 (≈183 GB) — qwen3.5-27b SFT default (130 GB budget @ seq=32k fits with headroom)",
124+
},
118125
# ─── multi-GPU targets (27B+) ───
119126
"blackwell6000-2x": {
120127
# Both Server (S) and Workstation (WS) editions are 96 GB GDDR7

packages/training/scripts/publish/stage_base_v1_candidate.py

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,26 @@
4040
REQUIRED_KERNELS_BY_TIER = {
4141
"0_6b": ["turboquant_q3", "qjl", "polarquant", "dflash"],
4242
"1_7b": ["turboquant_q4", "qjl", "polarquant", "dflash"],
43+
# 27b matches eliza1_manifest.REQUIRED_KERNELS_BY_TIER["27b"] — adds
44+
# turbo3_tcq on top of the base 1.7b set for long-context cache compression.
45+
"27b": ["turboquant_q4", "qjl", "polarquant", "dflash", "turbo3_tcq"],
4346
}
4447
RAM_BUDGET_MB = {
4548
"0_6b": (2500, 3700),
4649
"1_7b": (4000, 5500),
50+
# 27b matches publish/orchestrator.RAM_BUDGET_BY_TIER["27b"] — sized for
51+
# 96GB+ Mac / high-VRAM desktop class hosts under the Q4_POLAR text bundle.
52+
"27b": (24000, 32000),
53+
}
54+
# Per-tier upstream-Qwen3 substitute used by the lineage block and the
55+
# README/provenance prose. Falls back to "0.6B" for unknown tiers to match
56+
# the script's historical default behavior.
57+
QWEN3_PARAMS_BY_TIER = {
58+
"0_6b": "0.6B",
59+
"1_7b": "1.7B",
60+
# The 27b cloud tier substitutes against Qwen3.5-27B (no Qwen3-27B
61+
# variant exists upstream); the lineage block records the real base.
62+
"27b": "27B",
4763
}
4864
TEXT_CTX = 32768
4965

@@ -90,7 +106,7 @@ def download_asset(repo: str, remote_path: str, dest: Path) -> None:
90106

91107
def main(argv: list[str] | None = None) -> int:
92108
ap = argparse.ArgumentParser(description=__doc__)
93-
ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b"))
109+
ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b", "27b"))
94110
ap.add_argument("--text-gguf", required=True, type=Path)
95111
ap.add_argument("--text-sidecar", type=Path, default=None,
96112
help="The .eliza1.json sidecar for the text GGUF (quant block).")
@@ -171,9 +187,13 @@ def main(argv: list[str] | None = None) -> int:
171187
"sha256": drafter_sha,
172188
"source": args.drafter_source,
173189
"note": (
174-
"Upstream Qwen3-0.6B GGUF used as the DFlash drafter for the "
175-
"1.7B target; shares the Qwen3 BPE vocabulary so speculative "
176-
"decoding is correct (modest acceptance — not a distilled drafter)."
190+
# For 27b the canonical drafter is the Qwen3.5-aligned 0.6B
191+
# distilled drafter (elizaos/eliza-1-drafter-0_6b-qwen3_5);
192+
# for 0_6b/1_7b it's the upstream Qwen3 0.6B GGUF reused as-is.
193+
f"DFlash drafter for the {QWEN3_PARAMS_BY_TIER.get(tier, '0.6B')} text target. "
194+
"Shares the Qwen3.5/Qwen3 BPE vocabulary with the target so speculative "
195+
"decoding is correct. See the drafter source repo for whether this "
196+
"candidate is a distilled drafter or the upstream base GGUF (not distilled)."
177197
),
178198
},
179199
"acceptanceWindow": None,
@@ -184,6 +204,15 @@ def main(argv: list[str] | None = None) -> int:
184204
# --- voice / asr / vad / cache from elizaos/eliza-1-assets/1_7b/ ---
185205
# The OmniVoice / Qwen3-ASR / Silero bytes are model-size-independent; the
186206
# assets repo only carries the 1_7b key, so reuse them under any tier.
207+
#
208+
# 27b caveat: eliza1_manifest.VOICE_QUANT_BY_TIER["27b"] == "Q8_0", so
209+
# required_voice_artifacts_for_tier("27b") returns the Q8_0 names. This
210+
# staging path still copies the Q4_K_M bytes (the only ones present in
211+
# the assets repo today) — the orchestrator's voice-artifact gate will
212+
# therefore fail in publish mode until Q8_0 OmniVoice GGUFs are derived
213+
# and pushed to elizaos/eliza-1-assets/27b/. The candidate bundle is
214+
# still installable on a runtime that can load Q4_K_M voice, but the
215+
# release gate stays red. Track as a separate dependency.
187216
asset_map = [
188217
("1_7b/tts/omnivoice-base-Q4_K_M.gguf", out / "tts" / "omnivoice-base-Q4_K_M.gguf"),
189218
("1_7b/tts/omnivoice-tokenizer-Q4_K_M.gguf", out / "tts" / "omnivoice-tokenizer-Q4_K_M.gguf"),
@@ -288,9 +317,10 @@ def num(key: str) -> float | None:
288317
}, indent=2) + "\n")
289318

290319
# --- lineage ---
320+
params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
291321
lineage = {
292322
"text": M.LineageEntry(
293-
base=f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{'1.7B' if tier=='1_7b' else '0.6B'})",
323+
base=f"{args.drafter_source.split('/')[0]}/Qwen3-{params} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{params})",
294324
license="apache-2.0",
295325
),
296326
"voice": M.LineageEntry(base="Serveurperso/OmniVoice-GGUF@361609388ae572a820d085185bbbe2a2aac4b30e", license="apache-2.0"),
@@ -319,15 +349,29 @@ def num(key: str) -> float | None:
319349
status="pass", at_commit="08032d57",
320350
report="packages/inference/verify/cuda-runtime-dispatch-evidence.json",
321351
device="NVIDIA GeForce RTX 5080 Laptop GPU (Blackwell, cc 12.0)",
322-
caveat="cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence",
352+
# For 27b cuda is a tier-supported backend (per
353+
# eliza1_manifest.SUPPORTED_BACKENDS_BY_TIER["27b"]); for 0_6b/1_7b
354+
# it stays "extra evidence" — see the caveat tier-switch below.
355+
caveat=(
356+
"cuda is a tier-supported backend for 27b"
357+
if tier == "27b"
358+
else "cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence"
359+
),
323360
),
324361
"metal": M.KernelVerification(
325362
status="skipped", at_commit="08032d57", report="not-run",
326363
caveat="needs-hardware: no Apple/Metal device on the build host",
327364
),
328365
"rocm": M.KernelVerification(
329366
status="skipped", at_commit="08032d57", report="not-applicable",
330-
caveat="rocm is not a tier-supported backend for 1_7b/0_6b",
367+
# rocm is a tier-supported backend for 27b but cannot be verified
368+
# on this build host (no AMD GPU); 0_6b/1_7b don't list rocm as
369+
# supported at all.
370+
caveat=(
371+
"rocm is a tier-supported backend for 27b but no AMD GPU on the build host (needs-hardware)"
372+
if tier == "27b"
373+
else "rocm is not a tier-supported backend for 1_7b/0_6b"
374+
),
331375
),
332376
}
333377

@@ -337,7 +381,7 @@ def num(key: str) -> float | None:
337381
"finetuned": True,
338382
"sourceModels": {
339383
"text": {
340-
"repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'}",
384+
"repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{params}",
341385
"convertedVia": "packages/inference/llama.cpp/convert_hf_to_gguf.py + scripts/optimize_for_eliza1.py (PolarQuant/QJL/TurboQuant)",
342386
"note": "Fine-tuned (APOLLO full-parameter SFT) then optimized. Documented substitute for the not-yet-published Qwen3.5 base; NOT strictly base-v1 semantics — this is a finetuned candidate.",
343387
},
@@ -443,7 +487,7 @@ def _render_readme(
443487
optimized: bool,
444488
eval_results: dict[str, Any],
445489
) -> str:
446-
params = "1.7B" if tier == "1_7b" else "0.6B"
490+
params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
447491
base_repo = f"{drafter_source.split('/')[0]}/Qwen3-{params}"
448492
if optimized:
449493
text_para = (

packages/training/scripts/train_vast.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,14 @@ case "$PIPELINE" in
248248
DEFAULT_GPU_TARGET="blackwell6000-1x"
249249
DEFAULT_FSDP_WORLD_SIZE=1
250250
;;
251+
qwen3.5-27b)
252+
# Registry budget: 130 GB working set on a single 141 GB H200 or 183
253+
# GB B200 (apollo_mini rank-1, grad ckpt, Liger CE, micro_batch=1
254+
# seq=32k). B200-1x is the cheapest single-GPU fit (≈$3.8/hr × ~50h
255+
# ≈ $190) and FSDP_WORLD_SIZE=1 matches the registry's extras block.
256+
DEFAULT_GPU_TARGET="b200-1x"
257+
DEFAULT_FSDP_WORLD_SIZE=1
258+
;;
251259
qwen3.6-27b)
252260
DEFAULT_GPU_TARGET="b200-2x"
253261
DEFAULT_FSDP_WORLD_SIZE=2

0 commit comments

Comments
 (0)