Merge remote-tracking branch 'origin/develop' into develop

lalalune · lalalune · commit 5db02d7f0b9d · 2026-05-12T09:08:27.000-07:00
diff --git a/packages/app-core/src/services/local-inference/manifest/schema.ts b/packages/app-core/src/services/local-inference/manifest/schema.ts
@@ -371,20 +371,41 @@ export const Eliza1RamBudgetSchema = z
 // Release-state vocabulary. `base-v1` is the v1 product: the upstream BASE
 // models — GGUF-converted via the elizaOS/llama.cpp fork and fully
 // Eliza-optimized (every quant/kernel trick in inference/AGENTS.md §3) —
-// but NOT fine-tuned (fine-tuning ships in v2). `finetuned-v2` is the v2
-// state; `local-standin` is a non-publishable staging shape;
-// `upload-candidate` / `final` are the historical fine-tuned-v1 publish
-// states retained for forward-compat. Mirrors `ELIZA_1_RELEASE_STATES` in
+// but NOT fine-tuned (fine-tuning ships in v2). `base-v1-candidate` is the
+// in-progress state of a base-v1 bundle before every release-blocking
+// gate (real fork-built bytes, every supported-backend kernel verify,
+// every required platform-dispatch report, the runnable-on-base evals)
+// has gone green. It is publishable to HuggingFace as a download target
+// and is installable on a device whose backend it verified, but is not
+// the strict release — its `defaultEligible` stays `false` at publish
+// time. `finetuned-v2` is the v2 state; `local-standin` is a non-publishable
+// staging shape; `upload-candidate` / `final` are the historical
+// fine-tuned-v1 publish states retained for forward-compat. Mirrors
+// `ELIZA_1_RELEASE_STATES` in
 // `packages/training/scripts/manifest/eliza1_manifest.py`.
 export const ELIZA_1_RELEASE_STATES = [
   "local-standin",
+  "base-v1-candidate",
   "base-v1",
   "finetuned-v2",
   "upload-candidate",
   "final",
 ] as const;
 export type Eliza1ReleaseState = (typeof ELIZA_1_RELEASE_STATES)[number];
 
+// Release-channel vocabulary recorded on a published manifest.
+// `recommended` is the fine-tuned Eliza-1 (ships in v2) — the channel a
+// device may auto-promote to the strict default. `base-v1` is the
+// upstream-base + kernel-optimized release: every quant/kernel trick
+// applied, but the text weights are the upstream base GGUFs (not the
+// fine-tuned Eliza-1). A `base-v1`-channel manifest MUST be
+// `defaultEligible: false` at publish time. The on-device gate
+// (`canSetAsDefault`) still promotes a contract-valid `base-v1` bundle to
+// the fallback default when no `recommended` channel bundle is installed —
+// see `validator.ts`. Mirrors `ELIZA_1_RELEASE_CHANNELS` (Python side).
+export const ELIZA_1_RELEASE_CHANNELS = ["recommended", "base-v1"] as const;
+export type Eliza1ReleaseChannel = (typeof ELIZA_1_RELEASE_CHANNELS)[number];
+
 // Provenance slots — the bundle components whose upstream source repo a
 // `base-v1` manifest must record. Mirrors `ELIZA_1_PROVENANCE_SLOTS`
 // (Python side).
@@ -451,7 +472,23 @@ export const Eliza1ManifestSchema = z
     // per shipped component. The contract validator requires per-component
     // coverage when `releaseState === "base-v1"`.
     provenance: Eliza1ProvenanceSchema.optional(),
+    // Optional. Defaults to `"recommended"` semantically when unset (the
+    // fine-tuned Eliza-1 — the channel allowed to auto-promote to the
+    // strict device default). A `"base-v1"`-channel manifest is the
+    // upstream-base + kernel-optimized release; it MUST be
+    // `defaultEligible: false` at publish time. The on-device gate
+    // (`canSetAsDefault`) still allows a contract-valid `base-v1` bundle
+    // to fill an empty default slot when no `recommended` channel bundle
+    // is installed; the recommender prefers `defaultEligible: true` over
+    // candidates whenever both are available.
+    releaseChannel: z.enum(ELIZA_1_RELEASE_CHANNELS).optional(),
     defaultEligible: z.boolean(),
+    // Optional. Free-text quant tag emitted by the publish-side manifest
+    // builder (e.g. `"Q3_K_S"`, `"Q4_K_M"`). Not consumed by the runtime
+    // validator — declared here so a manifest carrying it is not rejected
+    // by Zod's default strip behaviour silently masking a real publish
+    // field. Accepted as a permissive string.
+    textQuant: z.string().min(1).optional(),
   })
   // The id MUST encode the tier so catalogs can derive tier from id without
   // re-reading the manifest. Example: `id: "eliza-1-9b"`.
@@ -462,4 +499,16 @@ export const Eliza1ManifestSchema = z
       message: "id must start with `eliza-1-<tier>`",
       path: ["id"],
     },
+  )
+  // A `base-v1`-channel manifest is the upstream-base release. At publish
+  // time it MUST be `defaultEligible: false` — the on-device gate
+  // (`canSetAsDefault`) is the one that allows it to fill an empty default
+  // slot when no `recommended` bundle is installed. Mirrors
+  // inference/AGENTS.md §6 and the Python manifest builder.
+  .refine(
+    (m) => m.releaseChannel !== "base-v1" || m.defaultEligible === false,
+    {
+      message: "releaseChannel=base-v1 requires defaultEligible: false",
+      path: ["defaultEligible"],
+    },
   );
diff --git a/packages/app-core/src/services/local-inference/manifest/validator.ts b/packages/app-core/src/services/local-inference/manifest/validator.ts
@@ -91,19 +91,36 @@ export function parseManifestOrThrow(input: unknown): Eliza1Manifest {
 
 /**
  * `canSetAsDefault` is the recommendation-engine gate. A manifest that
- * passes this is allowed to be picked as the default bundle for the
- * device — it is `defaultEligible`, contract-valid, AND every backend
- * it claims to verify is one the device exposes.
+ * passes this is allowed to fill an empty default slot for the device:
+ *
+ *   - the manifest is contract-valid (every required kernel declared, every
+ *     required eval green for a strict release, lineage/files consistent),
+ *   - the device RAM meets the manifest's `ramBudgetMb.min` floor,
+ *   - the device exposes at least one backend the manifest verified `pass`
+ *     on out of the tier's supported set.
+ *
+ * A `defaultEligible: true` manifest is the strict release: every supported
+ * backend kernel-verified `pass`, every required eval green. A
+ * `defaultEligible: false` manifest with `releaseState` in the candidate /
+ * staging vocabulary (`base-v1-candidate`, `local-standin`,
+ * `upload-candidate`) is still permitted to fill an empty default slot
+ * **when this device can run it** — the recommender prefers a strict
+ * release over a candidate when both are installed (see
+ * `isStrictReleaseManifest`). This mirrors the install gate
+ * (`downloader.assertBundleInstallable`): if the device can install + run
+ * the bundle, it can also fall back to running it as the default. The
+ * historic "candidate bundles must never be a default" rule produced the
+ * worse outcome of installing a bundle but leaving the model slot empty,
+ * forcing the user to manually pick the only model they had downloaded.
  *
  * The device-caps check rejects "this device has Vulkan only but the
- * manifest only verified Metal/CUDA" — a manifest may be globally
- * default-eligible but not on this device.
+ * manifest only verified Metal/CUDA" — a manifest may be contract-valid
+ * but not runnable on this device.
  */
 export function canSetAsDefault(
   manifest: Eliza1Manifest,
   device: Eliza1DeviceCaps,
 ): boolean {
-  if (!manifest.defaultEligible) return false;
   if (collectContractErrors(manifest).length > 0) return false;
   if (manifest.ramBudgetMb.min > device.ramMb) return false;
 
@@ -121,6 +138,16 @@ export function canSetAsDefault(
   return overlapping.length > 0;
 }
 
+/**
+ * Strict release identifier: a `defaultEligible: true` manifest. The
+ * recommender uses this to prefer a strict release over a candidate
+ * bundle when both are installed and contract-valid. Mirrors the
+ * publish-side `eliza1_gates.yaml` strict bar.
+ */
+export function isStrictReleaseManifest(manifest: Eliza1Manifest): boolean {
+  return manifest.defaultEligible === true;
+}
+
 // ---------------------------------------------------------------------------
 // Internal: contract rules from AGENTS.md §3 + §6
 // ---------------------------------------------------------------------------
diff --git a/packages/app-core/src/services/local-inference/recommendation.ts b/packages/app-core/src/services/local-inference/recommendation.ts
@@ -460,10 +460,13 @@ export type BundleDefaultEligibility =
       canBeDefault: false;
       /** Distinct, machine-readable reason — surfaced to the UI alongside
        * the `BundleIncompatibleError` the downloader raises for the same
-       * conditions. */
+       * conditions. `contract-invalid` covers both the historic
+       * "not-default-eligible" case (eval gate not passed for a strict
+       * release) and any other manifest-contract failure caught by
+       * `collectContractErrors`. */
       reason:
         | "no-manifest"
-        | "not-default-eligible"
+        | "contract-invalid"
         | "ram-below-floor"
         | "kernels-unverified-on-device"
         | "not-verified-on-device";
@@ -476,15 +479,20 @@ export type BundleDefaultEligibility =
  * not default):
  *
  *  - the bundle ships a validated `eliza-1.manifest.json`,
- *  - the manifest is `defaultEligible` AND contract-valid (which in turn
- *    means every required kernel is verified AND every required eval passed —
+ *  - the manifest is contract-valid (every required kernel declared, every
+ *    required eval green for a strict release, lineage/files consistent —
  *    enforced by `canSetAsDefault` → `collectContractErrors`),
  *  - the device exposes at least one backend the manifest verified `pass` on
  *    out of the tier's supported set,
  *  - the device RAM meets the manifest's `ramBudgetMb.min` floor,
  *  - the bundle has passed the one-time on-device verify pass
  *    (`InstalledModel.bundleVerifiedAt` is set) — a materialized-but-unverified
  *    bundle is never auto-selected, per AGENTS.md §7.
+ *
+ * `manifest.defaultEligible: true` is NOT required at the gate level — a
+ * `base-v1-candidate` bundle that passes every above condition is allowed
+ * to fill an empty default slot. The recommender prefers a strict release
+ * (`defaultEligible: true`) over a candidate when both are installed.
  */
 export function canBundleBeDefaultOnDevice(
   installed: InstalledModel,
@@ -511,13 +519,6 @@ export function canBundleBeDefaultOnDevice(
   if (canSetAsDefault(manifest, caps)) return { canBeDefault: true };
 
   // canSetAsDefault returned false — disambiguate why so the UI/log is precise.
-  if (!manifest.defaultEligible) {
-    return {
-      canBeDefault: false,
-      reason: "not-default-eligible",
-      detail: `${installed.id}: manifest defaultEligible is false (evals/kernels not all green at publish time)`,
-    };
-  }
   if (manifest.ramBudgetMb.min > caps.ramMb) {
     return {
       canBeDefault: false,
@@ -541,13 +542,13 @@ export function canBundleBeDefaultOnDevice(
       detail: `${installed.id}: no backend the device exposes (${deviceBackends}) has a 'pass' kernel-verify report in the manifest`,
     };
   }
-  // Contract-valid manifest, RAM ok, backend ok — but canSetAsDefault still
-  // said no. That can only be a contract-error path (e.g. an eval gate not
-  // passed) the manifest validator caught; surface it as not-default-eligible.
+  // RAM ok, backend ok — the failure must be a manifest-contract path the
+  // validator caught (e.g. a required-eval gate not passed for a strict
+  // release, a lineage/files mismatch, an inconsistent provenance block).
   return {
     canBeDefault: false,
-    reason: "not-default-eligible",
-    detail: `${installed.id}: manifest failed the default-eligibility contract check (an eval gate or kernel-coverage rule)`,
+    reason: "contract-invalid",
+    detail: `${installed.id}: manifest failed the contract check (an eval gate, kernel-coverage rule, or lineage/files consistency rule)`,
   };
 }
 
diff --git a/packages/training/scripts/lib/vast.py b/packages/training/scripts/lib/vast.py
@@ -115,6 +115,13 @@
         "min_per_gpu_ram_gb": 130,
         "description": "1× H200 SXM (141 GB) — 9B SFT or 27B at low seq_len",
     },
+    "b200-1x": {
+        "gpu_names": ["B200"],
+        "num_gpus": 1,
+        # B200 = 180 GB HBM3e per GPU; gpu_ram>=170 robust to ECC reserve.
+        "min_per_gpu_ram_gb": 170,
+        "description": "1× NVIDIA B200 (≈183 GB) — qwen3.5-27b SFT default (130 GB budget @ seq=32k fits with headroom)",
+    },
     # ─── multi-GPU targets (27B+) ───
     "blackwell6000-2x": {
         # Both Server (S) and Workstation (WS) editions are 96 GB GDDR7
diff --git a/packages/training/scripts/publish/stage_base_v1_candidate.py b/packages/training/scripts/publish/stage_base_v1_candidate.py
@@ -40,10 +40,26 @@
 REQUIRED_KERNELS_BY_TIER = {
     "0_6b": ["turboquant_q3", "qjl", "polarquant", "dflash"],
     "1_7b": ["turboquant_q4", "qjl", "polarquant", "dflash"],
+    # 27b matches eliza1_manifest.REQUIRED_KERNELS_BY_TIER["27b"] — adds
+    # turbo3_tcq on top of the base 1.7b set for long-context cache compression.
+    "27b": ["turboquant_q4", "qjl", "polarquant", "dflash", "turbo3_tcq"],
 }
 RAM_BUDGET_MB = {
     "0_6b": (2500, 3700),
     "1_7b": (4000, 5500),
+    # 27b matches publish/orchestrator.RAM_BUDGET_BY_TIER["27b"] — sized for
+    # 96GB+ Mac / high-VRAM desktop class hosts under the Q4_POLAR text bundle.
+    "27b": (24000, 32000),
+}
+# Per-tier upstream-Qwen3 substitute used by the lineage block and the
+# README/provenance prose. Falls back to "0.6B" for unknown tiers to match
+# the script's historical default behavior.
+QWEN3_PARAMS_BY_TIER = {
+    "0_6b": "0.6B",
+    "1_7b": "1.7B",
+    # The 27b cloud tier substitutes against Qwen3.5-27B (no Qwen3-27B
+    # variant exists upstream); the lineage block records the real base.
+    "27b": "27B",
 }
 TEXT_CTX = 32768
 
@@ -90,7 +106,7 @@ def download_asset(repo: str, remote_path: str, dest: Path) -> None:
 
 def main(argv: list[str] | None = None) -> int:
     ap = argparse.ArgumentParser(description=__doc__)
-    ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b"))
+    ap.add_argument("--tier", required=True, choices=("0_6b", "1_7b", "27b"))
     ap.add_argument("--text-gguf", required=True, type=Path)
     ap.add_argument("--text-sidecar", type=Path, default=None,
                     help="The .eliza1.json sidecar for the text GGUF (quant block).")
@@ -171,9 +187,13 @@ def main(argv: list[str] | None = None) -> int:
             "sha256": drafter_sha,
             "source": args.drafter_source,
             "note": (
-                "Upstream Qwen3-0.6B GGUF used as the DFlash drafter for the "
-                "1.7B target; shares the Qwen3 BPE vocabulary so speculative "
-                "decoding is correct (modest acceptance — not a distilled drafter)."
+                # For 27b the canonical drafter is the Qwen3.5-aligned 0.6B
+                # distilled drafter (elizaos/eliza-1-drafter-0_6b-qwen3_5);
+                # for 0_6b/1_7b it's the upstream Qwen3 0.6B GGUF reused as-is.
+                f"DFlash drafter for the {QWEN3_PARAMS_BY_TIER.get(tier, '0.6B')} text target. "
+                "Shares the Qwen3.5/Qwen3 BPE vocabulary with the target so speculative "
+                "decoding is correct. See the drafter source repo for whether this "
+                "candidate is a distilled drafter or the upstream base GGUF (not distilled)."
             ),
         },
         "acceptanceWindow": None,
@@ -184,6 +204,15 @@ def main(argv: list[str] | None = None) -> int:
     # --- voice / asr / vad / cache from elizaos/eliza-1-assets/1_7b/ ---
     # The OmniVoice / Qwen3-ASR / Silero bytes are model-size-independent; the
     # assets repo only carries the 1_7b key, so reuse them under any tier.
+    #
+    # 27b caveat: eliza1_manifest.VOICE_QUANT_BY_TIER["27b"] == "Q8_0", so
+    # required_voice_artifacts_for_tier("27b") returns the Q8_0 names. This
+    # staging path still copies the Q4_K_M bytes (the only ones present in
+    # the assets repo today) — the orchestrator's voice-artifact gate will
+    # therefore fail in publish mode until Q8_0 OmniVoice GGUFs are derived
+    # and pushed to elizaos/eliza-1-assets/27b/. The candidate bundle is
+    # still installable on a runtime that can load Q4_K_M voice, but the
+    # release gate stays red. Track as a separate dependency.
     asset_map = [
         ("1_7b/tts/omnivoice-base-Q4_K_M.gguf", out / "tts" / "omnivoice-base-Q4_K_M.gguf"),
         ("1_7b/tts/omnivoice-tokenizer-Q4_K_M.gguf", out / "tts" / "omnivoice-tokenizer-Q4_K_M.gguf"),
@@ -288,9 +317,10 @@ def num(key: str) -> float | None:
         }, indent=2) + "\n")
 
     # --- lineage ---
+    params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
     lineage = {
         "text": M.LineageEntry(
-            base=f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{'1.7B' if tier=='1_7b' else '0.6B'})",
+            base=f"{args.drafter_source.split('/')[0]}/Qwen3-{params} (SFT: APOLLO full-parameter; documented substitute for Qwen3.5-{params})",
             license="apache-2.0",
         ),
         "voice": M.LineageEntry(base="Serveurperso/OmniVoice-GGUF@361609388ae572a820d085185bbbe2a2aac4b30e", license="apache-2.0"),
@@ -319,15 +349,29 @@ def num(key: str) -> float | None:
             status="pass", at_commit="08032d57",
             report="packages/inference/verify/cuda-runtime-dispatch-evidence.json",
             device="NVIDIA GeForce RTX 5080 Laptop GPU (Blackwell, cc 12.0)",
-            caveat="cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence",
+            # For 27b cuda is a tier-supported backend (per
+            # eliza1_manifest.SUPPORTED_BACKENDS_BY_TIER["27b"]); for 0_6b/1_7b
+            # it stays "extra evidence" — see the caveat tier-switch below.
+            caveat=(
+                "cuda is a tier-supported backend for 27b"
+                if tier == "27b"
+                else "cuda is not a tier-supported backend for 1_7b/0_6b — recorded as extra evidence"
+            ),
         ),
         "metal": M.KernelVerification(
             status="skipped", at_commit="08032d57", report="not-run",
             caveat="needs-hardware: no Apple/Metal device on the build host",
         ),
         "rocm": M.KernelVerification(
             status="skipped", at_commit="08032d57", report="not-applicable",
-            caveat="rocm is not a tier-supported backend for 1_7b/0_6b",
+            # rocm is a tier-supported backend for 27b but cannot be verified
+            # on this build host (no AMD GPU); 0_6b/1_7b don't list rocm as
+            # supported at all.
+            caveat=(
+                "rocm is a tier-supported backend for 27b but no AMD GPU on the build host (needs-hardware)"
+                if tier == "27b"
+                else "rocm is not a tier-supported backend for 1_7b/0_6b"
+            ),
         ),
     }
 
@@ -337,7 +381,7 @@ def num(key: str) -> float | None:
         "finetuned": True,
         "sourceModels": {
             "text": {
-                "repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{'1.7B' if tier=='1_7b' else '0.6B'}",
+                "repo": f"{args.drafter_source.split('/')[0]}/Qwen3-{params}",
                 "convertedVia": "packages/inference/llama.cpp/convert_hf_to_gguf.py + scripts/optimize_for_eliza1.py (PolarQuant/QJL/TurboQuant)",
                 "note": "Fine-tuned (APOLLO full-parameter SFT) then optimized. Documented substitute for the not-yet-published Qwen3.5 base; NOT strictly base-v1 semantics — this is a finetuned candidate.",
             },
@@ -443,7 +487,7 @@ def _render_readme(
     optimized: bool,
     eval_results: dict[str, Any],
 ) -> str:
-    params = "1.7B" if tier == "1_7b" else "0.6B"
+    params = QWEN3_PARAMS_BY_TIER.get(tier, "0.6B")
     base_repo = f"{drafter_source.split('/')[0]}/Qwen3-{params}"
     if optimized:
         text_para = (
diff --git a/packages/training/scripts/train_vast.sh b/packages/training/scripts/train_vast.sh
@@ -248,6 +248,14 @@ case "$PIPELINE" in
         DEFAULT_GPU_TARGET="blackwell6000-1x"
         DEFAULT_FSDP_WORLD_SIZE=1
         ;;
+      qwen3.5-27b)
+        # Registry budget: 130 GB working set on a single 141 GB H200 or 183
+        # GB B200 (apollo_mini rank-1, grad ckpt, Liger CE, micro_batch=1
+        # seq=32k). B200-1x is the cheapest single-GPU fit (≈$3.8/hr × ~50h
+        # ≈ $190) and FSDP_WORLD_SIZE=1 matches the registry's extras block.
+        DEFAULT_GPU_TARGET="b200-1x"
+        DEFAULT_FSDP_WORLD_SIZE=1
+        ;;
       qwen3.6-27b)
         DEFAULT_GPU_TARGET="b200-2x"
         DEFAULT_FSDP_WORLD_SIZE=2