GeniePod
diff --git a/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎crates/genie-core/src/eval/bfcl.rs‎
Lines changed: 99 additions & 2 deletions b/‎crates/genie-core/src/eval/bfcl.rs‎
Lines changed: 99 additions & 2 deletions
diff --git a/‎crates/genie-core/src/ota/mod.rs‎
Lines changed: 150 additions & 20 deletions b/‎crates/genie-core/src/ota/mod.rs‎
Lines changed: 150 additions & 20 deletions
@@ -2,6 +2,19 @@
 
 ## Unreleased
 
+- **`genie-ctl --help` alignment** (#445): the `health` command no longer
+  prints flush-left. A trailing `\` line-continuation after the voice-gated
+  `speaker` block was stripping the four-space indent off the following line
+  in both `voice`-on and `voice`-off builds. Rendering moved into a pure
+  `usage_text()` helper with a regression test that fails if any command line
+  loses its indent.
+- **Quick-router compound spoken durations** (#449): "set a timer for forty
+  five minutes" now sets a 45-minute timer instead of 5. The deterministic
+  router only ever saw single tokens, so its `"forty five"` number arm was dead
+  code and the trailing "five" bound to the unit. `parse_duration` now stitches
+  a tens word (`twenty`..`ninety`) to a following ones digit, and the missing
+  `fifty`..`ninety` tens words parse on their own.
+
 ## 1.0.0-alpha.11 - 2026-06-20
 
 Alpha 11 is the **typed-tool contract + grounded BFCL** release. The single
 
@@ -599,14 +599,21 @@ fn canonicalize_entity_value(value: &Value, graph: &HomeGraph) -> Value {
 /// Canonicalize an action verb to a small set of stable forms, so the grounded
 /// metric credits synonyms the agent would execute identically. Unknown verbs
 /// pass through normalized (lowercase, separators -> `_`) so exact matches still
-/// hold. Mirrors the canonicalization the runtime tool dispatch should apply.
+/// hold. Mirrors the canonicalization the runtime tool dispatch applies in
+/// `tools::dispatch::canon_home_control_action`.
+///
+/// `activate` is intentionally *not* folded into `turn_on`: the runtime keeps it
+/// as a distinct `home_control` action for scenes/scripts (it appears separately
+/// in the action enum and `canon_home_control_action` leaves it as-is), so
+/// crediting `turn_on` for an expected `activate` would score a wrong actuation
+/// as correct and inflate the grounded metric.
 fn canon_action(text: &str) -> String {
     let normalized = text.trim().to_lowercase().replace([' ', '-'], "_");
     match normalized.as_str() {
         "deactivate" | "disable" | "switch_off" | "power_off" | "shut_off" | "turn_off" => {
             "turn_off".to_string()
         }
-        "activate" | "enable" | "switch_on" | "power_on" | "turn_on" => "turn_on".to_string(),
+        "enable" | "switch_on" | "power_on" | "turn_on" => "turn_on".to_string(),
         _ => normalized,
     }
 }
@@ -1002,6 +1009,96 @@ mod tests {
         assert_eq!(report.grounded_strict_matches, 1);
     }
 
+    #[test]
+    fn grounded_metric_keeps_activate_distinct_from_turn_on() {
+        // `activate` is a distinct runtime action for scenes/scripts: the
+        // dispatcher's canon_home_control_action leaves it as-is (covered by a
+        // "distinct action, must not remap" regression test), and `home_control`
+        // lists `activate` separately from `turn_on` in its action enum. The
+        // grounded metric must mirror runtime dispatch, so predicting `turn_on`
+        // for an expected `activate` is a *wrong actuation* (turn_on is a no-op
+        // for a scene) and must NOT be credited as a grounded match.
+        let case = case(vec![expected(
+            "home_control",
+            serde_json::json!({"action": "activate", "entity": "kitchen lights"}),
+        )]);
+        let prediction = BfclPrediction {
+            id: case.id.clone(),
+            response:
+                r#"{"tool":"home_control","arguments":{"action":"turn_on","entity":"kitchen lights"}}"#
+                    .to_string(),
+        };
+
+        let score = score_response(&case, &prediction.response);
+        assert!(
+            !score.argument_match,
+            "raw action strings differ, exact match must fail"
+        );
+        assert!(
+            !score.grounded_argument_match,
+            "predicting `turn_on` for an expected `activate` is a wrong actuation \
+             and must not be credited by the grounded metric"
+        );
+    }
+
+    #[test]
+    fn canon_action_contract() {
+        // Lock the scorer's action canonicalization so a future edit can't
+        // re-introduce the activate→turn_on collapse this fix removed (#458).
+        for s in ["turn_on", "enable", "switch_on", "power_on"] {
+            assert_eq!(canon_action(s), "turn_on", "{s} should fold to turn_on");
+        }
+        for s in [
+            "turn_off",
+            "deactivate",
+            "disable",
+            "switch_off",
+            "power_off",
+            "shut_off",
+        ] {
+            assert_eq!(canon_action(s), "turn_off", "{s} should fold to turn_off");
+        }
+        // activate stays distinct — it must NOT fold into turn_on.
+        assert_eq!(canon_action("activate"), "activate");
+        // Every other home_control action passes through unchanged (no cross-collapse).
+        for a in [
+            "toggle",
+            "open",
+            "close",
+            "lock",
+            "unlock",
+            "set_brightness",
+            "set_temperature",
+        ] {
+            assert_eq!(canon_action(a), a, "{a} must pass through unchanged");
+        }
+        // Normalization: case folding plus space/hyphen to underscore.
+        assert_eq!(canon_action(" Turn-On "), "turn_on");
+        assert_eq!(canon_action("ACTIVATE"), "activate");
+        assert_eq!(canon_action("set brightness"), "set_brightness");
+    }
+
+    #[test]
+    fn grounded_metric_credits_matching_activate() {
+        // Positive complement to grounded_metric_keeps_activate_distinct_from_turn_on:
+        // a correctly-predicted `activate` must still be credited — the fix passes
+        // `activate` through, it does not drop it.
+        let case = case(vec![expected(
+            "home_control",
+            serde_json::json!({"action": "activate", "entity": "kitchen lights"}),
+        )]);
+        let response = r#"{"tool":"home_control","arguments":{"action":"activate","entity":"kitchen lights"}}"#;
+        let score = score_response(&case, response);
+        assert!(
+            score.argument_match,
+            "exact activate match must be credited"
+        );
+        assert!(
+            score.grounded_argument_match,
+            "a matching activate must be credited by the grounded metric"
+        );
+    }
+
     #[test]
     fn loads_jsonl_fixture_and_scores_report() {
         let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("../..");
 
@@ -189,26 +189,99 @@ impl OtaManager {
     }
 }
 
-/// Compare semver strings. Returns true if `latest` is newer than `current`.
-fn version_is_newer(latest: &str, current: &str) -> bool {
-    let parse = |s: &str| -> (u32, u32, u32) {
-        let clean = s
-            .strip_prefix('v')
-            .unwrap_or(s)
-            .split('-')
-            .next()
-            .unwrap_or(s);
-        let parts: Vec<u32> = clean.split('.').filter_map(|p| p.parse().ok()).collect();
-        (
-            parts.first().copied().unwrap_or(0),
-            parts.get(1).copied().unwrap_or(0),
-            parts.get(2).copied().unwrap_or(0),
-        )
+/// A parsed semantic version: the numeric `major.minor.patch` core plus any
+/// pre-release identifiers.
+#[derive(Debug, PartialEq, Eq)]
+struct SemVer {
+    core: (u32, u32, u32),
+    /// Dot-separated pre-release identifiers (the part after `-`). Empty for a
+    /// normal release. A release outranks any pre-release of the same core.
+    pre: Vec<String>,
+}
+
+/// Parse a version string into a [`SemVer`].
+///
+/// Tolerant of a leading `v`. Build metadata (`+...`) is ignored for
+/// precedence, per SemVer §10. Missing core components default to 0.
+fn parse_semver(s: &str) -> SemVer {
+    let s = s.trim();
+    let s = s.strip_prefix('v').unwrap_or(s);
+    // Build metadata never affects precedence — drop anything from '+' on.
+    let s = s.split('+').next().unwrap_or(s);
+    // Core is everything before the first '-'; the rest is the pre-release.
+    let (core_str, pre_str) = match s.split_once('-') {
+        Some((core, pre)) => (core, Some(pre)),
+        None => (s, None),
     };
 
-    let l = parse(latest);
-    let c = parse(current);
-    l > c
+    // Positional parse: an unparseable component becomes 0 without shifting the
+    // ones after it (so "1.x.2" is (1, 0, 2), not (1, 2, 0)).
+    let mut nums = core_str
+        .split('.')
+        .map(|p| p.trim().parse::<u32>().unwrap_or(0));
+    let core = (
+        nums.next().unwrap_or(0),
+        nums.next().unwrap_or(0),
+        nums.next().unwrap_or(0),
+    );
+
+    let pre = pre_str
+        .filter(|p| !p.is_empty())
+        .map(|p| p.split('.').map(|id| id.to_string()).collect())
+        .unwrap_or_default();
+
+    SemVer { core, pre }
+}
+
+/// Compare a single pair of pre-release identifiers per SemVer §11:
+/// numeric identifiers compare numerically and always rank below alphanumeric
+/// ones; alphanumeric identifiers compare in ASCII order.
+fn compare_pre_identifier(a: &str, b: &str) -> std::cmp::Ordering {
+    use std::cmp::Ordering;
+    match (a.parse::<u64>(), b.parse::<u64>()) {
+        (Ok(an), Ok(bn)) => an.cmp(&bn),
+        (Ok(_), Err(_)) => Ordering::Less,
+        (Err(_), Ok(_)) => Ordering::Greater,
+        (Err(_), Err(_)) => a.cmp(b),
+    }
+}
+
+/// Full SemVer §11 precedence ordering between two parsed versions.
+fn compare_semver(a: &SemVer, b: &SemVer) -> std::cmp::Ordering {
+    use std::cmp::Ordering;
+
+    // 1. Numeric core dominates.
+    match a.core.cmp(&b.core) {
+        Ordering::Equal => {}
+        non_eq => return non_eq,
+    }
+
+    // 2. A release (no pre-release) outranks any pre-release of the same core.
+    match (a.pre.is_empty(), b.pre.is_empty()) {
+        (true, true) => return Ordering::Equal,
+        (true, false) => return Ordering::Greater,
+        (false, true) => return Ordering::Less,
+        (false, false) => {}
+    }
+
+    // 3. Compare pre-release identifiers left to right.
+    for (ai, bi) in a.pre.iter().zip(b.pre.iter()) {
+        match compare_pre_identifier(ai, bi) {
+            Ordering::Equal => {}
+            non_eq => return non_eq,
+        }
+    }
+
+    // 4. When all shared identifiers match, the longer set has higher precedence.
+    a.pre.len().cmp(&b.pre.len())
+}
+
+/// Compare semver strings. Returns true if `latest` is strictly newer than
+/// `current`, with full SemVer §11 pre-release precedence — so
+/// `1.0.0-alpha.12` is newer than `1.0.0-alpha.11`, and `1.0.0` is newer than
+/// any `1.0.0-alpha.N`.
+fn version_is_newer(latest: &str, current: &str) -> bool {
+    compare_semver(&parse_semver(latest), &parse_semver(current)) == std::cmp::Ordering::Greater
 }
 
 /// GET request to GitHub API (api.github.com).
@@ -286,9 +359,66 @@ mod tests {
 
     #[test]
     fn version_comparison_with_prerelease() {
-        // Pre-release suffix is stripped for comparison.
+        // A higher numeric core wins regardless of pre-release tags.
         assert!(version_is_newer("1.1.0-alpha.1", "1.0.0-alpha.1"));
-        assert!(!version_is_newer("1.0.0-alpha.2", "1.0.0-alpha.1"));
+        // Pre-releases of the SAME core order by their identifiers (SemVer §11),
+        // so a later alpha IS newer than an earlier one. This is the case the
+        // OTA checker depends on during the whole `1.0.0-alpha.N` release line —
+        // the previous "strip the suffix" logic made every alpha compare equal,
+        // so the device never saw a new alpha.
+        assert!(version_is_newer("1.0.0-alpha.2", "1.0.0-alpha.1"));
+        assert!(version_is_newer("1.0.0-alpha.11", "1.0.0-alpha.9"));
+        assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0-alpha.2"));
+    }
+
+    #[test]
+    fn prerelease_numeric_identifiers_compare_numerically() {
+        // The exact regression: alpha.11 must beat alpha.9 (string compare would
+        // say "11" < "9"; numeric compare says 11 > 9).
+        assert!(version_is_newer("1.0.0-alpha.12", "1.0.0-alpha.11"));
+        assert!(version_is_newer("1.0.0-alpha.100", "1.0.0-alpha.99"));
+        assert!(!version_is_newer("1.0.0-alpha.9", "1.0.0-alpha.11"));
+    }
+
+    #[test]
+    fn release_outranks_its_prerelease() {
+        // 1.0.0 final is newer than any 1.0.0 pre-release...
+        assert!(version_is_newer("1.0.0", "1.0.0-alpha.11"));
+        assert!(version_is_newer("1.0.0", "1.0.0-rc.1"));
+        // ...and a pre-release is NOT newer than the matching final release.
+        assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0"));
+        assert!(!version_is_newer("1.0.0-rc.1", "1.0.0"));
+    }
+
+    #[test]
+    fn prerelease_stage_ordering() {
+        // alpha < beta < rc (ASCII lexical for alphanumeric identifiers).
+        assert!(version_is_newer("1.0.0-beta.1", "1.0.0-alpha.11"));
+        assert!(version_is_newer("1.0.0-rc.1", "1.0.0-beta.9"));
+        assert!(!version_is_newer("1.0.0-alpha.99", "1.0.0-beta.1"));
+    }
+
+    #[test]
+    fn numeric_identifier_ranks_below_alphanumeric() {
+        // SemVer §11: a numeric identifier has lower precedence than an
+        // alphanumeric one in the same position.
+        assert!(version_is_newer("1.0.0-alpha", "1.0.0-1"));
+        assert!(!version_is_newer("1.0.0-1", "1.0.0-alpha"));
+    }
+
+    #[test]
+    fn longer_prerelease_set_wins_when_prefixes_match() {
+        // SemVer §11: a larger set of pre-release fields outranks a smaller one
+        // when all preceding identifiers are equal.
+        assert!(version_is_newer("1.0.0-alpha.1.1", "1.0.0-alpha.1"));
+        assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0-alpha.1.1"));
+    }
+
+    #[test]
+    fn build_metadata_is_ignored() {
+        // Build metadata (`+...`) does not affect precedence (SemVer §10).
+        assert!(!version_is_newer("1.0.0+build.5", "1.0.0+build.1"));
+        assert!(version_is_newer("1.0.1+build.1", "1.0.0+build.9"));
     }
 
     #[test]