Skip to content

Commit 2c4184d

Browse files
authored
Merge branch 'main' into fix/issue-455-home-undo-set-temperature-off
2 parents e7d3e98 + e096927 commit 2c4184d

12 files changed

Lines changed: 870 additions & 201 deletions

File tree

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@
22

33
## Unreleased
44

5+
- **`genie-ctl --help` alignment** (#445): the `health` command no longer
6+
prints flush-left. A trailing `\` line-continuation after the voice-gated
7+
`speaker` block was stripping the four-space indent off the following line
8+
in both `voice`-on and `voice`-off builds. Rendering moved into a pure
9+
`usage_text()` helper with a regression test that fails if any command line
10+
loses its indent.
11+
- **Quick-router compound spoken durations** (#449): "set a timer for forty
12+
five minutes" now sets a 45-minute timer instead of 5. The deterministic
13+
router only ever saw single tokens, so its `"forty five"` number arm was dead
14+
code and the trailing "five" bound to the unit. `parse_duration` now stitches
15+
a tens word (`twenty`..`ninety`) to a following ones digit, and the missing
16+
`fifty`..`ninety` tens words parse on their own.
17+
518
## 1.0.0-alpha.11 - 2026-06-20
619

720
Alpha 11 is the **typed-tool contract + grounded BFCL** release. The single

crates/genie-core/src/eval/bfcl.rs

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,14 +599,21 @@ fn canonicalize_entity_value(value: &Value, graph: &HomeGraph) -> Value {
599599
/// Canonicalize an action verb to a small set of stable forms, so the grounded
600600
/// metric credits synonyms the agent would execute identically. Unknown verbs
601601
/// pass through normalized (lowercase, separators -> `_`) so exact matches still
602-
/// hold. Mirrors the canonicalization the runtime tool dispatch should apply.
602+
/// hold. Mirrors the canonicalization the runtime tool dispatch applies in
603+
/// `tools::dispatch::canon_home_control_action`.
604+
///
605+
/// `activate` is intentionally *not* folded into `turn_on`: the runtime keeps it
606+
/// as a distinct `home_control` action for scenes/scripts (it appears separately
607+
/// in the action enum and `canon_home_control_action` leaves it as-is), so
608+
/// crediting `turn_on` for an expected `activate` would score a wrong actuation
609+
/// as correct and inflate the grounded metric.
603610
fn canon_action(text: &str) -> String {
604611
let normalized = text.trim().to_lowercase().replace([' ', '-'], "_");
605612
match normalized.as_str() {
606613
"deactivate" | "disable" | "switch_off" | "power_off" | "shut_off" | "turn_off" => {
607614
"turn_off".to_string()
608615
}
609-
"activate" | "enable" | "switch_on" | "power_on" | "turn_on" => "turn_on".to_string(),
616+
"enable" | "switch_on" | "power_on" | "turn_on" => "turn_on".to_string(),
610617
_ => normalized,
611618
}
612619
}
@@ -1002,6 +1009,96 @@ mod tests {
10021009
assert_eq!(report.grounded_strict_matches, 1);
10031010
}
10041011

1012+
#[test]
1013+
fn grounded_metric_keeps_activate_distinct_from_turn_on() {
1014+
// `activate` is a distinct runtime action for scenes/scripts: the
1015+
// dispatcher's canon_home_control_action leaves it as-is (covered by a
1016+
// "distinct action, must not remap" regression test), and `home_control`
1017+
// lists `activate` separately from `turn_on` in its action enum. The
1018+
// grounded metric must mirror runtime dispatch, so predicting `turn_on`
1019+
// for an expected `activate` is a *wrong actuation* (turn_on is a no-op
1020+
// for a scene) and must NOT be credited as a grounded match.
1021+
let case = case(vec![expected(
1022+
"home_control",
1023+
serde_json::json!({"action": "activate", "entity": "kitchen lights"}),
1024+
)]);
1025+
let prediction = BfclPrediction {
1026+
id: case.id.clone(),
1027+
response:
1028+
r#"{"tool":"home_control","arguments":{"action":"turn_on","entity":"kitchen lights"}}"#
1029+
.to_string(),
1030+
};
1031+
1032+
let score = score_response(&case, &prediction.response);
1033+
assert!(
1034+
!score.argument_match,
1035+
"raw action strings differ, exact match must fail"
1036+
);
1037+
assert!(
1038+
!score.grounded_argument_match,
1039+
"predicting `turn_on` for an expected `activate` is a wrong actuation \
1040+
and must not be credited by the grounded metric"
1041+
);
1042+
}
1043+
1044+
#[test]
1045+
fn canon_action_contract() {
1046+
// Lock the scorer's action canonicalization so a future edit can't
1047+
// re-introduce the activate→turn_on collapse this fix removed (#458).
1048+
for s in ["turn_on", "enable", "switch_on", "power_on"] {
1049+
assert_eq!(canon_action(s), "turn_on", "{s} should fold to turn_on");
1050+
}
1051+
for s in [
1052+
"turn_off",
1053+
"deactivate",
1054+
"disable",
1055+
"switch_off",
1056+
"power_off",
1057+
"shut_off",
1058+
] {
1059+
assert_eq!(canon_action(s), "turn_off", "{s} should fold to turn_off");
1060+
}
1061+
// activate stays distinct — it must NOT fold into turn_on.
1062+
assert_eq!(canon_action("activate"), "activate");
1063+
// Every other home_control action passes through unchanged (no cross-collapse).
1064+
for a in [
1065+
"toggle",
1066+
"open",
1067+
"close",
1068+
"lock",
1069+
"unlock",
1070+
"set_brightness",
1071+
"set_temperature",
1072+
] {
1073+
assert_eq!(canon_action(a), a, "{a} must pass through unchanged");
1074+
}
1075+
// Normalization: case folding plus space/hyphen to underscore.
1076+
assert_eq!(canon_action(" Turn-On "), "turn_on");
1077+
assert_eq!(canon_action("ACTIVATE"), "activate");
1078+
assert_eq!(canon_action("set brightness"), "set_brightness");
1079+
}
1080+
1081+
#[test]
1082+
fn grounded_metric_credits_matching_activate() {
1083+
// Positive complement to grounded_metric_keeps_activate_distinct_from_turn_on:
1084+
// a correctly-predicted `activate` must still be credited — the fix passes
1085+
// `activate` through, it does not drop it.
1086+
let case = case(vec![expected(
1087+
"home_control",
1088+
serde_json::json!({"action": "activate", "entity": "kitchen lights"}),
1089+
)]);
1090+
let response = r#"{"tool":"home_control","arguments":{"action":"activate","entity":"kitchen lights"}}"#;
1091+
let score = score_response(&case, response);
1092+
assert!(
1093+
score.argument_match,
1094+
"exact activate match must be credited"
1095+
);
1096+
assert!(
1097+
score.grounded_argument_match,
1098+
"a matching activate must be credited by the grounded metric"
1099+
);
1100+
}
1101+
10051102
#[test]
10061103
fn loads_jsonl_fixture_and_scores_report() {
10071104
let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("../..");

crates/genie-core/src/ota/mod.rs

Lines changed: 150 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -189,26 +189,99 @@ impl OtaManager {
189189
}
190190
}
191191

192-
/// Compare semver strings. Returns true if `latest` is newer than `current`.
193-
fn version_is_newer(latest: &str, current: &str) -> bool {
194-
let parse = |s: &str| -> (u32, u32, u32) {
195-
let clean = s
196-
.strip_prefix('v')
197-
.unwrap_or(s)
198-
.split('-')
199-
.next()
200-
.unwrap_or(s);
201-
let parts: Vec<u32> = clean.split('.').filter_map(|p| p.parse().ok()).collect();
202-
(
203-
parts.first().copied().unwrap_or(0),
204-
parts.get(1).copied().unwrap_or(0),
205-
parts.get(2).copied().unwrap_or(0),
206-
)
192+
/// A parsed semantic version: the numeric `major.minor.patch` core plus any
193+
/// pre-release identifiers.
194+
#[derive(Debug, PartialEq, Eq)]
195+
struct SemVer {
196+
core: (u32, u32, u32),
197+
/// Dot-separated pre-release identifiers (the part after `-`). Empty for a
198+
/// normal release. A release outranks any pre-release of the same core.
199+
pre: Vec<String>,
200+
}
201+
202+
/// Parse a version string into a [`SemVer`].
203+
///
204+
/// Tolerant of a leading `v`. Build metadata (`+...`) is ignored for
205+
/// precedence, per SemVer §10. Missing core components default to 0.
206+
fn parse_semver(s: &str) -> SemVer {
207+
let s = s.trim();
208+
let s = s.strip_prefix('v').unwrap_or(s);
209+
// Build metadata never affects precedence — drop anything from '+' on.
210+
let s = s.split('+').next().unwrap_or(s);
211+
// Core is everything before the first '-'; the rest is the pre-release.
212+
let (core_str, pre_str) = match s.split_once('-') {
213+
Some((core, pre)) => (core, Some(pre)),
214+
None => (s, None),
207215
};
208216

209-
let l = parse(latest);
210-
let c = parse(current);
211-
l > c
217+
// Positional parse: an unparseable component becomes 0 without shifting the
218+
// ones after it (so "1.x.2" is (1, 0, 2), not (1, 2, 0)).
219+
let mut nums = core_str
220+
.split('.')
221+
.map(|p| p.trim().parse::<u32>().unwrap_or(0));
222+
let core = (
223+
nums.next().unwrap_or(0),
224+
nums.next().unwrap_or(0),
225+
nums.next().unwrap_or(0),
226+
);
227+
228+
let pre = pre_str
229+
.filter(|p| !p.is_empty())
230+
.map(|p| p.split('.').map(|id| id.to_string()).collect())
231+
.unwrap_or_default();
232+
233+
SemVer { core, pre }
234+
}
235+
236+
/// Compare a single pair of pre-release identifiers per SemVer §11:
237+
/// numeric identifiers compare numerically and always rank below alphanumeric
238+
/// ones; alphanumeric identifiers compare in ASCII order.
239+
fn compare_pre_identifier(a: &str, b: &str) -> std::cmp::Ordering {
240+
use std::cmp::Ordering;
241+
match (a.parse::<u64>(), b.parse::<u64>()) {
242+
(Ok(an), Ok(bn)) => an.cmp(&bn),
243+
(Ok(_), Err(_)) => Ordering::Less,
244+
(Err(_), Ok(_)) => Ordering::Greater,
245+
(Err(_), Err(_)) => a.cmp(b),
246+
}
247+
}
248+
249+
/// Full SemVer §11 precedence ordering between two parsed versions.
250+
fn compare_semver(a: &SemVer, b: &SemVer) -> std::cmp::Ordering {
251+
use std::cmp::Ordering;
252+
253+
// 1. Numeric core dominates.
254+
match a.core.cmp(&b.core) {
255+
Ordering::Equal => {}
256+
non_eq => return non_eq,
257+
}
258+
259+
// 2. A release (no pre-release) outranks any pre-release of the same core.
260+
match (a.pre.is_empty(), b.pre.is_empty()) {
261+
(true, true) => return Ordering::Equal,
262+
(true, false) => return Ordering::Greater,
263+
(false, true) => return Ordering::Less,
264+
(false, false) => {}
265+
}
266+
267+
// 3. Compare pre-release identifiers left to right.
268+
for (ai, bi) in a.pre.iter().zip(b.pre.iter()) {
269+
match compare_pre_identifier(ai, bi) {
270+
Ordering::Equal => {}
271+
non_eq => return non_eq,
272+
}
273+
}
274+
275+
// 4. When all shared identifiers match, the longer set has higher precedence.
276+
a.pre.len().cmp(&b.pre.len())
277+
}
278+
279+
/// Compare semver strings. Returns true if `latest` is strictly newer than
280+
/// `current`, with full SemVer §11 pre-release precedence — so
281+
/// `1.0.0-alpha.12` is newer than `1.0.0-alpha.11`, and `1.0.0` is newer than
282+
/// any `1.0.0-alpha.N`.
283+
fn version_is_newer(latest: &str, current: &str) -> bool {
284+
compare_semver(&parse_semver(latest), &parse_semver(current)) == std::cmp::Ordering::Greater
212285
}
213286

214287
/// GET request to GitHub API (api.github.com).
@@ -286,9 +359,66 @@ mod tests {
286359

287360
#[test]
288361
fn version_comparison_with_prerelease() {
289-
// Pre-release suffix is stripped for comparison.
362+
// A higher numeric core wins regardless of pre-release tags.
290363
assert!(version_is_newer("1.1.0-alpha.1", "1.0.0-alpha.1"));
291-
assert!(!version_is_newer("1.0.0-alpha.2", "1.0.0-alpha.1"));
364+
// Pre-releases of the SAME core order by their identifiers (SemVer §11),
365+
// so a later alpha IS newer than an earlier one. This is the case the
366+
// OTA checker depends on during the whole `1.0.0-alpha.N` release line —
367+
// the previous "strip the suffix" logic made every alpha compare equal,
368+
// so the device never saw a new alpha.
369+
assert!(version_is_newer("1.0.0-alpha.2", "1.0.0-alpha.1"));
370+
assert!(version_is_newer("1.0.0-alpha.11", "1.0.0-alpha.9"));
371+
assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0-alpha.2"));
372+
}
373+
374+
#[test]
375+
fn prerelease_numeric_identifiers_compare_numerically() {
376+
// The exact regression: alpha.11 must beat alpha.9 (string compare would
377+
// say "11" < "9"; numeric compare says 11 > 9).
378+
assert!(version_is_newer("1.0.0-alpha.12", "1.0.0-alpha.11"));
379+
assert!(version_is_newer("1.0.0-alpha.100", "1.0.0-alpha.99"));
380+
assert!(!version_is_newer("1.0.0-alpha.9", "1.0.0-alpha.11"));
381+
}
382+
383+
#[test]
384+
fn release_outranks_its_prerelease() {
385+
// 1.0.0 final is newer than any 1.0.0 pre-release...
386+
assert!(version_is_newer("1.0.0", "1.0.0-alpha.11"));
387+
assert!(version_is_newer("1.0.0", "1.0.0-rc.1"));
388+
// ...and a pre-release is NOT newer than the matching final release.
389+
assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0"));
390+
assert!(!version_is_newer("1.0.0-rc.1", "1.0.0"));
391+
}
392+
393+
#[test]
394+
fn prerelease_stage_ordering() {
395+
// alpha < beta < rc (ASCII lexical for alphanumeric identifiers).
396+
assert!(version_is_newer("1.0.0-beta.1", "1.0.0-alpha.11"));
397+
assert!(version_is_newer("1.0.0-rc.1", "1.0.0-beta.9"));
398+
assert!(!version_is_newer("1.0.0-alpha.99", "1.0.0-beta.1"));
399+
}
400+
401+
#[test]
402+
fn numeric_identifier_ranks_below_alphanumeric() {
403+
// SemVer §11: a numeric identifier has lower precedence than an
404+
// alphanumeric one in the same position.
405+
assert!(version_is_newer("1.0.0-alpha", "1.0.0-1"));
406+
assert!(!version_is_newer("1.0.0-1", "1.0.0-alpha"));
407+
}
408+
409+
#[test]
410+
fn longer_prerelease_set_wins_when_prefixes_match() {
411+
// SemVer §11: a larger set of pre-release fields outranks a smaller one
412+
// when all preceding identifiers are equal.
413+
assert!(version_is_newer("1.0.0-alpha.1.1", "1.0.0-alpha.1"));
414+
assert!(!version_is_newer("1.0.0-alpha.1", "1.0.0-alpha.1.1"));
415+
}
416+
417+
#[test]
418+
fn build_metadata_is_ignored() {
419+
// Build metadata (`+...`) does not affect precedence (SemVer §10).
420+
assert!(!version_is_newer("1.0.0+build.5", "1.0.0+build.1"));
421+
assert!(version_is_newer("1.0.1+build.1", "1.0.0+build.9"));
292422
}
293423

294424
#[test]

0 commit comments

Comments
 (0)