Skip to content

Commit e950661

Browse files
authored
Fix nondeterminism in splitting (#482)
1 parent cdf227e commit e950661

File tree

1 file changed

+69
-12
lines changed

1 file changed

+69
-12
lines changed

src/lang/frontend_ast_split.rs

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
//! We use a greedy algorithm to order the statements in a predicate to minimize
1616
//! the number of live wildcards at split boundaries.
1717
18-
use std::collections::{HashMap, HashSet};
18+
use std::{
19+
cmp::Reverse,
20+
collections::{HashMap, HashSet},
21+
};
1922

2023
// SplittingError is now defined in error.rs
2124
pub use crate::lang::error::SplittingError;
@@ -253,6 +256,28 @@ fn compute_tie_breakers(
253256
(simplicity, closes_count, -(fanout as i32))
254257
}
255258

259+
fn statement_selection_key(
260+
idx: usize,
261+
statements: &[StatementTmpl],
262+
active_wildcards: &HashSet<String>,
263+
remaining: &HashSet<usize>,
264+
approaching_split: bool,
265+
) -> (i32, (usize, usize, i32), Reverse<usize>) {
266+
let primary_score = score_statement(
267+
&statements[idx],
268+
active_wildcards,
269+
statements,
270+
remaining,
271+
approaching_split,
272+
);
273+
let tie_breakers =
274+
compute_tie_breakers(&statements[idx], active_wildcards, statements, remaining);
275+
276+
// Final deterministic tie-breaker: prefer smaller original indices.
277+
// This avoids hash-iteration-dependent selection when scores are equal.
278+
(primary_score, tie_breakers, Reverse(idx))
279+
}
280+
256281
/// Find the best next statement to add based on scoring heuristic
257282
fn find_best_next_statement(
258283
statements: &[StatementTmpl],
@@ -268,16 +293,13 @@ fn find_best_next_statement(
268293
remaining
269294
.iter()
270295
.max_by_key(|&&idx| {
271-
let primary_score = score_statement(
272-
&statements[idx],
273-
active_wildcards,
296+
statement_selection_key(
297+
idx,
274298
statements,
299+
active_wildcards,
275300
remaining,
276301
approaching_split,
277-
);
278-
let tie_breakers =
279-
compute_tie_breakers(&statements[idx], active_wildcards, statements, remaining);
280-
(primary_score, tie_breakers)
302+
)
281303
})
282304
.copied()
283305
.unwrap()
@@ -362,10 +384,14 @@ fn generate_refactor_suggestion(
362384
return None;
363385
}
364386

387+
// Normalize wildcard order so diagnostics are deterministic.
388+
let mut sorted_crossing_wildcards = crossing_wildcards.to_vec();
389+
sorted_crossing_wildcards.sort();
390+
365391
// Analyze the span of each crossing wildcard
366392
let mut wildcard_spans: Vec<(String, usize, usize, usize)> = Vec::new();
367393

368-
for wildcard in crossing_wildcards {
394+
for wildcard in &sorted_crossing_wildcards {
369395
let mut first_use = None;
370396
let mut last_use = None;
371397

@@ -401,9 +427,9 @@ fn generate_refactor_suggestion(
401427
}
402428

403429
// If multiple wildcards cross the boundary, suggest grouping
404-
if crossing_wildcards.len() > 1 {
430+
if sorted_crossing_wildcards.len() > 1 {
405431
return Some(RefactorSuggestion::GroupWildcardUsages {
406-
wildcards: crossing_wildcards.to_vec(),
432+
wildcards: sorted_crossing_wildcards,
407433
});
408434
}
409435

@@ -459,11 +485,12 @@ fn split_into_chain(
459485
// Check: Can we fit promoted wildcards in public args?
460486
// Need to account for possible overlap between incoming_public and live_at_boundary
461487
let incoming_set: HashSet<_> = incoming_public.iter().cloned().collect();
462-
let new_promotions: Vec<_> = live_at_boundary
488+
let mut new_promotions: Vec<_> = live_at_boundary
463489
.iter()
464490
.filter(|w| !incoming_set.contains(*w))
465491
.cloned()
466492
.collect();
493+
new_promotions.sort();
467494
let total_public = incoming_public.len() + new_promotions.len();
468495
if total_public > Params::max_statement_args() {
469496
let context = crate::lang::error::SplitContext {
@@ -933,6 +960,36 @@ mod tests {
933960
);
934961
}
935962

963+
#[test]
964+
fn test_statement_selection_prefers_lower_index_on_tie() {
965+
// Two structurally symmetric statements produce identical heuristic scores.
966+
// Determinism comes from the final index-based tie breaker.
967+
let input = r#"
968+
tie_break(A, B) = AND (
969+
Equal(A["x"], B["x"])
970+
Equal(A["y"], B["y"])
971+
)
972+
"#;
973+
974+
let pred = parse_predicate(input);
975+
let statements = pred.statements;
976+
let remaining: HashSet<usize> = [0, 1].into_iter().collect();
977+
let active_wildcards = HashSet::new();
978+
979+
let key0 = statement_selection_key(0, &statements, &active_wildcards, &remaining, false);
980+
let key1 = statement_selection_key(1, &statements, &active_wildcards, &remaining, false);
981+
982+
assert_eq!(key0.0, key1.0, "Primary heuristic score should tie");
983+
assert_eq!(key0.1, key1.1, "Secondary tie-breaker metrics should tie");
984+
assert!(
985+
key0 > key1,
986+
"Lower original index should win deterministic final tie-breaker"
987+
);
988+
989+
let selected = find_best_next_statement(&statements, &remaining, &active_wildcards, 0);
990+
assert_eq!(selected, 0);
991+
}
992+
936993
#[test]
937994
fn test_greedy_ordering_reduces_liveness() {
938995
// This test verifies that our greedy ordering algorithm reduces wildcard liveness

0 commit comments

Comments
 (0)