Skip to content

Commit e6231ca

Browse files
Fix regression timeouts via range condition simplification
- Simplify trivial range bounds in derive_range: when lo=0, omit the lo<=x condition; when hi=max_char, omit the x<=hi condition. Full charset ranges return epsilon directly. - Add char_le(0,x)=true and char_le(x,max)=true to eval_cond for always-valid bounds. - Add range implication logic to simplify_ite_rec: when path has negated/positive char_le constraints, detect implied or contradicted char_le conditions (e.g., ¬(x<=127) implies 128<=x). - Add is_subset(a, .+) check: non-nullable regexes are subsets of .+ - In update_state_graph, skip recursive exploration of nullable targets to avoid state explosion. These fixes resolve timeouts on 5724 (all problems), 5721 P1, and 5693. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6aea54f commit e6231ca

2 files changed

Lines changed: 84 additions & 5 deletions

File tree

src/ast/rewriter/seq_derive.cpp

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,22 @@ namespace seq {
336336
// Extract character values from unit strings
337337
expr_ref c_lo(m), c_hi(m);
338338
if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) {
339-
// ite(lo <= ele && ele <= hi, ε, ∅)
340-
expr_ref ge_lo(m_util.mk_le(c_lo, m_ele), m);
341-
expr_ref le_hi(m_util.mk_le(m_ele, c_hi), m);
342-
expr_ref in_range(m.mk_and(ge_lo, le_hi), m);
339+
// Build range condition, simplifying trivial bounds
340+
unsigned lo_val = 0, hi_val = 0;
341+
bool lo_trivial = m_util.is_const_char(c_lo, lo_val) && lo_val == 0;
342+
bool hi_trivial = m_util.is_const_char(c_hi, hi_val) && hi_val == u().max_char();
343+
344+
if (lo_trivial && hi_trivial)
345+
return eps; // full charset range — always matches
346+
347+
expr_ref in_range(m);
348+
if (lo_trivial)
349+
in_range = m_util.mk_le(m_ele, c_hi);
350+
else if (hi_trivial)
351+
in_range = m_util.mk_le(c_lo, m_ele);
352+
else
353+
in_range = m.mk_and(m_util.mk_le(c_lo, m_ele), m_util.mk_le(m_ele, c_hi));
354+
343355
return mk_ite(in_range, eps, empty);
344356
}
345357

@@ -508,8 +520,13 @@ namespace seq {
508520
if (re().is_empty(a)) return true;
509521
if (re().is_full_seq(b)) return true;
510522

511-
// a ⊆ a* (since a* accepts everything a does and more)
523+
// a ⊆ .+ iff a is non-nullable (non-nullable means ε ∉ L(a))
512524
expr* b1 = nullptr;
525+
if (re().is_plus(b, b1) && re().is_full_char(b1) &&
526+
re().get_info(a).nullable == l_false)
527+
return true;
528+
529+
// a ⊆ a* (since a* accepts everything a does and more)
513530
if (re().is_star(b, b1) && a == b1) return true;
514531

515532
// a* ⊆ b* if a ⊆ b
@@ -964,6 +981,12 @@ namespace seq {
964981
return vl <= vr ? l_true : l_false;
965982
if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr))
966983
return vl <= vr ? l_true : l_false;
984+
// char_le(0, x) is always true (chars are unsigned)
985+
if (u().is_const_char(lhs, vl) && vl == 0)
986+
return l_true;
987+
// char_le(x, max_char) is always true
988+
if (u().is_const_char(rhs, vr) && vr == u().max_char())
989+
return l_true;
967990
}
968991

969992
// not(e1)
@@ -1074,6 +1097,52 @@ namespace seq {
10741097
return simplify_ite_rec(path, e);
10751098
}
10761099
}
1100+
1101+
// Range implication between char_le conditions:
1102+
// If c is char_le(lo, x) [lo <= x] and path has ¬(x <= hi) [x > hi]:
1103+
// ¬(x <= hi) means x >= hi+1. If lo <= hi+1, then lo <= x is implied → c is true.
1104+
// If c is char_le(x, hi) [x <= hi] and path has ¬(lo <= x) [x < lo]:
1105+
// ¬(lo <= x) means x <= lo-1. If lo-1 <= hi, then x <= hi is implied → c is true.
1106+
expr* c_lhs = nullptr, * c_rhs = nullptr;
1107+
expr* p_lhs = nullptr, * p_rhs = nullptr;
1108+
if (m_util.is_char_le(c, c_lhs, c_rhs) && m_util.is_char_le(cond, p_lhs, p_rhs)) {
1109+
unsigned c_lo = 0, c_hi = 0, p_lo = 0, p_hi = 0;
1110+
if (sign) {
1111+
// cond is negated (¬cond is true)
1112+
// c is (lo <= x), cond is (x <= hi) with sign=true means ¬(x <= hi) i.e. x > hi i.e. x >= hi+1
1113+
if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele &&
1114+
p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) &&
1115+
c_lo <= p_hi + 1)
1116+
return simplify_ite_rec(path, t);
1117+
// c is (x <= hi), cond is (lo <= x) with sign=true means ¬(lo <= x) i.e. x < lo i.e. x <= lo-1
1118+
if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) &&
1119+
m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele &&
1120+
p_lo > 0 && p_lo - 1 <= c_hi)
1121+
return simplify_ite_rec(path, t);
1122+
} else {
1123+
// cond is true (not negated)
1124+
// c is (lo <= x), cond is (x <= hi) true: x <= hi. If lo > hi → c is false.
1125+
if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele &&
1126+
p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) &&
1127+
c_lo > p_hi)
1128+
return simplify_ite_rec(path, e);
1129+
// c is (x <= hi), cond is (lo <= x) true: lo <= x. If hi < lo → c is false.
1130+
if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) &&
1131+
m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele &&
1132+
c_hi < p_lo)
1133+
return simplify_ite_rec(path, e);
1134+
// c is (lo <= x), cond is (lo2 <= x) true: lo2 <= x. If lo <= lo2 → c is true.
1135+
if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele &&
1136+
m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele &&
1137+
c_lo <= p_lo)
1138+
return simplify_ite_rec(path, t);
1139+
// c is (x <= hi), cond is (x <= hi2) true: x <= hi2. If hi >= hi2 → c is true.
1140+
if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) &&
1141+
p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) &&
1142+
c_hi >= p_hi)
1143+
return simplify_ite_rec(path, t);
1144+
}
1145+
}
10771146
}
10781147

10791148
// Check if both range bounds are in path and c is (x = v) within range

src/smt/seq_regex.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,16 @@ namespace smt {
859859
m_state_graph.add_edge(r_id, dr_id, maybecycle);
860860
}
861861
m_state_graph.mark_done(r_id);
862+
// Recursively explore unexplored targets for dead state detection
863+
// Skip targets that are nullable to avoid state explosion
864+
for (auto const& dr: derivatives) {
865+
unsigned dr_id = get_state_id(dr);
866+
if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id))
867+
continue;
868+
if (re().get_info(dr).nullable == l_true)
869+
continue;
870+
update_state_graph(dr);
871+
}
862872
}
863873

864874
STRACE(seq_regex, m_state_graph.display(tout););

0 commit comments

Comments
 (0)