Skip to content

Commit 6aea54f

Browse files
Fix derivative instability and recursion bugs
- Add top-level cache (m_top_cache) to ensure stable AST node identity across repeated derivative calls, preventing state graph divergence - Add get_head_tail helper for derive_to_re with str.is_unit/str.is_concat - Add ITE hoisting in mk_union/mk_inter to keep ITEs at top level - Add De Morgan rule in mk_complement: ~(A∪B) → ~A ∩ ~B - Add ~ε → .+ simplification in mk_complement - Add prefix factoring: a·x ∪ a·y = a·(x∪y) and a·x ∩ a·y = a·(x∩y) - Add r* ∩ .+ = r+ special case in mk_inter - Enhance is_subset with union/intersection distributivity and complement - Remove De Morgan from mk_inter to prevent infinite recursion loop Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 07cea49 commit 6aea54f

2 files changed

Lines changed: 149 additions & 0 deletions

File tree

src/ast/rewriter/seq_derive.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,28 @@ namespace seq {
4242

4343
void derive::reset() {
4444
m_cache.reset();
45+
m_top_cache.reset();
4546
m_trail.reset();
4647
}
4748

4849
expr_ref derive::operator()(expr* ele, expr* r) {
4950
SASSERT(m_util.is_re(r));
5051
if (m_trail.size() > 1000)
5152
reset();
53+
// Check top-level cache (post-simplify result)
54+
expr* cached = nullptr;
55+
if (m_top_cache.find(ele, r, cached))
56+
return expr_ref(cached, m);
5257
m_ele = ele;
5358
m_depth = 0;
5459
expr_ref result = derive_rec(r);
5560
result = simplify_ite(result);
5661
m_ele = nullptr;
62+
// Cache and pin the final result
63+
m_top_cache.insert(ele, r, result);
64+
m_trail.push_back(ele);
65+
m_trail.push_back(r);
66+
m_trail.push_back(result);
5767
return result;
5868
}
5969

@@ -254,6 +264,27 @@ namespace seq {
254264
return mk_ite(cond, tail_re, empty);
255265
}
256266

267+
// δ(to_re(unit(c))) = ite(ele = c, ε, ∅)
268+
expr* ch = nullptr;
269+
if (u().str.is_unit(s, ch)) {
270+
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
271+
expr_ref empty(re().mk_empty(re_sort), m);
272+
expr_ref cond(m.mk_eq(m_ele, ch), m);
273+
return mk_ite(cond, eps, empty);
274+
}
275+
276+
// δ(to_re(s1 ++ s2)) = ite(head matches, to_re(tail ++ s2), ∅)
277+
expr* s1 = nullptr, * s2 = nullptr;
278+
if (u().str.is_concat(s, s1, s2)) {
279+
expr_ref hd(m), tl(m);
280+
if (get_head_tail(s1, s2, hd, tl)) {
281+
expr_ref cond(m.mk_eq(m_ele, hd), m);
282+
expr_ref tail_re(re().mk_to_re(tl), m);
283+
expr_ref empty(re().mk_empty(re_sort), m);
284+
return mk_ite(cond, tail_re, empty);
285+
}
286+
}
287+
257288
// δ(to_re(itos(n))) - derivative of integer-to-string
258289
// itos(n) produces digits '0'-'9' when n >= 0, empty when n < 0
259290
expr* n = nullptr;
@@ -328,6 +359,34 @@ namespace seq {
328359
return mk_ite(cond, eps, empty);
329360
}
330361

362+
// Extract head character and remaining tail from a sequence
363+
// s1 is the first part, s2 is the continuation (from str.concat(s1, s2))
364+
bool derive::get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl) {
365+
expr* ch = nullptr;
366+
expr* a = nullptr, * b = nullptr;
367+
if (u().str.is_unit(s1, ch)) {
368+
hd = ch;
369+
tl = s2;
370+
return true;
371+
}
372+
if (u().str.is_concat(s1, a, b)) {
373+
expr_ref new_s2(u().str.mk_concat(b, s2), m);
374+
return get_head_tail(a, new_s2, hd, tl);
375+
}
376+
zstring zs;
377+
if (u().str.is_string(s1, zs) && zs.length() > 0) {
378+
hd = m_util.mk_char(zs[0]);
379+
if (zs.length() == 1)
380+
tl = s2;
381+
else {
382+
expr_ref rest(u().str.mk_string(zs.extract(1, zs.length() - 1)), m);
383+
tl = u().str.mk_concat(rest, s2);
384+
}
385+
return true;
386+
}
387+
return false;
388+
}
389+
331390
// -------------------------------------------------------
332391
// Normalize reverse by pushing it inward
333392
// -------------------------------------------------------
@@ -462,11 +521,21 @@ namespace seq {
462521
if (is_subset(a, b1) || is_subset(a, a1)) return true;
463522
}
464523

524+
// a1 ∪ a2 ⊆ b if a1 ⊆ b and a2 ⊆ b
525+
if (re().is_union(a, a1, b1)) {
526+
if (is_subset(a1, b) && is_subset(b1, b)) return true;
527+
}
528+
465529
// a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b
466530
if (re().is_intersection(a, a1, b1)) {
467531
if (is_subset(a1, b) || is_subset(b1, b)) return true;
468532
}
469533

534+
// a ⊆ b1 ∩ b2 if a ⊆ b1 and a ⊆ b2
535+
if (re().is_intersection(b, b1, a1)) {
536+
if (is_subset(a, b1) && is_subset(a, a1)) return true;
537+
}
538+
470539
// concat subsumption: a1·a2 ⊆ b1·b2 when a1 ⊆ b1 and a2 ⊆ b2
471540
expr* a2 = nullptr, * b2 = nullptr;
472541
if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) &&
@@ -479,6 +548,10 @@ namespace seq {
479548
a1 == b1 && lb <= la && ua <= ub)
480549
return true;
481550

551+
// complement: ~a ⊆ ~b if b ⊆ a
552+
if (re().is_complement(a, a1) && re().is_complement(b, b1))
553+
return is_subset(b1, a1);
554+
482555
return false;
483556
}
484557

@@ -501,6 +574,13 @@ namespace seq {
501574
if (is_subset(a, b)) return expr_ref(b, m);
502575
if (is_subset(b, a)) return expr_ref(a, m);
503576

577+
// Prefix factoring: a·x ∪ a·y = a·(x ∪ y)
578+
expr *a1, *a2, *b1, *b2;
579+
if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) {
580+
expr_ref tail = mk_union(a2, b2);
581+
return mk_deriv_concat(expr_ref(a1, m), tail);
582+
}
583+
504584
// ITE combination: if both are ITE with same condition, merge
505585
expr *c1, *t1, *e1, *c2, *t2, *e2;
506586
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) {
@@ -509,6 +589,18 @@ namespace seq {
509589
return mk_ite(c1, then_br, else_br);
510590
}
511591

592+
// ITE hoisting: ite(c, t, e) ∪ r = ite(c, t ∪ r, e ∪ r)
593+
if (m.is_ite(a, c1, t1, e1)) {
594+
expr_ref then_br = mk_union(t1, b);
595+
expr_ref else_br = mk_union(e1, b);
596+
return mk_ite(c1, then_br, else_br);
597+
}
598+
if (m.is_ite(b, c2, t2, e2)) {
599+
expr_ref then_br = mk_union(a, t2);
600+
expr_ref else_br = mk_union(a, e2);
601+
return mk_ite(c2, then_br, else_br);
602+
}
603+
512604
// ACI: flatten, sort, deduplicate
513605
expr_ref_vector args(m);
514606
flatten_union(a, args);
@@ -556,6 +648,13 @@ namespace seq {
556648
if (is_subset(a, b)) return expr_ref(a, m);
557649
if (is_subset(b, a)) return expr_ref(b, m);
558650

651+
// Prefix factoring: a·x ∩ a·y = a·(x ∩ y)
652+
expr *a1, *b1, *a2, *b2;
653+
if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) {
654+
expr_ref tail = mk_inter(a2, b2);
655+
return mk_deriv_concat(expr_ref(a1, m), tail);
656+
}
657+
559658
// ITE combination: if both are ITE with same condition, merge
560659
expr *c1, *t1, *e1, *c2, *t2, *e2;
561660
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) {
@@ -564,6 +663,18 @@ namespace seq {
564663
return mk_ite(c1, then_br, else_br);
565664
}
566665

666+
// ITE hoisting: ite(c, t, e) ∩ r = ite(c, t ∩ r, e ∩ r)
667+
if (m.is_ite(a, c1, t1, e1)) {
668+
expr_ref then_br = mk_inter(t1, b);
669+
expr_ref else_br = mk_inter(e1, b);
670+
return mk_ite(c1, then_br, else_br);
671+
}
672+
if (m.is_ite(b, c2, t2, e2)) {
673+
expr_ref then_br = mk_inter(a, t2);
674+
expr_ref else_br = mk_inter(a, e2);
675+
return mk_ite(c2, then_br, else_br);
676+
}
677+
567678
// ACI: flatten, sort, deduplicate
568679
expr_ref_vector args(m);
569680
flatten_inter(a, args);
@@ -587,6 +698,25 @@ namespace seq {
587698
if (args.empty())
588699
return expr_ref(re().mk_full_seq(a->get_sort()), m);
589700

701+
// Special: r* ∩ .+ = r+
702+
expr* star_body = nullptr;
703+
int star_idx = -1, dotplus_idx = -1;
704+
for (unsigned i = 0; i < args.size(); ++i) {
705+
if (re().is_star(args.get(i), star_body))
706+
star_idx = i;
707+
if (re().is_dot_plus(args.get(i)))
708+
dotplus_idx = i;
709+
}
710+
if (star_idx >= 0 && dotplus_idx >= 0 && star_body) {
711+
args.set(star_idx, re().mk_plus(star_body));
712+
// Remove .+ by shifting
713+
for (unsigned i = dotplus_idx; i + 1 < args.size(); ++i)
714+
args.set(i, args.get(i + 1));
715+
args.shrink(args.size() - 1);
716+
if (args.size() == 1)
717+
return expr_ref(args.get(0), m);
718+
}
719+
590720
return mk_inter_from_sorted(args);
591721
}
592722

@@ -635,6 +765,21 @@ namespace seq {
635765
return mk_ite(c, ct, ce);
636766
}
637767

768+
// De Morgan: ~(r1 ∪ r2) → ~r1 ∩ ~r2
769+
expr* r1 = nullptr, * r2 = nullptr;
770+
if (re().is_union(a, r1, r2)) {
771+
expr_ref c1 = mk_complement(r1);
772+
expr_ref c2 = mk_complement(r2);
773+
return mk_inter(c1, c2);
774+
}
775+
776+
// ~ε → .+
777+
sort* s = nullptr;
778+
if (re().is_to_re(a, r) && u().str.is_empty(r)) {
779+
VERIFY(m_util.is_re(a, s));
780+
return expr_ref(re().mk_plus(re().mk_full_char(a->get_sort())), m);
781+
}
782+
638783
return expr_ref(re().mk_complement(a), m);
639784
}
640785

src/ast/rewriter/seq_derive.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ namespace seq {
5656

5757
// Cache: maps (ele, regex) pair to its derivative
5858
obj_pair_map<expr, expr, expr*> m_cache;
59+
obj_pair_map<expr, expr, expr*> m_top_cache; // post-simplify cache
5960
expr_ref_vector m_trail; // pin cached results
6061

6162
// Depth limiting
@@ -104,6 +105,9 @@ namespace seq {
104105
// Distribute concatenation through ITE/union in derivative
105106
expr_ref mk_deriv_concat(expr* d, expr* tail);
106107

108+
// Extract head character and tail from a sequence expression
109+
bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl);
110+
107111
// Lightweight subsumption check: returns true if L(a) ⊆ L(b)
108112
bool is_subset(expr* a, expr* b);
109113

0 commit comments

Comments
 (0)