Skip to content

Commit 65e48fc

Browse files
authored
Add missing regex concat simplifications
1 parent 0cc04b5 commit 65e48fc

1 file changed

Lines changed: 57 additions & 2 deletions

File tree

src/ast/rewriter/seq_rewriter.cpp

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4491,10 +4491,60 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
44914491
r* ++ r -> r ++ r*
44924492
*/
44934493
br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
4494+
auto accepts_empty_word = [&](expr* r) {
4495+
auto info = re().get_info(r);
4496+
return info.interpreted && info.nullable == l_true && info.min_length == 0;
4497+
};
4498+
auto starts_with_full_seq = [&](expr* r) {
4499+
expr* r1 = nullptr, *r2 = nullptr;
4500+
return re().is_full_seq(r) || (re().is_concat(r, r1, r2) && re().is_full_seq(r1));
4501+
};
4502+
auto ends_with_full_seq = [&](expr* r) {
4503+
expr* r1 = nullptr, *r2 = nullptr;
4504+
while (re().is_concat(r, r1, r2))
4505+
r = r2;
4506+
return re().is_full_seq(r);
4507+
};
4508+
auto all_inter_arms_end_with_full_seq = [&](expr* r) {
4509+
ptr_buffer<expr> todo;
4510+
todo.push_back(r);
4511+
while (!todo.empty()) {
4512+
expr* r1 = nullptr, *r2 = nullptr;
4513+
expr* t = todo.back();
4514+
todo.pop_back();
4515+
if (re().is_intersection(t, r1, r2)) {
4516+
todo.push_back(r1);
4517+
todo.push_back(r2);
4518+
}
4519+
else if (!ends_with_full_seq(t)) {
4520+
return false;
4521+
}
4522+
}
4523+
return true;
4524+
};
44944525
if (re().is_full_seq(a) && re().is_full_seq(b)) {
44954526
result = a;
44964527
return BR_DONE;
44974528
}
4529+
if (re().is_full_seq(a) && accepts_empty_word(b)) {
4530+
result = a;
4531+
return BR_DONE;
4532+
}
4533+
if (re().is_full_seq(b) && accepts_empty_word(a)) {
4534+
result = b;
4535+
return BR_DONE;
4536+
}
4537+
expr* u1 = nullptr, *u2 = nullptr;
4538+
if (re().is_full_seq(a) && re().is_union(b, u1, u2) &&
4539+
(starts_with_full_seq(u1) || starts_with_full_seq(u2))) {
4540+
result = mk_regex_union_normalize(mk_regex_concat(a, u1), mk_regex_concat(a, u2));
4541+
return BR_REWRITE2;
4542+
}
4543+
if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) &&
4544+
all_inter_arms_end_with_full_seq(a)) {
4545+
result = a;
4546+
return BR_DONE;
4547+
}
44984548
if (re().is_empty(a)) {
44994549
result = a;
45004550
return BR_DONE;
@@ -4525,7 +4575,8 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
45254575
result = re().mk_to_re(str().mk_concat(a_str, b_str));
45264576
return BR_REWRITE2;
45274577
}
4528-
expr* a1 = nullptr, *b1 = nullptr;
4578+
expr* a1 = nullptr;
4579+
expr* b1 = nullptr;
45294580
if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
45304581
result = re().mk_to_re(str().mk_concat(a1, b1));
45314582
return BR_DONE;
@@ -4534,6 +4585,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
45344585
result = a;
45354586
return BR_DONE;
45364587
}
4588+
expr* b2 = nullptr, *b3 = nullptr;
4589+
if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1, b3) && a1 == b3) {
4590+
result = b;
4591+
return BR_DONE;
4592+
}
45374593
if (re().is_star(a, a1) && a1 == b) {
45384594
result = re().mk_concat(b, a);
45394595
return BR_DONE;
@@ -6163,4 +6219,3 @@ bool seq_rewriter::get_bounds(expr* e, unsigned& low, unsigned& high) {
61636219
}
61646220
return low <= high;
61656221
}
6166-

0 commit comments

Comments
 (0)