From b55efe9c9ffc03549c55a3aa1972c01f98f7d4bd Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Tue, 16 May 2023 14:51:23 +0000 Subject: [PATCH 1/4] Change is_constantt logic to enumerate not-constant expressions Rather than adding more-and-more expressions to the enumeration just call out those that are _not_ constant ((nondet) symbols and side-effect expressions as well as any access that may be out of bounds). --- src/util/expr_util.cpp | 49 +++++++++++++++++++++++++----------------- src/util/expr_util.h | 2 +- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/util/expr_util.cpp b/src/util/expr_util.cpp index 089dc217aa2..7764898a693 100644 --- a/src/util/expr_util.cpp +++ b/src/util/expr_util.cpp @@ -228,29 +228,32 @@ const exprt &skip_typecast(const exprt &expr) /// "constants" bool is_constantt::is_constant(const exprt &expr) const { - if(expr.is_constant()) - return true; + if( + expr.id() == ID_symbol || expr.id() == ID_nondet_symbol || + expr.id() == ID_side_effect) + { + return false; + } if(expr.id() == ID_address_of) { return is_constant_address_of(to_address_of_expr(expr).object()); } - else if( - expr.id() == ID_typecast || expr.id() == ID_array_of || - expr.id() == ID_plus || expr.id() == ID_mult || expr.id() == ID_array || - expr.id() == ID_with || expr.id() == ID_struct || expr.id() == ID_union || - expr.id() == ID_empty_union || expr.id() == ID_equal || - expr.id() == ID_notequal || expr.id() == ID_lt || expr.id() == ID_le || - expr.id() == ID_gt || expr.id() == ID_ge || expr.id() == ID_if || - expr.id() == ID_not || expr.id() == ID_and || expr.id() == ID_or || - expr.id() == ID_bitnot || expr.id() == ID_bitand || expr.id() == ID_bitor || - expr.id() == ID_bitxor || expr.id() == ID_byte_update_big_endian || - expr.id() == ID_byte_update_little_endian) + else if(auto index = expr_try_dynamic_cast(expr)) { - return std::all_of( - expr.operands().begin(), expr.operands().end(), [this](const exprt &e) { - return is_constant(e); - }); + if(!is_constant(index->array()) || !index->index().is_constant()) + return false; + + const auto &array_type = to_array_type(index->array().type()); + if(!array_type.size().is_constant()) + return false; + + const mp_integer size = + numeric_cast_v(to_constant_expr(array_type.size())); + const mp_integer index_int = + numeric_cast_v(to_constant_expr(index->index())); + + return index_int >= 0 && index_int < size; } else if(auto be = expr_try_dynamic_cast(expr)) { @@ -269,7 +272,7 @@ bool is_constantt::is_constant(const exprt &expr) const numeric_cast_v(to_constant_expr(be->offset())) * be->get_bits_per_byte(); - return offset_bits + *extract_bits <= *op_bits; + return offset_bits >= 0 && offset_bits + *extract_bits <= *op_bits; } else if(auto eb = expr_try_dynamic_cast(expr)) { @@ -292,8 +295,14 @@ bool is_constantt::is_constant(const exprt &expr) const return lower_bound >= 0 && lower_bound <= upper_bound && upper_bound < src_bits; } - - return false; + else + { + // std::all_of returns true when there are no operands + return std::all_of( + expr.operands().begin(), expr.operands().end(), [this](const exprt &e) { + return is_constant(e); + }); + } } /// this function determines which reference-typed expressions are constant diff --git a/src/util/expr_util.h b/src/util/expr_util.h index 3b31c1de156..2a15df0ac4e 100644 --- a/src/util/expr_util.h +++ b/src/util/expr_util.h @@ -82,7 +82,7 @@ const exprt &skip_typecast(const exprt &expr); /// Determine whether an expression is constant. A literal constant is /// constant, but so are, e.g., sums over constants or addresses of objects. -/// An implementation derive from this class to refine what it considers +/// An implementation may derive from this class to refine what it considers /// constant in a particular context by overriding is_constant and/or /// is_constant_address_of. class is_constantt From 38516c37f0e7d4ad978bdaad9d4175dc6edafeba Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 26 May 2023 11:48:59 +0000 Subject: [PATCH 2/4] goto-symex can safely propagate with_exprt We would only deem with_exprt constant when all their operands are constant, which means that the array operand must be an array or array_of expression, and both update indices as well as update values must be constant. In most such cases the simplifier would rewrite the with_exprt anyway. The exception to this is an index that cannot immediately be evaluated to a numeric constant, as is the case with, e.g., pointer_object expressions. Fixes: #3095 --- src/goto-symex/goto_symex_is_constant.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/goto-symex/goto_symex_is_constant.h b/src/goto-symex/goto_symex_is_constant.h index b3e93248943..570b9e48f8a 100644 --- a/src/goto-symex/goto_symex_is_constant.h +++ b/src/goto-symex/goto_symex_is_constant.h @@ -40,21 +40,6 @@ class goto_symex_is_constantt : public is_constantt return !found_non_constant; } - else if(expr.id() == ID_with) - { - // this is bad -#if 0 - for(const auto &op : expr.operands()) - { - if(!is_constant(op)) - return false; - } - - return true; -#else - return false; -#endif - } return is_constantt::is_constant(expr); } From 6c079857efea79f6fc25d63c7855ed3cdfe91e63 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 26 May 2023 12:29:52 +0000 Subject: [PATCH 3/4] Simplify typecast of pointer_offset and pointer_object expressions We do not prescribe a fixed type for these expressions, so we can just alter the type and have back-ends deal with producing a suitable value of the new type. Co-authored-by: Remi Delmas --- regression/cbmc/constant_folding4/main.c | 99 +++++++++++++++++++++ regression/cbmc/constant_folding4/test.desc | 12 +++ src/util/simplify_expr.cpp | 11 +++ 3 files changed, 122 insertions(+) create mode 100644 regression/cbmc/constant_folding4/main.c create mode 100644 regression/cbmc/constant_folding4/test.desc diff --git a/regression/cbmc/constant_folding4/main.c b/regression/cbmc/constant_folding4/main.c new file mode 100644 index 00000000000..e85d85e48e1 --- /dev/null +++ b/regression/cbmc/constant_folding4/main.c @@ -0,0 +1,99 @@ +#include + +typedef struct +{ + size_t k; + void **ptrs; +} shadow_map_t; + +extern size_t __CPROVER_max_malloc_size; +int __builtin_clzll(unsigned long long); + +// computes 2^OBJECT_BITS +#define __nof_objects \ + ((size_t)(1ULL << __builtin_clzll(__CPROVER_max_malloc_size))) + +// Initialises the given shadow memory map +void shadow_map_init(shadow_map_t *smap, size_t k) +{ + *smap = (shadow_map_t){ + .k = k, .ptrs = __CPROVER_allocate(__nof_objects * sizeof(void *), 1)}; +} + +// Returns a pointer to the shadow bytes of the byte pointed to by ptr +void *shadow_map_get(shadow_map_t *smap, void *ptr) +{ + size_t id = __CPROVER_POINTER_OBJECT(ptr); + void *sptr = smap->ptrs[id]; + if(!sptr) + { + sptr = __CPROVER_allocate(smap->k * __CPROVER_OBJECT_SIZE(ptr), 1); + smap->ptrs[id] = sptr; + } + return sptr + smap->k * __CPROVER_POINTER_OFFSET(ptr); +} + +shadow_map_t smap; + +// read before write instrumentation +void shadow_read(void *ptr) +{ + char *sptr = (char *)shadow_map_get(&smap, ptr); + __CPROVER_assert(*sptr, "read before write"); +} + +void shadow_write(void *ptr) +{ + char *sptr = (char *)shadow_map_get(&smap, ptr); + *sptr = *sptr | 1; +} + +size_t nondet_size_t(); + +int main() +{ + shadow_map_init(&smap, 1); + size_t size; + size = SIZE; + shadow_write(&size); + shadow_read(&size); + char **arr = malloc(size * sizeof(char *)); + shadow_write(&arr); + char a; + a = 0; + shadow_write(&a); + char b; + if(nondet_size_t() || 1) + { + b = 1; + shadow_write(&b); + } + char c; + c = 2; + shadow_write(&c); + size_t i; + i = 0; + shadow_write(&i); + +LOOP_HEAD: + shadow_read(&i); + shadow_read(&size); + if(i >= ITERS) + goto LOOP_EXIT; + arr[i] = malloc(3); + shadow_write(&arr[i]); + shadow_read(&a); + arr[i][0] = a; + shadow_write(&(arr[i][0])); + shadow_read(&b); + arr[i][1] = b; + shadow_write(&(arr[i][1])); + shadow_read(&c); + arr[i][2] = c; + shadow_write(&(arr[i][2])); + shadow_read(&i); + i = i + 1; + shadow_write(&i); + goto LOOP_HEAD; +LOOP_EXIT:; +} diff --git a/regression/cbmc/constant_folding4/test.desc b/regression/cbmc/constant_folding4/test.desc new file mode 100644 index 00000000000..1e56d4e0e4c --- /dev/null +++ b/regression/cbmc/constant_folding4/test.desc @@ -0,0 +1,12 @@ +CORE new-smt-backend +main.c +-DSIZE=1 -DITERS=1 +^Generated 33 VCC\(s\), 0 remaining after simplification$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Requires constant propagation of with_exprt (where all operands evaluate to +constants) and simplification of type casts of pointer_object expressions. diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index c7925260cfe..be6506c33a3 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -1332,6 +1332,17 @@ simplify_exprt::simplify_typecast(const typecast_exprt &expr) return changed(simplify_address_of(result)); // recursive call } } + else if( + operand.id() == ID_pointer_object || operand.id() == ID_pointer_offset) + { + // pointer_object and pointer_offset do not have a prescribed type + if(expr_type.id() == ID_signedbv || expr_type.id() == ID_unsignedbv) + { + auto new_expr = operand; + new_expr.type() = expr_type; + return std::move(new_expr); + } + } return unchanged(expr); } From 715a65f1185ad2044f706b8ef2b20bd86e01a6e1 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 29 May 2023 14:59:02 +0000 Subject: [PATCH 4/4] WIP: constant propagate if over pointers Needs some heuristics to constrain `record_may_leak`, which can produce deeply nested expressions. Co-authored-by: Remi Delmas --- regression/cbmc/constant_folding5/main.c | 89 +++++++++++++++++++++ regression/cbmc/constant_folding5/test.desc | 12 +++ src/goto-symex/goto_symex_is_constant.h | 2 + 3 files changed, 103 insertions(+) create mode 100644 regression/cbmc/constant_folding5/main.c create mode 100644 regression/cbmc/constant_folding5/test.desc diff --git a/regression/cbmc/constant_folding5/main.c b/regression/cbmc/constant_folding5/main.c new file mode 100644 index 00000000000..fd79597570a --- /dev/null +++ b/regression/cbmc/constant_folding5/main.c @@ -0,0 +1,89 @@ +#include +#include +#include + +__CPROVER_bool nondet_bool(); + +static bool seen1[__CPROVER_constant_infinity_uint]; +static bool seen2[__CPROVER_constant_infinity_uint]; + +typedef struct { + bool assume; + bool *seen; +} context_t; + +__CPROVER_bool is_fresh(void **ptr, size_t size, context_t *ctx) { + if (ctx->assume) { + if (nondet_bool()) + return false; + *ptr = malloc(size); + __CPROVER_assume(*ptr); + ctx->seen[__CPROVER_POINTER_OBJECT(*ptr)] = true; + return true; + } else { + bool r_ok = __CPROVER_r_ok(*ptr, size); + bool seen = ctx->seen[__CPROVER_POINTER_OBJECT(*ptr)]; + ctx->seen[__CPROVER_POINTER_OBJECT(*ptr)] = true; + return r_ok && !seen; + } +} + +__CPROVER_bool ptr_equals(void **ptr, void *p, context_t *ctx) { + if (ctx->assume) { + if (nondet_bool()) + return false; + *ptr = p; + return true; + } else { + return p == *ptr; + } +} + +__CPROVER_bool pred(char **ptr1, char **ptr2, size_t size, context_t *ctx) { + return (ptr_equals(ptr1, NULL, ctx) || is_fresh(ptr1, size, ctx)) && + (ptr_equals(ptr2, *ptr1, ctx) || ptr_equals(ptr2, NULL, ctx) || + is_fresh(ptr2, size, ctx) ); +} + +int main() { + char *ptr1 = NULL; + char *ptr2 = NULL; + size_t size; + + // assume the predicate + context_t assume_ctx = {.assume = true, .seen = seen1}; + __CPROVER_assume(pred(&ptr1, &ptr2, size, &assume_ctx)); + + // check that the predicate holds + context_t assert_ctx = {.assume = false, .seen = seen2}; + assert(pred(&ptr1, &ptr2, size, &assert_ctx)); + + // in clear + assert((ptr1 == NULL || (__CPROVER_r_ok(ptr1, size) && + __CPROVER_POINTER_OFFSET(ptr1) == 0)) && + (ptr2 == NULL || ptr2 == ptr1 || + (__CPROVER_r_ok(ptr2, size) && __CPROVER_POINTER_OFFSET(ptr2) == 0) && + !__CPROVER_same_object(ptr1, ptr2))); + + // offsets are zero + assert(__CPROVER_POINTER_OFFSET(ptr1) == 0); + assert(__CPROVER_POINTER_OFFSET(ptr2) == 0); + + // this works + int *small_array = malloc(100 * sizeof(int)); + if (small_array) { + small_array[0] = 0; + assert(small_array[__CPROVER_POINTER_OFFSET(ptr1)] == 0); + assert(small_array[__CPROVER_POINTER_OFFSET(ptr2)] == 0); + } + +#ifdef BIG_ARRAY + // this blows_up + int *big_array = malloc(1000000 * sizeof(int)); + if (big_array) { + big_array[0] = 0; + assert(big_array[__CPROVER_POINTER_OFFSET(ptr1)] == 0); + assert(big_array[__CPROVER_POINTER_OFFSET(ptr2)] == 0); + } +#endif +} diff --git a/regression/cbmc/constant_folding5/test.desc b/regression/cbmc/constant_folding5/test.desc new file mode 100644 index 00000000000..1e56d4e0e4c --- /dev/null +++ b/regression/cbmc/constant_folding5/test.desc @@ -0,0 +1,12 @@ +CORE new-smt-backend +main.c +-DSIZE=1 -DITERS=1 +^Generated 33 VCC\(s\), 0 remaining after simplification$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Requires constant propagation of with_exprt (where all operands evaluate to +constants) and simplification of type casts of pointer_object expressions. diff --git a/src/goto-symex/goto_symex_is_constant.h b/src/goto-symex/goto_symex_is_constant.h index 570b9e48f8a..bde7cfb36eb 100644 --- a/src/goto-symex/goto_symex_is_constant.h +++ b/src/goto-symex/goto_symex_is_constant.h @@ -40,6 +40,8 @@ class goto_symex_is_constantt : public is_constantt return !found_non_constant; } + else if(expr.id() == ID_if && expr.type().id() == ID_pointer) + return true; return is_constantt::is_constant(expr); }