From b3a19252fa586afed4345cfe1b1b6a3f23e06608 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 10 May 2021 15:27:40 +0000 Subject: [PATCH 1/3] Do not unnecessarily restrict byte update lowering of arrays Arrays of arrays of non-constant size (as found in regression/cbmc/Multi_Dimensional_Array6) can still be lowered by the more general lowering code, although doing so is expensive. --- src/solvers/lowering/byte_operators.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp index 6dc3e3831a4..16530511151 100644 --- a/src/solvers/lowering/byte_operators.cpp +++ b/src/solvers/lowering/byte_operators.cpp @@ -2057,11 +2057,7 @@ static exprt lower_byte_update( subtype = to_array_type(src.type()).subtype(); auto element_width = pointer_offset_bits(*subtype, ns); - CHECK_RETURN(element_width.has_value()); - CHECK_RETURN(*element_width > 0); - CHECK_RETURN(*element_width % 8 == 0); - - if(*element_width == 8) + if(element_width.has_value() && *element_width == 8) { if(value_as_byte_array.id() != ID_array) { From 99219e801da2afb0bc3d3e4f6165be01938648c5 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 10 May 2021 10:14:17 +0000 Subject: [PATCH 2/3] Two new failing tests demonstrating updates outside member bounds These examples write to a member different from the access path being used, yet within the object bounds. --- regression/cbmc/member_bounds1/main.c | 24 ++++++++++++++++++++++++ regression/cbmc/member_bounds1/test.desc | 11 +++++++++++ regression/cbmc/member_bounds2/main.c | 19 +++++++++++++++++++ regression/cbmc/member_bounds2/test.desc | 13 +++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 regression/cbmc/member_bounds1/main.c create mode 100644 regression/cbmc/member_bounds1/test.desc create mode 100644 regression/cbmc/member_bounds2/main.c create mode 100644 regression/cbmc/member_bounds2/test.desc diff --git a/regression/cbmc/member_bounds1/main.c b/regression/cbmc/member_bounds1/main.c new file mode 100644 index 00000000000..f552968851b --- /dev/null +++ b/regression/cbmc/member_bounds1/main.c @@ -0,0 +1,24 @@ +#include + +#pragma pack(push, 1) +struct S +{ + int a[2]; + int x; +}; +#pragma pack(pop) + +#ifdef _MSC_VER +# define _Static_assert(x, m) static_assert(x, m) +#endif + +int main() +{ + int A[3]; + _Static_assert(sizeof(A) == sizeof(struct S), ""); + struct S *s = A; + s->a[2] = 42; + assert(*((int *)s + 2) == 42); + assert(s->x == 42); + assert(A[2] == 42); +} diff --git a/regression/cbmc/member_bounds1/test.desc b/regression/cbmc/member_bounds1/test.desc new file mode 100644 index 00000000000..3f14bd2b01c --- /dev/null +++ b/regression/cbmc/member_bounds1/test.desc @@ -0,0 +1,11 @@ +KNOWNBUG +main.c +--no-simplify +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +This test passes when using the simplifier, but results in "WITH" expressions +that update elements outside their bounds without the simplifier. diff --git a/regression/cbmc/member_bounds2/main.c b/regression/cbmc/member_bounds2/main.c new file mode 100644 index 00000000000..de2c3a4065c --- /dev/null +++ b/regression/cbmc/member_bounds2/main.c @@ -0,0 +1,19 @@ +#include + +#pragma pack(push, 1) +struct SU +{ + union { + int a[2]; + } u; + int x; +}; +#pragma pack(pop) + +int main() +{ + struct SU su; + su.u.a[2] = 42; + assert(*((int *)&su + 2) == 42); + assert(su.x == 42); +} diff --git a/regression/cbmc/member_bounds2/test.desc b/regression/cbmc/member_bounds2/test.desc new file mode 100644 index 00000000000..bede94d14b0 --- /dev/null +++ b/regression/cbmc/member_bounds2/test.desc @@ -0,0 +1,13 @@ +KNOWNBUG +main.c + +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Field sensitivity makes it (currently) impossible to update bytes outside the +particular field. One possible way to address this is rewriting subscripted +array accesses to pointer dereferencing, as the C standard describes. Here, this +would amount to using *(&su.u.a[0] + 2) instead of su.u.a[2]. From b980f1f152b983ed87e99a9e3e3fe52a6fe0ae09 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 10 May 2021 14:23:21 +0000 Subject: [PATCH 3/3] goto-symex: optionally support updates across member boundaries If using the new option --symex-no-member-bounds, goto-symex will rewrite all indexed writes that constant propagation does not show to be within bounds to writes to the member object. --- regression/cbmc/member_bounds1/test.desc | 6 +- scripts/expected_doxygen_warnings.txt | 2 +- src/cbmc/cbmc_parse_options.cpp | 3 + src/goto-checker/bmc_util.h | 2 + src/goto-symex/symex_assign.cpp | 171 +++++++++++++++++++++-- src/goto-symex/symex_config.h | 4 + src/goto-symex/symex_main.cpp | 4 +- 7 files changed, 179 insertions(+), 13 deletions(-) diff --git a/regression/cbmc/member_bounds1/test.desc b/regression/cbmc/member_bounds1/test.desc index 3f14bd2b01c..83d4c74d5c0 100644 --- a/regression/cbmc/member_bounds1/test.desc +++ b/regression/cbmc/member_bounds1/test.desc @@ -1,6 +1,6 @@ -KNOWNBUG +CORE broken-smt-backend main.c ---no-simplify +--no-simplify --symex-no-member-bounds ^VERIFICATION SUCCESSFUL$ ^EXIT=0$ ^SIGNAL=0$ @@ -9,3 +9,5 @@ main.c -- This test passes when using the simplifier, but results in "WITH" expressions that update elements outside their bounds without the simplifier. +--symex-no-member-bounds ensures that such expressions are rewritten by +goto-symex. diff --git a/scripts/expected_doxygen_warnings.txt b/scripts/expected_doxygen_warnings.txt index fc1b1075f9c..a7582921f35 100644 --- a/scripts/expected_doxygen_warnings.txt +++ b/scripts/expected_doxygen_warnings.txt @@ -1,6 +1,6 @@ warning: Include graph for 'goto_instrument_parse_options.cpp' not generated, too many nodes (93), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'goto_model.h' not generated, too many nodes (95), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. -warning: Included by graph for 'arith_tools.h' not generated, too many nodes (167), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. +warning: Included by graph for 'arith_tools.h' not generated, too many nodes (168), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'c_types.h' not generated, too many nodes (128), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'config.h' not generated, too many nodes (78), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'invariant.h' not generated, too many nodes (172), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 80ef5ac922a..d717ff37c51 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -345,6 +345,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option( "symex-cache-dereferences", cmdline.isset("symex-cache-dereferences")); + options.set_option( + "symex-no-member-bounds", cmdline.isset("symex-no-member-bounds")); + if(cmdline.isset("incremental-loop")) { options.set_option( diff --git a/src/goto-checker/bmc_util.h b/src/goto-checker/bmc_util.h index 272e5007dee..30fd402045b 100644 --- a/src/goto-checker/bmc_util.h +++ b/src/goto-checker/bmc_util.h @@ -197,6 +197,7 @@ void run_property_decider( "(unwind-max):" \ "(ignore-properties-before-unwind-min)" \ "(symex-cache-dereferences)" \ + "(symex-no-member-bounds)" \ #define HELP_BMC \ " --paths [strategy] explore paths one at a time\n" \ @@ -252,6 +253,7 @@ void run_property_decider( " gets blacklisted\n" \ " --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \ " --symex-cache-dereferences enable caching of repeated dereferences" \ + " --symex-no-member-bounds support updates beyond bounds of a compound member" /* NOLINT(*) */ \ // clang-format on #endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index f4358ba0e8b..f86a61119c8 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -11,12 +11,14 @@ Author: Daniel Kroening, kroening@kroening.com #include "symex_assign.h" -#include "expr_skeleton.h" -#include "goto_symex_state.h" +#include #include #include +#include #include +#include "expr_skeleton.h" +#include "goto_symex_state.h" #include "symex_config.h" // We can either use with_exprt or update_exprt when building expressions that @@ -253,11 +255,100 @@ void symex_assignt::assign_array( { const exprt &lhs_array=lhs.array(); const exprt &lhs_index=lhs.index(); - const typet &lhs_index_type = lhs_array.type(); + const array_typet &lhs_array_type = to_array_type(lhs_array.type()); + + const bool may_be_out_of_bounds = + symex_config.updates_across_member_bounds && + [&lhs_index, &lhs_array_type]() { + if( + lhs_index.id() != ID_constant || + lhs_array_type.size().id() != ID_constant) + { + return true; + } + + const auto lhs_index_int = + numeric_cast_v(to_constant_expr(lhs_index)); + + return lhs_index_int < 0 || + lhs_index_int >= numeric_cast_v( + to_constant_expr(lhs_array_type.size())); + }(); + + if( + may_be_out_of_bounds && + (lhs_array.id() == ID_member || lhs_array.id() == ID_index)) + { + const auto subtype_bytes_opt = size_of_expr(lhs_array.type().subtype(), ns); + CHECK_RETURN(subtype_bytes_opt.has_value()); - PRECONDITION(lhs_index_type.id() == ID_array); + exprt new_offset = mult_exprt{ + lhs_index, + typecast_exprt::conditional_cast(*subtype_bytes_opt, lhs_index.type())}; - if(use_update) + exprt parent; + if(lhs_array.id() == ID_member) + { + const member_exprt &member = to_member_expr(lhs_array); + const auto member_offset = member_offset_expr(member, ns); + CHECK_RETURN(member_offset.has_value()); + + parent = member.compound(); + new_offset = plus_exprt{ + typecast_exprt::conditional_cast(*member_offset, new_offset.type()), + new_offset}; + } + else + { + const index_exprt &index = to_index_expr(lhs_array); + + const auto element_size_opt = + size_of_expr(to_array_type(index.array().type()).subtype(), ns); + CHECK_RETURN(element_size_opt.has_value()); + + parent = index.array(); + new_offset = + plus_exprt{mult_exprt{typecast_exprt::conditional_cast( + *element_size_opt, new_offset.type()), + typecast_exprt::conditional_cast( + index.index(), new_offset.type())}, + new_offset}; + } + + if(symex_config.simplify_opt) + simplify(new_offset, ns); + + const byte_update_exprt new_rhs = make_byte_update(parent, new_offset, rhs); + const expr_skeletont new_skeleton = + full_lhs.compose(expr_skeletont::remove_op0(lhs_array)); + assign_rec(parent, new_skeleton, new_rhs, guard); + } + else if( + may_be_out_of_bounds && (lhs_array.id() == ID_byte_extract_big_endian || + lhs_array.id() == ID_byte_extract_little_endian)) + { + const byte_extract_exprt &byte_extract = to_byte_extract_expr(lhs_array); + + const auto subtype_bytes_opt = size_of_expr(lhs_array.type().subtype(), ns); + CHECK_RETURN(subtype_bytes_opt.has_value()); + + exprt new_offset = + plus_exprt{mult_exprt{lhs_index, + typecast_exprt::conditional_cast( + *subtype_bytes_opt, lhs_index.type())}, + typecast_exprt::conditional_cast( + byte_extract.offset(), lhs_index.type())}; + + if(symex_config.simplify_opt) + simplify(new_offset, ns); + + byte_extract_exprt new_lhs = byte_extract; + new_lhs.offset() = new_offset; + new_lhs.type() = rhs.type(); + + assign_rec(new_lhs, full_lhs, rhs, guard); + } + else if(use_update) { // turn // a[i]=e @@ -378,8 +469,70 @@ void symex_assignt::assign_byte_extract( else UNREACHABLE; - const byte_update_exprt new_rhs{byte_update_id, lhs.op(), lhs.offset(), rhs}; - const expr_skeletont new_skeleton = - full_lhs.compose(expr_skeletont::remove_op0(lhs)); - assign_rec(lhs.op(), new_skeleton, new_rhs, guard); + const bool may_be_out_of_bounds = + symex_config.updates_across_member_bounds && [&lhs, this]() { + if(lhs.offset().id() != ID_constant) + return true; + const auto extract_size_opt = pointer_offset_size(lhs.type(), ns); + if(!extract_size_opt.has_value()) + return true; + const auto object_size_opt = pointer_offset_size(lhs.op().type(), ns); + if(!object_size_opt.has_value()) + return true; + const auto lhs_offset_int = + numeric_cast_v(to_constant_expr(lhs.offset())); + return lhs_offset_int < 0 || + lhs_offset_int + *extract_size_opt > *object_size_opt; + }(); + + if( + may_be_out_of_bounds && + (lhs.op().id() == ID_member || lhs.op().id() == ID_index)) + { + exprt new_offset = lhs.offset(); + exprt parent; + if(lhs.op().id() == ID_member) + { + const member_exprt &member = to_member_expr(lhs.op()); + const auto member_offset = member_offset_expr(member, ns); + CHECK_RETURN(member_offset.has_value()); + + parent = member.compound(); + new_offset = plus_exprt{ + typecast_exprt::conditional_cast(*member_offset, new_offset.type()), + new_offset}; + } + else + { + const index_exprt &index = to_index_expr(lhs.op()); + + const auto element_size_opt = + size_of_expr(to_array_type(index.array().type()).subtype(), ns); + CHECK_RETURN(element_size_opt.has_value()); + + parent = index.array(); + new_offset = + plus_exprt{mult_exprt{typecast_exprt::conditional_cast( + *element_size_opt, new_offset.type()), + typecast_exprt::conditional_cast( + index.index(), new_offset.type())}, + new_offset}; + } + + if(symex_config.simplify_opt) + simplify(new_offset, ns); + + const byte_update_exprt new_rhs{byte_update_id, parent, new_offset, rhs}; + const expr_skeletont new_skeleton = + full_lhs.compose(expr_skeletont::remove_op0(lhs.op())); + assign_rec(parent, new_skeleton, new_rhs, guard); + } + else + { + const byte_update_exprt new_rhs{ + byte_update_id, lhs.op(), lhs.offset(), rhs}; + const expr_skeletont new_skeleton = + full_lhs.compose(expr_skeletont::remove_op0(lhs)); + assign_rec(lhs.op(), new_skeleton, new_rhs, guard); + } } diff --git a/src/goto-symex/symex_config.h b/src/goto-symex/symex_config.h index 70650744356..73b9ab1542d 100644 --- a/src/goto-symex/symex_config.h +++ b/src/goto-symex/symex_config.h @@ -63,6 +63,10 @@ struct symex_configt final /// Used in goto_symext::dereference_rec bool cache_dereferences; + /// Handle assignments beyond the bounds prescribed by a struct or array + /// access path. See tests in regression/cbmc/member_bounds* for examples. + bool updates_across_member_bounds; + /// \brief Construct a symex_configt using options specified in an /// \ref optionst explicit symex_configt(const optionst &options); diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index 451647a7ce4..ff31bc86185 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -56,7 +56,9 @@ symex_configt::symex_configt(const optionst &options) : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE), complexity_limits_active( options.get_signed_int_option("symex-complexity-limit") > 0), - cache_dereferences{options.get_bool_option("symex-cache-dereferences")} + cache_dereferences{options.get_bool_option("symex-cache-dereferences")}, + updates_across_member_bounds{ + options.get_bool_option("symex-no-member-bounds")} { }