diff --git a/regression/contracts/loop_invariant_synthesis_01/main.c b/regression/contracts/loop_contract_synthesis_01/main.c similarity index 100% rename from regression/contracts/loop_invariant_synthesis_01/main.c rename to regression/contracts/loop_contract_synthesis_01/main.c diff --git a/regression/contracts/loop_invariant_synthesis_01/test.desc b/regression/contracts/loop_contract_synthesis_01/test.desc similarity index 91% rename from regression/contracts/loop_invariant_synthesis_01/test.desc rename to regression/contracts/loop_contract_synthesis_01/test.desc index 1ff43bb978d..71094e93b29 100644 --- a/regression/contracts/loop_invariant_synthesis_01/test.desc +++ b/regression/contracts/loop_contract_synthesis_01/test.desc @@ -1,6 +1,6 @@ CORE main.c ---synthesize-loop-invariants --apply-loop-contracts +--synthesize-loop-invariants ^EXIT=0$ ^SIGNAL=0$ ^\[main\.\d+\] line 10 Check loop invariant before entry: SUCCESS$ diff --git a/regression/contracts/loop_contract_synthesis_02/main.c b/regression/contracts/loop_contract_synthesis_02/main.c new file mode 100644 index 00000000000..09a782991fd --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_02/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned s = 0; + + for(unsigned i = 0; i < SIZE; ++i) + { + if(i == len - 1) + break; + s += array[i]; + } +} diff --git a/regression/contracts/loop_contract_synthesis_02/test.desc b/regression/contracts/loop_contract_synthesis_02/test.desc new file mode 100644 index 00000000000..47012b9e16e --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_02/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariant with form of range predicates can be correctly +synthesized for programs with only pointer checks but no other assertions. diff --git a/regression/contracts/loop_contract_synthesis_03/main.c b/regression/contracts/loop_contract_synthesis_03/main.c new file mode 100644 index 00000000000..be272d35f2c --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_03/main.c @@ -0,0 +1,16 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + const char *end = array + len; + unsigned s = 0; + + while(array != end) + { + s += *array++; + } +} diff --git a/regression/contracts/loop_contract_synthesis_03/test.desc b/regression/contracts/loop_contract_synthesis_03/test.desc new file mode 100644 index 00000000000..373ef9eb38f --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_03/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariant with form of range predicates and same +object predicates can be correctly synthesized for programs with only pointer +checks but no other assertions. diff --git a/regression/contracts/loop_contract_synthesis_04/main.c b/regression/contracts/loop_contract_synthesis_04/main.c new file mode 100644 index 00000000000..8e75063dc4a --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_04/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned long s = 0; + + unsigned long j = 0; + for(unsigned long i = 0; i < len; i++) + { + s += array[j]; + j++; + } +} diff --git a/regression/contracts/loop_contract_synthesis_04/test.desc b/regression/contracts/loop_contract_synthesis_04/test.desc new file mode 100644 index 00000000000..2f9fbe96d87 --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_04/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that the loop-invariant synthesizer can enumerate +strengthening clauses for invariant-not-preserved violation. diff --git a/src/Makefile b/src/Makefile index e176aac6270..df7e4c25677 100644 --- a/src/Makefile +++ b/src/Makefile @@ -80,7 +80,8 @@ goto-harness.dir: util.dir goto-programs.dir langapi.dir linking.dir \ goto-instrument.dir goto-instrument.dir: languages goto-programs.dir pointer-analysis.dir \ - goto-symex.dir linking.dir analyses.dir solvers.dir + goto-symex.dir linking.dir analyses.dir solvers.dir \ + goto-checker.dir goto-checker.dir: solvers.dir goto-symex.dir goto-programs.dir diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index a95d0064a72..896729f179f 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -383,6 +383,53 @@ void dependence_grapht::add_dep( nodes[n_to].in[n_from].add(kind); } +bool dependence_grapht::is_flow_dependent( + const goto_programt::const_targett &from, + const goto_programt::const_targett &to) +{ + std::set visited = std::set(); + const dep_graph_domaint from_domain = static_cast( + *storage->abstract_state_before(from, *domain_factory)); + const dep_graph_domaint to_domain = static_cast( + *storage->abstract_state_before(to, *domain_factory)); + return is_flow_dependent(from_domain, to_domain, visited); +} + +bool dependence_grapht::is_flow_dependent( + const dep_graph_domaint &from, + const dep_graph_domaint &to, + std::set &visited) +{ + // `to` is control dependent on `from`? + for(const auto node : to.get_control_deps()) + { + if(visited.count((*this)[node].get_node_id())) + continue; + + visited.insert((*this)[node].get_node_id()); + + if( + from.get_node_id() == (*this)[node].get_node_id() || + is_flow_dependent(from, (*this)[node], visited)) + return true; + } + + // `to` is data dependent on `from`? + for(const auto node : to.get_data_deps()) + { + if(visited.count((*this)[node].get_node_id())) + continue; + + visited.insert((*this)[node].get_node_id()); + + if( + from.get_node_id() == (*this)[node].get_node_id() || + is_flow_dependent(from, (*this)[node], visited)) + return true; + } + return false; +} + void dep_graph_domaint::populate_dep_graph( dependence_grapht &dep_graph, goto_programt::const_targett this_loc) const { diff --git a/src/analyses/dependence_graph.h b/src/analyses/dependence_graph.h index 14ec16bdbb0..757b575ee28 100644 --- a/src/analyses/dependence_graph.h +++ b/src/analyses/dependence_graph.h @@ -67,6 +67,7 @@ class dep_graph_domaint:public ai_domain_baset { public: typedef grapht::node_indext node_indext; + typedef std::set depst; explicit dep_graph_domaint(node_indext id) : has_values(false), node_id(id), has_changed(false) @@ -167,13 +168,20 @@ class dep_graph_domaint:public ai_domain_baset void populate_dep_graph( dependence_grapht &, goto_programt::const_targett) const; + depst get_control_deps() const + { + return control_deps; + } + depst get_data_deps() const + { + return data_deps; + } + private: tvt has_values; node_indext node_id; bool has_changed; - typedef std::set depst; - // Set of locations with control instructions on which the instruction at this // location has a control dependency on depst control_deps; @@ -279,7 +287,16 @@ class dependence_grapht: return rd; } + /// Decide whether the instruction `to` is flow dependent on `from`. + bool is_flow_dependent( + const goto_programt::const_targett &from, + const goto_programt::const_targett &to); + protected: + bool is_flow_dependent( + const dep_graph_domaint &from, + const dep_graph_domaint &to, + std::set &visited); friend dep_graph_domain_factoryt; friend dep_graph_domaint; const namespacet &ns; diff --git a/src/goto-instrument/CMakeLists.txt b/src/goto-instrument/CMakeLists.txt index a1edca91f9a..3fdfe75bba7 100644 --- a/src/goto-instrument/CMakeLists.txt +++ b/src/goto-instrument/CMakeLists.txt @@ -13,6 +13,7 @@ target_link_libraries(goto-instrument-lib linking big-int goto-programs + goto-checker goto-symex assembler pointer-analysis diff --git a/src/goto-instrument/Makefile b/src/goto-instrument/Makefile index bfa8ae84975..74a582d9c56 100644 --- a/src/goto-instrument/Makefile +++ b/src/goto-instrument/Makefile @@ -84,6 +84,7 @@ SRC = accelerate/accelerate.cpp \ splice_call.cpp \ stack_depth.cpp \ synthesizer/enumerative_loop_invariant_synthesizer.cpp \ + synthesizer/cegis_verifier.cpp \ synthesizer/expr_enumerator.cpp \ synthesizer/synthesizer_utils.cpp \ thread_instrumentation.cpp \ @@ -108,6 +109,7 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \ ../cpp/cpp$(LIBEXT) \ ../linking/linking$(LIBEXT) \ ../big-int/big-int$(LIBEXT) \ + ../goto-checker/goto-checker$(LIBEXT) \ ../goto-programs/goto-programs$(LIBEXT) \ ../goto-symex/goto-symex$(LIBEXT) \ ../assembler/assembler$(LIBEXT) \ diff --git a/src/goto-instrument/contracts/contracts.cpp b/src/goto-instrument/contracts/contracts.cpp index 2268f74b90b..8c116c6c0dd 100644 --- a/src/goto-instrument/contracts/contracts.cpp +++ b/src/goto-instrument/contracts/contracts.cpp @@ -142,7 +142,7 @@ void code_contractst::check_apply_loop_contracts( // i.e., the loop guard was satisfied. const auto entered_loop = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__entered_loop") + bool_typet(), loop_head_location, mode, symbol_table, ENTERED_LOOP) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(entered_loop, loop_head_location)); @@ -173,7 +173,7 @@ void code_contractst::check_apply_loop_contracts( // instrumentation of the loop. const auto in_base_case = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__in_base_case") + bool_typet(), loop_head_location, mode, symbol_table, IN_BASE_CASE) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(in_base_case, loop_head_location)); @@ -294,6 +294,12 @@ void code_contractst::check_apply_loop_contracts( // Generate havocing code for assignment targets. havoc_assigns_targetst havoc_gen(to_havoc, ns); havoc_gen.append_full_havoc_code(loop_head_location, pre_loop_head_instrs); + // Add loop number to the havoc_code + for(auto &havoc_instr : pre_loop_head_instrs.instructions) + { + if(is_loop_havoc(havoc_instr)) + havoc_instr.loop_number = loop_end->loop_number; + } // Insert the second block of pre_loop_head_instrs: the havocing code. // We do not `add_pragma_disable_assigns_check`, diff --git a/src/goto-instrument/contracts/utils.cpp b/src/goto-instrument/contracts/utils.cpp index 2c2ab147534..8122e89f811 100644 --- a/src/goto-instrument/contracts/utils.cpp +++ b/src/goto-instrument/contracts/utils.cpp @@ -447,3 +447,36 @@ void generate_history_variables_initialization( // Add all the history variable initialization instructions program.destructive_append(history); } + +bool is_transformed_loop_end(const goto_programt::const_targett &target) +{ + // The end of the loop end of transformed loop is + // ASSIGN entered_loop = true + if(!target->is_assign()) + return false; + + return from_expr(target->assign_lhs()).find("__entered_loop") != + std::string::npos && + target->assign_rhs() == true_exprt(); +} + +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name) +{ + INVARIANT( + var_name == IN_BASE_CASE || var_name == ENTERED_LOOP, + "var_name is not of instrumented variables."); + + if(!target->is_assign()) + return false; + + if(can_cast_expr(target->assign_lhs())) + { + const auto &lhs = to_symbol_expr(target->assign_lhs()); + return id2string(lhs.get_identifier()).find("::" + var_name) != + std::string::npos; + } + + return false; +} diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index ff4ced40065..1faa5d2f326 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -17,6 +17,9 @@ Date: September 2021 #include +#define IN_BASE_CASE "__in_base_case" +#define ENTERED_LOOP "__entered_loop" + /// \brief A class that overrides the low-level havocing functions in the base /// utility class, to havoc only when expressions point to valid memory, /// i.e. if all dereferences within an expression are valid @@ -205,4 +208,13 @@ void generate_history_variables_initialization( const irep_idt &mode, goto_programt &program); +/// Return true if `target` is the loop end of some transformed code. +bool is_transformed_loop_end(const goto_programt::const_targett &target); + +/// Return true if `target` is an assignment to an instrumented variable with +/// name `var_name`. +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name); + #endif // CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 1a21921c827..333547ca5f8 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1148,16 +1148,6 @@ void goto_instrument_parse_optionst::instrument_goto_program() goto_model.goto_functions.update(); } - if(cmdline.isset("synthesize-loop-invariants")) - { - log.warning() << "Loop invariant synthesizer is still work in progress. " - "It only generates TRUE as invariants." - << messaget::eom; - - // Synthesize loop invariants and annotate them into `goto_model` - enumerative_loop_invariant_synthesizert synthesizer(goto_model, log); - annotate_invariants(synthesizer.synthesize_all(), goto_model, log); - } bool use_dfcc = cmdline.isset(FLAG_DFCC); if(use_dfcc) @@ -1432,6 +1422,29 @@ void goto_instrument_parse_optionst::instrument_goto_program() goto_check_c(options, goto_model, ui_message_handler); transform_assertions_assumptions(options, goto_model); + if(cmdline.isset(FLAG_SYNTHESIZE_LOOP_INVARIANTS)) + { + if(cmdline.isset(FLAG_LOOP_CONTRACTS)) + { + throw invalid_command_line_argument_exceptiont( + "Incompatible options detected", + "--" FLAG_SYNTHESIZE_LOOP_INVARIANTS " and --" FLAG_LOOP_CONTRACTS, + "Use either --" FLAG_SYNTHESIZE_LOOP_INVARIANTS + " or --" FLAG_LOOP_CONTRACTS); + } + + // Synthesize loop invariants and annotate them into `goto_model` + enumerative_loop_invariant_synthesizert synthesizer(goto_model, log); + annotate_invariants(synthesizer.synthesize_all(), goto_model); + + // Apply loop contracts. + std::set to_exclude_from_nondet_static( + cmdline.get_values("nondet-static-exclude").begin(), + cmdline.get_values("nondet-static-exclude").end()); + code_contractst contracts(goto_model, log); + contracts.apply_loop_contracts(to_exclude_from_nondet_static); + } + // check for uninitalized local variables if(cmdline.isset("uninitialized-check")) { diff --git a/src/goto-instrument/havoc_utils.cpp b/src/goto-instrument/havoc_utils.cpp index f7d6dc5c5c7..89e9f1c98e2 100644 --- a/src/goto-instrument/havoc_utils.cpp +++ b/src/goto-instrument/havoc_utils.cpp @@ -16,7 +16,14 @@ Date: July 2021 #include #include -#include +#define IS_LOOP_HAVOC "Loop havocing instrumented by applying loop contracts." + +bool is_loop_havoc(const goto_programt::instructiont &instruction) +{ + if(id2string(instruction.source_location().get_comment()) == IS_LOOP_HAVOC) + return true; + return false; +} void havoc_utilst::append_full_havoc_code( const source_locationt location, @@ -52,6 +59,8 @@ void havoc_utilst::append_object_havoc_code_for_expr( havoc.add_source_location() = location; havoc.add_to_operands(expr); dest.add(goto_programt::make_other(havoc, location)); + dest.instructions.back().source_location_nonconst().set_comment( + IS_LOOP_HAVOC); } void havoc_utilst::append_scalar_havoc_code_for_expr( @@ -62,4 +71,6 @@ void havoc_utilst::append_scalar_havoc_code_for_expr( side_effect_expr_nondett rhs(expr.type(), location); dest.add(goto_programt::make_assignment( code_assignt{expr, std::move(rhs), location}, location)); + dest.instructions.back().source_location_nonconst().set_comment( + IS_LOOP_HAVOC); } diff --git a/src/goto-instrument/havoc_utils.h b/src/goto-instrument/havoc_utils.h index e5b425c9996..5acd5f005a1 100644 --- a/src/goto-instrument/havoc_utils.h +++ b/src/goto-instrument/havoc_utils.h @@ -17,12 +17,15 @@ Date: July 2021 #include #include -#include +#include -class goto_programt; +#include typedef std::set assignst; +/// Return true if `instruction` is a loop havoc instruction. +bool is_loop_havoc(const goto_programt::instructiont &instruction); + /// \brief A class containing utility functions for havocing expressions. class havoc_utils_is_constantt : public is_constantt { diff --git a/src/goto-instrument/synthesizer/cegis_verifier.cpp b/src/goto-instrument/synthesizer/cegis_verifier.cpp new file mode 100644 index 00000000000..541c3b41a8f --- /dev/null +++ b/src/goto-instrument/synthesizer/cegis_verifier.cpp @@ -0,0 +1,524 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#include "cegis_verifier.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool contains_symbol_prefix(const exprt &expr, const std::string &prefix) +{ + if( + expr.id() == ID_symbol && + has_prefix(id2string(to_symbol_expr(expr).get_identifier()), prefix)) + { + return true; + } + + forall_operands(it, expr) + { + if(contains_symbol_prefix(*it, prefix)) + return true; + } + return false; +} + +optionst cegis_verifiert::get_options() +{ + optionst options; + + // Default options + // These options are the same as we run CBMC without any set flag. + options.set_option("built-in-assertions", true); + options.set_option("propagation", true); + options.set_option("simple-slice", true); + options.set_option("simplify", true); + options.set_option("show-goto-symex-steps", false); + options.set_option("show-points-to-sets", false); + options.set_option("show-array-constraints", false); + options.set_option("built-in-assertions", true); + options.set_option("assertions", true); + options.set_option("assumptions", true); + options.set_option("arrays-uf", "auto"); + options.set_option("depth", UINT32_MAX); + options.set_option("exploration-strategy", "lifo"); + options.set_option("symex-cache-dereferences", false); + options.set_option("rewrite-union", true); + options.set_option("self-loops-to-assumptions", true); + options.set_option("trace", true); + + // Preporcess `goto_model`. Copied from `cbmc_parse_options.cpp`. + remove_asm(goto_model); + link_to_library( + goto_model, log.get_message_handler(), cprover_cpp_library_factory); + link_to_library( + goto_model, log.get_message_handler(), cprover_c_library_factory); + process_goto_program(goto_model, options, log); + + add_failed_symbols(goto_model.symbol_table); + + remove_skip(goto_model); + label_properties(goto_model); + return options; +} + +loop_idt cegis_verifiert::get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation) +{ + loop_idt result; + + // build the dependence graph + const namespacet ns(goto_model.symbol_table); + dependence_grapht dependence_graph(ns); + dependence_graph(goto_model); + + // A loop is the cause loop if the violation is dependent on the write set of + // the loop. + for(const auto &step : goto_trace.steps) + { + // Being dependent on a write set is equivalent to being dependent on one + // of the loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + // Checking if `to` is dependent on `from`. + // `from` : loop havocing + // `to` : violation + goto_programt::const_targett from; + goto_programt::const_targett to; + + // Get `from`---a loop havoc instruction. + irep_idt from_fun_name = step.function_id; + const goto_functionst::goto_functiont &from_function = + goto_model.get_goto_function(from_fun_name); + for(goto_programt::const_targett it = + from_function.body.instructions.begin(); + it != from_function.body.instructions.end(); + ++it) + { + if(it->location_number == step.pc->location_number) + { + from = it; + } + } + + // Get `to`---the instruction where the violation happens + irep_idt to_fun_name = goto_trace.get_last_step().function_id; + const goto_functionst::goto_functiont &to_function = + goto_model.get_goto_function(to_fun_name); + for(goto_programt::const_targett it = + to_function.body.instructions.begin(); + it != to_function.body.instructions.end(); + ++it) + { + if(it->location_number == violation->location_number) + { + to = it; + } + } + + // The violation is caused by the loop havoc + // if it is dependent on the loop havoc. + if(dependence_graph.is_flow_dependent(from, to)) + { + // TODO: not safe for inlined static functions. + result.function_id = step.pc->source_location().get_function(); + result.loop_number = step.pc->loop_number; + } + } + } + return result; +} + +cext cegis_verifiert::build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc) +{ + const namespacet ns(goto_model.symbol_table); + + // Valuations of havoced variables right after havoc instructions. + std::map object_sizes; + std::map havoced_values; + std::map havoced_pointer_offsets; + + // Loop-entry valuations of havoced variables. + std::map loop_entry_values; + std::map loop_entry_offsets; + + // Live variables upon the loop head. + std::set live_variables; + + bool entered_loop = false; + + // Scan the trace step by step to store needed valuations. + for(const auto &step : goto_trace.steps) + { + switch(step.type) + { + case goto_trace_stept::typet::DECL: + case goto_trace_stept::typet::ASSIGNMENT: + { + if(!step.full_lhs_value.is_nil()) + { + // Entered loop? + if(is_assignment_to_instrumented_variable(step.pc, ENTERED_LOOP)) + entered_loop = step.full_lhs_value == true_exprt(); + + // skip hidden steps + if(step.hidden) + break; + + // Live variables + // 1. must be in the same function as the target loop; + // 2. alive before entering the target loop; + // 3. a pointer or a primitive-typed variable; + // TODO: add support for union pointer + if( + step.pc->source_location().get_function() == + loop_entry_loc.get_function() && + !entered_loop && + (step.full_lhs.type().id() == ID_unsignedbv || + step.full_lhs.type().id() == ID_pointer) && + step.full_lhs.id() == ID_symbol) + { + const auto &symbol = + expr_try_dynamic_cast(step.full_lhs); + + // malloc_size is not-hidden tmp variable. + if(id2string(symbol->get_identifier()) != "malloc::malloc_size") + { + live_variables.emplace(step.full_lhs); + } + } + + // Record valuation of primitive-typed variable. + if(step.full_lhs.type().id() == ID_unsignedbv) + { + // Store the value into the map for loop_entry value if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_values[step.full_lhs] = + hex_to_size_t(step.full_lhs_value.get_string(ID_value)); + } + + // Store the value into the the map for havoced value if this step + // is a loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + havoced_values[step.full_lhs] = + hex_to_size_t(step.full_lhs_value.get_string(ID_value)); + } + } + + // Record object_size, pointer_offset, and loop_entry(pointer_offset). + if( + can_cast_expr( + step.full_lhs_value) && + contains_symbol_prefix( + step.full_lhs_value, SYMEX_DYNAMIC_PREFIX "dynamic_object")) + { + const auto &pointer_constant_expr = + expr_try_dynamic_cast( + step.full_lhs_value); + + pointer_arithmetict pointer_arithmetic( + pointer_constant_expr->symbolic_pointer()); + if(pointer_constant_expr->symbolic_pointer().id() == ID_typecast) + { + pointer_arithmetic = pointer_arithmetict( + pointer_constant_expr->symbolic_pointer().operands()[0]); + } + + // Extract object size. + exprt &underlying_array = pointer_arithmetic.pointer; + // Object sizes are stored in the type of underlying arrays. + while(!can_cast_type(underlying_array.type())) + { + if( + underlying_array.id() == ID_address_of || + underlying_array.id() == ID_index) + { + underlying_array = underlying_array.operands()[0]; + continue; + } + UNREACHABLE; + } + std::size_t object_size = + pointer_offset_size(to_array_type(underlying_array.type()), ns) + .value() + .to_ulong(); + object_sizes[step.full_lhs] = object_size; + + // Extract offsets. + std::size_t offset = 0; + if(pointer_arithmetic.offset.is_not_nil()) + { + constant_exprt offset_expr = + expr_dynamic_cast(pointer_arithmetic.offset); + offset = hex_to_size_t(id2string(offset_expr.get_value())); + } + + // Store the offset into the map for loop_entry if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_offsets[step.full_lhs] = offset; + } + + // Store the offset into the the map for havoced offset if this step + // is a loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + havoced_pointer_offsets[step.full_lhs] = offset; + } + } + } + } + + case goto_trace_stept::typet::ASSERT: + case goto_trace_stept::typet::FUNCTION_CALL: + case goto_trace_stept::typet::FUNCTION_RETURN: + case goto_trace_stept::typet::ASSUME: + case goto_trace_stept::typet::LOCATION: + case goto_trace_stept::typet::GOTO: + case goto_trace_stept::typet::OUTPUT: + case goto_trace_stept::typet::INPUT: + case goto_trace_stept::typet::SPAWN: + case goto_trace_stept::typet::MEMORY_BARRIER: + case goto_trace_stept::typet::ATOMIC_BEGIN: + case goto_trace_stept::typet::ATOMIC_END: + case goto_trace_stept::typet::DEAD: + case goto_trace_stept::typet::CONSTRAINT: + case goto_trace_stept::typet::SHARED_READ: + case goto_trace_stept::typet::SHARED_WRITE: + break; + + case goto_trace_stept::typet::NONE: + UNREACHABLE; + } + } + + return cext( + object_sizes, + havoced_values, + havoced_pointer_offsets, + loop_entry_values, + loop_entry_offsets, + live_variables); +} + +void cegis_verifiert::restore_functions() +{ + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + irep_idt fun_name = fun_entry.first; + goto_model.goto_functions.function_map[fun_name].body.swap( + original_functions[fun_name]); + } +} + +bool cegis_verifiert::verify() +{ + // This method does the following three things to verify the `goto_model` and + // return a formatted counterexample if there is any violated property. + // + // 1. annotate and apply the loop contracts stored in `invariant_candidates`. + // + // 2. run the CBMC API to verify the intrumented goto model. As the API is not + // merged yet, we preprocess the goto model and run the symex checker on it + // to simulate CBMC API. + // FIXEME: ^^^ replace the symex checker once the real API is merged. + // + // 3. construct the formatted counterexample from the violated property and + // its trace. + + // Store the original functions. We will restore them after the verification. + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + original_functions[fun_entry.first].copy_from(fun_entry.second.body); + } + + // Annotate the candidates tot the goto_model for checking. + annotate_invariants(invariant_candidates, goto_model); + + // Control verbosity. + // We allow non-error output message only when verbosity is set to at least 9. + const unsigned original_verbosity = log.get_message_handler().get_verbosity(); + if(original_verbosity < 9) + log.get_message_handler().set_verbosity(1); + + // Apply loop contracts we annotated. + code_contractst cont(goto_model, log); + cont.apply_loop_contracts(); + + // Get the checker same as CBMC api without any flag. + // TODO: replace the checker with CBMC api once it is implemented. + ui_message_handlert ui_message_handler(log.get_message_handler()); + const auto options = get_options(); + std::unique_ptr< + all_properties_verifier_with_trace_storaget> + checker = util_make_unique< + all_properties_verifier_with_trace_storaget>( + options, ui_message_handler, goto_model); + + goto_convert( + goto_model.symbol_table, + goto_model.goto_functions, + log.get_message_handler()); + + // Run the checker to get the result. + const resultt result = (*checker)(); + + if(original_verbosity >= 9) + checker->report(); + + // Restore the verbosity. + log.get_message_handler().set_verbosity(original_verbosity); + + // + // Strat to counstruct the counterexample. + // + + if(result == resultt::PASS) + { + restore_functions(); + return true; + } + + if(result == resultt::ERROR || result == resultt::UNKNOWN) + { + INVARIANT(false, "Verification failed during loop contract synthesis."); + } + + properties = checker->get_properties(); + // Find the violation and construct conterexample from its trace. + for(const auto &property_it : properties) + { + if(property_it.second.status != property_statust::FAIL) + continue; + + first_violation = property_it.first; + exprt violated_predicate = property_it.second.pc->condition(); + + // The pointer checked in the null-pointer-check violation. + exprt checked_pointer = true_exprt(); + + // Type of the violation + cext::violation_typet violation_type = cext::violation_typet::cex_other; + + // The violation is a pointer OOB check. + if((property_it.second.description.find( + "dereference failure: pointer outside object bounds in") != + std::string::npos)) + { + violation_type = cext::violation_typet::cex_out_of_boundary; + } + + // The violation is a null pointer check. + if(property_it.second.description.find("pointer NULL") != std::string::npos) + { + violation_type = cext::violation_typet::cex_null_pointer; + checked_pointer = property_it.second.pc->condition() + .operands()[0] + .operands()[1] + .operands()[0]; + INVARIANT(checked_pointer.id() == ID_symbol, "Checking pointer symbol"); + } + + // The violation is a loop-invariant-preservation check. + if(property_it.second.description.find("preserved") != std::string::npos) + { + violation_type = cext::violation_typet::cex_not_preserved; + } + + // The violation is a loop-invariant-preservation check. + if( + property_it.second.description.find("invariant before entry") != + std::string::npos) + { + violation_type = cext::violation_typet::cex_not_hold_upon_entry; + } + + // The loop which could be the cause of the violation. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + loop_idt cause_loop_id = get_cause_loop_id( + checker->get_traces()[property_it.first], property_it.second.pc); + + if(!cause_loop_id.loop_number.has_value()) + { + log.debug() << "No cause loop found!\n"; + restore_functions(); + + return_cex.cause_loop_id = cause_loop_id; + return_cex.cex_type = violation_type; + return false; + } + + log.debug() << "Found cause loop with function id: " + << cause_loop_id.function_id + << ", and loop number: " << cause_loop_id.loop_number.value() + << "\n"; + + // Decide whether the violation is in the cause loop. + bool violation_in_loop = is_instruction_in_transfomed_loop( + cause_loop_id, + goto_model.get_goto_function(cause_loop_id.function_id), + property_it.second.pc->location_number); + + // We always strengthen in_clause if the violation is + // invariant-not-preserved. + if(violation_type == cext::violation_typet::cex_not_preserved) + violation_in_loop = true; + + restore_functions(); + + return_cex = build_cex( + checker->get_traces()[property_it.first], + get_loop_head( + cause_loop_id.loop_number.value(), + goto_model.goto_functions.function_map[cause_loop_id.function_id]) + ->source_location()); + return_cex.violated_predicate = violated_predicate; + return_cex.cause_loop_id = cause_loop_id; + return_cex.checked_pointer = checked_pointer; + return_cex.is_violation_in_loop = violation_in_loop; + return_cex.cex_type = violation_type; + + return false; + } + + UNREACHABLE; +} diff --git a/src/goto-instrument/synthesizer/cegis_verifier.h b/src/goto-instrument/synthesizer/cegis_verifier.h new file mode 100644 index 00000000000..b80f55d266a --- /dev/null +++ b/src/goto-instrument/synthesizer/cegis_verifier.h @@ -0,0 +1,142 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#ifndef CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H +#define CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H + +#include +#include + +#include + +#include "synthesizer_utils.h" + +class messaget; + +/// Formatted counterexample. +class cext +{ +public: + enum class violation_typet + { + cex_out_of_boundary, + cex_null_pointer, + cex_not_preserved, + cex_not_hold_upon_entry, + cex_other + }; + + cext( + const std::map &object_sizes, + const std::map &havoced_values, + const std::map &havoced_pointer_offsets, + const std::map &loop_entry_values, + const std::map &loop_entry_offsets, + const std::set &live_variables) + : object_sizes(object_sizes), + havoced_values(havoced_values), + havoced_pointer_offsets(havoced_pointer_offsets), + loop_entry_values(loop_entry_values), + loop_entry_offsets(loop_entry_offsets), + live_variables(live_variables) + { + } + + cext() + { + } + + // pointer that failed the null pointer check + exprt checked_pointer; + exprt violated_predicate; + + // true if the violation happens in the cause loop + // false if the violation happens after the cause loop + bool is_violation_in_loop = true; + + // We collect havoced evaluations of havoced variables and their object sizes + // and pointer offsets. + + // __CPROVER_OBJECT_SIZE + std::map object_sizes; + // all the valuation of havoced variables with primitived type. + std::map havoced_values; + // __CPROVER_POINTER_OFFSET + std::map havoced_pointer_offsets; + + // We also collect loop-entry evaluations of havoced variables. + // __CPROVER_loop_entry + std::map loop_entry_values; + // __CPROVER_POINTER_OFFSET(__CPROVER_loop_entry( )) + std::map loop_entry_offsets; + + // Set of live variables upon loop head. + std::set live_variables; + + violation_typet cex_type; + loop_idt cause_loop_id; +}; + +/// Verifier that take a goto program as input, and ouptut formatted +/// counterexamples for counterexample-guided-synthesis. +class cegis_verifiert +{ +public: + cegis_verifiert( + const invariant_mapt &invariant_candidates, + goto_modelt &goto_model, + messaget &log) + : invariant_candidates(invariant_candidates), + goto_model(goto_model), + log(log) + { + } + + /// Verify `goto_model`. Return true if there is no violation. + /// Otherwise, sotre the formatted counterexample in `return_cex` + bool verify(); + + /// Result counterexample. + cext return_cex; + propertiest properties; + irep_idt first_violation; + +protected: + // Get the options same as using CBMC api without any flag, and + // preprocess `goto_model`. + // TODO: replace the checker with CBMC api once it is implemented. + optionst get_options(); + + // Compute the cause loop of `violation`. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + loop_idt get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation); + + /// Restore transformed functions to original functions. + void restore_functions(); + + // Build counterexample from trace, and store it in `return_cex`. + cext build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc); + + const invariant_mapt &invariant_candidates; + goto_modelt &goto_model; + messaget log; + + /// Map from function names to original functions. It is used to + /// restore functions with annotated loops to original functions. + std::map original_functions; +}; + +#endif // CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H diff --git a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp index 67dbc3ed068..a4f56632c6f 100644 --- a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp +++ b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp @@ -11,8 +11,68 @@ Author: Qinheping Hu #include "enumerative_loop_invariant_synthesizer.h" +#include +#include +#include +#include +#include + #include +#include "cegis_verifier.h" +#include "expr_enumerator.h" + +// substitute all tmp_post variables with their origins in `expr` +void substitute_tmp_post_rec( + exprt &dest, + const std::map &tmp_post_map) +{ + if(dest.id() != ID_address_of) + Forall_operands(it, dest) + substitute_tmp_post_rec(*it, tmp_post_map); + + // possibly substitute? + if(dest.id() == ID_symbol && tmp_post_map.count(dest)) + { + dest = tmp_post_map.at(dest); + } +} + +std::vector construct_terminals(std::set symbols) +{ + std::vector result = std::vector(); + for(const auto &e : symbols) + { + if(e.type().id() == ID_unsignedbv) + { + // For a variable v with primitive type, we add + // v, __CPROVER_loop_entry(v) + // into the result. + result.push_back(typecast_exprt(e, size_type())); + result.push_back( + typecast_exprt(unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + if((e.type().id() == ID_signedbv)) + { + result.push_back(e); + result.push_back(unary_exprt(ID_loop_entry, e, e.type())); + } + if((e.type().id() == ID_pointer)) + { + // For a variable v with pointer type, we add + // __CPROVER_pointer_offset(v), + // __CPROVER_pointer_offset(__CPROVER_loop_entry(v)) + // into the result. + result.push_back(unary_exprt(ID_pointer_offset, e, size_type())); + result.push_back(pointer_offset_exprt( + unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + } + result.push_back(from_integer(1, unsigned_int_type())); + result.push_back(from_integer(1, unsigned_long_int_type())); + return result; +} + void enumerative_loop_invariant_synthesizert::init_candidates() { for(auto &function_p : goto_model.goto_functions.function_map) @@ -32,19 +92,275 @@ void enumerative_loop_invariant_synthesizert::init_candidates() // we only synthesize invariants for unannotated loops if(loop_end->condition().find(ID_C_spec_loop_invariant).is_nil()) { - invariant_candiate_map[new_id] = true_exprt(); + // Store the loop guard. + exprt guard = + get_loop_head( + loop_end->loop_number, + goto_model.goto_functions.function_map[function_p.first]) + ->condition(); + neg_guards[new_id] = guard; + + // Initialize invariant clauses as `true`. + in_invariant_clause_map[new_id] = true_exprt(); + pos_invariant_clause_map[new_id] = true_exprt(); } } } } +void enumerative_loop_invariant_synthesizert::build_tmp_post_map() +{ + for(auto &goto_function : goto_model.goto_functions.function_map) + { + for(const auto &instruction : goto_function.second.body.instructions) + { + // tmp_post variables are symbol lhs of ASSIGN. + if(!instruction.is_assign() || instruction.assign_lhs().id() != ID_symbol) + continue; + + const auto symbol_lhs = + expr_try_dynamic_cast(instruction.assign_lhs()); + + // tmp_post variables have identifiers with the prefix tmp::tmp_post. + if( + id2string(symbol_lhs->get_identifier()).find("tmp::tmp_post") != + std::string::npos) + { + tmp_post_map[instruction.assign_lhs()] = instruction.assign_rhs(); + } + } + } +} + +std::set +enumerative_loop_invariant_synthesizert::compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars) +{ + // We overapproximate dependent symbols as all symbols in live variables. + // TODO: using flow-dependency analysis to rule out not dependent symbols. + + std::set result; + for(const auto &e : live_vars) + find_symbols(e, result); + + return result; +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_range_predicate( + const exprt &violated_predicate) +{ + // For the case where the violated predicate is dependent on no instruction + // other than loop havocing, the violated_predicate is + // WLP(loop_body_before_violation, violated_predicate). + // TODO: implement a more complete WLP algorithm. + return violated_predicate; +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_same_object_predicate( + const exprt &checked_pointer) +{ + // The same object predicate says that the checked pointer points to the + // same object as it pointed before entering the loop. + // It works for the array-manuplating loops where only offset of pointer + // are modified but not the object pointers point to. + return same_object( + checked_pointer, unary_exprt(ID_loop_entry, checked_pointer)); +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id) +{ + // Synthesis of strengthning clauses is a enumerate-and-check proecess. + // We first construct the enumerator for the following grammar. + // And then enumerate clause and check that if it can make the invariant + // inductive. + + // Initialize factory representing grammar + // StartBool -> StartBool && StartBool | Start == Start + // | StartBool <= StartBool | StartBool < StartBool + // Start -> Start + Start | terminal_symbols + // where a0, and a1 are symbol expressions. + namespacet ns(goto_model.symbol_table); + enumerator_factoryt factory = enumerator_factoryt(ns); + recursive_enumerator_placeholdert start_bool_ph(factory, "StartBool", ns); + recursive_enumerator_placeholdert start_ph(factory, "Start", ns); + + // terminals + expr_sett leafexprs(terminal_symbols.begin(), terminal_symbols.end()); + + // rules for Start + enumeratorst start_args; + // Start -> terminals + leaf_enumeratort leaf_g(leafexprs, ns); + start_args.push_back(&leaf_g); + + // Start -> Start + Start + binary_functional_enumeratort plus_g( + ID_plus, + start_ph, + start_ph, + [](const partitiont &partition) { + if(partition.size() <= 1) + return true; + return partition.front() == 1; + }, + ns); + start_args.push_back(&plus_g); + + // rules for StartBool + enumeratorst start_bool_args; + // StartBool -> StartBool && StartBool + binary_functional_enumeratort and_g(ID_and, start_bool_ph, start_bool_ph, ns); + start_bool_args.push_back(&and_g); + // StartBool -> Start == Start + binary_functional_enumeratort equal_g(ID_equal, start_ph, start_ph, ns); + start_bool_args.push_back(&equal_g); + // StartBool -> Start <= Start + binary_functional_enumeratort le_g(ID_le, start_ph, start_ph, ns); + start_bool_args.push_back(&le_g); + // StartBool -> Start < Start + binary_functional_enumeratort lt_g(ID_lt, start_ph, start_ph, ns); + start_bool_args.push_back(<_g); + + // add the two nonterminals to the factory + factory.attach_productions("Start", start_args); + factory.attach_productions("StartBool", start_bool_args); + + // size of candidates we are searching now, + // starting from 0 + size_t size_bound = 0; + + // numbers of candidates we have seen, + // used for quantitative analysis + size_t seen_terms = 0; + + // Start to enumerate and check. + while(true) + { + size_bound++; + + // generate candidate and verify + for(auto strengthening_candidate : start_bool_ph.enumerate(size_bound)) + { + seen_terms++; + invariant_mapt new_in_clauses = invariant_mapt(in_invariant_clause_map); + new_in_clauses[cause_loop_id] = + and_exprt(new_in_clauses[cause_loop_id], strengthening_candidate); + const auto &combined_invariant = combine_in_and_post_invariant_clauses( + new_in_clauses, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // A good strengthening clause if + // 1. all checks pass, or + // 2. the loop invariant is inductive and hold upon the entry. + if( + verifier.verify() || (verifier.properties.at(violation_id).status != + property_statust::FAIL && + verifier.return_cex.cex_type != + cext::violation_typet::cex_not_hold_upon_entry)) + { + return strengthening_candidate; + } + } + } + UNREACHABLE; +} + invariant_mapt enumerative_loop_invariant_synthesizert::synthesize_all() { init_candidates(); + build_tmp_post_map(); + + // The invariants we are going to synthesize and verify are the combined + // invariants from in- and pos- invariant clauses. + auto combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // Set of symbols the violation may be dependent on. + // We enumerate strenghening clauses built from symbols from the set. + std::set dependent_symbols; + // Set of symbols we used to enumerate strenghening clauses. + std::vector terminal_symbols; + + while(!verifier.verify()) + { + exprt new_invariant_clause = true_exprt(); + + // Synthsize the new_clause + // We use difference strategies for different type of violations. + switch(verifier.return_cex.cex_type) + { + case cext::violation_typet::cex_out_of_boundary: + new_invariant_clause = + synthesize_range_predicate(verifier.return_cex.violated_predicate); + break; + + case cext::violation_typet ::cex_null_pointer: + new_invariant_clause = + synthesize_same_object_predicate(verifier.return_cex.checked_pointer); + break; + + case cext::violation_typet::cex_not_preserved: + terminal_symbols = construct_terminals(dependent_symbols); + new_invariant_clause = synthesize_strengthening_clause( + terminal_symbols, + verifier.return_cex.cause_loop_id, + verifier.first_violation); + break; + + case cext::violation_typet::cex_not_hold_upon_entry: + case cext::violation_typet::cex_other: + INVARIANT(false, "unsupported violation type"); + break; + } + + INVARIANT( + new_invariant_clause != true_exprt(), + "failed to synthesized meaningful clause"); + + // There could be tmp_post varialbes in the synthesized clause. + // We substitute them with their original variables. + substitute_tmp_post_rec(new_invariant_clause, tmp_post_map); + + // Update the dependent symbols. + dependent_symbols = compute_dependent_symbols( + verifier.return_cex.cause_loop_id, + new_invariant_clause, + verifier.return_cex.live_variables); + + // add the new cluase to the candidate invairants + if(verifier.return_cex.is_violation_in_loop) + { + in_invariant_clause_map[verifier.return_cex.cause_loop_id] = and_exprt( + in_invariant_clause_map[verifier.return_cex.cause_loop_id], + new_invariant_clause); + } + else + { + // violation happens pos-loop. + pos_invariant_clause_map[verifier.return_cex.cause_loop_id] = and_exprt( + pos_invariant_clause_map[verifier.return_cex.cause_loop_id], + new_invariant_clause); + } + + // Re-combine invariant clauses and update the candidate map. + combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + } + + log.result() << "result : " << log.green << "PASS" << messaget::eom + << log.reset; - // Now this method only generate true for all unnotated loops. - // The implementation will be added later. - return invariant_candiate_map; + return combined_invariant; } exprt enumerative_loop_invariant_synthesizert::synthesize(loop_idt loop_id) diff --git a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h index 160531198f4..97918c7e5a6 100644 --- a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h +++ b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h @@ -29,7 +29,7 @@ class enumerative_loop_invariant_synthesizert { public: enumerative_loop_invariant_synthesizert( - const goto_modelt &goto_model, + goto_modelt &goto_model, messaget &log) : loop_invariant_synthesizer_baset(goto_model, log) { @@ -44,7 +44,38 @@ class enumerative_loop_invariant_synthesizert /// Initialize invariants as true for all unannotated loops. void init_candidates(); - invariant_mapt invariant_candiate_map; + /// Scan all ASSIGN instructions to build the map from tmp_post variables + /// to their original variables. + void build_tmp_post_map(); + + /// Compute the depedent symbols for a loop with invariant-not-preserved + /// violation which happen after `new_clause` was added. + std::set compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars); + + /// Synthesize range predicate for OOB violation with `violated_predicate`. + exprt synthesize_range_predicate(const exprt &violated_predicate); + + /// Synthesize same object predicate for null-pointer violation for + /// `checked_pointer`. + exprt synthesize_same_object_predicate(const exprt &checked_pointer); + + /// Synthesize strengthening clause for invariant-not-preserved violation. + exprt synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id); + + /// Synthesize invariant of form + /// (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt in_invariant_clause_map; + invariant_mapt pos_invariant_clause_map; + invariant_mapt neg_guards; + + /// Map from tmp_post variables to their original variables. + std::map tmp_post_map; }; // NOLINTNEXTLINE(whitespace/line_length) diff --git a/src/goto-instrument/synthesizer/expr_enumerator.cpp b/src/goto-instrument/synthesizer/expr_enumerator.cpp index 617a7aed25d..660e0ca87e3 100644 --- a/src/goto-instrument/synthesizer/expr_enumerator.cpp +++ b/src/goto-instrument/synthesizer/expr_enumerator.cpp @@ -316,7 +316,18 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const exprs.size() == 2, "number of arguments should be 2: " + integer2string(exprs.size())); if(op_id == ID_equal) + { + if(exprs.front().type() != exprs.back().type()) + return true_exprt(); + return equal_exprt(exprs.front(), exprs.back()); + } + if(op_id == ID_notequal) + { + if(exprs.front().type() != exprs.back().type()) + return true_exprt(); + return notequal_exprt(exprs.front(), exprs.back()); + } if(op_id == ID_le) return less_than_or_equal_exprt(exprs.front(), exprs.back()); if(op_id == ID_lt) @@ -333,8 +344,6 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const return plus_exprt(exprs.front(), exprs.back()); if(op_id == ID_minus) return minus_exprt(exprs.front(), exprs.back()); - if(op_id == ID_notequal) - return notequal_exprt(exprs.front(), exprs.back()); return binary_exprt(exprs.front(), op_id, exprs.back()); } diff --git a/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h b/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h index 1c2fbca73f8..cbc0c7aea6e 100644 --- a/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h +++ b/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h @@ -16,7 +16,8 @@ Author: Qinheping Hu #include "synthesizer_utils.h" -#define OPT_SYNTHESIZE_LOOP_INVARIANTS "(synthesize-loop-invariants)" +#define FLAG_SYNTHESIZE_LOOP_INVARIANTS "synthesize-loop-invariants" +#define OPT_SYNTHESIZE_LOOP_INVARIANTS "(" FLAG_SYNTHESIZE_LOOP_INVARIANTS ")" #define HELP_LOOP_INVARIANT_SYNTHESIZER \ " --synthesize-loop-invariants\n" \ " synthesize and apply loop invariants\n" @@ -32,7 +33,7 @@ class messaget; class loop_invariant_synthesizer_baset { public: - loop_invariant_synthesizer_baset(const goto_modelt &goto_model, messaget &log) + loop_invariant_synthesizer_baset(goto_modelt &goto_model, messaget &log) : goto_model(goto_model), log(log) { } @@ -48,7 +49,7 @@ class loop_invariant_synthesizer_baset virtual exprt synthesize(loop_idt) = 0; protected: - const goto_modelt &goto_model; + goto_modelt &goto_model; messaget &log; }; diff --git a/src/goto-instrument/synthesizer/module_dependencies.txt b/src/goto-instrument/synthesizer/module_dependencies.txt index 290dab44704..f422fb5621d 100644 --- a/src/goto-instrument/synthesizer/module_dependencies.txt +++ b/src/goto-instrument/synthesizer/module_dependencies.txt @@ -1,8 +1,14 @@ analyses ansi-c +assembler +contracts +cpp +goto-checker goto-instrument/contracts goto-instrument/synthesizer goto-instrument goto-programs langapi +pointer-analysis +solvers util diff --git a/src/goto-instrument/synthesizer/synthesizer_utils.cpp b/src/goto-instrument/synthesizer/synthesizer_utils.cpp index f1a618b1dc9..ddfca59a316 100644 --- a/src/goto-instrument/synthesizer/synthesizer_utils.cpp +++ b/src/goto-instrument/synthesizer/synthesizer_utils.cpp @@ -9,6 +9,11 @@ Author: Qinheping Hu #include "synthesizer_utils.h" #include +#include +#include + +#include +#include goto_programt::const_targett get_loop_end_from_loop_head_and_content( const goto_programt::const_targett &loop_head, @@ -94,14 +99,13 @@ get_loop_head(const unsigned int target_loop_number, goto_functiont &function) void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log) + goto_modelt &goto_model) { for(const auto &invariant_map_entry : invariant_map) { loop_idt loop_id = invariant_map_entry.first; irep_idt function_id = loop_id.function_id; - unsigned int loop_number = loop_id.loop_number; + unsigned int loop_number = loop_id.loop_number.value(); // get the last instruction of the target loop auto &function = goto_model.goto_functions.function_map[function_id]; @@ -112,3 +116,65 @@ void annotate_invariants( invariant_map_entry.second; } } + +bool is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target) +{ + // The loop body after transformation are instructions from + // loop havocing instructions + // to + // loop end of transformed code. + + bool result = false; + unsigned location_number_of_havocing = UINT_MAX; + for(goto_programt::const_targett it = function.body.instructions.begin(); + it != function.body.instructions.end(); + ++it) + { + if(is_loop_havoc(*it) && it->loop_number == loop_id.loop_number.value()) + { + location_number_of_havocing = it->location_number; + } + + if( + is_transformed_loop_end(it) && + it->loop_number == loop_id.loop_number.value()) + { + result = + result || (location_number_of_havocing < location_number_of_target && + location_number_of_target < it->location_number); + } + } + + return result; +} + +std::size_t hex_to_size_t(const std::string &hex_str) +{ + std::istringstream converter(hex_str); + size_t result; + converter >> std::hex >> result; + return result; +} + +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards) +{ + // Combine invariant + // (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt result; + for(const auto &in_clause : in_clauses) + { + const auto &id = in_clause.first; + INVARIANT(neg_guards.count(id), "Some loop guard is missing."); + + result[id] = and_exprt( + or_exprt(neg_guards.at(id), in_clause.second), + implies_exprt(neg_guards.at(id), post_clauses.at(id))); + } + return result; +} diff --git a/src/goto-instrument/synthesizer/synthesizer_utils.h b/src/goto-instrument/synthesizer/synthesizer_utils.h index 40620f7feac..2938774696a 100644 --- a/src/goto-instrument/synthesizer/synthesizer_utils.h +++ b/src/goto-instrument/synthesizer/synthesizer_utils.h @@ -9,6 +9,9 @@ Author: Qinheping Hu #ifndef CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H #define CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H +#include + +#include #include #include @@ -50,7 +53,23 @@ get_loop_head(const unsigned int loop_number, goto_functiont &function); /// loops. Corresponding loops are specified by keys of `invariant_map` void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log); + goto_modelt &goto_model); + +/// Decide whether the target instruction is in the body of the transformed loop +/// specified by `loop_id`. +bool is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target); + +/// convert hex string to size_t. +std::size_t hex_to_size_t(const std::string &hex_str); + +/// Combine invariant of form +/// (in_inv || !guard) && (!guard -> pos_inv) +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards); #endif // CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H diff --git a/src/goto-programs/CMakeLists.txt b/src/goto-programs/CMakeLists.txt index f0748416435..1d0cdcbbbea 100644 --- a/src/goto-programs/CMakeLists.txt +++ b/src/goto-programs/CMakeLists.txt @@ -3,4 +3,4 @@ add_library(goto-programs ${sources}) generic_includes(goto-programs) -target_link_libraries(goto-programs util assembler langapi analyses linking ansi-c) +target_link_libraries(goto-programs util assembler langapi analyses linking ansi-c json) diff --git a/src/goto-programs/loop_ids.h b/src/goto-programs/loop_ids.h index 69f77e00834..76c89b3334e 100644 --- a/src/goto-programs/loop_ids.h +++ b/src/goto-programs/loop_ids.h @@ -31,14 +31,28 @@ struct loop_idt { } + loop_idt() : function_id(""), loop_number() + { + } + + loop_idt(const loop_idt &other) + : function_id(other.function_id), loop_number(other.loop_number) + { + } + irep_idt function_id; - unsigned int loop_number; + optionalt loop_number; bool operator==(const loop_idt &o) const { return function_id == o.function_id && loop_number == o.loop_number; } + bool operator!=(const loop_idt &o) const + { + return !operator==(o); + } + bool operator<(const loop_idt &o) const { return function_id < o.function_id ||