From 8fa325f78f97149ab72dddb07dc1e02ac94f637f Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Mon, 5 Dec 2022 15:00:57 -0600 Subject: [PATCH 1/3] Add enumerative loop invariant synthesizer Implement the functionality described below. Motivation --- This loop invariant synthesizer use the idea counter-example-guided synthesis (CEGIS) to synthesize loop invariants for programs with only checks instrumented by `goto-instrument` with flag `--pointer-check`. This PR contain the driver of the synthesizer and the verifier we use to check invariant candidates. Verifier --- The verifier take as input a goto program with pointer checks and a map from loop id to loop invariant candidates. It first annotate and apply the loop invariant candidates into the goto model; and then simulate the CBMC api to verify the instrumented goto program. If there are some violations---loop invariants are not inductive, or some pointer checks fail---, it record valuation from trace generated by the back end to construct a formatted counterexample `cext`. Counterexample --- A counterexample `cext` record valuations of variables in the trace, and other information about the violation. The valuation we record including 1. set of live variables upon the entry of the loop. 2. the havoced value of all primitive-typed variables; 3. the havoced offset and the object size of all pointer-typed variables; 4. history values of 2 and 3. The valuations will be used as true positive (history values) and true negative (havcoed valuation) to filter out bad invariant clause with the idea of the Daikon invariant detector in a following PR. However, in this PR we only construct the valuation but not actually use them. Synthesizer --- Loop invariants we synthesize are of the form `` (in_clause || !guard) && (!guard -> pos_clause)`` where `in_clause` and `out_clause` are predicates we store in two different map, and `guard` is the loop guard. The idea is that we separately synthesize the condition `in_clause` that should hold before the execution of the loop body, and condition `pos_clause` that should hold as post-condition of the loop. When the violation happen in the loop, we update `in_clause`. When the violation happen after the loop, we update `pos_clause`. When the invariant candidate it not inductive, we enumerate strengthening clause to make it inductive. To be more efficient, we choose different synthesis strategy for different type of violation * For out-of-boundary violation, we choose to use the violated predicate as the new clause, which is the WLP of the violation if the violation is only dependent on the havocing instruction. **TODO**: to make it more complete, we need to implement a WLP algorithm * For null-pointer violation, we choose `__CPROVER_same_object(ptr, __CPROVER_loop_entry(ptr))` as the new clause. That is, the havoced pointer should points to the same object at the start of every iteration of the loop. It is a heuristic choice. This can be extended with the idea of alias analysis if needed. * For invariant-not-preserved violation, we enumerate strengthening clauses and check that if the invariant will be inductive after strengthening (disjunction with the new clause). The synthesizer works as follow 1. initialize the two invariant maps, 2. verify the current candidates built from the two maps, _a. return the candidate if there is no violation _b. synthesize a new clause to resolve the **first** violation and add it to the correct map, 3. repeat 2. The flag `synthesize-loop-invariants` will also apply synthesized loop contracts. --- .../main.c | 0 .../test.desc | 2 +- .../loop_contract_synthesis_02/main.c | 17 + .../loop_contract_synthesis_02/test.desc | 11 + .../loop_contract_synthesis_03/main.c | 16 + .../loop_contract_synthesis_03/test.desc | 12 + .../loop_contract_synthesis_04/main.c | 17 + .../loop_contract_synthesis_04/test.desc | 11 + src/Makefile | 3 +- src/analyses/dependence_graph.cpp | 47 ++ src/analyses/dependence_graph.h | 21 +- src/goto-instrument/CMakeLists.txt | 1 + src/goto-instrument/Makefile | 2 + src/goto-instrument/contracts/contracts.cpp | 10 +- src/goto-instrument/contracts/utils.cpp | 33 ++ src/goto-instrument/contracts/utils.h | 12 + .../goto_instrument_parse_options.cpp | 37 +- src/goto-instrument/havoc_utils.cpp | 13 +- src/goto-instrument/havoc_utils.h | 7 +- .../synthesizer/cegis_verifier.cpp | 518 ++++++++++++++++++ .../synthesizer/cegis_verifier.h | 142 +++++ ...enumerative_loop_invariant_synthesizer.cpp | 317 ++++++++++- .../enumerative_loop_invariant_synthesizer.h | 35 +- .../loop_invariant_synthesizer_base.h | 7 +- .../synthesizer/module_dependencies.txt | 6 + .../synthesizer/synthesizer_utils.cpp | 72 ++- .../synthesizer/synthesizer_utils.h | 23 +- src/goto-programs/loop_ids.h | 16 +- 28 files changed, 1374 insertions(+), 34 deletions(-) rename regression/contracts/{loop_invariant_synthesis_01 => loop_contract_synthesis_01}/main.c (100%) rename regression/contracts/{loop_invariant_synthesis_01 => loop_contract_synthesis_01}/test.desc (91%) create mode 100644 regression/contracts/loop_contract_synthesis_02/main.c create mode 100644 regression/contracts/loop_contract_synthesis_02/test.desc create mode 100644 regression/contracts/loop_contract_synthesis_03/main.c create mode 100644 regression/contracts/loop_contract_synthesis_03/test.desc create mode 100644 regression/contracts/loop_contract_synthesis_04/main.c create mode 100644 regression/contracts/loop_contract_synthesis_04/test.desc create mode 100644 src/goto-instrument/synthesizer/cegis_verifier.cpp create mode 100644 src/goto-instrument/synthesizer/cegis_verifier.h diff --git a/regression/contracts/loop_invariant_synthesis_01/main.c b/regression/contracts/loop_contract_synthesis_01/main.c similarity index 100% rename from regression/contracts/loop_invariant_synthesis_01/main.c rename to regression/contracts/loop_contract_synthesis_01/main.c diff --git a/regression/contracts/loop_invariant_synthesis_01/test.desc b/regression/contracts/loop_contract_synthesis_01/test.desc similarity index 91% rename from regression/contracts/loop_invariant_synthesis_01/test.desc rename to regression/contracts/loop_contract_synthesis_01/test.desc index 1ff43bb978d..71094e93b29 100644 --- a/regression/contracts/loop_invariant_synthesis_01/test.desc +++ b/regression/contracts/loop_contract_synthesis_01/test.desc @@ -1,6 +1,6 @@ CORE main.c ---synthesize-loop-invariants --apply-loop-contracts +--synthesize-loop-invariants ^EXIT=0$ ^SIGNAL=0$ ^\[main\.\d+\] line 10 Check loop invariant before entry: SUCCESS$ diff --git a/regression/contracts/loop_contract_synthesis_02/main.c b/regression/contracts/loop_contract_synthesis_02/main.c new file mode 100644 index 00000000000..09a782991fd --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_02/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned s = 0; + + for(unsigned i = 0; i < SIZE; ++i) + { + if(i == len - 1) + break; + s += array[i]; + } +} diff --git a/regression/contracts/loop_contract_synthesis_02/test.desc b/regression/contracts/loop_contract_synthesis_02/test.desc new file mode 100644 index 00000000000..47012b9e16e --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_02/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariant with form of range predicates can be correctly +synthesized for programs with only pointer checks but no other assertions. diff --git a/regression/contracts/loop_contract_synthesis_03/main.c b/regression/contracts/loop_contract_synthesis_03/main.c new file mode 100644 index 00000000000..be272d35f2c --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_03/main.c @@ -0,0 +1,16 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + const char *end = array + len; + unsigned s = 0; + + while(array != end) + { + s += *array++; + } +} diff --git a/regression/contracts/loop_contract_synthesis_03/test.desc b/regression/contracts/loop_contract_synthesis_03/test.desc new file mode 100644 index 00000000000..373ef9eb38f --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_03/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that loop invariant with form of range predicates and same +object predicates can be correctly synthesized for programs with only pointer +checks but no other assertions. diff --git a/regression/contracts/loop_contract_synthesis_04/main.c b/regression/contracts/loop_contract_synthesis_04/main.c new file mode 100644 index 00000000000..8e75063dc4a --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_04/main.c @@ -0,0 +1,17 @@ +#define SIZE 80 + +void main() +{ + unsigned long len; + __CPROVER_assume(len <= SIZE); + __CPROVER_assume(len >= 8); + char *array = malloc(len); + unsigned long s = 0; + + unsigned long j = 0; + for(unsigned long i = 0; i < len; i++) + { + s += array[j]; + j++; + } +} diff --git a/regression/contracts/loop_contract_synthesis_04/test.desc b/regression/contracts/loop_contract_synthesis_04/test.desc new file mode 100644 index 00000000000..2f9fbe96d87 --- /dev/null +++ b/regression/contracts/loop_contract_synthesis_04/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +--pointer-check --synthesize-loop-invariants +^EXIT=0$ +^SIGNAL=0$ +^\[main.pointer\_dereference.\d+\] .* SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test shows that the loop-invariant synthesizer can enumerate +strengthening clauses for invariant-not-preserved violation. diff --git a/src/Makefile b/src/Makefile index 698292d6e60..f7bc64e60dd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -78,7 +78,8 @@ goto-harness.dir: util.dir goto-programs.dir langapi.dir linking.dir \ goto-instrument.dir goto-instrument.dir: languages goto-programs.dir pointer-analysis.dir \ - goto-symex.dir linking.dir analyses.dir solvers.dir + goto-symex.dir linking.dir analyses.dir solvers.dir \ + goto-checker.dir goto-checker.dir: solvers.dir goto-symex.dir goto-programs.dir diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index 86ce3cc8f5b..8d944ddcb0e 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -383,6 +383,53 @@ void dependence_grapht::add_dep( nodes[n_to].in[n_from].add(kind); } +bool dependence_grapht::is_flow_dependent( + const goto_programt::const_targett &from, + const goto_programt::const_targett &to) +{ + std::set visited = std::set(); + const dep_graph_domaint from_domain = static_cast( + *storage->abstract_state_before(from, *domain_factory)); + const dep_graph_domaint to_domain = static_cast( + *storage->abstract_state_before(to, *domain_factory)); + return is_flow_dependent(from_domain, to_domain, visited); +} + +bool dependence_grapht::is_flow_dependent( + const dep_graph_domaint &from, + const dep_graph_domaint &to, + std::set &visited) +{ + // `to` is control dependent on `from`? + for(const auto node : to.get_control_deps()) + { + if(visited.count((*this)[node].get_node_id())) + continue; + + visited.insert((*this)[node].get_node_id()); + + if( + from.get_node_id() == (*this)[node].get_node_id() || + is_flow_dependent(from, (*this)[node], visited)) + return true; + } + + // `to` is data dependent on `from`? + for(const auto node : to.get_data_deps()) + { + if(visited.count((*this)[node].get_node_id())) + continue; + + visited.insert((*this)[node].get_node_id()); + + if( + from.get_node_id() == (*this)[node].get_node_id() || + is_flow_dependent(from, (*this)[node], visited)) + return true; + } + return false; +} + void dep_graph_domaint::populate_dep_graph( dependence_grapht &dep_graph, goto_programt::const_targett this_loc) const { diff --git a/src/analyses/dependence_graph.h b/src/analyses/dependence_graph.h index dadf90638dd..63f8faf4306 100644 --- a/src/analyses/dependence_graph.h +++ b/src/analyses/dependence_graph.h @@ -67,6 +67,7 @@ class dep_graph_domaint:public ai_domain_baset { public: typedef grapht::node_indext node_indext; + typedef std::set depst; explicit dep_graph_domaint(node_indext id) : has_values(false), node_id(id), has_changed(false) @@ -167,13 +168,20 @@ class dep_graph_domaint:public ai_domain_baset void populate_dep_graph( dependence_grapht &, goto_programt::const_targett) const; + depst get_control_deps() const + { + return control_deps; + } + depst get_data_deps() const + { + return data_deps; + } + private: tvt has_values; node_indext node_id; bool has_changed; - typedef std::set depst; - // Set of locations with control instructions on which the instruction at this // location has a control dependency on depst control_deps; @@ -279,7 +287,16 @@ class dependence_grapht: return rd; } + /// Decide whether the instruction `to` is flow dependent on `from`. + bool is_flow_dependent( + const goto_programt::const_targett &from, + const goto_programt::const_targett &to); + protected: + bool is_flow_dependent( + const dep_graph_domaint &from, + const dep_graph_domaint &to, + std::set &visited); friend dep_graph_domain_factoryt; friend dep_graph_domaint; const namespacet &ns; diff --git a/src/goto-instrument/CMakeLists.txt b/src/goto-instrument/CMakeLists.txt index a1edca91f9a..3fdfe75bba7 100644 --- a/src/goto-instrument/CMakeLists.txt +++ b/src/goto-instrument/CMakeLists.txt @@ -13,6 +13,7 @@ target_link_libraries(goto-instrument-lib linking big-int goto-programs + goto-checker goto-symex assembler pointer-analysis diff --git a/src/goto-instrument/Makefile b/src/goto-instrument/Makefile index 640a1a2e3e1..4438a041720 100644 --- a/src/goto-instrument/Makefile +++ b/src/goto-instrument/Makefile @@ -82,6 +82,7 @@ SRC = accelerate/accelerate.cpp \ splice_call.cpp \ stack_depth.cpp \ synthesizer/enumerative_loop_invariant_synthesizer.cpp \ + synthesizer/cegis_verifier.cpp \ synthesizer/expr_enumerator.cpp \ synthesizer/synthesizer_utils.cpp \ thread_instrumentation.cpp \ @@ -106,6 +107,7 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \ ../cpp/cpp$(LIBEXT) \ ../linking/linking$(LIBEXT) \ ../big-int/big-int$(LIBEXT) \ + ../goto-checker/goto-checker$(LIBEXT) \ ../goto-programs/goto-programs$(LIBEXT) \ ../goto-symex/goto-symex$(LIBEXT) \ ../assembler/assembler$(LIBEXT) \ diff --git a/src/goto-instrument/contracts/contracts.cpp b/src/goto-instrument/contracts/contracts.cpp index 7bbe5ee90f7..56bd0381944 100644 --- a/src/goto-instrument/contracts/contracts.cpp +++ b/src/goto-instrument/contracts/contracts.cpp @@ -142,7 +142,7 @@ void code_contractst::check_apply_loop_contracts( // i.e., the loop guard was satisfied. const auto entered_loop = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__entered_loop") + bool_typet(), loop_head_location, mode, symbol_table, ENTERED_LOOP) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(entered_loop, loop_head_location)); @@ -173,7 +173,7 @@ void code_contractst::check_apply_loop_contracts( // instrumentation of the loop. const auto in_base_case = new_tmp_symbol( - bool_typet(), loop_head_location, mode, symbol_table, "__in_base_case") + bool_typet(), loop_head_location, mode, symbol_table, IN_BASE_CASE) .symbol_expr(); pre_loop_head_instrs.add( goto_programt::make_decl(in_base_case, loop_head_location)); @@ -294,6 +294,12 @@ void code_contractst::check_apply_loop_contracts( // Generate havocing code for assignment targets. havoc_assigns_targetst havoc_gen(to_havoc, ns); havoc_gen.append_full_havoc_code(loop_head_location, pre_loop_head_instrs); + // Add loop number to the havoc_code + for(auto &havoc_instr : pre_loop_head_instrs.instructions) + { + if(is_loop_havoc(havoc_instr)) + havoc_instr.loop_number = loop_end->loop_number; + } // Insert the second block of pre_loop_head_instrs: the havocing code. // We do not `add_pragma_disable_assigns_check`, diff --git a/src/goto-instrument/contracts/utils.cpp b/src/goto-instrument/contracts/utils.cpp index 5570a62a194..e3c25fee076 100644 --- a/src/goto-instrument/contracts/utils.cpp +++ b/src/goto-instrument/contracts/utils.cpp @@ -442,3 +442,36 @@ void generate_history_variables_initialization( // Add all the history variable initialization instructions program.destructive_append(history); } + +bool is_transformed_loop_end(const goto_programt::const_targett &target) +{ + // The end of the loop end of transformed loop is + // ASSIGN entered_loop = true + if(!target->is_assign()) + return false; + + return from_expr(target->assign_lhs()).find("__entered_loop") != + std::string::npos && + target->assign_rhs() == true_exprt(); +} + +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name) +{ + INVARIANT( + var_name == IN_BASE_CASE || var_name == ENTERED_LOOP, + "var_name is not of instrumented variables."); + + if(!target->is_assign()) + return false; + + if(can_cast_expr(target->assign_lhs())) + { + const auto &lhs = to_symbol_expr(target->assign_lhs()); + return id2string(lhs.get_identifier()).find("::" + var_name) != + std::string::npos; + } + + return false; +} diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index 99595940b90..7f369cf52fd 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -17,6 +17,9 @@ Date: September 2021 #include +#define IN_BASE_CASE "__in_base_case" +#define ENTERED_LOOP "__entered_loop" + /// \brief A class that overrides the low-level havocing functions in the base /// utility class, to havoc only when expressions point to valid memory, /// i.e. if all dereferences within an expression are valid @@ -205,4 +208,13 @@ void generate_history_variables_initialization( const irep_idt &mode, goto_programt &program); +/// Return true if `target` is the loop end of some transformed code. +bool is_transformed_loop_end(const goto_programt::const_targett &target); + +/// Return true if `target` is an assignment to an instrumented variable with +/// name `var_name`. +bool is_assignment_to_instrumented_variable( + const goto_programt::const_targett &target, + std::string var_name); + #endif // CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 1a21921c827..c700a835628 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1148,16 +1148,6 @@ void goto_instrument_parse_optionst::instrument_goto_program() goto_model.goto_functions.update(); } - if(cmdline.isset("synthesize-loop-invariants")) - { - log.warning() << "Loop invariant synthesizer is still work in progress. " - "It only generates TRUE as invariants." - << messaget::eom; - - // Synthesize loop invariants and annotate them into `goto_model` - enumerative_loop_invariant_synthesizert synthesizer(goto_model, log); - annotate_invariants(synthesizer.synthesize_all(), goto_model, log); - } bool use_dfcc = cmdline.isset(FLAG_DFCC); if(use_dfcc) @@ -1432,6 +1422,33 @@ void goto_instrument_parse_optionst::instrument_goto_program() goto_check_c(options, goto_model, ui_message_handler); transform_assertions_assumptions(options, goto_model); + if(cmdline.isset(FLAG_SYNTHESIZE_LOOP_INVARIANTS)) + { + if(cmdline.isset(FLAG_LOOP_CONTRACTS)) + { + throw invalid_command_line_argument_exceptiont( + "Incompatible options detected", + "--" FLAG_SYNTHESIZE_LOOP_INVARIANTS " and --" FLAG_LOOP_CONTRACTS, + "Use either --" FLAG_SYNTHESIZE_LOOP_INVARIANTS + " or --" FLAG_LOOP_CONTRACTS); + } + + log.warning() << "Loop invariant synthesizer is still work in progress. " + "It only generates TRUE as invariants." + << messaget::eom; + + // Synthesize loop invariants and annotate them into `goto_model` + enumerative_loop_invariant_synthesizert synthesizer(goto_model, log); + annotate_invariants(synthesizer.synthesize_all(), goto_model); + + // Apply loop contracts. + std::set to_exclude_from_nondet_static( + cmdline.get_values("nondet-static-exclude").begin(), + cmdline.get_values("nondet-static-exclude").end()); + code_contractst contracts(goto_model, log); + contracts.apply_loop_contracts(to_exclude_from_nondet_static); + } + // check for uninitalized local variables if(cmdline.isset("uninitialized-check")) { diff --git a/src/goto-instrument/havoc_utils.cpp b/src/goto-instrument/havoc_utils.cpp index f7d6dc5c5c7..89e9f1c98e2 100644 --- a/src/goto-instrument/havoc_utils.cpp +++ b/src/goto-instrument/havoc_utils.cpp @@ -16,7 +16,14 @@ Date: July 2021 #include #include -#include +#define IS_LOOP_HAVOC "Loop havocing instrumented by applying loop contracts." + +bool is_loop_havoc(const goto_programt::instructiont &instruction) +{ + if(id2string(instruction.source_location().get_comment()) == IS_LOOP_HAVOC) + return true; + return false; +} void havoc_utilst::append_full_havoc_code( const source_locationt location, @@ -52,6 +59,8 @@ void havoc_utilst::append_object_havoc_code_for_expr( havoc.add_source_location() = location; havoc.add_to_operands(expr); dest.add(goto_programt::make_other(havoc, location)); + dest.instructions.back().source_location_nonconst().set_comment( + IS_LOOP_HAVOC); } void havoc_utilst::append_scalar_havoc_code_for_expr( @@ -62,4 +71,6 @@ void havoc_utilst::append_scalar_havoc_code_for_expr( side_effect_expr_nondett rhs(expr.type(), location); dest.add(goto_programt::make_assignment( code_assignt{expr, std::move(rhs), location}, location)); + dest.instructions.back().source_location_nonconst().set_comment( + IS_LOOP_HAVOC); } diff --git a/src/goto-instrument/havoc_utils.h b/src/goto-instrument/havoc_utils.h index 19a0ca57815..ad0cc9cc334 100644 --- a/src/goto-instrument/havoc_utils.h +++ b/src/goto-instrument/havoc_utils.h @@ -17,12 +17,15 @@ Date: July 2021 #include #include -#include +#include -class goto_programt; +#include typedef std::set assignst; +/// Return true if `instruction` is a loop havoc instruction. +bool is_loop_havoc(const goto_programt::instructiont &instruction); + /// \brief A class containing utility functions for havocing expressions. class havoc_utils_is_constantt : public is_constantt { diff --git a/src/goto-instrument/synthesizer/cegis_verifier.cpp b/src/goto-instrument/synthesizer/cegis_verifier.cpp new file mode 100644 index 00000000000..ac7b182c3cb --- /dev/null +++ b/src/goto-instrument/synthesizer/cegis_verifier.cpp @@ -0,0 +1,518 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#include "cegis_verifier.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool contains_symbol_prefix(const exprt &expr, const std::string &prefix) +{ + if( + expr.id() == ID_symbol && + has_prefix(id2string(to_symbol_expr(expr).get_identifier()), prefix)) + { + return true; + } + + forall_operands(it, expr) + { + if(contains_symbol_prefix(*it, prefix)) + return true; + } + return false; +} + +optionst cegis_verifiert::get_options() +{ + optionst options; + + // Default options + // These options are the same as we run CBMC without any set flag. + options.set_option("built-in-assertions", true); + options.set_option("propagation", true); + options.set_option("simple-slice", true); + options.set_option("simplify", true); + options.set_option("show-goto-symex-steps", false); + options.set_option("show-points-to-sets", false); + options.set_option("show-array-constraints", false); + options.set_option("built-in-assertions", true); + options.set_option("assertions", true); + options.set_option("assumptions", true); + options.set_option("arrays-uf", "auto"); + options.set_option("depth", UINT32_MAX); + options.set_option("exploration-strategy", "lifo"); + options.set_option("symex-cache-dereferences", false); + options.set_option("rewrite-union", true); + options.set_option("self-loops-to-assumptions", true); + options.set_option("trace", true); + + // Preporcess `goto_model`. Copied from `cbmc_parse_options.cpp`. + remove_asm(goto_model); + link_to_library( + goto_model, log.get_message_handler(), cprover_cpp_library_factory); + link_to_library( + goto_model, log.get_message_handler(), cprover_c_library_factory); + process_goto_program(goto_model, options, log); + + add_failed_symbols(goto_model.symbol_table); + + remove_skip(goto_model); + label_properties(goto_model); + return options; +} + +loop_idt cegis_verifiert::get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation) +{ + loop_idt result; + + // build the dependence graph + const namespacet ns(goto_model.symbol_table); + dependence_grapht dependence_graph(ns); + dependence_graph(goto_model); + + // A loop is the cause loop if the violation is dependent on the write set of + // the loop. + for(const auto &step : goto_trace.steps) + { + // Being dependent on a write set is equivalent to being dependent on one + // of the loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + // Checking if `to` is dependent on `from`. + // `from` : loop havocing + // `to` : violation + goto_programt::const_targett from; + goto_programt::const_targett to; + + // Get `from`---a loop havoc instruction. + irep_idt from_fun_name = step.function_id; + const goto_functionst::goto_functiont &from_function = + goto_model.get_goto_function(from_fun_name); + for(goto_programt::const_targett it = + from_function.body.instructions.begin(); + it != from_function.body.instructions.end(); + ++it) + { + if(it->location_number == step.pc->location_number) + { + from = it; + } + } + + // Get `to`---the instruction where the violation happens + irep_idt to_fun_name = goto_trace.get_last_step().function_id; + const goto_functionst::goto_functiont &to_function = + goto_model.get_goto_function(to_fun_name); + for(goto_programt::const_targett it = + to_function.body.instructions.begin(); + it != to_function.body.instructions.end(); + ++it) + { + if(it->location_number == violation->location_number) + { + to = it; + } + } + + // The violation is caused by the loop havoc + // if it is dependent on the loop havoc. + if(dependence_graph.is_flow_dependent(from, to)) + { + // TODO: not safe for inlined static functions. + result.function_id = step.pc->source_location().get_function(); + result.loop_number = step.pc->loop_number; + } + } + } + return result; +} + +cext cegis_verifiert::build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc) +{ + const namespacet ns(goto_model.symbol_table); + + // Valuations of havoced variables right after havoc instructions. + std::map object_sizes; + std::map havoced_values; + std::map havoced_pointer_offsets; + + // Loop-entry valuations of havoced variables. + std::map loop_entry_values; + std::map loop_entry_offsets; + + // Live variables upon the loop head. + std::set live_variables; + + bool entered_loop = false; + + // Scan the trace step by step to store needed valuations. + for(const auto &step : goto_trace.steps) + { + switch(step.type) + { + case goto_trace_stept::typet::DECL: + case goto_trace_stept::typet::ASSIGNMENT: + { + if(!step.full_lhs_value.is_nil()) + { + // Entered loop? + if(is_assignment_to_instrumented_variable(step.pc, ENTERED_LOOP)) + entered_loop = step.full_lhs_value == true_exprt(); + + // skip hidden steps + if(step.hidden) + break; + + // Live variables + // 1. must be in the same function as the target loop; + // 2. alive before entering the target loop; + // 3. a pointer or a primitive-typed variable; + // TODO: add support for union pointer + if( + step.pc->source_location().get_function() == + loop_entry_loc.get_function() && + !entered_loop && + (step.full_lhs.type().id() == ID_unsignedbv || + step.full_lhs.type().id() == ID_pointer) && + step.full_lhs.id() == ID_symbol) + { + const auto &symbol = + expr_try_dynamic_cast(step.full_lhs); + + // malloc_size is not-hidden tmp variable. + if(id2string(symbol->get_identifier()) != "malloc::malloc_size") + { + live_variables.emplace(step.full_lhs); + } + } + + // Record valuation of primitive-typed variable. + if(step.full_lhs.type().id() == ID_unsignedbv) + { + // Store the value into the map for loop_entry value if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_values[step.full_lhs] = + hex_to_size_t(step.full_lhs_value.get_string(ID_value)); + } + + // Store the value into the the map for havoced value if this step + // is a loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + havoced_values[step.full_lhs] = + hex_to_size_t(step.full_lhs_value.get_string(ID_value)); + } + } + + // Record object_size, pointer_offset, and loop_entry(pointer_offset). + if( + can_cast_expr( + step.full_lhs_value) && + contains_symbol_prefix( + step.full_lhs_value, SYMEX_DYNAMIC_PREFIX "dynamic_object")) + { + const auto &pointer_constant_expr = + expr_try_dynamic_cast( + step.full_lhs_value); + + pointer_arithmetict pointer_arithmetic( + pointer_constant_expr->symbolic_pointer()); + if(pointer_constant_expr->symbolic_pointer().id() == ID_typecast) + { + pointer_arithmetic = pointer_arithmetict( + pointer_constant_expr->symbolic_pointer().operands()[0]); + } + + // Extract object size. + exprt &underlying_array = pointer_arithmetic.pointer; + // Object sizes are stored in the type of underlying arrays. + while(!can_cast_type(underlying_array.type())) + { + if( + underlying_array.id() == ID_address_of || + underlying_array.id() == ID_index) + { + underlying_array = underlying_array.operands()[0]; + continue; + } + UNREACHABLE; + } + std::size_t object_size = + pointer_offset_size(to_array_type(underlying_array.type()), ns) + .value() + .to_ulong(); + object_sizes[step.full_lhs] = object_size; + + // Extract offsets. + std::size_t offset = 0; + if(pointer_arithmetic.offset.is_not_nil()) + { + constant_exprt offset_expr = + expr_dynamic_cast(pointer_arithmetic.offset); + offset = hex_to_size_t(id2string(offset_expr.get_value())); + } + + // Store the offset into the map for loop_entry if we haven't + // entered the loop. + if(!entered_loop) + { + loop_entry_offsets[step.full_lhs] = offset; + } + + // Store the offset into the the map for havoced offset if this step + // is a loop havocing instruction. + if(is_loop_havoc(*step.pc)) + { + havoced_pointer_offsets[step.full_lhs] = offset; + } + } + } + } + + case goto_trace_stept::typet::ASSERT: + case goto_trace_stept::typet::FUNCTION_CALL: + case goto_trace_stept::typet::FUNCTION_RETURN: + case goto_trace_stept::typet::ASSUME: + case goto_trace_stept::typet::LOCATION: + case goto_trace_stept::typet::GOTO: + case goto_trace_stept::typet::OUTPUT: + case goto_trace_stept::typet::INPUT: + case goto_trace_stept::typet::SPAWN: + case goto_trace_stept::typet::MEMORY_BARRIER: + case goto_trace_stept::typet::ATOMIC_BEGIN: + case goto_trace_stept::typet::ATOMIC_END: + case goto_trace_stept::typet::DEAD: + case goto_trace_stept::typet::CONSTRAINT: + case goto_trace_stept::typet::SHARED_READ: + case goto_trace_stept::typet::SHARED_WRITE: + break; + + case goto_trace_stept::typet::NONE: + UNREACHABLE; + } + } + + return cext( + object_sizes, + havoced_values, + havoced_pointer_offsets, + loop_entry_values, + loop_entry_offsets, + live_variables); +} + +void cegis_verifiert::restore_functions() +{ + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + irep_idt fun_name = fun_entry.first; + goto_model.goto_functions.function_map[fun_name].body.swap( + original_functions[fun_name]); + } +} + +bool cegis_verifiert::verify() +{ + // This method does the following three things to verify the `goto_model` and + // return a formatted counterexample if there is any violated property. + // + // 1. annotate and apply the loop contracts stored in `invariant_candidates`. + // + // 2. run the CBMC API to verify the intrumented goto model. As the API is not + // merged yet, we preprocess the goto model and run the symex checker on it + // to simulate CBMC API. + // FIXEME: ^^^ replace the symex checker once the real API is merged. + // + // 3. construct the formatted counterexample from the violated property and + // its trace. + + // Store the original functions. We will restore them after the verification. + for(const auto &fun_entry : goto_model.goto_functions.function_map) + { + original_functions[fun_entry.first].copy_from(fun_entry.second.body); + } + + // Annotate the candidates tot the goto_model for checking. + annotate_invariants(invariant_candidates, goto_model); + + // Control verbosity. + // We allow non-error output message only when verbosity is set to at least 9. + const unsigned original_verbosity = log.get_message_handler().get_verbosity(); + if(original_verbosity < 9) + log.get_message_handler().set_verbosity(1); + + // Apply loop contracts we annotated. + code_contractst cont(goto_model, log); + cont.apply_loop_contracts(); + + // Get the checker same as CBMC api without any flag. + // TODO: replace the checker with CBMC api once it is implemented. + ui_message_handlert ui_message_handler(log.get_message_handler()); + const auto options = get_options(); + std::unique_ptr< + all_properties_verifier_with_trace_storaget> + checker = util_make_unique< + all_properties_verifier_with_trace_storaget>( + options, ui_message_handler, goto_model); + + // Run the checker to get the result. + const resultt result = (*checker)(); + + if(original_verbosity >= 9) + checker->report(); + + // Restore the verbosity. + log.get_message_handler().set_verbosity(original_verbosity); + + // + // Strat to counstruct the counterexample. + // + + if(result == resultt::PASS) + { + restore_functions(); + return true; + } + + if(result == resultt::ERROR || result == resultt::UNKNOWN) + { + INVARIANT(false, "Verification failed during loop contract synthesis."); + } + + properties = checker->get_properties(); + // Find the violation and construct conterexample from its trace. + for(const auto &property_it : properties) + { + if(property_it.second.status != property_statust::FAIL) + continue; + + first_violation = property_it.first; + exprt violated_predicate = property_it.second.pc->condition(); + + // The pointer checked in the null-pointer-check violation. + exprt checked_pointer = true_exprt(); + + // Type of the violation + cext::violation_typet violation_type = cext::violation_typet::cex_other; + + // The violation is a pointer OOB check. + if((property_it.second.description.find( + "dereference failure: pointer outside object bounds in") != + std::string::npos)) + { + violation_type = cext::violation_typet::cex_out_of_boundary; + } + + // The violation is a null pointer check. + if(property_it.second.description.find("pointer NULL") != std::string::npos) + { + violation_type = cext::violation_typet::cex_null_pointer; + checked_pointer = property_it.second.pc->condition() + .operands()[0] + .operands()[1] + .operands()[0]; + INVARIANT(checked_pointer.id() == ID_symbol, "Checking pointer symbol"); + } + + // The violation is a loop-invariant-preservation check. + if(property_it.second.description.find("preserved") != std::string::npos) + { + violation_type = cext::violation_typet::cex_not_preserved; + } + + // The violation is a loop-invariant-preservation check. + if( + property_it.second.description.find("invariant before entry") != + std::string::npos) + { + violation_type = cext::violation_typet::cex_not_hold_upon_entry; + } + + // The loop which could be the cause of the violation. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + loop_idt cause_loop_id = get_cause_loop_id( + checker->get_traces()[property_it.first], property_it.second.pc); + + if(!cause_loop_id.loop_number.has_value()) + { + log.debug() << "No cause loop found!\n"; + restore_functions(); + + return_cex.cause_loop_id = cause_loop_id; + return_cex.cex_type = violation_type; + return false; + } + + log.debug() << "Found cause loop with function id: " + << cause_loop_id.function_id + << ", and loop number: " << cause_loop_id.loop_number.value() + << "\n"; + + // Decide whether the violation is in the cause loop. + bool violation_in_loop = is_instruction_in_transfomed_loop( + cause_loop_id, + goto_model.get_goto_function(cause_loop_id.function_id), + property_it.second.pc->location_number); + + // We always strengthen in_clause if the violation is + // invariant-not-preserved. + if(violation_type == cext::violation_typet::cex_not_preserved) + violation_in_loop = true; + + restore_functions(); + + return_cex = build_cex( + checker->get_traces()[property_it.first], + get_loop_head( + cause_loop_id.loop_number.value(), + goto_model.goto_functions.function_map[cause_loop_id.function_id]) + ->source_location()); + return_cex.violated_predicate = violated_predicate; + return_cex.cause_loop_id = cause_loop_id; + return_cex.checked_pointer = checked_pointer; + return_cex.is_violation_in_loop = violation_in_loop; + return_cex.cex_type = violation_type; + + return false; + } + + UNREACHABLE; +} diff --git a/src/goto-instrument/synthesizer/cegis_verifier.h b/src/goto-instrument/synthesizer/cegis_verifier.h new file mode 100644 index 00000000000..b80f55d266a --- /dev/null +++ b/src/goto-instrument/synthesizer/cegis_verifier.h @@ -0,0 +1,142 @@ +/*******************************************************************\ + +Module: Verifier for Counterexample-Guided Synthesis + +Author: Qinheping Hu + +\*******************************************************************/ + +/// \file +/// Verifier for Counterexample-Guided Synthesis + +#ifndef CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H +#define CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H + +#include +#include + +#include + +#include "synthesizer_utils.h" + +class messaget; + +/// Formatted counterexample. +class cext +{ +public: + enum class violation_typet + { + cex_out_of_boundary, + cex_null_pointer, + cex_not_preserved, + cex_not_hold_upon_entry, + cex_other + }; + + cext( + const std::map &object_sizes, + const std::map &havoced_values, + const std::map &havoced_pointer_offsets, + const std::map &loop_entry_values, + const std::map &loop_entry_offsets, + const std::set &live_variables) + : object_sizes(object_sizes), + havoced_values(havoced_values), + havoced_pointer_offsets(havoced_pointer_offsets), + loop_entry_values(loop_entry_values), + loop_entry_offsets(loop_entry_offsets), + live_variables(live_variables) + { + } + + cext() + { + } + + // pointer that failed the null pointer check + exprt checked_pointer; + exprt violated_predicate; + + // true if the violation happens in the cause loop + // false if the violation happens after the cause loop + bool is_violation_in_loop = true; + + // We collect havoced evaluations of havoced variables and their object sizes + // and pointer offsets. + + // __CPROVER_OBJECT_SIZE + std::map object_sizes; + // all the valuation of havoced variables with primitived type. + std::map havoced_values; + // __CPROVER_POINTER_OFFSET + std::map havoced_pointer_offsets; + + // We also collect loop-entry evaluations of havoced variables. + // __CPROVER_loop_entry + std::map loop_entry_values; + // __CPROVER_POINTER_OFFSET(__CPROVER_loop_entry( )) + std::map loop_entry_offsets; + + // Set of live variables upon loop head. + std::set live_variables; + + violation_typet cex_type; + loop_idt cause_loop_id; +}; + +/// Verifier that take a goto program as input, and ouptut formatted +/// counterexamples for counterexample-guided-synthesis. +class cegis_verifiert +{ +public: + cegis_verifiert( + const invariant_mapt &invariant_candidates, + goto_modelt &goto_model, + messaget &log) + : invariant_candidates(invariant_candidates), + goto_model(goto_model), + log(log) + { + } + + /// Verify `goto_model`. Return true if there is no violation. + /// Otherwise, sotre the formatted counterexample in `return_cex` + bool verify(); + + /// Result counterexample. + cext return_cex; + propertiest properties; + irep_idt first_violation; + +protected: + // Get the options same as using CBMC api without any flag, and + // preprocess `goto_model`. + // TODO: replace the checker with CBMC api once it is implemented. + optionst get_options(); + + // Compute the cause loop of `violation`. + // We say a loop is the cause loop if the violated predicate is dependent + // upon the write set of the loop. + loop_idt get_cause_loop_id( + const goto_tracet &goto_trace, + const goto_programt::const_targett violation); + + /// Restore transformed functions to original functions. + void restore_functions(); + + // Build counterexample from trace, and store it in `return_cex`. + cext build_cex( + const goto_tracet &goto_trace, + const source_locationt &loop_entry_loc); + + const invariant_mapt &invariant_candidates; + goto_modelt &goto_model; + messaget log; + + /// Map from function names to original functions. It is used to + /// restore functions with annotated loops to original functions. + std::map original_functions; +}; + +#endif // CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_CEGIS_VERIFIER_H diff --git a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp index 67dbc3ed068..1c316800e42 100644 --- a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp +++ b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp @@ -11,8 +11,61 @@ Author: Qinheping Hu #include "enumerative_loop_invariant_synthesizer.h" +#include +#include +#include +#include +#include + #include +#include "cegis_verifier.h" +#include "expr_enumerator.h" + +// substitute all tmp_post variables with their origins in `expr` +void substitute_tmp_post_rec( + exprt &dest, + const std::map &tmp_post_map) +{ + if(dest.id() != ID_address_of) + Forall_operands(it, dest) + substitute_tmp_post_rec(*it, tmp_post_map); + + // possibly substitute? + if(dest.id() == ID_symbol && tmp_post_map.count(dest)) + { + dest = tmp_post_map.at(dest); + } +} + +std::vector construct_terminals(std::set symbols) +{ + std::vector result = std::vector(); + for(const auto &e : symbols) + { + if(e.type().id() == ID_unsignedbv) + { + // For a variable v with primitive type, we add + // v, __CPROVER_loop_entry(v) + // into the result. + result.push_back(e); + result.push_back(unary_exprt(ID_loop_entry, e, size_type())); + } + if((e.type().id() == ID_pointer)) + { + // For a variable v with pointer type, we add + // __CPROVER_pointer_offset(v), + // __CPROVER_pointer_offset(__CPROVER_loop_entry(v)) + // into the result. + result.push_back(unary_exprt(ID_pointer_offset, e, size_type())); + result.push_back(pointer_offset_exprt( + unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + } + result.push_back(from_integer(1, size_type())); + return result; +} + void enumerative_loop_invariant_synthesizert::init_candidates() { for(auto &function_p : goto_model.goto_functions.function_map) @@ -32,19 +85,275 @@ void enumerative_loop_invariant_synthesizert::init_candidates() // we only synthesize invariants for unannotated loops if(loop_end->condition().find(ID_C_spec_loop_invariant).is_nil()) { - invariant_candiate_map[new_id] = true_exprt(); + // Store the loop guard. + exprt guard = + get_loop_head( + loop_end->loop_number, + goto_model.goto_functions.function_map[function_p.first]) + ->condition(); + neg_guards[new_id] = guard; + + // Initialize invariant clauses as `true`. + in_invariant_clause_map[new_id] = true_exprt(); + pos_invariant_clause_map[new_id] = true_exprt(); + } + } + } +} + +void enumerative_loop_invariant_synthesizert::build_tmp_post_map() +{ + for(auto &goto_function : goto_model.goto_functions.function_map) + { + for(const auto &instruction : goto_function.second.body.instructions) + { + // tmp_post variables are symbol lhs of ASSIGN. + if(!instruction.is_assign() || instruction.assign_lhs().id() != ID_symbol) + continue; + + const auto symbol_lhs = + expr_try_dynamic_cast(instruction.assign_lhs()); + + // tmp_post variables have identifiers with the prefix tmp::tmp_post. + if( + id2string(symbol_lhs->get_identifier()).find("tmp::tmp_post") != + std::string::npos) + { + tmp_post_map[instruction.assign_lhs()] = instruction.assign_rhs(); } } } } +std::set +enumerative_loop_invariant_synthesizert::compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars) +{ + // We overapproximate dependent symbols as all symbols in live variables. + // TODO: using flow-dependency analysis to rule out not dependent symbols. + + std::set result; + for(const auto &e : live_vars) + find_symbols(e, result); + + return result; +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_range_predicate( + const exprt &violated_predicate) +{ + // For the case where the violated predicate is dependent on no instruction + // other than loop havocing, the violated_predicate is + // WLP(loop_body_before_violation, violated_predicate). + // TODO: implement a more complete WLP algorithm. + return violated_predicate; +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_same_object_predicate( + const exprt &checked_pointer) +{ + // The same object predicate says that the checked pointer points to the + // same object as it pointed before entering the loop. + // It works for the array-manuplating loops where only offset of pointer + // are modified but not the object pointers point to. + return same_object( + checked_pointer, unary_exprt(ID_loop_entry, checked_pointer)); +} + +exprt enumerative_loop_invariant_synthesizert::synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id) +{ + // Synthesis of strengthning clauses is a enumerate-and-check proecess. + // We first construct the enumerator for the following grammar. + // And then enumerate clause and check that if it can make the invariant + // inductive. + + // Initialize factory representing grammar + // StartBool -> StartBool && StartBool | Start == Start + // | StartBool <= StartBool | StartBool < StartBool + // Start -> Start + Start | terminal_symbols + // where a0, and a1 are symbol expressions. + namespacet ns(goto_model.symbol_table); + enumerator_factoryt factory = enumerator_factoryt(ns); + recursive_enumerator_placeholdert start_bool_ph(factory, "StartBool", ns); + recursive_enumerator_placeholdert start_ph(factory, "Start", ns); + + // terminals + expr_sett leafexprs(terminal_symbols.begin(), terminal_symbols.end()); + + // rules for Start + enumeratorst start_args; + // Start -> terminals + leaf_enumeratort leaf_g(leafexprs, ns); + start_args.push_back(&leaf_g); + + // Start -> Start + Start + binary_functional_enumeratort plus_g( + ID_plus, + start_ph, + start_ph, + [](const partitiont &partition) { + if(partition.size() <= 1) + return true; + return partition.front() == 1; + }, + ns); + start_args.push_back(&plus_g); + + // rules for StartBool + enumeratorst start_bool_args; + // StartBool -> StartBool && StartBool + binary_functional_enumeratort and_g(ID_and, start_bool_ph, start_bool_ph, ns); + start_bool_args.push_back(&and_g); + // StartBool -> Start == Start + binary_functional_enumeratort equal_g(ID_equal, start_ph, start_ph, ns); + start_bool_args.push_back(&equal_g); + // StartBool -> Start <= Start + binary_functional_enumeratort le_g(ID_le, start_ph, start_ph, ns); + start_bool_args.push_back(&le_g); + // StartBool -> Start < Start + binary_functional_enumeratort lt_g(ID_lt, start_ph, start_ph, ns); + start_bool_args.push_back(<_g); + + // add the two nonterminals to the factory + factory.attach_productions("Start", start_args); + factory.attach_productions("StartBool", start_bool_args); + + // size of candidates we are searching now, + // starting from 0 + size_t size_bound = 0; + + // numbers of candidates we have seen, + // used for quantitative analysis + size_t seen_terms = 0; + + // Start to enumerate and check. + while(true) + { + size_bound++; + + // generate candidate and verify + for(auto strengthening_candidate : start_bool_ph.enumerate(size_bound)) + { + seen_terms++; + invariant_mapt new_in_clauses = invariant_mapt(in_invariant_clause_map); + new_in_clauses[cause_loop_id] = + and_exprt(new_in_clauses[cause_loop_id], strengthening_candidate); + const auto &combined_invariant = combine_in_and_post_invariant_clauses( + new_in_clauses, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // A good strengthening clause if + // 1. all checks pass, or + // 2. the loop invariant is inductive and hold upon the entry. + if( + verifier.verify() || (verifier.properties.at(violation_id).status != + property_statust::FAIL && + verifier.return_cex.cex_type != + cext::violation_typet::cex_not_hold_upon_entry)) + { + return strengthening_candidate; + } + } + } + UNREACHABLE; +} + invariant_mapt enumerative_loop_invariant_synthesizert::synthesize_all() { init_candidates(); + build_tmp_post_map(); + + // The invariants we are going to synthesize and verify are the combined + // invariants from in- and pos- invariant clauses. + auto combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + + // The verifier we use to check current invariant candidates. + cegis_verifiert verifier(combined_invariant, goto_model, log); + + // Set of symbols the violation may be dependent on. + // We enumerate strenghening clauses built from symbols from the set. + std::set dependent_symbols; + // Set of symbols we used to enumerate strenghening clauses. + std::vector terminal_symbols; + + while(!verifier.verify()) + { + exprt new_invariant_clause = true_exprt(); + + // Synthsize the new_clause + // We use difference strategies for different type of violations. + switch(verifier.return_cex.cex_type) + { + case cext::violation_typet::cex_out_of_boundary: + new_invariant_clause = + synthesize_range_predicate(verifier.return_cex.violated_predicate); + break; + + case cext::violation_typet ::cex_null_pointer: + new_invariant_clause = + synthesize_same_object_predicate(verifier.return_cex.checked_pointer); + break; + + case cext::violation_typet::cex_not_preserved: + terminal_symbols = construct_terminals(dependent_symbols); + new_invariant_clause = synthesize_strengthening_clause( + terminal_symbols, + verifier.return_cex.cause_loop_id, + verifier.first_violation); + break; + + case cext::violation_typet::cex_not_hold_upon_entry: + case cext::violation_typet::cex_other: + INVARIANT(false, "unsupported violation type"); + break; + } + + INVARIANT( + new_invariant_clause != true_exprt(), + "failed to synthesized meaningful clause"); + + // There could be tmp_post varialbes in the synthesized clause. + // We substitute them with their original variables. + substitute_tmp_post_rec(new_invariant_clause, tmp_post_map); + + // Update the dependent symbols. + dependent_symbols = compute_dependent_symbols( + verifier.return_cex.cause_loop_id, + new_invariant_clause, + verifier.return_cex.live_variables); + + // add the new cluase to the candidate invairants + if(verifier.return_cex.is_violation_in_loop) + { + in_invariant_clause_map[verifier.return_cex.cause_loop_id] = and_exprt( + in_invariant_clause_map[verifier.return_cex.cause_loop_id], + new_invariant_clause); + } + else + { + // violation happens pos-loop. + pos_invariant_clause_map[verifier.return_cex.cause_loop_id] = and_exprt( + pos_invariant_clause_map[verifier.return_cex.cause_loop_id], + new_invariant_clause); + } + + // Re-combine invariant clauses and update the candidate map. + combined_invariant = combine_in_and_post_invariant_clauses( + in_invariant_clause_map, pos_invariant_clause_map, neg_guards); + } + + log.result() << "result : " << log.green << "PASS" << messaget::eom + << log.reset; - // Now this method only generate true for all unnotated loops. - // The implementation will be added later. - return invariant_candiate_map; + return combined_invariant; } exprt enumerative_loop_invariant_synthesizert::synthesize(loop_idt loop_id) diff --git a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h index 160531198f4..97918c7e5a6 100644 --- a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h +++ b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.h @@ -29,7 +29,7 @@ class enumerative_loop_invariant_synthesizert { public: enumerative_loop_invariant_synthesizert( - const goto_modelt &goto_model, + goto_modelt &goto_model, messaget &log) : loop_invariant_synthesizer_baset(goto_model, log) { @@ -44,7 +44,38 @@ class enumerative_loop_invariant_synthesizert /// Initialize invariants as true for all unannotated loops. void init_candidates(); - invariant_mapt invariant_candiate_map; + /// Scan all ASSIGN instructions to build the map from tmp_post variables + /// to their original variables. + void build_tmp_post_map(); + + /// Compute the depedent symbols for a loop with invariant-not-preserved + /// violation which happen after `new_clause` was added. + std::set compute_dependent_symbols( + const loop_idt &cause_loop_id, + const exprt &new_clause, + const std::set &live_vars); + + /// Synthesize range predicate for OOB violation with `violated_predicate`. + exprt synthesize_range_predicate(const exprt &violated_predicate); + + /// Synthesize same object predicate for null-pointer violation for + /// `checked_pointer`. + exprt synthesize_same_object_predicate(const exprt &checked_pointer); + + /// Synthesize strengthening clause for invariant-not-preserved violation. + exprt synthesize_strengthening_clause( + const std::vector terminal_symbols, + const loop_idt &cause_loop_id, + const irep_idt &violation_id); + + /// Synthesize invariant of form + /// (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt in_invariant_clause_map; + invariant_mapt pos_invariant_clause_map; + invariant_mapt neg_guards; + + /// Map from tmp_post variables to their original variables. + std::map tmp_post_map; }; // NOLINTNEXTLINE(whitespace/line_length) diff --git a/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h b/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h index 1c2fbca73f8..cbc0c7aea6e 100644 --- a/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h +++ b/src/goto-instrument/synthesizer/loop_invariant_synthesizer_base.h @@ -16,7 +16,8 @@ Author: Qinheping Hu #include "synthesizer_utils.h" -#define OPT_SYNTHESIZE_LOOP_INVARIANTS "(synthesize-loop-invariants)" +#define FLAG_SYNTHESIZE_LOOP_INVARIANTS "synthesize-loop-invariants" +#define OPT_SYNTHESIZE_LOOP_INVARIANTS "(" FLAG_SYNTHESIZE_LOOP_INVARIANTS ")" #define HELP_LOOP_INVARIANT_SYNTHESIZER \ " --synthesize-loop-invariants\n" \ " synthesize and apply loop invariants\n" @@ -32,7 +33,7 @@ class messaget; class loop_invariant_synthesizer_baset { public: - loop_invariant_synthesizer_baset(const goto_modelt &goto_model, messaget &log) + loop_invariant_synthesizer_baset(goto_modelt &goto_model, messaget &log) : goto_model(goto_model), log(log) { } @@ -48,7 +49,7 @@ class loop_invariant_synthesizer_baset virtual exprt synthesize(loop_idt) = 0; protected: - const goto_modelt &goto_model; + goto_modelt &goto_model; messaget &log; }; diff --git a/src/goto-instrument/synthesizer/module_dependencies.txt b/src/goto-instrument/synthesizer/module_dependencies.txt index 290dab44704..f422fb5621d 100644 --- a/src/goto-instrument/synthesizer/module_dependencies.txt +++ b/src/goto-instrument/synthesizer/module_dependencies.txt @@ -1,8 +1,14 @@ analyses ansi-c +assembler +contracts +cpp +goto-checker goto-instrument/contracts goto-instrument/synthesizer goto-instrument goto-programs langapi +pointer-analysis +solvers util diff --git a/src/goto-instrument/synthesizer/synthesizer_utils.cpp b/src/goto-instrument/synthesizer/synthesizer_utils.cpp index f1a618b1dc9..ddfca59a316 100644 --- a/src/goto-instrument/synthesizer/synthesizer_utils.cpp +++ b/src/goto-instrument/synthesizer/synthesizer_utils.cpp @@ -9,6 +9,11 @@ Author: Qinheping Hu #include "synthesizer_utils.h" #include +#include +#include + +#include +#include goto_programt::const_targett get_loop_end_from_loop_head_and_content( const goto_programt::const_targett &loop_head, @@ -94,14 +99,13 @@ get_loop_head(const unsigned int target_loop_number, goto_functiont &function) void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log) + goto_modelt &goto_model) { for(const auto &invariant_map_entry : invariant_map) { loop_idt loop_id = invariant_map_entry.first; irep_idt function_id = loop_id.function_id; - unsigned int loop_number = loop_id.loop_number; + unsigned int loop_number = loop_id.loop_number.value(); // get the last instruction of the target loop auto &function = goto_model.goto_functions.function_map[function_id]; @@ -112,3 +116,65 @@ void annotate_invariants( invariant_map_entry.second; } } + +bool is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target) +{ + // The loop body after transformation are instructions from + // loop havocing instructions + // to + // loop end of transformed code. + + bool result = false; + unsigned location_number_of_havocing = UINT_MAX; + for(goto_programt::const_targett it = function.body.instructions.begin(); + it != function.body.instructions.end(); + ++it) + { + if(is_loop_havoc(*it) && it->loop_number == loop_id.loop_number.value()) + { + location_number_of_havocing = it->location_number; + } + + if( + is_transformed_loop_end(it) && + it->loop_number == loop_id.loop_number.value()) + { + result = + result || (location_number_of_havocing < location_number_of_target && + location_number_of_target < it->location_number); + } + } + + return result; +} + +std::size_t hex_to_size_t(const std::string &hex_str) +{ + std::istringstream converter(hex_str); + size_t result; + converter >> std::hex >> result; + return result; +} + +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards) +{ + // Combine invariant + // (in_inv || !guard) && (!guard -> pos_inv) + invariant_mapt result; + for(const auto &in_clause : in_clauses) + { + const auto &id = in_clause.first; + INVARIANT(neg_guards.count(id), "Some loop guard is missing."); + + result[id] = and_exprt( + or_exprt(neg_guards.at(id), in_clause.second), + implies_exprt(neg_guards.at(id), post_clauses.at(id))); + } + return result; +} diff --git a/src/goto-instrument/synthesizer/synthesizer_utils.h b/src/goto-instrument/synthesizer/synthesizer_utils.h index 40620f7feac..2938774696a 100644 --- a/src/goto-instrument/synthesizer/synthesizer_utils.h +++ b/src/goto-instrument/synthesizer/synthesizer_utils.h @@ -9,6 +9,9 @@ Author: Qinheping Hu #ifndef CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H #define CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H +#include + +#include #include #include @@ -50,7 +53,23 @@ get_loop_head(const unsigned int loop_number, goto_functiont &function); /// loops. Corresponding loops are specified by keys of `invariant_map` void annotate_invariants( const invariant_mapt &invariant_map, - goto_modelt &goto_model, - messaget &log); + goto_modelt &goto_model); + +/// Decide whether the target instruction is in the body of the transformed loop +/// specified by `loop_id`. +bool is_instruction_in_transfomed_loop( + const loop_idt &loop_id, + const goto_functiont &function, + unsigned location_number_of_target); + +/// convert hex string to size_t. +std::size_t hex_to_size_t(const std::string &hex_str); + +/// Combine invariant of form +/// (in_inv || !guard) && (!guard -> pos_inv) +invariant_mapt combine_in_and_post_invariant_clauses( + const invariant_mapt &in_clauses, + const invariant_mapt &post_clauses, + const invariant_mapt &neg_guards); #endif // CPROVER_GOTO_INSTRUMENT_SYNTHESIZER_SYNTHESIZER_UTILS_H diff --git a/src/goto-programs/loop_ids.h b/src/goto-programs/loop_ids.h index 69f77e00834..76c89b3334e 100644 --- a/src/goto-programs/loop_ids.h +++ b/src/goto-programs/loop_ids.h @@ -31,14 +31,28 @@ struct loop_idt { } + loop_idt() : function_id(""), loop_number() + { + } + + loop_idt(const loop_idt &other) + : function_id(other.function_id), loop_number(other.loop_number) + { + } + irep_idt function_id; - unsigned int loop_number; + optionalt loop_number; bool operator==(const loop_idt &o) const { return function_id == o.function_id && loop_number == o.loop_number; } + bool operator!=(const loop_idt &o) const + { + return !operator==(o); + } + bool operator<(const loop_idt &o) const { return function_id < o.function_id || From 4b558d87afe099d3b72fae1e1423c833f52e8c75 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Tue, 6 Dec 2022 11:28:05 -0600 Subject: [PATCH 2/3] Disallow enumerate equal expr between sub-exprs with different types. --- .../goto_instrument_parse_options.cpp | 4 ---- .../enumerative_loop_invariant_synthesizer.cpp | 11 +++++++++-- src/goto-instrument/synthesizer/expr_enumerator.cpp | 13 +++++++++++-- src/goto-programs/CMakeLists.txt | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index c700a835628..333547ca5f8 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1433,10 +1433,6 @@ void goto_instrument_parse_optionst::instrument_goto_program() " or --" FLAG_LOOP_CONTRACTS); } - log.warning() << "Loop invariant synthesizer is still work in progress. " - "It only generates TRUE as invariants." - << messaget::eom; - // Synthesize loop invariants and annotate them into `goto_model` enumerative_loop_invariant_synthesizert synthesizer(goto_model, log); annotate_invariants(synthesizer.synthesize_all(), goto_model); diff --git a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp index 1c316800e42..a4f56632c6f 100644 --- a/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp +++ b/src/goto-instrument/synthesizer/enumerative_loop_invariant_synthesizer.cpp @@ -48,8 +48,14 @@ std::vector construct_terminals(std::set symbols) // For a variable v with primitive type, we add // v, __CPROVER_loop_entry(v) // into the result. + result.push_back(typecast_exprt(e, size_type())); + result.push_back( + typecast_exprt(unary_exprt(ID_loop_entry, e, e.type()), size_type())); + } + if((e.type().id() == ID_signedbv)) + { result.push_back(e); - result.push_back(unary_exprt(ID_loop_entry, e, size_type())); + result.push_back(unary_exprt(ID_loop_entry, e, e.type())); } if((e.type().id() == ID_pointer)) { @@ -62,7 +68,8 @@ std::vector construct_terminals(std::set symbols) unary_exprt(ID_loop_entry, e, e.type()), size_type())); } } - result.push_back(from_integer(1, size_type())); + result.push_back(from_integer(1, unsigned_int_type())); + result.push_back(from_integer(1, unsigned_long_int_type())); return result; } diff --git a/src/goto-instrument/synthesizer/expr_enumerator.cpp b/src/goto-instrument/synthesizer/expr_enumerator.cpp index 617a7aed25d..660e0ca87e3 100644 --- a/src/goto-instrument/synthesizer/expr_enumerator.cpp +++ b/src/goto-instrument/synthesizer/expr_enumerator.cpp @@ -316,7 +316,18 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const exprs.size() == 2, "number of arguments should be 2: " + integer2string(exprs.size())); if(op_id == ID_equal) + { + if(exprs.front().type() != exprs.back().type()) + return true_exprt(); + return equal_exprt(exprs.front(), exprs.back()); + } + if(op_id == ID_notequal) + { + if(exprs.front().type() != exprs.back().type()) + return true_exprt(); + return notequal_exprt(exprs.front(), exprs.back()); + } if(op_id == ID_le) return less_than_or_equal_exprt(exprs.front(), exprs.back()); if(op_id == ID_lt) @@ -333,8 +344,6 @@ exprt binary_functional_enumeratort::instantiate(const expr_listt &exprs) const return plus_exprt(exprs.front(), exprs.back()); if(op_id == ID_minus) return minus_exprt(exprs.front(), exprs.back()); - if(op_id == ID_notequal) - return notequal_exprt(exprs.front(), exprs.back()); return binary_exprt(exprs.front(), op_id, exprs.back()); } diff --git a/src/goto-programs/CMakeLists.txt b/src/goto-programs/CMakeLists.txt index f0748416435..1d0cdcbbbea 100644 --- a/src/goto-programs/CMakeLists.txt +++ b/src/goto-programs/CMakeLists.txt @@ -3,4 +3,4 @@ add_library(goto-programs ${sources}) generic_includes(goto-programs) -target_link_libraries(goto-programs util assembler langapi analyses linking ansi-c) +target_link_libraries(goto-programs util assembler langapi analyses linking ansi-c json) From c63e32814483de00fed55ce56d0ac05e1cb9f17c Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Mon, 12 Dec 2022 10:14:06 -0600 Subject: [PATCH 3/3] Add goto-convert before verifying candidate --- src/goto-instrument/synthesizer/cegis_verifier.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/goto-instrument/synthesizer/cegis_verifier.cpp b/src/goto-instrument/synthesizer/cegis_verifier.cpp index ac7b182c3cb..541c3b41a8f 100644 --- a/src/goto-instrument/synthesizer/cegis_verifier.cpp +++ b/src/goto-instrument/synthesizer/cegis_verifier.cpp @@ -17,6 +17,7 @@ Author: Qinheping Hu #include #include +#include #include #include #include @@ -392,6 +393,11 @@ bool cegis_verifiert::verify() all_properties_verifier_with_trace_storaget>( options, ui_message_handler, goto_model); + goto_convert( + goto_model.symbol_table, + goto_model.goto_functions, + log.get_message_handler()); + // Run the checker to get the result. const resultt result = (*checker)();