diff --git a/src/analyses/c2poAnalysis.ml b/src/analyses/c2poAnalysis.ml new file mode 100644 index 0000000000..13b323ba91 --- /dev/null +++ b/src/analyses/c2poAnalysis.ml @@ -0,0 +1,339 @@ +(** C-2PO: A Weakly-Relational Pointer Analysis for C based on 2 Pointer Logic. The analysis can infer equalities and disequalities between terms which are built from pointer variables, with the addition of constants and dereferencing. ([c2po])*) + +open Analyses +open GoblintCil +open C2poDomain +open CongruenceClosure +open Batteries +open SingleThreadedLifter +open DuplicateVars + +module Spec = +struct + include DefaultSpec + include Analyses.IdentitySpec + module D = D + module C = D + + let name () = "c2po" + let startcontext () = D.top () + + (** Find reachable variables in a function *) + let reachable_from_args ctx args = + let collect_reachable_from_exp acc e = + let reachable_from_exp = ctx.ask (ReachableFrom e) in + let reachable_from_exp = Queries.AD.to_var_may reachable_from_exp in + reachable_from_exp @ acc + in + let res = List.fold collect_reachable_from_exp [] args in + if M.tracing then M.tracel "c2po-reachable" "reachable vars: %s\n" (List.fold_left (fun s v -> s ^ v.vname ^"; ") "" res); + res + + (* Returns Some true if we know for sure that it is true, + and Some false if we know for sure that it is false, + and None if we don't know anyhing. *) + let eval_guard ask cc e ik = + let open Queries in + let prop_list = T.prop_of_cil ask e true in + match split prop_list with + | [], [], [] -> + ID.top() + | x::xs, _, [] -> + if fst (eq_query cc x) then + ID.of_bool ik true + else if neq_query cc x then + ID.of_bool ik false + else + ID.top() + | _, y::ys, [] -> + if neq_query cc y then + ID.of_bool ik true + else if fst (eq_query cc y) then + ID.of_bool ik false + else + ID.top() + | _ -> + ID.top() (*there should never be block disequalities here...*) + + (** Convert a conjunction to an invariant.*) + let conj_to_invariant ask conjs = + let f a prop = + try + let exp = T.prop_to_cil prop in (* May raise UnsupportedCilExpression *) + if M.tracing then M.trace "c2po-invariant" "Adding invariant: %a" d_exp exp; + Invariant.(a && of_exp exp) + with T.UnsupportedCilExpression _ -> + a + in + List.fold f (Invariant.top()) conjs + + let query ctx (type a) (q: a Queries.t): a Queries.result = + let open Queries in + match ctx.local with + | `Bot -> Result.top q + | `Lifted cc -> + match q with + | EvalInt e -> + let ik = Cilfacade.get_ikind_exp e in + eval_guard (ask_of_man ctx) cc e ik + | Queries.Invariant context -> + let scope = Node.find_fundec ctx.node in + let t = D.remove_vars_not_in_scope scope cc in + let conj = get_conjunction t in + let ask = ask_of_man ctx in + conj_to_invariant ask conj + | _ -> + Result.top q + + (** Assign the right hand side rhs (that is already + converted to a C-2PO term) to the term `lterm`. *) + let assign_term cc ask lterm rhs lval_t = + (* ignore assignments to values that are not 64 bits *) + match T.get_element_size_in_bits lval_t, rhs with + (* Indefinite assignment *) + | lval_size, (None, _) -> + D.remove_may_equal_terms ask lval_size lterm cc + (* Definite assignment *) + | lval_size, (Some rterm, Some roffset) -> + let dummy_var = MayBeEqual.dummy_var lval_t in + + if M.tracing then M.trace "c2po-assign" "assigning: var: %s; expr: %s + %s. \nTo_cil: lval: %a; expr: %a\n" (T.show lterm) (T.show rterm) (Z.to_string roffset) d_exp (T.to_cil lterm) d_exp (T.to_cil rterm); + + let equal_dummy_rterm = [Equal (dummy_var, rterm, roffset)] in + let equal_dummy_lterm = [Equal (lterm, dummy_var, Z.zero)] in + + cc |> + meet_conjs_opt equal_dummy_rterm |> + D.remove_may_equal_terms ask lval_size lterm |> + meet_conjs_opt equal_dummy_lterm |> + D.remove_terms_containing_aux_variable + + | _ -> (* this is impossible *) + C2PODomain.top () + + (** Assign Cil Lval to a right hand side that is already converted to + C-2PO terms.*) + let assign_lval cc ask lval expr = + let lval_t = typeOfLval lval in + try + let lterm = T.of_lval ask lval in + assign_term cc ask lterm expr lval_t + with T.UnsupportedCilExpression _ -> + (* the assigned variables couldn't be parsed, so we don't know which addresses were written to. + We have to forget all the information we had. + This should almost never happen. + Except if the left hand side is a complicated expression like myStruct.field1[i]->field2[z+k], and Goblint can't infer the offset.*) + if M.tracing then M.trace "c2po-invalidate" "Invalidate lval: %a" d_lval lval; + C2PODomain.top () + + let assign ctx lval expr = + let ask = (ask_of_man ctx) in + match ctx.local with + | `Bot -> + `Bot + | `Lifted cc -> + let cc = assign_lval cc ask lval (T.of_cil ask expr) in + let cc = reset_normal_form cc in + let res = `Lifted cc in + if M.tracing then M.trace "c2po-assign" "assign: var: %a; expr: %a; result: %s.\n" d_lval lval d_plainexp expr (D.show res); + res + + let branch ctx e pos = + let props = T.prop_of_cil (ask_of_man ctx) e pos in + let valid_props = T.filter_valid_pointers props in + let res = + match ctx.local with + | `Bot -> `Bot + | `Lifted cc -> + if List.is_empty valid_props then + `Lifted cc + else + try + let meet = meet_conjs_opt valid_props cc in + let t = reset_normal_form meet in + `Lifted t + with Unsat -> + `Bot + in + if M.tracing then M.trace "c2po" "branch:\n Actual equality: %a; pos: %b; valid_prop_list: %s; is_bot: %b\n" d_exp e pos (show_conj valid_props) (D.is_bot res); + if D.is_bot res then raise Deadcode; + res + + let body ctx f = + ctx.local + + let assign_return ask d return_var expr = + (* the return value is not stored on the heap, therefore we don't need to remove any terms *) + match T.of_cil ask expr with + | (Some term, Some offset) -> + let ret_var_eq_term = [Equal (return_var, term, offset)] in + let assign_by_meet = meet_conjs_opt ret_var_eq_term d in + reset_normal_form assign_by_meet + | _ -> d + + let return ctx exp_opt f = + let res = + match exp_opt with + | Some e -> + begin match ctx.local with + | `Bot -> + `Bot + | `Lifted d -> + let return_var = MayBeEqual.return_var (typeOf e) in + let d = assign_return (ask_of_man ctx) d return_var e in + `Lifted d + end + | None -> ctx.local + in + if M.tracing then M.trace "c2po-function" "return: exp_opt: %a; state: %s; result: %s\n" d_exp (BatOption.default (MayBeEqual.dummy_lval_print (TVoid [])) exp_opt) (D.show ctx.local) (D.show res); + res + + (** var_opt is the variable we assign to. It has type lval. v=malloc.*) + let special ctx lval_opt v exprs = + let desc = LibraryFunctions.find v in + let ask = ask_of_man ctx in + match ctx.local with + | `Bot -> `Bot + | `Lifted cc -> + let t = + begin match lval_opt with + | None -> + cc + | Some lval -> + (* forget information about var, + but ignore assignments to values that are not 64 bits *) + try + let size = T.get_element_size_in_bits (typeOfLval lval) in + let lterm = T.of_lval ask lval in + let cc = D.remove_may_equal_terms ask size lterm cc in + begin match desc.special exprs with + | Malloc _ + | Calloc _ + | Alloca _ -> + add_block_diseqs cc lterm + | _ -> cc + end + with T.UnsupportedCilExpression _ -> + C2PODomain.top () + end + in + match desc.special exprs with + | Assert { exp; refine; _ } -> + if not refine then + ctx.local + else + branch ctx exp true + | _ -> + `Lifted (reset_normal_form t) + + (** First all local variables of the function are duplicated, + then we remember the value of each local variable at the beginning of the function by using the analysis startState. + This way we can infer the relations between the local variables of the caller and the pointers that were modified by the function. *) + let enter ctx var_opt f args = + (* add duplicated variables, and set them equal to the original variables *) + match ctx.local with + | `Bot -> [`Bot, `Bot] + | `Lifted cc -> + let ghost_equality v = + Equal (T.term_of_varinfo (DuplicVar v), T.term_of_varinfo (NormalVar v), Z.zero) + in + let ghost_equalities_for_params = List.map ghost_equality f.sformals in + let equalities_to_add = T.filter_valid_pointers ghost_equalities_for_params in + let state_with_ghosts = meet_conjs_opt equalities_to_add cc in + if M.tracing then begin + let dummy_lval = Var (Var.dummy_varinfo (TVoid [])), NoOffset in + let lval = BatOption.default dummy_lval var_opt in + M.trace "c2po-function" "enter1: var_opt: %a; state: %s; state_with_ghosts: %s\n" d_lval lval (D.show ctx.local) (C2PODomain.show state_with_ghosts); + end; + (* remove callee vars that are not reachable and not global *) + let reachable_variables = + let reachable = f.sformals @ f.slocals @ reachable_from_args ctx args in + Var.from_varinfo reachable f.sformals + in + let new_state = D.remove_terms_not_containing_variables reachable_variables state_with_ghosts in + if M.tracing then M.trace "c2po-function" "enter2: result: %s\n" (C2PODomain.show new_state); + let new_state = reset_normal_form new_state in + [ctx.local, `Lifted new_state] + + let remove_out_of_scope_vars cc f = + let local_vars = f.sformals @ f.slocals in + let duplicated_vars = f.sformals in + let cc = D.remove_terms_containing_return_variable cc in + D.remove_terms_containing_variables (Var.from_varinfo local_vars duplicated_vars) cc + + let combine_env ctx lval_opt expr f args t_context_opt f_d (f_ask: Queries.ask) = + match ctx.local with + | `Bot -> `Bot + | `Lifted cc -> + let caller_ask = ask_of_man ctx in + (* assign function parameters to duplicated values *) + let arg_assigns = GobList.combine_short f.sformals args in + let assign_term st (var, arg) = + let ghost_var = T.term_of_varinfo (DuplicVar var) in + let arg = T.of_cil f_ask arg in + assign_term st caller_ask ghost_var arg var.vtype + in + let state_with_assignments = List.fold_left assign_term cc arg_assigns in + + if M.tracing then M.trace "c2po-function" "combine_env0: state_with_assignments: %s\n" (C2PODomain.show state_with_assignments); + + (*remove all variables that were tainted by the function*) + let tainted = f_ask.f (MayBeTainted) in + if M.tracing then M.trace "c2po-tainted" "combine_env1: %a\n" MayBeEqual.AD.pretty tainted; + + let local = D.remove_tainted_terms caller_ask tainted state_with_assignments in + match D.meet (`Lifted local) f_d with + | `Bot -> `Bot + | `Lifted cc -> + let cc = reset_normal_form @@ remove_out_of_scope_vars cc f in + if M.tracing then begin + let dummy_lval = Var (Var.dummy_varinfo (TVoid[])), NoOffset in + let lval = BatOption.default dummy_lval lval_opt in + M.trace "c2po-function" "combine_env2: var_opt: %a; local_state: %s; f_state: %s; meeting everything: %s\n" d_lval lval (D.show ctx.local) (D.show f_d) (C2PODomain.show cc) + end; + `Lifted cc + + let combine_assign ctx var_opt expr f args t_context_opt f_d (f_ask: Queries.ask) = + match ctx.local with + | `Bot -> `Bot + | `Lifted cc -> + let caller_ask = ask_of_man ctx in + (* assign function parameters to duplicated values *) + let arg_assigns = GobList.combine_short f.sformals args in + let assign_term st (var, arg) = + let ghost_var = T.term_of_varinfo (DuplicVar var) in + let arg = T.of_cil f_ask arg in + assign_term st caller_ask ghost_var arg var.vtype + in + let state_with_assignments = List.fold_left assign_term cc arg_assigns in + match D.meet (`Lifted state_with_assignments) f_d with + | `Bot -> `Bot + | `Lifted cc -> + let cc = match var_opt with + | None -> + cc + | Some lval -> + let return_type = typeOfLval lval in + let return_var = MayBeEqual.return_var return_type in + let return_var = (Some return_var, Some Z.zero) in + assign_lval cc f_ask lval return_var + in + if M.tracing then M.trace "c2po-function" "combine_assign1: assigning return value: %s\n" (C2PODomain.show cc); + let cc = remove_out_of_scope_vars cc f in + let cc = reset_normal_form cc in + if M.tracing then M.trace "c2po-function" "combine_assign2: result: %s\n" (C2PODomain.show cc); + `Lifted cc + + let startstate v = + D.top () + let threadenter ctx ~multiple lval f args = + [D.top ()] + let threadspawn ctx ~multiple lval f args fctx = + D.top() + let exitstate v = + D.top () + +end + +let _ = + MCP.register_analysis ~dep:["startState"; "taintPartialContexts"] (module SingleThreadedLifter(Spec) : MCPSpec) diff --git a/src/analyses/singleThreadedLifter.ml b/src/analyses/singleThreadedLifter.ml new file mode 100644 index 0000000000..3fed5ed222 --- /dev/null +++ b/src/analyses/singleThreadedLifter.ml @@ -0,0 +1,75 @@ +(** This lifter takes an analysis that only works for single-threaded code and allows it to run on multi-threaded programs by returning top when the code might be multi-threaded. +*) + +open Analyses + +module SingleThreadedLifter (S: MCPSpec) = +struct + include S + + let is_multithreaded (ask:Queries.ask) = + not @@ ask.f (MustBeSingleThreaded {since_start = true}) + + let query ctx = + let return_top (type a) (q: a Queries.t) = + Queries.Result.top q + in + if is_multithreaded (ask_of_man ctx) then + return_top + else + query ctx + + let assign ctx lval expr = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + assign ctx lval expr + + let branch ctx e pos = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + branch ctx e pos + + let body ctx f = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + body ctx f + + let return ctx exp_opt f = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + return ctx exp_opt f + + let special ctx var_opt v exprs = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + special ctx var_opt v exprs + + let enter ctx var_opt f args = + if is_multithreaded (ask_of_man ctx) then + [D.top (),D.top ()] + else + enter ctx var_opt f args + + let combine_env ctx var_opt expr f exprs t_context_opt t (ask: Queries.ask) = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + combine_env ctx var_opt expr f exprs t_context_opt t ask + + let combine_assign ctx var_opt expr f args t_context_opt t (ask: Queries.ask) = + if is_multithreaded (ask_of_man ctx) then + D.top () + else + combine_assign ctx var_opt expr f args t_context_opt t ask + + let threadenter ctx ~multiple lval f args = + [D.top ()] + + let threadspawn ctx ~multiple lval f args fctx = + D.top () +end diff --git a/src/analyses/startStateAnalysis.ml b/src/analyses/startStateAnalysis.ml new file mode 100644 index 0000000000..2b334bf592 --- /dev/null +++ b/src/analyses/startStateAnalysis.ml @@ -0,0 +1,66 @@ +(** Remembers the abstract address value of each parameter at the beginning of each function by adding a ghost variable for each parameter. + Used by the c2po analysis. *) + +open GoblintCil +open Batteries +open Analyses +open DuplicateVars.Var + +(** First, all parameters (=formals) of the function are duplicated (by using DuplicateVars), + then, we remember the value of each local variable at the beginning of the function a new duplicated variable. *) +module Spec : Analyses.MCPSpec = +struct + let name () = "startState" + module AD = ValueDomain.AD + module D = MapDomain.MapBot (Basetype.Variables) (AD) + module C = D + + include Analyses.IdentitySpec + + let get_value (ask: Queries.ask) exp = + try + ask.f (MayPointTo exp) + with IntDomain.ArithmeticOnIntegerBot _ -> + AD.top() + + (** If e is a known variable (=one of the duplicated variables), then it returns the value for this variable. + If e is an unknown variable or an expression that is not simply a variable, then it returns top. *) + let eval (ask: Queries.ask) (d: D.t) (exp: exp): AD.t = + match exp with + | Lval (Var x, NoOffset) -> + let maybe_value = D.find_opt x d in + Option.default (AD.top ()) maybe_value + | _ -> + AD.top () + + let startcontext () = + D.empty () + + let startstate v = + D.bot () + + let exitstate = + startstate + + let query ctx (type a) (q: a Queries.t): a Queries.result = + let open Queries in + match q with + | MayPointTo e -> + eval (ask_of_man ctx) ctx.local e + | _ -> + Result.top q + + let body ctx (f: fundec) = + let add_var_value st var = + let lval = Lval (Var var, NoOffset) in + let value = get_value (ask_of_man ctx) lval in + let duplicated_var = to_varinfo (DuplicVar var) in + if M.tracing then M.trace "startState" "added value: var: %a; value: %a" d_lval (Var duplicated_var, NoOffset) AD.pretty value; + D.add duplicated_var value st + in + (* assign function parameters *) + List.fold_left add_var_value (D.empty ()) f.sformals +end + +let _ = + MCP.register_analysis (module Spec : MCPSpec) diff --git a/src/cdomains/c2poDomain.ml b/src/cdomains/c2poDomain.ml new file mode 100644 index 0000000000..3d7f8445b1 --- /dev/null +++ b/src/cdomains/c2poDomain.ml @@ -0,0 +1,280 @@ +(** Domain for weakly relational pointer analysis C-2PO. *) + +open Batteries +open GoblintCil +open CongruenceClosure +module M = Messages +open DuplicateVars + +module C2PODomain = struct + include Printable.StdLeaf + + type t = CongruenceClosure.t[@@deriving ord, hash] + + let show x = show_conj (get_conjunction x) + let name () = "c2po" + + type domain = t + include Printable.SimpleShow (struct type t = domain let show = show end) + + let equal_standard cc1 cc2 = + let res = + if exactly_equal cc1 cc2 then + true + else + (* add all terms to both elements *) + let terms1 = SSet.union cc1.set (BlDis.term_set cc1.bldis) in + let terms2 = SSet.union cc2.set (BlDis.term_set cc2.bldis) in + let terms = SSet.union terms1 terms2 in + + let cc1 = insert_set cc1 terms in + let cc2 = insert_set cc2 terms in + + equal_eq_classes cc1 cc2 + && equal_diseqs cc1 cc2 + && equal_bldis cc1 cc2 + in + if M.tracing then M.trace "c2po-equal" "equal eq classes. %b\nx=\n%s\ny=\n%s" res (show_all cc1) (show_all cc2); + res + + let equal_normal_form x y = + let res = + if exactly_equal x y then + true + else + let nf1 = get_normal_form x in + let nf2 = get_normal_form y in + if M.tracing then M.trace "c2po-min-repr" "Normal form of x = %s; Normal form of y = %s" (show_conj nf1) (show_conj nf2); + T.props_equal nf1 nf2 + in + if M.tracing then M.trace "c2po-equal" "equal min repr. %b\nx=\n%s\ny=\n%s" res (show_all x) (show_all y); + res + + let equal a b = + if M.tracing then M.trace "c2po-normal-form" "COMPUTING EQUAL"; + match GobConfig.get_string "ana.c2po.equal" with + | "normal_form" -> + equal_normal_form a b + | _ -> + equal_standard a b + + let equal a b = Timing.wrap "c2po-equal" (equal a) b + + let bot() = failwith "not supported" + let is_bot x = false + let empty () = init_cc () + let init () = empty () + let top () = empty () + let is_top cc = + TUF.is_empty cc.uf && + Disequalities.is_empty cc.diseq && + BlDis.is_empty cc.bldis + + let join_f a b join_cc_function = + let res = + if exactly_equal a b then + a + else begin + if M.tracing then M.tracel "c2po-join" "JOIN AUTOMATON. FIRST ELEMENT: %s\nSECOND ELEMENT: %s\n" + (show_all a) (show_all b); + let cc, _ = join_cc_function a b in + let cmap1, _ = Disequalities.comp_map a.uf in + let cmap2, _ = Disequalities.comp_map b.uf in + let cc = join_bldis a.bldis b.bldis a b cc cmap1 cmap2 in + let cc = join_neq a.diseq b.diseq a b cc cmap1 cmap2 in + reset_normal_form cc + end + in + if M.tracing then M.tracel "c2po-join" "JOIN. JOIN: %s\n" + (show_all res); + res + + let join a b = + match GobConfig.get_string "ana.c2po.join_algorithm" with + | "precise" -> + if M.tracing then M.trace "c2po-join" "Join Automaton"; + join_f a b join_eq + | _ -> + if M.tracing then M.trace "c2po-join" "Join Eq classes"; + join_f a b join_eq_no_automata + + let join a b = + Timing.wrap "join" (join a) b + + let widen_automata a b = + (* we calculate the join and then restrict to the term set of a' *) + let join_result = join a b in + let not_in_a_set t = + not @@ SSet.mem t a.set + in + let filtered_join = remove_terms not_in_a_set join_result in + reset_normal_form filtered_join + + let widen_eq_classes a b = + join_f a b widen_eq_no_automata + + let widen a b = + if M.tracing then M.trace "c2po-widen" "WIDEN\n"; + match GobConfig.get_string "ana.c2po.join_algorithm" with + | "precise" -> + widen_automata a b + | _ -> + widen_eq_classes a b + + + let meet a b = + if M.tracing then M.trace "c2po-meet" "Meet x= %s; y=%s" (show a) (show b); + let res = + if exactly_equal a b then + a + else + match get_conjunction a with + | [] -> + b + | a_conj -> + reset_normal_form (meet_conjs_opt a_conj b) + in + if M.tracing then M.trace "c2po-meet" "Meet result = %s" (show res); + res + + let narrow a b = + let res = + if exactly_equal a b then + a + else + let terms_contained_in_a = function + | Equal (t1,t2,_) + | Nequal (t1,t2,_) + | BlNequal (t1,t2) -> + SSet.mem t1 a.set && SSet.mem t2 a.set + in + let b_conj = get_conjunction b in + let b_conj = List.filter terms_contained_in_a b_conj in + let meet = meet_conjs_opt b_conj a in + reset_normal_form meet + in + if M.tracing then M.trace "c2po-meet" "NARROW RESULT = %s" (show res); + res + + let leq x y = + try + equal (meet x y) x + with + Unsat -> false + + let pretty_diff () (x,y) = + let x_conj = get_conjunction x in + let y_conj = get_conjunction y in + let not_in conj a = + not (List.mem_cmp compare_prop a conj) + in + let x_diff = List.filter (not_in y_conj) x_conj in + let y_diff = List.filter (not_in x_conj) y_conj in + Pretty.dprintf ("Additional propositions of first element:\n%s\nAdditional propositions of second element:\n%s\n") (show_conj x_diff) (show_conj y_diff) + +end + + +module D = struct + include Lattice.LiftBot (C2PODomain) + + let show_all = function + | `Bot -> + show `Bot + | `Lifted x -> + show_all x + + let meet a b = + try + meet a b + with Unsat -> + `Bot + + let narrow a b = + try + narrow a b + with Unsat -> + `Bot + + let printXml f x = match x with + | `Lifted x -> + BatPrintf.fprintf f "\n\n\nnormal form\n\n\n%s\n\nuf\n\n\n%s\n\nsubterm set\n\n\n%s\n\nmap\n\n\n%s\n\nmin. repr\n\n\n%s\n\ndiseq\n\n\n%s\n\n\n" + (XmlUtil.escape (Format.asprintf "%s" (show (`Lifted x)))) + (XmlUtil.escape (Format.asprintf "%s" (TUF.show_uf x.uf))) + (XmlUtil.escape (Format.asprintf "%s" (SSet.show_set x.set))) + (XmlUtil.escape (Format.asprintf "%s" (LMap.show_map x.map))) + (XmlUtil.escape (Format.asprintf "%s" (show_normal_form x.normal_form))) + (XmlUtil.escape (Format.asprintf "%s" (Disequalities.show_neq x.diseq))) + | `Bot -> + BatPrintf.fprintf f "\nbottom\n\n" + + (** Remove terms from the data structure. + It removes all terms that contain an AssignAux variable, + while maintaining all equalities about variables that are not being removed.*) + let remove_terms_containing_aux_variable cc = + if M.tracing then M.trace "c2po" "remove_terms_containing_aux_variable\n"; + let is_assign_aux_term t = + let var = T.get_var t in + Var.is_assign_aux var + in + remove_terms is_assign_aux_term cc + + (** Remove terms from the data structure. + It removes all terms that contain an ReturnAux variable, + while maintaining all equalities about variables that are not being removed.*) + let remove_terms_containing_return_variable cc = + if M.tracing then M.trace "c2po" "remove_terms_containing_aux_variable\n"; + let is_return_aux_term t = + let var = T.get_var t in + Var.is_return_aux var + in + remove_terms is_return_aux_term cc + + (** Remove terms from the data structure. + It removes all terms which contain one of the "vars", + while maintaining all equalities about variables that are not being removed.*) + let remove_terms_containing_variables vars cc = + if M.tracing then M.trace "c2po" "remove_terms_containing_variables: %s\n" (List.fold_left (fun s v -> s ^"; " ^Var.show v) "" vars); + remove_terms (T.contains_variable vars) cc + + (** Remove terms from the data structure. + It removes all terms which do not contain one of the "vars", + except the global vars are also kept (when vglob = true), + while maintaining all equalities about variables that are not being removed.*) + let remove_terms_not_containing_variables vars cc = + if M.tracing then M.trace "c2po" "remove_terms_not_containing_variables: %s\n" (List.fold_left (fun s v -> s ^"; " ^Var.show v) "" vars); + let not_global_and_not_contains_variable t = + let var = T.get_var t in + not (DuplicateVars.VarType.vglob var) && not (T.contains_variable vars t) + in + remove_terms not_global_and_not_contains_variable cc + + (** Remove terms from the data structure. + It removes all terms that may be changed after an assignment to "term".*) + let remove_may_equal_terms ask size term cc = + if M.tracing then M.trace "c2po" "remove_may_equal_terms: %s\n" (T.show term); + let _, cc = insert cc term in + let may_equal_term = + MayBeEqual.may_be_equal ask cc size term + in + remove_terms may_equal_term cc + + (** Remove terms from the data structure. + It removes all terms that may point to one of the tainted addresses.*) + let remove_tainted_terms ask address cc = + if M.tracing then M.tracel "c2po-tainted" "remove_tainted_terms: %a\n" MayBeEqual.AD.pretty address; + let may_be_tainted = + MayBeEqual.may_point_to_one_of_these_addresses ask address cc + in + remove_terms may_be_tainted cc + + (** Remove terms from the data structure. + It removes all terms that are not in the scope, and also those that are tmp variables.*) + let remove_vars_not_in_scope scope cc = + let not_in_scope t = + let var = T.get_var t in + let var = Var.to_varinfo var in + InvariantCil.var_is_tmp var || not (InvariantCil.var_is_in_scope scope var) + in + remove_terms not_in_scope cc +end diff --git a/src/cdomains/congruenceClosure.ml b/src/cdomains/congruenceClosure.ml new file mode 100644 index 0000000000..80e22ad4c5 --- /dev/null +++ b/src/cdomains/congruenceClosure.ml @@ -0,0 +1,1591 @@ +(** OCaml implementation of a quantitative congruence closure. + It is used by the C-2PO Analysis and based on the UnionFind implementation. +*) +include UnionFind +open Batteries +open GoblintCil +open DuplicateVars +module M = Messages + + +module TUF = UnionFind +module LMap = LookupMap + + +(* block disequalities *) +module BlDis = struct + (** Block disequalities: + a term t1 is mapped to a set of terms that have a different block than t1. + It is allowed to contain terms that are not present in the data structure, + so we shouldn't assume that all terms in bldis are present in the union find! + *) + type t = TSet.t TMap.t [@@deriving eq, ord, hash] + + let bindings = TMap.bindings + let empty = TMap.empty + let is_empty = TMap.is_empty + + let to_conj bldiseq = List.fold + (fun list (t1, tset) -> + TSet.fold (fun t2 bldiseqs -> BlNequal(t1, t2)::bldiseqs) tset [] @ list + ) [] (bindings bldiseq) + + let add bldiseq t1 t2 = + match TMap.find_opt t1 bldiseq with + | None -> TMap.add t1 (TSet.singleton t2) bldiseq + | Some tset -> TMap.add t1 (TSet.add t2 tset) bldiseq + + (** Add disequalities bl(t1) != bl(t2) and bl(t2) != bl(t1). *) + let add_block_diseq bldiseq (t1, t2) = + add (add bldiseq t1 t2) t2 t1 + + (** + params: + + t1-> a term that is not necessarily present in the data structure + + tlist: a list of representative terms + + For each term t2 in tlist, it adds the disequality t1 != t2 to diseqs. + *) + let add_block_diseqs bldiseq uf t1 tlist = + let add_block_diseq_t1 bldiseq t2 = + add_block_diseq bldiseq (t1, t2) + in + List.fold add_block_diseq_t1 bldiseq tlist + + (** For each block disequality bl(t1) != bl(t2) we add all disequalities + that follow from equalities. I.e., if t1 = z1 + t1' and t2 = z2 + t2', + then we add the disequaity bl(t1') != bl(t2'). + *) + let element_closure bldis cmap = + let comp_closure = function + | BlNequal (r1,r2) -> + let eq_class1 = LMap.comp_t_cmap_repr cmap r1 in + let eq_class2 = LMap.comp_t_cmap_repr cmap r2 in + let terms1 = List.map snd eq_class1 in + let terms2 = List.map snd eq_class2 in + List.cartesian_product terms1 terms2 + | _ -> [] + in + List.concat_map comp_closure bldis + + (** Returns true if bl(v) != bl(v'). *) + let map_set_mem v v' (map:t) = + match TMap.find_opt v map with + | None -> false + | Some set -> TSet.mem v' set + + let filter_map f (diseq:t) = + let filter_map_set _ s = + let set = TSet.filter_map f s in + if TSet.is_empty set then None else Some set + in + TMap.filter_map filter_map_set diseq + + let map f (diseq:t) = + let map_set s = + TSet.map f s + in + TMap.map map_set diseq + + let term_set bldis = + TSet.of_enum (TMap.keys bldis) + + let map_lhs = + let add_change bldis (t1,t2) = + match TMap.find_opt t1 bldis with + | None -> bldis + | Some tset -> TMap.add t2 tset (TMap.remove t1 bldis) + in + List.fold add_change + + let filter_map_lhs f (diseq:t) = + let filter_map_lhs diseq t = + match f t with + | None -> TMap.remove t diseq + | Some t2 -> + if not (T.equal t t2) then + TMap.add t2 (TMap.find t diseq) (TMap.remove t diseq) + else + diseq + in + Enum.fold filter_map_lhs diseq (TMap.keys diseq) +end + +module Disequalities = struct + + (* disequality map: + if t_1 -> z -> {t_2, t_3} + then we know that t_1 + z != t_2 + and also that t_1 + z != t_3 + *) + type t = TSet.t ZMap.t TMap.t [@@deriving eq, ord, hash] (* disequalitites *) + type arg_t = (T.t * Z.t) ZMap.t TMap.t (* maps each state in the automata to its predecessors *) + + let empty = TMap.empty + let is_empty = TMap.is_empty + let remove = TMap.remove + (** Returns a list of tuples, which each represent a disequality *) + let bindings = + List.flatten % + List.concat_map + (fun (t, smap) -> + List.map (fun (z, tset) -> + List.map (fun term -> + (t,z,term)) (TSet.elements tset)) + (ZMap.bindings smap) + ) % TMap.bindings + + (** adds a mapping v -> r -> size -> { v' } to the map, + or if there are already elements + in v -> r -> {..} then v' is added to the previous set *) + let map_set_add (v,r) v' (map:t) = + match TMap.find_opt v map with + | None -> + let inner_map = ZMap.add r (TSet.singleton v') ZMap.empty in + TMap.add v inner_map map + | Some imap -> + let set = match ZMap.find_opt r imap with + | None -> TSet.singleton v' + | Some set -> TSet.add v' set + in + let inner_map = ZMap.add r set imap in + TMap.add v inner_map map + + let map_set_mem (v,r) v' (map:t) = + let mem_inner_map inner_map = + Option.map_default (TSet.mem v') false (ZMap.find_opt r inner_map) + in + Option.map_default mem_inner_map false (TMap.find_opt v map) + + (** Map of partition, transform union find to a map + of type V -> Z -> V set + with reference variable |-> offset |-> all terms that are in the union find with this ref var and offset. *) + let comp_map uf = + let f (comp,uf) (v,_) = + let t, z, uf = TUF.find uf v in + map_set_add (t,z) v comp,uf + in + List.fold_left f (TMap.empty, uf) (TMap.bindings uf) + + (** Find all elements that are in the same equivalence class as t. *) + let comp_t uf t = + let (t',z',uf) = TUF.find uf t in + let f (comp,uf) (v,((p,z),_)) = + let v', z'',uf = TUF.find uf v in + if T.equal v' t' then + (v, Z.(z'-z''))::comp,uf + else + comp, uf + in + let equivs, _ = List.fold_left f ([],uf) (TMap.bindings uf) in + equivs + + (** Returns: "arg" map: + + maps each representative term t to a map that maps an integer Z to + a list of representatives t' of v where *(v + z') is + in the representative class of t. + + It basically maps each state in the automata to its predecessors. *) + let get_args uf = + let do_term (uf,xs) el = + match el with + | Deref (v', r', _) -> + let v0, r0, uf = TUF.find uf v' in + let x = (v0, Z.(r0 + r')) in + uf, x::xs + | _ -> uf, xs + in + let do_set iarg (r,set) = + let uf,list = List.fold_left do_term (uf,[]) (TSet.elements set) in + ZMap.add r list iarg + in + let do_bindings arg (v, imap) = + let ilist = ZMap.bindings imap in + let iarg = List.fold_left do_set ZMap.empty ilist in + TMap.add v iarg arg + in + let cmap, uf = comp_map uf in + let clist = TMap.bindings cmap in + let arg = List.fold_left do_bindings TMap.empty clist in + (uf, cmap, arg) + + let fold_left2 f acc l1 l2 = + List.fold_left ( + fun acc x -> List.fold_left ( + fun acc y -> f acc x y) acc l2) acc l1 + + let map2 f l1 l2 = + let map_f x = List.map (f x) l2 in + List.concat_map map_f l1 + + let map_find_opt (v,r) map = + let inner_map = TMap.find_opt v map in + Option.map_default (ZMap.find_opt r) None inner_map + + let map_find_all t map = + let inner_map = TMap.find_opt t map in + let bindings = Option.map ZMap.bindings inner_map in + let concat = + (* TODO: Can one change returned order to xs@acc? *) + List.fold (fun acc (z, xs) -> acc@xs) [] + in + Option.map_default concat [] (bindings) + + (* Helper function for check_neq and check_neq_bl, to add a disequality, if it adds information and is not impossible *) + let add_diseq acc (v1, r1') (v2, r2') = + if T.equal v1 v2 then + if Z.equal r1' r2' then + raise Unsat + else + acc + else + (v1, v2, Z.(r2'-r1'))::acc + + (** Find all disequalities of the form t1 != z2-z1 + t2 + that can be inferred from equalities of the form *(z1 + t1) = *(z2 + t2). + *) + let check_neq (_,arg) acc (v,zmap) = + let f acc (r1,_) (r2,_) = + if Z.equal r1 r2 then + acc + else (* r1 <> r2 *) + let l1 = Option.default [] (map_find_opt (v,r1) arg) in + let l2 = Option.default [] (map_find_opt (v,r2) arg) in + fold_left2 add_diseq acc l1 l2 + in + let zlist = ZMap.bindings zmap in + fold_left2 f acc zlist zlist + + (** Find all disequalities of the form t1 != z2-z1 + t2 + that can be inferred from block equalities of the form bl( *(z1 + t1) ) = bl( *(z2 + t2) ). + *) + let check_neq_bl (uf,arg) acc (t1, tset) = + let l1 = map_find_all t1 arg in + let f acc t2 = + (* We know that r1 <> r2, otherwise it would be Unsat. *) + let l2 = map_find_all t2 arg in + fold_left2 add_diseq acc l1 l2 + in + List.fold f acc (TSet.to_list tset) + + (** Initialize the list of disequalities taking only implicit dis-equalities into account. + + Returns: List of dis-equalities implied by the equalities. *) + let init_neq (uf,cmap,arg) = + List.fold_left (check_neq (uf,arg)) [] (TMap.bindings cmap) + + (** Initialize the list of disequalities taking only implicit dis-equalities into account. + + Returns: List of dis-equalities implied by the block disequalities. *) + let init_neg_block_diseq (uf, bldis, cmap, arg) = + List.fold_left (check_neq_bl (uf,arg)) [] (TMap.bindings bldis) + + (** Initialize the list of disequalities taking explicit dis-equalities into account. + + Parameters: union-find partition, explicit disequalities. + + Returns: list of normalized provided dis-equalities *) + let init_list_neq uf neg = + let add_diseq (uf, list) (v1,v2,r) = + let v1, r1, uf = TUF.find uf v1 in + let v2, r2, uf = TUF.find uf v2 in + if T.equal v1 v2 then + if Z.(equal r1 (r2+r)) then + raise Unsat + else + uf, list + else + uf, (v1, v2, Z.(r2-r1+r))::list + in + List.fold_left add_diseq (uf,[]) neg + + (** Parameter: list of disequalities (t1, t2, z), where t1 and t2 are roots. + + Returns: map `neq` where each representative is mapped to a set of representatives it is not equal to. + *) + let rec propagate_neq (uf,(cmap: TSet.t ZMap.t TMap.t),arg,neq) bldis = function (* v1, v2 are distinct roots with v1 != v2+r *) + | [] -> neq (* uf need not be returned: has been flattened during constr. of cmap *) + | (v1,v2,r) :: rest -> + (* we don't want to explicitly store disequalities of the kind &x != &y *) + if T.is_addr v1 && T.is_addr v2 || BlDis.map_set_mem v1 v2 bldis then + propagate_neq (uf,cmap,arg,neq) bldis rest + else (* v1, v2 are roots; v2 -> r,v1 not yet contained in neq *) + if T.equal v1 v2 then + if Z.equal r Z.zero then + raise Unsat + else + propagate_neq (uf,cmap,arg,neq) bldis rest + else (* check whether it is already in neq *) + if map_set_mem (v1,Z.(-r)) v2 neq then + propagate_neq (uf,cmap,arg,neq) bldis rest + else + let neq = map_set_add (v1,Z.(-r)) v2 neq |> map_set_add (v2,r) v1 in + (* + search components of v1, v2 for elements at distance r to obtain inferred equalities + at the same level (not recorded) and then compare their predecessors + *) + match TMap.find_opt v1 (cmap:t), TMap.find_opt v2 cmap with + | None,_ + | _,None -> (*should not happen*) + propagate_neq (uf,cmap,arg,neq) bldis rest + | Some imap1, Some imap2 -> + let f rest (r1,_) = + match ZMap.find_opt Z.(r1+r) imap2 with + | None -> rest + | Some _ -> + let l1 = Option.default [] (map_find_opt (v1,r1) arg) in + let l2 = Option.default [] (map_find_opt (v2,Z.(r1+r)) arg) in + fold_left2 add_diseq rest l1 l2 + in + let ilist1 = ZMap.bindings imap1 in + let rest = List.fold_left f rest ilist1 in + propagate_neq (uf,cmap,arg,neq) bldis rest + (* + collection of disequalities: + * disequalities originating from different offsets of same root + * disequalities originating from block disequalities + * stated disequalities + * closure by collecting appropriate args + for a disequality v1 != v2 +r for distinct roots v1,v2 + check whether there is some r1, r2 such that r1 = r2 +r + then dis-equate the sets at v1,r1 with v2,r2. + *) + + (** Produces a string for the number used as offset; helper function for show* functions below.*) + let show_number r = + if Z.equal r Z.zero then + "" + else if Z.leq r Z.zero then + Z.to_string r + else + " + " ^ Z.to_string r + + let show_neq neq = + let clist = bindings neq in + let do_neq = + (fun s (v,r,v') -> + s ^ "\t" ^ T.show v ^ show_number r ^ " != " ^ T.show v' ^ "\n") + in + List.fold_left do_neq "" clist + + let get_disequalities = + let to_disequality (t1, z, t2) = + Nequal (t1, t2, Z.(-z)) + in + List.map to_disequality % bindings + + (** For each disequality t1 != z + t2 we add all disequalities + that follow from equalities. I.e., if t1 = z1 + t1' and t2 = z2 + t2', + then we add the disequaity t1' != z + z2 - z1 + t2'. + *) + let element_closure diseqs cmap uf = + let comp_closure (r1,r2,z) = + let eq_class1 = LMap.comp_t_cmap_repr cmap r1 in + let eq_class2 = LMap.comp_t_cmap_repr cmap r2 in + let to_diseq ((z1, nt1), (z2, nt2)) = + (nt1, nt2, Z.(-z2+z+z1)) + in + List.map to_diseq (List.cartesian_product eq_class1 eq_class2) + in + List.concat_map comp_closure diseqs +end + +(** Set of subterms which are present in the current data structure. *) +module SSet = struct + type t = TSet.t [@@deriving eq, ord, hash] + + let elements = TSet.elements + let mem = TSet.mem + let add = TSet.add + let fold = TSet.fold + let empty = TSet.empty + let to_list = TSet.to_list + let inter = TSet.inter + let find_opt = TSet.find_opt + let union = TSet.union + + let show_set set = + let show_element v s = + s ^ "\t" ^ T.show v ^ ";\n" + in + TSet.fold show_element set "" ^ "\n" + + (** Adds all subterms of t to the SSet and the LookupMap*) + let rec subterms_of_term (set,map) t = + match t with + | Addr _ + | Aux _ -> (add t set, map) + | Deref (t',z,_) -> + let set = add t set in + let map = LMap.map_add (t',z) t map in + subterms_of_term (set, map) t' + + let subterms_of_conj list = + (** Adds all subterms of the proposition to the SSet and the LookupMap*) + let subterms_of_prop (set, map) (t1, t2, _) = + let subterms' = subterms_of_term (set, map) t1 in + subterms_of_term subterms' t2 + in + List.fold_left subterms_of_prop (TSet.empty, LMap.empty) list + + let fold_atoms f (acc: 'a) set:'a = + let exception AtomsDone in + let res = ref acc in + let do_atom (v: T.t) acc = + match v with + | Addr _ + | Aux _ -> + f acc v + | _ -> + res := acc; + raise AtomsDone + in + try + TSet.fold do_atom set acc + with AtomsDone -> !res + + let get_atoms set = + (* `elements set` returns a sorted list of the elements. The atoms are always smaller that other terms, + according to our comparison function. Therefore take_while is enough. *) + let is_atom = function + | Addr _ + | Aux _ -> true + | _ -> false + in + BatList.take_while is_atom (elements set) + + (** We try to find the dereferenced term between the already existing terms, in order to remember the information about the exp. *) + let deref_term t z set = + let exp = T.to_cil t in + match find_opt (Deref (t, z, exp)) set with + | None -> Deref (t, z, T.dereference_exp exp z) + | Some t -> t + + (** Sometimes it's important to keep the dereferenced term, + even if it's not technically possible to dereference it from a point of view of the C types. + We still need the dereferenced term for the correctness of some algorithms, + and the resulting expression will never be used, so it doesn't need to be a + C expression that makes sense. *) + let deref_term_even_if_its_not_possible min_term z set = + try + deref_term min_term z set + with (T.UnsupportedCilExpression _) -> + let random_type = (TPtr (TPtr (TInt (ILong,[]),[]),[])) in (*the type is not so important for min_repr and get_normal_form*) + let cil_offset = + (* TODO: Is there a "valid" reason for to throw an exception? If not, just propagate exception here. *) + try T.to_cil_constant z (Some random_type) + with (T.UnsupportedCilExpression _) -> + Const (CInt (Z.zero, T.default_int_type, Some (Z.to_string z))) + in + Deref (min_term, z, Lval (Mem (BinOp (PlusPI, T.to_cil(min_term), cil_offset, random_type)), NoOffset)) + +end + +(** Minimal representatives map. + It maps each representative term of an equivalence class to the minimal term of this representative class. + rep -> (t, z) means that t = rep + z *) +module MRMap = struct + type t = (T.t * Z.t) TMap.t [@@deriving eq, ord, hash] + + let bindings = TMap.bindings + let find = TMap.find + let find_opt = TMap.find_opt + let add = TMap.add + let remove = TMap.remove + let mem = TMap.mem + let empty = TMap.empty + + let show_min_rep min_representatives = + let show_one_rep s (state, (rep, z)) = + s ^ "\tState: " ^ T.show state ^ + "\n\tMin: (" ^ T.show rep ^ ", " ^ Z.to_string z ^ ")--\n\n" + in + List.fold_left show_one_rep "" (bindings min_representatives) + + let rec update_min_repr (uf, set, map) min_representatives = function + | [] -> min_representatives + | state::queue -> (* process all outgoing edges in order of ascending edge labels *) + match LMap.successors state map with + | edges -> + let process_edge (min_representatives, queue, uf) (edge_z, next_term) = + let next_state, next_z = TUF.find_no_pc uf next_term in + let (min_term, min_z) = find state min_representatives in + let next_min = + (SSet.deref_term_even_if_its_not_possible min_term Z.(edge_z - min_z) set, next_z) in + match TMap.find_opt next_state min_representatives + with + | None -> + (add next_state next_min min_representatives, queue @ [next_state], uf) + | Some current_min when T.compare (fst next_min) (fst current_min) < 0 -> + (add next_state next_min min_representatives, queue @ [next_state], uf) + | _ -> (min_representatives, queue, uf) + in + let (min_representatives, queue, uf) = List.fold_left process_edge (min_representatives, queue, uf) edges + in update_min_repr (uf, set, map) min_representatives queue + + (** Uses dijkstra algorithm to update the minimal representatives of + the successor nodes of all edges in the queue + and if necessary it recursively updates the minimal representatives of the successor nodes. + The states in the queue must already have an updated min_repr. + This function visits only the successor nodes of the nodes in queue, not the nodes themselves. + Before visiting the nodes, it sorts the queue by the size of the current mininmal representative. + + parameters: + + - `(uf, set, map)` represent the union find data structure and the corresponding term set and lookup map. + - `min_representatives` maps each representative of the union find data structure to the minimal representative of the equivalence class. + - `queue` contains the states that need to be processed. + The states of the automata are the equivalence classes and each state of the automata is represented by the representative term. + Therefore the queue is a list of representative terms. + + Returns: + - The updated `min_representatives` map with the minimal representatives*) + let update_min_repr (uf, set, map) min_representatives queue = + (* order queue by size of the current min representative *) + let compare el1 el2 = + let compare_repr = TUF.compare_repr (find el1 min_representatives) (find el2 min_representatives) in + if compare_repr = 0 then + T.compare el1 el2 + else + compare_repr + in + let roots = List.filter (TUF.is_root uf) queue in + let sorted_roots = List.sort_unique compare roots in + update_min_repr (uf, set, map) min_representatives sorted_roots + + (** + Computes a map that maps each representative of an equivalence class to the minimal representative of the equivalence class. + + Returns: + - The map with the minimal representatives. + *) + let compute_minimal_representatives (uf, set, map) = + if M.tracing then M.trace "c2po-normal-form" "compute_minimal_representatives\n"; + let atoms = SSet.get_atoms set in + (* process all atoms in increasing order *) + let compare el1 el2 = + let v1, z1 = TUF.find_no_pc uf el1 in + let v2, z2 = TUF.find_no_pc uf el2 in + let repr_compare = TUF.compare_repr (v1, z1) (v2, z2) in + if repr_compare = 0 then + T.compare el1 el2 + else + repr_compare + in + let atoms = List.sort compare atoms in + let add_atom_to_map (min_representatives, queue, uf) a = + let rep, offs = TUF.find_no_pc uf a in + if mem rep min_representatives then + (min_representatives, queue, uf) + else + (add rep (a, offs) min_representatives, queue @ [rep], uf) + in + let (min_representatives, queue, uf) = List.fold_left add_atom_to_map (empty, [], uf) atoms + (* compute the minimal representative of all remaining edges *) + in update_min_repr (uf, set, map) min_representatives queue + + let compute_minimal_representatives a = + Timing.wrap "c2po-compute-min-repr" compute_minimal_representatives a + + (** Computes the initial map of minimal representatives. + It maps each element `e` in the set to `(e, 0)`. *) + let initial_minimal_representatives set = + let add_element map element = + add element (element, Z.zero) map + in + List.fold_left add_element empty (SSet.elements set) +end + +(** + This is the type for the abstract domain. + + - `uf` is the union find tree + + - `set` is the set of terms that are currently considered. + It is the set of terms that have a mapping in the `uf` tree. + + - `map` maps reference variables v to a map that maps integers z to terms that are equivalent to *(v + z). + It represents the transitions of the quantitative finite automata. + + - `normal_form` is the unique normal form of the domain element, it is a lazily computed when needed and stored such that it can be used again later. + + - `diseq` represents the disequalities. It is a map from a term t1 to a map from an integer z to a set of terms T, which represents the disequality t1 != z + t2 for each t2 in T. + + - `bldis` represents the block disequalities. It is a map that maps each term to a set of terms that are definitely in a different block. +*) +type t = {uf: TUF.t; + set: SSet.t; + map: LMap.t; + normal_form: (T.v_prop list Lazy.t[@compare fun _ _ -> 0][@eq fun _ _ -> true][@hash fun _ -> 0]); + diseq: Disequalities.t; + bldis: BlDis.t} +[@@deriving eq, ord, hash] + +let show_conj list = + match list with + | [] -> "top" + | list -> + let show_prop s d = + s ^ "\t" ^ T.show_prop d ^ ";\n" + in + List.fold_left show_prop "" list + +(** Returns a list of all the transition that are present in the automata. *) +let get_transitions (uf , map) = + let do_binding t (edge_z, res_t) = + (edge_z, t, TUF.find_no_pc uf res_t) + in + let do_bindings ((t : term), zmap) = + let bindings = LMap.zmap_bindings zmap in + List.map (do_binding t) bindings + in + List.concat_map do_bindings (LMap.bindings map) + +let exactly_equal cc1 cc2 = + cc1.uf == cc2.uf && + cc1.map == cc2.map && + cc1.diseq == cc2.diseq && + cc1.bldis == cc2.bldis + +(** Converts the domain representation to a conjunction, using + the function `get_normal_repr` to compute the representatives that need to be used in the conjunction.*) +let get_normal_conjunction cc get_normal_repr = + let is_non_trivial_equality (t1, t2, z) = + not (T.equal t1 t2 && Z.(equal z zero)) + in + let to_equality (t1, t2, z) = + Equal (t1, t2, z) + in + let conjunctions_of prop_of xs = + Seq.of_list xs + |> Seq.map prop_of + |> Seq.filter is_non_trivial_equality + |> Seq.map to_equality + |> List.of_seq + in + let conjunctions_of_atoms = + let eq_of_atom atom = + let rep_state, rep_z = TUF.find_no_pc cc.uf atom in + let min_state, min_z = get_normal_repr rep_state in + (atom, min_state, Z.(rep_z - min_z)) + in + let atoms = SSet.get_atoms cc.set in + conjunctions_of eq_of_atom atoms + in + let conjunctions_of_transitions = + let transitions = get_transitions (cc.uf, cc.map) in + let eq_of_transition (z, s, (s', z')) = + let min_state, min_z = get_normal_repr s in + let min_state', min_z' = get_normal_repr s' in + (SSet.deref_term_even_if_its_not_possible min_state Z.(z - min_z) cc.set, min_state', Z.(z' - min_z')) + in + conjunctions_of eq_of_transition transitions + in + (* disequalities *) + let disequalities = Disequalities.get_disequalities cc.diseq in + (* find disequalities between min_repr *) + let normalize_disequality (t1, t2, z) = + let min_state1, min_z1 = get_normal_repr t1 in + let min_state2, min_z2 = get_normal_repr t2 in + let new_offset = Z.(-min_z2 + min_z1 + z) in + if T.compare min_state1 min_state2 < 0 then + Nequal (min_state1, min_state2, new_offset) + else + Nequal (min_state2, min_state1, Z.(-new_offset)) + in + let normalize_disequality = function + | Nequal (t1,t2,z) -> normalize_disequality (t1, t2, z) + | Equal (t1,t2,z) -> failwith "No equality expected." + | BlNequal (t1,t2) -> failwith "No block disequality expected." + in + if M.tracing then M.trace "c2po-diseq" "DISEQUALITIES: %s;\nUnion find: %s\nMap: %s\n" (show_conj disequalities) (TUF.show_uf cc.uf) (LMap.show_map cc.map); + let disequalities = List.map normalize_disequality disequalities in + (* block disequalities *) + let normalize_bldis t = match t with + | BlNequal (t1,t2) -> + let min_state1, _ = get_normal_repr t1 in + let min_state2, _ = get_normal_repr t2 in + if T.compare min_state1 min_state2 < 0 then + BlNequal (min_state1, min_state2) + else + BlNequal (min_state2, min_state1) + | _ -> failwith "Expected a block disequality, but received something different." + in + let conjunctions_of_bl_diseqs = List.map normalize_bldis @@ BlDis.to_conj cc.bldis in + (* all propositions *) + let all_props = conjunctions_of_atoms @ conjunctions_of_transitions @ disequalities @ conjunctions_of_bl_diseqs in + BatList.sort_unique T.compare_v_prop all_props + +(** Returns the canonical normal form of the data structure in form of a sorted list of conjunctions. *) +let get_normal_form cc = + if M.tracing && not (Lazy.is_val cc.normal_form) then M.trace "c2po-normal-form" "Computing normal form"; + Lazy.force cc.normal_form + +(** Converts the normal form to string, but only if it was already computed. *) +let show_normal_form normal_form = + if Lazy.is_val normal_form then + show_conj (Lazy.force normal_form) + else + "not computed" + +(** Returns a list of conjunctions that follow from the data structure in form of a sorted list of conjunctions. + This is not a normal form, but it is useful to print information + about the current state of the analysis. *) +let get_conjunction cc = + get_normal_conjunction cc (fun t -> t, Z.zero) + +(** Sets the normal_form to an uncomputed value, + that will be lazily computed when it is needed. *) +let reset_normal_form cc = + let f min_repr t = + match MRMap.find_opt t min_repr with + | None -> t, Z.zero + | Some minr -> minr + in + {cc with normal_form = + lazy( + let min_repr = MRMap.compute_minimal_representatives (cc.uf, cc.set, cc.map) in + let conj = get_normal_conjunction cc (f min_repr) in + + if M.tracing then M.trace "c2po-min-repr" "Computed minimal represntative: %s" (MRMap.show_min_rep min_repr); + if M.tracing then M.trace "c2po-equal" "Computed normal form: %s" (show_conj conj); + + conj + )} + +let show_all x = + "Normal form:\n" ^ + show_conj((get_conjunction x)) ^ + "Union Find partition:\n" ^ + TUF.show_uf x.uf + ^ "\nSubterm set:\n" + ^ SSet.show_set x.set + ^ "\nLookup map/transitions:\n" + ^ LMap.show_map x.map + ^ "\nNeq:\n" + ^ Disequalities.show_neq x.diseq + ^ "\nBlock diseqs:\n" + ^ show_conj (BlDis.to_conj x.bldis) + ^ "\nMin repr:\n" + ^ show_normal_form x.normal_form + +(** Splits the conjunction into three groups: the first one contains all equality propositions, + the second one contains all inequality propositions + and the third one contains all block disequality propositions. *) +let split conj = + let group (eqs, neqs, blds) = function + | Equal eq -> (eq::eqs, neqs, blds) + | Nequal neq -> (eqs, neq::neqs, blds) + | BlNequal bld -> (eqs, neqs, bld::blds) + in + List.fold_left group ([],[],[]) conj + +(** + Returns \{uf, set, map, normal_form, bldis, diseq\}, + where all data structures are initialized with an empty map/set. +*) +let init_cc () = + {uf = TUF.empty; + set = SSet.empty; + map = LMap.empty; + normal_form = lazy([]); + diseq = Disequalities.empty; + bldis = BlDis.empty} + +(** Computes the closure of disequalities. *) +let congruence_neq cc neg' = + let _, neg, _ = split (Disequalities.get_disequalities cc.diseq) in + let neg = neg @ neg' in + (* get args of dereferences *) + let uf, cmap, arg = Disequalities.get_args cc.uf in + (* take implicit dis-equalities into account *) + let neq_list = Disequalities.init_neq (uf, cmap, arg) in + let neq_list_from_bldis = Disequalities.init_neg_block_diseq (uf, cc.bldis, cmap, arg) in + let neq_list = neq_list @ neq_list_from_bldis in + let neq = Disequalities.propagate_neq (uf, cmap, arg, Disequalities.empty) cc.bldis neq_list in + + (* take explicit dis-equalities into account *) + let uf, neq_list = Disequalities.init_list_neq uf neg in + let neq = Disequalities.propagate_neq (uf, cmap, arg, neq) cc.bldis neq_list in + if M.tracing then M.trace "c2po-neq" "congruence_neq: %s\nUnion find: %s\n" (Disequalities.show_neq neq) (TUF.show_uf uf); + {cc with uf; diseq = neq} + +(** + parameters: (uf, map, new_repr) equalities. + + returns updated (uf, map, new_repr), where: + + `uf` is the new union find data structure after having added all equalities. + + `map` maps reference variables v to a map that maps integers z to terms that are equivalent to *(v + z). + + `new_repr` maps each term that changed its representative term to the new representative. + It can be given as a parameter to `update_bldis` in order to update the representatives in the block disequalities. + + Throws "Unsat" if a contradiction is found. +*) +let rec closure (uf, map, new_repr) = function + | [] -> (uf, map, new_repr) + | (t1, t2, r)::rest -> + let v1, r1, uf = TUF.find uf t1 in + let v2, r2, uf = TUF.find uf t2 in + let sizet1, sizet2 = T.get_size t1, T.get_size t2 in + if not (Z.equal sizet1 sizet2) then ( + if M.tracing then M.trace "c2po" "ignoring equality because the sizes are not the same: %s = %s + %s" (T.show t1) (Z.to_string r) (T.show t2); + closure (uf, map, new_repr) rest + ) + else if T.equal v1 v2 then + (* t1 and t2 are in the same equivalence class *) + if Z.equal r1 Z.(r2 + r) then + closure (uf, map, new_repr) rest + else + raise Unsat + else + let diff_r = Z.(r2 - r1 + r) in + let v, uf, b = TUF.union uf v1 v2 diff_r in (* union *) + (* update new_representative *) + let new_repr = + if T.equal v v1 then + TMap.add v2 v new_repr + else + TMap.add v1 v new_repr + in + let f (zmap, rest) (r', v') = + let rest = match LMap.zmap_find_opt r' zmap with + | None -> rest + | Some v'' -> (v', v'',Z.zero) ::rest + in + let zmap = LMap.zmap_add r' v' zmap in + zmap, rest + in + (* update map *) + let map, rest = + match LMap.find_opt v1 map, LMap.find_opt v2 map, b with + | None, _, false + | _, None, true -> (* either v1 or v2 does not occur inside Deref *) + map, rest + | None, Some _, true -> + LMap.shift v1 Z.(r1 - r2 - r) v2 map, rest + | Some _, None, false -> + LMap.shift v2 Z.(r2 - r1 + r) v1 map, rest + | Some imap1, Some imap2, true -> (* v1 is new root *) + (* zmap describes args of Deref *) + let r0 = Z.(r2 - r1 + r) in (* difference between roots *) + (* we move all entries of imap2 to imap1 *) + let infl2 = List.map (fun (r', v') -> Z.(-r0 + r'), v') (LMap.zmap_bindings imap2) in + let zmap, rest = List.fold_left f (imap1, rest) infl2 in + let map = LMap.add v zmap map in + let map = LMap.remove v2 map in + map, rest + | Some imap1, Some imap2, false -> (* v2 is new root *) + let r0 = Z.(r1 - r2 - r) in + let infl1 = List.map (fun (r', v') -> Z.(-r0 + r'),v') (LMap.zmap_bindings imap1) in + let zmap, rest = List.fold_left f (imap2, rest) infl1 in + let map = LMap.add v zmap map in + let map = LMap.remove v1 map in + map, rest + in + closure (uf, map, new_repr) rest + +(** Update block disequalities with the new representatives. *) +let update_bldis new_repr bldis = + let bldis = BlDis.map_lhs bldis (TMap.bindings new_repr) in + (* update block disequalities with the new representatives *) + let find_new_root t1 = Option.default t1 (TMap.find_opt t1 new_repr) in + BlDis.map find_new_root bldis + +(** + Parameters: cc conjunctions. + + returns updated cc, where: + + - `uf` is the new union find data structure after having added all equalities. + + - `set` doesn't change + + - `map` maps reference variables v to a map that maps integers z to terms that are equivalent to *(v + z). + + - `diseq` doesn't change (it must be updated later to the new representatives). + + - `bldis` are the block disequalities between the new representatives. + + Throws "Unsat" if a contradiction is found. + This does NOT update the disequalities. + They need to be updated with `congruence_neq`. +*) +let closure cc conjs = + let (uf, map, new_repr) = closure (cc.uf, cc.map, TMap.empty) conjs in + let bldis = update_bldis new_repr cc.bldis in + {cc with uf; map; bldis} + +(** Adds the block disequalities to the cc, but first rewrites them such that + they are disequalities between representatives. The cc should already contain + all the terms that are present in those block disequalities. +*) +let rec add_normalized_bl_diseqs cc = function + | [] -> cc + | (t1,t2)::bl_conjs -> + let t1',_,uf = TUF.find cc.uf t1 in + let t2',_,uf = TUF.find uf t2 in + if T.equal t1' t2' then + raise Unsat (*unsatisfiable*) + else + let bldis = BlDis.add_block_diseq cc.bldis (t1',t2') in + add_normalized_bl_diseqs {cc with bldis; uf} bl_conjs + +(** Add a term to the data structure. + + Returns (reference variable, offset), updated congruence closure *) +let rec insert cc t = + if SSet.mem t cc.set then + let v, z, uf = TUF.find cc.uf t in + (v,z), {cc with uf} + else + match t with + | Addr _ + | Aux _ -> + let uf = TUF.ValMap.add t ((t, Z.zero), 1) cc.uf in + let set = SSet.add t cc.set in + (t, Z.zero), {cc with uf; set} + | Deref (t', z, exp) -> + let (v, r), cc = insert cc t' in + let set = SSet.add t cc.set in + match LMap.map_find_opt (v, Z.(r + z)) cc.map with + | Some v' -> + let v2, z2, uf = TUF.find cc.uf v' in + let uf = LMap.add t ((t, Z.zero),1) uf in + let cc = closure {cc with uf; set} [(t, v', Z.zero)] in + (v2, z2), cc + | None -> + let map = LMap.map_add (v, Z.(r + z)) t cc.map in + let uf = LMap.add t ((t, Z.zero),1) cc.uf in + (t, Z.zero), {cc with uf; set; map} + +(** Add all terms in a specific set to the data structure. + + Returns updated cc. *) +let insert_set cc t_set = + let insert_term t cc = + let _, cc = insert cc t in + cc + in + SSet.fold insert_term t_set cc + +(** Returns true if t1 and t2 are equivalent. *) +let rec eq_query cc (t1,t2,r) = + let (v1, r1), cc = insert cc t1 in + let (v2, r2), cc = insert cc t2 in + if T.equal v1 v2 && Z.equal r1 Z.(r2 + r) then + (true, cc) + (* If the equality is *(t1' + z1) = *(t2' + z2), then we check if the two pointers are equal, + i.e. if t1' + z1 = t2' + z2. + This is useful when the dereferenced elements are not pointers and therefore not stored in our data strutcure. + But we still know that they are equal if the pointers are equal. *) + else if Z.equal r Z.zero then + match t1, t2 with + | Deref (t1', z1, _), Deref (t2', z2, _) -> + eq_query cc (t1', t2', Z.(z2 - z1)) + | _ -> + (false, cc) + else + (false,cc) + +let block_neq_query cc (t1,t2) = + let (v1, r1), cc = insert cc t1 in + let (v2, r2), cc = insert cc t2 in + BlDis.map_set_mem v1 v2 cc.bldis + +(** Returns true if t1 and t2 are not equivalent. *) +let neq_query cc (t1, t2, r) = + (* we implicitly assume that &x != &y + z *) + if T.is_addr t1 && T.is_addr t2 then + true + else + let (v1, r1), cc = insert cc t1 in + let (v2, r2), cc = insert cc t2 in + (* implicit disequalities following from equalities *) + if T.equal v1 v2 then + not Z.(equal r1 (r2 + r)) + else + (* implicit disequalities following from block disequalities *) + BlDis.map_set_mem v1 v2 cc.bldis || + (* explicit dsequalities *) + Disequalities.map_set_mem (v2, Z.(r2 - r1 + r)) v1 cc.diseq + +(** Adds equalities to the data structure. + Throws "Unsat" if a contradiction is found. + Does not update disequalities. *) +let meet_pos_conjs cc pos_conjs = + let res = + let subterms, _ = SSet.subterms_of_conj pos_conjs in + let cc = insert_set cc subterms in + closure cc pos_conjs + in + if M.tracing then M.trace "c2po-meet" "meet_pos_conjs result: %s\n" (show_conj (get_conjunction res)); + res + +(** Adds propositions to the data structure. + Returns None if a contradiction is found. *) +let meet_conjs_opt conjs cc = + let pos_conjs, neg_conjs, bl_conjs = split conjs in + + (* Convert bl_conjs into format that can be given to SSet.subterms_of_conj *) + let bl_conjs_as_props = List.map (fun (t1, t2)->(t1, t2, Z.zero)) bl_conjs in + let terms_to_add, _ = SSet.subterms_of_conj (neg_conjs @ bl_conjs_as_props) in + + let cc = meet_pos_conjs cc pos_conjs in + let cc = insert_set cc terms_to_add in + let cc = add_normalized_bl_diseqs cc bl_conjs in + congruence_neq cc neg_conjs + +(** Add proposition t1 = t2 + r to the data structure. + Does not update the disequalities. *) +let add_eq cc (t1, t2, r) = + let (v1, r1), cc = insert cc t1 in + let (v2, r2), cc = insert cc t2 in + let cc = closure cc [v1, v2, Z.(r2 - r1 + r)] in + cc + +(** Adds block disequalities to cc: + for each representative t in cc it adds the disequality bl(lterm) != bl(t)*) +let add_block_diseqs cc lterm = + let representatives = TUF.get_representatives cc.uf in + let bldis = BlDis.add_block_diseqs cc.bldis cc.uf lterm representatives in + {cc with bldis} + +(* Remove variables: *) + +(** Removes terms from the union find and the lookup map. *) +let remove_terms_from_eq predicate cc = + let insert_term cc t = + let _, cc = insert cc t in + cc + in + let insert_terms cc = List.fold insert_term cc in + (* start from all initial states that are still valid and find new representatives if necessary *) + (* new_reps maps each representative term to the new representative of the equivalence class *) + (* but new_reps contains an element but not necessarily the representative *) + let find_new_repr state old_rep old_z new_reps = + match LMap.find_opt old_rep new_reps with + | Some (new_rep,z) -> + new_rep, Z.(old_z - z), new_reps + | None -> + if not @@ predicate old_rep then + let new_reps = TMap.add old_rep (old_rep, Z.zero) new_reps in + old_rep, old_z, new_reps + else (* <- we keep the same representative as before *) + (* the representative need to be removed from the data structure: state is the new repr->*) + let new_reps = TMap.add old_rep (state, old_z) new_reps in + state, Z.zero, new_reps + in + let add_atom (uf, new_reps, new_cc, reachable_old_reps) state = + let old_rep, old_z, uf = TUF.find uf state in + let new_rep, new_z, new_reps = find_new_repr state old_rep old_z new_reps in + let new_cc = insert_terms new_cc [state; new_rep] in + let new_cc = closure new_cc [(state, new_rep, new_z)] in + let reachable_old_reps = (old_rep, new_rep, Z.(old_z - new_z))::reachable_old_reps in + (uf, new_reps, new_cc, reachable_old_reps) + in + let uf, new_reps, new_cc, reachable_old_reps = + (* Only perform add_atom on those that do not fullfill the predicate *) + let filter_out acc x = + if predicate x then + acc + else + add_atom acc x + in + SSet.fold_atoms filter_out (cc.uf, TMap.empty, init_cc (),[]) cc.set in + let cmap, uf = Disequalities.comp_map uf in + (* breadth-first search of reachable states *) + let add_transition (old_rep, new_rep, z1) (uf, new_reps, new_cc, reachable_old_reps) (s_z,s_t) = + let old_rep_s, old_z_s, uf = TUF.find uf s_t in + let find_successor_in_set (z, term_set) = + let exception Found in + let res = ref None in + let do_term t = + try + let successor = SSet.deref_term t Z.(s_z - z) cc.set in + if predicate successor then + () + else begin + res := Some successor; + raise Found + end + with T.UnsupportedCilExpression _ -> + () + in + try + TSet.iter do_term term_set; + !res + with Found -> !res + in + (* find successor term -> find any element in equivalence class that can be dereferenced *) + let old_rep_zmap = TMap.find old_rep cmap in + let old_rep_bindings = ZMap.bindings @@ old_rep_zmap in + + match List.find_map_opt find_successor_in_set old_rep_bindings with + | Some successor_term -> + if predicate successor_term && T.check_valid_pointer (T.to_cil successor_term) then + (uf, new_reps, new_cc, reachable_old_reps) + else begin + let new_cc = insert_terms new_cc [successor_term] in + match LMap.find_opt old_rep_s new_reps with + | Some (new_rep_s,z2) -> (* the successor already has a new representative, therefore we can just add it to the lookup map*) + let new_eq = [(successor_term, new_rep_s, Z.(old_z_s - z2))] in + let new_cc = closure new_cc new_eq in + uf, new_reps, new_cc, reachable_old_reps + | None -> (* the successor state was not visited yet, therefore we need to find the new representative of the state. + -> we choose a successor term *(t+z) for any t + -> we need add the successor state to the list of states that still need to be visited + *) + let new_reps = TMap.add old_rep_s (successor_term, old_z_s) new_reps in + let reachable_old_reps = (old_rep_s, successor_term, old_z_s)::reachable_old_reps in + uf, new_reps, new_cc, reachable_old_reps + end + | None -> + (* the term cannot be dereferenced, so we won't add this transition. *) + (uf, new_reps, new_cc, reachable_old_reps) + in + (* find all successors that are still reachable *) + let rec add_transitions uf new_reps new_cc = function + | [] -> + new_reps, new_cc + | (old_rep, new_rep, z)::rest -> + let successors = LMap.successors old_rep cc.map in + let uf, new_reps, new_cc, reachable_old_reps = + List.fold (add_transition (old_rep, new_rep,z)) (uf, new_reps, new_cc, []) successors + in + add_transitions uf new_reps new_cc (rest @ reachable_old_reps) + in + let cmp = Tuple3.compare ~cmp1:(T.compare) ~cmp2:(T.compare) ~cmp3:(Z.compare) in + let reachable_old_reps = List.unique_cmp ~cmp reachable_old_reps in + add_transitions uf new_reps new_cc reachable_old_reps + + +(** Find the representative term of the equivalence classes of an element that has already been deleted from the data structure. + Returns None if there are no elements in the same equivalence class as t before it was deleted.*) +let find_new_root new_reps uf v = + match TMap.find_opt v new_reps with + | None -> uf, None + | Some (new_t, z1) -> + let t_rep, z2, uf = TUF.find uf new_t in + uf, Some (t_rep, Z.(z2 - z1)) + +let remove_terms_from_diseq diseq new_reps cc = + let disequalities = Disequalities.get_disequalities diseq in + let add_disequality (uf, new_diseq) = function + | Nequal (t1, t2 ,z) -> + begin + match find_new_root new_reps uf t1 with + | uf, Some (t1',z1') -> + begin + match find_new_root new_reps uf t2 with + | uf, Some (t2', z2') -> + uf, (t1', t2', Z.(z2'+z-z1'))::new_diseq + | _ -> uf, new_diseq + end + | _ -> uf, new_diseq + end + | _-> uf, new_diseq + in + let uf,new_diseq = List.fold add_disequality (cc.uf,[]) disequalities + in congruence_neq {cc with uf} new_diseq + +let remove_terms_from_bldis bldis new_reps cc = + let uf_ref = ref cc.uf in + let find_new_root_term t = + let uf, new_root = find_new_root new_reps !uf_ref t in + uf_ref := uf; + Option.map fst new_root + in + let bldis = BlDis.filter_map_lhs find_new_root_term bldis in + let bldis = BlDis.filter_map find_new_root_term bldis in + !uf_ref, bldis + +(** Remove terms from the data structure. + It removes all terms for which "predicate" is false, + while maintaining all equalities about variables that are not being removed.*) +let remove_terms predicate cc = + let old_cc = cc in + let new_reps, cc = remove_terms_from_eq predicate cc in + let uf, bldis = remove_terms_from_bldis old_cc.bldis new_reps cc in + let cc = {cc with uf; bldis} in + let cc = remove_terms_from_diseq old_cc.diseq new_reps cc in + cc + +let remove_terms p cc = Timing.wrap "removing terms" (remove_terms p) cc + +(** Join version 1: by using the automaton. + The product automaton of cc1 and cc2 is computed and then we add the terms to the right equivalence class. + We also add new terms in order to have some terms for each state in the automaton. *) +let join_eq cc1 cc2 = + let do_atom a = + let r1, off1 = TUF.find_no_pc cc1.uf a in + let r2, off2 = TUF.find_no_pc cc2.uf a in + (r1, r2, Z.(off2 - off1)), (a, off1) + in + let add_term (pmap, cc, new_pairs) (new_element, (new_term, a_off)) = + match Map.find_opt new_element pmap with + | None -> + let pmap = Map.add new_element (new_term, a_off) pmap in + let new_pairs = new_element::new_pairs in + pmap, cc, new_pairs + | Some (c, c1_off) -> + let cc = add_eq cc (new_term, c, Z.(-c1_off + a_off)) in + pmap, cc, new_pairs + in + (* add equalities that make sure that all atoms that have the same + representative are equal. *) + let add_one_edge y t t1_off diff (pmap, cc, new_pairs) (offset, a) = + let a', a_off = TUF.find_no_pc cc1.uf a in + match LMap.map_find_opt (y, Z.(diff + offset)) cc2.map with + | None -> pmap, cc, new_pairs + | Some b -> + let b', b_off = TUF.find_no_pc cc2.uf b in + try + let new_term = SSet.deref_term t Z.(offset - t1_off) cc1.set (* may throw T.UnsupportedCilExpression *) in + let _ , cc = insert cc new_term in + let new_element = a',b',Z.(b_off - a_off) in + add_term (pmap, cc, new_pairs) (new_element, (new_term, a_off)) + with (T.UnsupportedCilExpression _) -> + pmap, cc, new_pairs + in + let rec add_edges_to_map pmap cc = function + | [] -> cc, pmap + | (x, y, diff)::rest -> + let t, t1_off = Map.find (x, y, diff) pmap in + let add_edge = add_one_edge y t t1_off diff in + let x_succs = LMap.successors x cc1.map in + let pmap, cc, new_pairs = List.fold_left add_edge (pmap, cc, []) x_succs in + let rest = rest @ new_pairs in + add_edges_to_map pmap cc rest + in + let atoms = SSet.get_atoms (SSet.inter cc1.set cc2.set) in + let mappings = List.map do_atom atoms in + let empty = (Map.empty, init_cc (), []) in + let pmap, cc, working_set = List.fold_left add_term empty mappings in + add_edges_to_map pmap cc working_set + +(** Join version 2: just look at equivalence classes and not the automaton. *) +let product_no_automata_over_terms cc1 cc2 terms = + let cc1 = insert_set cc1 terms in + let cc2 = insert_set cc2 terms in + let f a = + let r1, off1 = TUF.find_no_pc cc1.uf a in + let r2, off2 = TUF.find_no_pc cc2.uf a in + (r1, r2, Z.(off2 - off1)), (a, off1) + in + let mappings = List.map f (SSet.to_list terms) in + let add_term (cc, pmap) (new_element, (new_term, a_off)) = + match Map.find_opt new_element pmap with + | None -> + let pmap = Map.add new_element (new_term, a_off) pmap in + cc, pmap + | Some (c, c1_off) -> + let cc = add_eq cc (new_term, c, Z.(-c1_off + a_off)) in + cc, pmap + in + let initial = (init_cc (), Map.empty) in + List.fold_left add_term initial mappings + +(** Here we do the join without using the automata. + We construct a new cc that contains the elements of cc1.set U cc2.set. + and two elements are in the same equivalence class iff they are in the same eq. class + both in cc1 and in cc2. *) +let join_eq_no_automata cc1 cc2 = + let terms = SSet.union cc1.set cc2.set in + product_no_automata_over_terms cc1 cc2 terms + +(** Same as join, but we only take the terms from the left argument. *) +let widen_eq_no_automata cc1 cc2 = + product_no_automata_over_terms cc1 cc2 cc1.set + +(** Joins the disequalities diseq1 and diseq2, given a congruence closure data structure. + + This is done by checking for each disequality if it is implied by both cc. *) +let join_neq diseq1 diseq2 cc1 cc2 cc cmap1 cmap2 = + let _, diseq1, _ = split (Disequalities.get_disequalities diseq1) in + let _, diseq2, _ = split (Disequalities.get_disequalities diseq2) in + (* keep all disequalities from diseq1 that are implied by cc2 and + those from diseq2 that are implied by cc1 *) + let diseq1 = Disequalities.element_closure diseq1 cmap1 cc.uf in + let diseq2 = Disequalities.element_closure diseq2 cmap2 cc.uf in + let diseq1 = List.filter (neq_query cc2) diseq1 in + let diseq2 = List.filter (neq_query cc1) diseq2 in + let diseq = diseq1 @ diseq2 in + + let subterms, _ = SSet.subterms_of_conj diseq in + let cc = insert_set cc subterms in + let res = congruence_neq cc diseq in + (if M.tracing then M.trace "c2po-neq" "join_neq: %s\n\n" (Disequalities.show_neq res.diseq)); + res + +(** Joins the block disequalities bldiseq1 and bldiseq2, given a congruence closure data structure. + + This is done by checking for each block disequality if it is implied by both cc. *) +let join_bldis bldiseq1 bldiseq2 cc1 cc2 cc cmap1 cmap2 = + let both_root (t1, t2) = + TUF.is_root cc.uf t1 && TUF.is_root cc.uf t2 + in + let bldiseq1 = BlDis.to_conj bldiseq1 in + let bldiseq2 = BlDis.to_conj bldiseq2 in + (* keep all disequalities from diseq1 that are implied by cc2 and + those from diseq2 that are implied by cc1 *) + let bldiseq1 = BlDis.element_closure bldiseq1 cmap1 in + let bldiseq2 = BlDis.element_closure bldiseq2 cmap2 in + let bldiseq1 = List.filter (block_neq_query cc2) bldiseq1 in + let bldiseq2 = List.filter (block_neq_query cc1) bldiseq2 in + let bldiseq = bldiseq1 @ bldiseq2 in + + (* Bring block disequalities into format that can be passed to subterms_of_conj *) + let bldiseq_prop_format = List.map (fun (a, b) -> (a, b, Z.zero)) bldiseq in + let subterms, _ = SSet.subterms_of_conj bldiseq_prop_format in + + let cc = insert_set cc subterms in + let diseqs_ref_terms = List.filter both_root bldiseq in + let bldis = List.fold BlDis.add_block_diseq BlDis.empty diseqs_ref_terms in + (if M.tracing then M.trace "c2po-neq" "join_bldis: %s\n\n" (show_conj (BlDis.to_conj bldis))); + {cc with bldis} + +(** Check for equality of two congruence closures, + by comparing the equivalence classes instead of computing the minimal_representative. *) +let equal_eq_classes cc1 cc2 = + let comp1, _ = Disequalities.comp_map cc1.uf in + let comp2, _ = Disequalities.comp_map cc2.uf in + (* they should have the same number of equivalence classes *) + if TMap.cardinal comp1 = TMap.cardinal comp2 then + (* compare each equivalence class of cc1 with the corresponding eq. class of cc2 *) + let compare_zmap_entry offset zmap2 (z, tset1) = + match ZMap.find_opt Z.(z + offset) zmap2 with + | None -> + false + | Some tset2 -> + SSet.equal tset1 tset2 + in + let compare_with_cc2_eq_class (rep1, zmap1) = + let rep2, offset = TUF.find_no_pc cc2.uf rep1 in + let zmap2 = TMap.find rep2 comp2 in + if ZMap.cardinal zmap2 = ZMap.cardinal zmap1 then + List.for_all (compare_zmap_entry offset zmap2) (ZMap.bindings zmap1) + else + false + in + List.for_all compare_with_cc2_eq_class (TMap.bindings comp1) + else + false + +let equal_diseqs cc1 cc2 = + let normalize_diseqs (min_state1, min_state2, new_offset) = + if T.compare min_state1 min_state2 < 0 then + Nequal (min_state1, min_state2, new_offset) + else + Nequal (min_state2, min_state1, Z.(-new_offset)) + in + + let rename_diseqs dis = + match dis with + | Nequal (t1,t2,z) -> + let (min_state1, min_z1) = TUF.find_no_pc cc2.uf t1 in + let (min_state2, min_z2) = TUF.find_no_pc cc2.uf t2 in + let new_offset = Z.(min_z2 - min_z1 + z) in + normalize_diseqs (min_state1, min_state2, new_offset) + | _ -> dis + in + let diseq1 = Disequalities.get_disequalities cc1.diseq in + let renamed_diseqs = List.map rename_diseqs diseq1 in + let renamed_diseqs = BatList.sort_unique T.compare_v_prop renamed_diseqs in + + let normalize_diseqs = function + | Nequal neq -> + normalize_diseqs neq + | _ -> + failwith "Unexpected equality or block disequality." + in + let diseqs2 = Disequalities.get_disequalities cc2.diseq in + let normalized_diseqs = List.map normalize_diseqs diseqs2 in + let normalized_diseqs = BatList.sort_unique T.compare_v_prop normalized_diseqs in + List.equal T.equal_v_prop renamed_diseqs normalized_diseqs + +let equal_bldis cc1 cc2 = + let normalize_bldis (min_state1, min_state2) = + if T.compare min_state1 min_state2 < 0 then + BlNequal (min_state1, min_state2) + else + BlNequal (min_state2, min_state1) + in + let rename_bldis dis = + match dis with + | BlNequal (t1, t2) -> + let min_state1, _ = TUF.find_no_pc cc2.uf t1 in + let min_state2, _ = TUF.find_no_pc cc2.uf t2 in + normalize_bldis (min_state1, min_state2) + | _ -> dis + in + let normalize_bldis = function + | BlNequal (t1, t2) -> + normalize_bldis(t1, t2) + | Nequal (t1, t2, _) + | Equal(t1, t2, _) -> + failwith "Unexpected disequality or equality." + in + let renamed_bldis = List.map rename_bldis (BlDis.to_conj cc1.bldis) in + let renamed_bldis = BatList.sort_unique T.compare_v_prop renamed_bldis in + + let normalized_bldis = List.map normalize_bldis (BlDis.to_conj cc2.bldis) in + let normalized_bldis = BatList.sort_unique T.compare_v_prop normalized_bldis in + + List.equal T.equal_v_prop renamed_bldis normalized_bldis + +(**Find out if two addresses are not equal by using the MayPointTo query*) +module MayBeEqual = struct + + module AD = Queries.AD + open Var + + let dummy_var typ = + T.aux_term_of_varinfo (AssignAux typ) + + let dummy_lval_print typ = + Lval (Var (to_varinfo (AssignAux typ)), NoOffset) + + let return_var typ = + T.aux_term_of_varinfo (ReturnAux typ) + + let ask_may_point_to (ask: Queries.ask) exp = + try + ask.f (MayPointTo exp) + with IntDomain.ArithmeticOnIntegerBot _ -> + AD.top () + + (** Ask MayPointTo not only for the term `term`, but also + for all terms that are in the same equivalence class as `term`. Then meet the result. + *) + let may_point_to_all_equal_terms ask exp cc term offset = + let equal_terms = + if TMap.mem term cc.uf then + let comp = Disequalities.comp_t cc.uf term in + let valid_term (t, z) = + let typ = T.type_of_term t in + GoblintCil.isPointerType typ + in + List.filter valid_term comp + else + [(term, Z.zero)] + in + + let intersect_query_result res (term, z) = + let next_query = + try + let cil_exp = T.to_cil_sum Z.(z + offset) (T.to_cil term) in + let res = ask_may_point_to ask cil_exp in + if AD.is_bot res then + AD.top() + else + res + with T.UnsupportedCilExpression _ -> + AD.top() + in + try + AD.meet res next_query + with IntDomain.ArithmeticOnIntegerBot _ -> + res + in + + if M.tracing then M.trace "c2po-query" "may-point-to %a -> equal terms: %s" + d_exp exp (List.fold (fun s (t,z) -> s ^ "(" ^ T.show t ^","^ Z.to_string Z.(z + offset) ^")") "" equal_terms); + + List.fold intersect_query_result (AD.top ()) equal_terms + + (** Find out if an addresse is possibly equal to one of the addresses in `addresses` by using the MayPointTo query. *) + let may_point_to_address (ask:Queries.ask) addresses t2 off cc = + match T.to_cil_sum off (T.to_cil t2) with + | exception (T.UnsupportedCilExpression _) -> + true + | exp2 -> + let mpt1 = addresses in + let mpt2 = may_point_to_all_equal_terms ask exp2 cc t2 off in + let res = + try + not (AD.is_bot (AD.meet mpt1 mpt2)) + with + IntDomain.ArithmeticOnIntegerBot _ -> true + in + if M.tracing then begin + let meet = + try + AD.meet mpt1 mpt2 + with IntDomain.ArithmeticOnIntegerBot _ -> + AD.bot () + in + M.tracel "c2po-maypointto2" "QUERY MayPointTo. \nres: %a;\nt2: %s; exp2: %a; res: %a; \nmeet: %a; result: %s\n" + AD.pretty mpt1 (T.show t2) d_plainexp exp2 AD.pretty mpt2 AD.pretty meet (string_of_bool res); + end; + res + + (** Find out if two addresses `t1` and `t2` are possibly equal by using the MayPointTo query. *) + let may_point_to_same_address (ask: Queries.ask) t1 t2 off cc = + if T.equal t1 t2 then + true + else + let exp1 = T.to_cil t1 in + let mpt1 = may_point_to_all_equal_terms ask exp1 cc t1 Z.zero in + let res = may_point_to_address ask mpt1 t2 off cc in + if M.tracing && res then M.tracel "c2po-maypointto2" "QUERY MayPointTo. \nres: %a;\nt1: %s; exp1: %a;\n" + AD.pretty mpt1 (T.show t1) d_plainexp exp1; + res + + (** Returns true if `t1` and `t2` may possibly be equal or may possibly overlap. *) + let rec may_be_equal ask cc size t1 t2 = + let there_is_an_overlap s s' diff = + if Z.(gt diff zero) then + Z.(lt diff s') + else + Z.(lt (-diff) s) + in + match t1, t2 with + | Deref (t, z,_), Deref (v, z',_) -> + let (q', z1') = TUF.find_no_pc cc.uf v in + let (q, z1) = TUF.find_no_pc cc.uf t in + let s' = T.get_size t2 in + let diff = Z.(-z' - z1 + z1' + z) in + (* If they are in the same equivalence class and they overlap, then they are equal *) + let cond1 = + if T.equal q' q && there_is_an_overlap size s' diff then + true + (* If we have a disequality, then they are not equal *) + else if neq_query cc (t,v,Z.(z'-z)) then + false + (* or if we know that they are not equal according to the query MayPointTo*) + else if GobConfig.get_bool "ana.c2po.askbase" then + may_point_to_same_address ask t v Z.(z' - z) cc + else + true + in + cond1 || (may_be_equal ask cc size t1 v) + | Deref _, _ -> + false (* The value of addresses or auxiliaries never change when we overwrite the memory*) + | Addr _ , _ + | Aux _, _ -> + T.is_subterm t1 t2 + + (**Returns true iff by assigning to t1, the value of t2 could change. + The parameter s is the size in bits of the variable t1 we are assigning to. *) + let may_be_equal ask cc size t1 t2 = + let res = may_be_equal ask cc size t1 t2 in + if M.tracing then M.tracel "c2po-maypointto" "May be equal: %s %s: %b\n" (T.show t1) (T.show t2) res; + res + + (**Returns true if `t2` or any subterm of `t2` may possibly point to one of the addresses in `addresses`.*) + let rec may_point_to_one_of_these_addresses ask addresses cc t2 = + match t2 with + | Deref (v, z',_) -> + (may_point_to_address ask addresses v z' cc) + || (may_point_to_one_of_these_addresses ask addresses cc v) + | Addr _ -> + false + | Aux (v,e) -> + may_point_to_address ask addresses (Addr v) Z.zero cc +end diff --git a/src/cdomains/duplicateVars.ml b/src/cdomains/duplicateVars.ml new file mode 100644 index 0000000000..d321238a4c --- /dev/null +++ b/src/cdomains/duplicateVars.ml @@ -0,0 +1,125 @@ +(** Used by C2poDomain and StartStateAnalysis. + Contains functions to duplicate variables in order to have shadow variables for each function parameter, + that can be used to remeber the initial value of these parameters. + It uses RichVarinfo to create the duplicated variables. *) +open CilType +open GoblintCil +open Batteries +open GoblintCil +module M = Messages + +(** Variable Type used by the C-2PO Analysis. + It contains normal variables with a varinfo as well as auxiliary variables for + assignment and return and duplicated variables for remembering the value of variables at the beginning of a function. *) +module VarType = struct + let equal_typ a b = + CilType.Typ.equal a b + + let hash_typ x = + CilType.Typ.hash x + + let compare_typ a b = + CilType.Typ.compare a b + + type t = + | AssignAux of typ + | ReturnAux of typ + | NormalVar of Varinfo.t + | DuplicVar of Varinfo.t [@@deriving eq,ord,hash] + + let from_varinfo normal duplicated = + let to_normal v = + NormalVar v + in + let to_duplicated v = + DuplicVar v + in + + let normal = Seq.of_list normal in + let duplicated = Seq.of_list duplicated in + + let normal = Seq.map to_normal normal in + let duplicated = Seq.map to_duplicated duplicated in + let complete = Seq.append normal duplicated in + + List.of_seq complete + + (* Need to lookup attributes such as vaddrof in the original varinfo *) + let vaddrof : t -> bool = function + | AssignAux _ + | ReturnAux _ -> false + | NormalVar v + | DuplicVar v -> v.vaddrof + + let vglob : t -> bool = function + | AssignAux _ + | ReturnAux _ -> false + | NormalVar v + | DuplicVar v -> v.vglob + + let duplic_var_prefix = + "c2po__" + let duplic_var_postfix = + "'" + + let show v = match v with + | AssignAux t -> + "AuxAssign" + | ReturnAux t -> + "AuxReturn" + | NormalVar v -> + v.vname + | DuplicVar v -> + duplic_var_prefix ^ v.vname ^ duplic_var_postfix + + let get_type v = match v with + | AssignAux t + | ReturnAux t -> t + | NormalVar v + | DuplicVar v -> v.vtype + + let is_assign_aux = function + | AssignAux _ -> true + | _ -> false + + let is_return_aux = function + | ReturnAux _ -> true + | _ -> false + + let name_varinfo v = + match v with + | AssignAux t -> + "AuxAssign" + | ReturnAux t -> + "AuxReturn" + | NormalVar v -> + v.vname + | DuplicVar v -> + duplic_var_prefix ^ string_of_int v.vid ^ duplic_var_postfix + + let typ v = + get_type v + + (* Description that gets appended to the varinfo-name in user output. *) + let describe_varinfo (var: varinfo) v = + show v +end + +module VarVarinfoMap = RichVarinfo.BiVarinfoMap.Make(VarType) + +module Var = +struct + include VarType + + let dummy_varinfo typ: varinfo = + VarVarinfoMap.to_varinfo (AssignAux typ) + + let return_varinfo typ = + VarVarinfoMap.to_varinfo (ReturnAux typ) + + let to_varinfo v = + let res = VarVarinfoMap.to_varinfo v in + if M.tracing then M.trace "c2po-varinfo" "to_varinfo: %a -> %a" d_type (get_type v) d_type res.vtype; + res + +end diff --git a/src/cdomains/unionFind.ml b/src/cdomains/unionFind.ml new file mode 100644 index 0000000000..a2ef103117 --- /dev/null +++ b/src/cdomains/unionFind.ml @@ -0,0 +1,1064 @@ +(** + The Union Find is used by the C-2PO Analysis. + This file contains the code for a quantitative union find and the quantitative finite automata. + They will be necessary in order to construct the congruence closure of terms. +*) +open Batteries +open GoblintCil +open DuplicateVars +module M = Messages + +exception Unsat + +(* equality of terms should not depend on the expression *) +let compare_exp _ _ = 0 +let equal_exp _ _ = true +let hash_exp _ = 1 + +type term = + | Addr of Var.t + | Aux of Var.t * exp + | Deref of term * Z.t * exp [@@deriving eq, hash, ord] + +let normal_form_tuple_3 (t1, t2, z) = + let cmp = compare_term t1 t2 in + if cmp < 0 || (cmp = 0 && Z.geq z Z.zero) then + (t1, t2, z) + else + (t2, t1, Z.(-z)) + +let normal_form_tuple_2 (t1, t2) = + if compare_term t1 t2 < 0 then + (t1, t2) + else + (t2, t1) + +(** Two propositions are equal if they are syntactically equal + or if one is t_1 = z + t_2 and the other t_2 = - z + t_1. *) +let tuple3_equal p1 p2 = Tuple3.eq equal_term equal_term Z.equal (normal_form_tuple_3 p1) (normal_form_tuple_3 p2) +let tuple3_cmp p1 p2 = Tuple3.comp compare_term compare_term Z.compare (normal_form_tuple_3 p1) (normal_form_tuple_3 p2) +let tuple3_hash p1 = Hashtbl.hash (normal_form_tuple_3 p1) +let tuple2_equal p1 p2 = Tuple2.eq equal_term equal_term (normal_form_tuple_2 p1) (normal_form_tuple_2 p2) +let tuple2_cmp p1 p2 = Tuple2.comp compare_term compare_term (normal_form_tuple_2 p1) (normal_form_tuple_2 p2) +let tuple2_hash p1 = Hashtbl.hash (normal_form_tuple_2 p1) + +type term_offset_relation = term * term * Z.t [@@deriving eq, hash, ord] + +type block_relation = term * term [@@deriving eq, hash, ord] + +type prop = Equal of term_offset_relation + | Nequal of term_offset_relation + | BlNequal of block_relation [@@deriving eq, hash, ord] + +(** The terms consist of address constants and dereferencing function with sum of an integer. + The dereferencing function is parametrized by the size of the element in the memory. + We store the CIL expression of the term in the data type, such that it it easier to find the types of the dereferenced elements. + Also so that we can easily convert back from term to Cil expression. +*) +module T = struct + type exp = Cil.exp + + let bitsSizeOfPtr () = Z.of_int @@ bitsSizeOf (TPtr (TVoid [],[])) + + (* we store the varinfo and the Cil expression corresponding to the term in the data type *) + type t = term[@@deriving eq, hash, ord] + type v_prop = prop[@@deriving eq, hash, ord] + + let props_equal = List.equal equal_v_prop + + let is_addr = function + | Addr _ -> true + | _ -> false + + exception UnsupportedCilExpression of string + + let rec get_size_in_bits typ = match Cil.unrollType typ with + | TArray (typ, _, _) -> (* we treat arrays like pointers *) + get_size_in_bits (TPtr (typ,[])) + | _ -> + try Z.of_int (bitsSizeOf typ) with + | SizeOfError ("abstract type", _) -> + Z.one + | SizeOfError (msg, _) -> + raise (UnsupportedCilExpression msg) + + let show_type exp = + try + let typ = typeOf exp in + let typ_abbreviation = match Cil.unrollType typ with + | TPtr _ -> "Ptr" + | TInt _ -> "Int" + | TArray _ -> "Arr" + | TVoid _ -> "Voi" + | TFloat (_, _)-> "Flo" + | TComp (_, _) -> "TCo" + | TFun (_, _, _, _) + | TNamed (_, _) + | TEnum (_, _) + | TBuiltin_va_list _ -> "?" + in + let bit_size = get_size_in_bits typ in + let bit_size = Z.to_string bit_size in + "[" ^ typ_abbreviation ^ bit_size ^ "]" + + with UnsupportedCilExpression _ -> + "[?]" + + let rec show : t -> string = function + | Addr v -> + "&" ^ Var.show v + | Aux (v,exp) -> + "~" ^ Var.show v ^ show_type exp + | Deref (Addr v, z, exp) when Z.equal z Z.zero -> + Var.show v ^ show_type exp + | Deref (t, z, exp) when Z.equal z Z.zero -> + "*" ^ show t^ show_type exp + | Deref (t, z, exp) -> + "*(" ^ Z.to_string z ^ "+" ^ show t ^ ")"^ show_type exp + + let show_prop = function + | Equal (t1,t2,r) when Z.equal r Z.zero -> + show t1 ^ " = " ^ show t2 + | Equal (t1,t2,r) -> + show t1 ^ " = " ^ Z.to_string r ^ "+" ^ show t2 + | Nequal (t1,t2,r) when Z.equal r Z.zero -> + show t1 ^ " != " ^ show t2 + | Nequal (t1,t2,r) -> + show t1 ^ " != " ^ Z.to_string r ^ "+" ^ show t2 + | BlNequal (t1,t2) -> + "bl(" ^ show t1 ^ ") != bl(" ^ show t2 ^ ")" + + (** Returns true if the first parameter is a subterm of the second one. *) + let rec is_subterm needle haystack = + let is_subterm_of haystack = + match haystack with + | Deref (t, _, _) -> is_subterm needle t + | _ -> false + in + equal needle haystack || is_subterm_of haystack + + let rec get_var = function + | Addr v + | Aux (v,_) -> + v + | Deref (t, _, _) -> + get_var t + + (** Returns true if the second parameter contains one of the variables defined in the list "variables". *) + let contains_variable variables term = + let term_var = get_var term in + List.mem_cmp Var.compare term_var variables + + (** Use query EvalInt for an expression. *) + let eval_int (ask:Queries.ask) exp = + match Cilfacade.get_ikind_exp exp with + | exception Invalid_argument _ -> + raise (UnsupportedCilExpression "non-constant value") + | ikind -> + begin match ask.f (Queries.EvalInt exp) with + | `Lifted i -> + let casted_i = IntDomain.IntDomTuple.cast_to ikind i in + let maybe_i = IntDomain.IntDomTuple.to_int casted_i in + begin match maybe_i with + | Some i -> i + | None -> raise (UnsupportedCilExpression "non-constant value") + end + | _ -> raise (UnsupportedCilExpression "non-constant value") + end + + let eval_int_opt (ask:Queries.ask) exp = + match eval_int ask exp with + | i -> Some i + | exception (UnsupportedCilExpression _) -> None + + (** Returns Some type for a pointer to a type + and None if the result is not a pointer. *) + let rec type_of_element typ = + match Cil.unrollType typ with + | TArray (typ, _, _) -> + type_of_element typ + | TPtr (typ, _) -> + Some typ + | _ -> + None + + (** Returns the size of the type. If typ is a pointer, it returns the + size of the elements it points to. If typ is an array, it returns the size of the + elements of the array (even if it is a multidimensional array. Therefore get_element_size_in_bits int\[]\[]\[] = sizeof(int)). *) + let get_element_size_in_bits typ = + match type_of_element typ with + | Some typ -> + get_size_in_bits typ + | None -> + Z.one + + let is_struct_type t = + match Cil.unrollType t with + | TComp _ -> + true + | _ -> + false + + let is_struct_ptr_type t = + match Cil.unrollType t with + | TPtr(typ, _) -> + is_struct_type typ + | _ -> + false + + let aux_term_of_varinfo vinfo = + let var = Var (Var.to_varinfo vinfo) in + let lval = Lval (var, NoOffset) in + Aux (vinfo, lval) + + (* *) + let term_of_varinfo var_type = + let var = Var.to_varinfo var_type in + let lval = Lval (Var var, NoOffset) in + + if is_struct_type var.vtype || DuplicateVars.VarType.vaddrof var_type then + Deref (Addr var_type, Z.zero, lval) + else + aux_term_of_varinfo var_type + + (** From a offset, compute the index in bits *) + let offset_to_index ?typ offset = + let ptr_diff_ikind = Cilfacade.ptrdiff_ikind () in + let offset_in_bytes = PreValueDomain.Offs.to_index ?typ offset in + let bytes_to_bits = IntDomain.of_const (Z.of_int 8, ptr_diff_ikind, None) in + IntDomain.IntDomTuple.mul bytes_to_bits offset_in_bytes + + (** Convert a Cil offset to an integer offset. *) + let cil_offs_to_idx (ask: Queries.ask) offs typ = + (* TODO: Some duplication with convert_offset in base.ml and cil_offs_to_idx in memOutOfBounds.ml, unclear how to immediately get more reuse. *) + let rec convert_offset (ofs: offset) = + match ofs with + | NoOffset -> + `NoOffset + | Field (fld, ofs) -> + `Field (fld, convert_offset ofs) + | Index (exp, ofs) when CilType.Exp.equal exp (Lazy.force Offset.Index.Exp.any) -> (* special offset added by convertToQueryLval *) + let exp_ikind = Cilfacade.get_ikind_exp exp in + `Index (ValueDomain.ID.top_of exp_ikind, convert_offset ofs) + | Index (exp, ofs) -> + let ptr_diff_ikind = Cilfacade.ptrdiff_ikind () in + let i = match ask.f (Queries.EvalInt exp) with + | `Lifted x -> + IntDomain.IntDomTuple.cast_to ptr_diff_ikind x + | _ -> + ValueDomain.ID.top_of ptr_diff_ikind + in + let converted_ofs = convert_offset ofs in + `Index (i, converted_ofs) + in + let to_constant exp = + try + let z = eval_int ask exp in + let z_str = Some (Z.to_string z)in + let exp_ikind = Cilfacade.get_ikind_exp exp in + Const (CInt (z, exp_ikind, z_str)) + with + | Invalid_argument _ + | UnsupportedCilExpression _ -> exp + in + let rec convert_type typ = (* compute length of arrays when it is known*) + match typ with + | TArray (typ, exp, attr) -> + let const = Option.map to_constant exp in + let converted_type = convert_type typ in + TArray (converted_type, const, attr) + | TPtr (typ, attr) -> + let converted_type = convert_type typ in + TPtr (converted_type, attr) + | TFun (typ, form, var_arg, attr) -> + let converted_typ = convert_type typ in + TFun (converted_typ, form, var_arg, attr) + | TNamed (typeinfo, attr) -> + let converted_type = convert_type typeinfo.ttype in + TNamed ({typeinfo with ttype = converted_type}, attr) + | TVoid _ + | TInt (_, _) + | TFloat (_, _) + | TComp (_, _) + | TEnum (_, _) + | TBuiltin_va_list _ -> typ + in + let typ = Cil.unrollType typ in + let converted_type = Some (convert_type typ) in + let converted_offset = convert_offset offs in + offset_to_index ?typ:converted_type converted_offset + + (** Convert an offset to an integer of Z, if posible. + Otherwise, this throws UnsupportedCilExpression. *) + let z_of_offset ask offs typ = + match IntDomain.IntDomTuple.to_int (cil_offs_to_idx ask offs typ) with + | Some i -> i + | None + | exception (SizeOfError _) + | exception (Cilfacade.TypeOfError _) -> + if M.tracing then M.trace "c2po-invalidate" "Reason: unknown offset"; + raise (UnsupportedCilExpression "unknown offset") + + let can_be_dereferenced t = + match Cil.unrollType t with + | TPtr _ + | TArray _ + | TComp _ -> true + | _ -> false + + let type_of_term = + function + | Addr v -> + let var_type = (Var.to_varinfo v).vtype in + TPtr (var_type, []) + | Aux (_, exp) + | Deref (_, _, exp) -> + typeOf exp + + let to_cil = + function + | Addr v -> + let varinfo = Var.to_varinfo v in + let lval = (Var varinfo, NoOffset) in + AddrOf lval + | Aux (_, exp) + | (Deref (_, _, exp)) -> exp + + let default_int_type = + ILong + + (** Returns a Cil expression which is the constant z divided by the size of the elements of t.*) + let to_cil_constant z t = + let z = + if Z.equal z Z.zero then + Z.zero + else + let typ_size = match t with + | Some t -> get_element_size_in_bits t + | None -> Z.one + in + if Z.lt (Z.abs z) typ_size && Z.gt (Z.abs z) Z.zero then + raise (UnsupportedCilExpression "Cil can't represent something like &(c->d).") + else if Z.equal typ_size Z.zero then + Z.zero + else + Z.(z / typ_size) + in + let z_str = Some (Z.to_string z) in + Const (CInt (z, default_int_type, z_str)) + + let to_cil_sum off cil_t = + let res = + if Z.(equal zero off) then + cil_t + else + let typ = typeOf cil_t in + let const = to_cil_constant off (Some typ) in + BinOp (PlusPI, cil_t, const, typ) + in + if M.tracing then M.trace "c2po-2cil" "exp: %a; offset: %s; res: %a" d_exp cil_t (Z.to_string off) d_exp res; + res + + (** Returns the integer offset of a field of a struct. *) + let get_field_offset finfo = + let field = `Field (finfo, `NoOffset) in + let field_to_index = offset_to_index field in + match IntDomain.IntDomTuple.to_int field_to_index with + | Some i -> i + | None -> + raise (UnsupportedCilExpression "unknown offset") + + let is_field = function + | Field _ -> true + | _ -> false + + let rec add_index_to_exp exp index = + try + let exp_type = typeOf exp in + if is_struct_type exp_type = is_field index then + begin + match exp with + | Lval (Var v, NoOffset) -> + Lval (Var v, index) + | Lval (Mem v, NoOffset) -> + Lval (Mem v, index) + | BinOp (PlusPI, exp1, Const (CInt (z, _ , _ )), _) when Z.equal z Z.zero -> + add_index_to_exp exp1 index + | _ -> + raise (UnsupportedCilExpression "not supported yet") + end + else + if is_struct_ptr_type exp_type && (is_field index) then + Lval (Mem (exp), index) + else + raise (UnsupportedCilExpression "Field on a non-compound") + with Cilfacade.TypeOfError _ -> + raise (UnsupportedCilExpression "typeOf error") + + (** Returns true if the Cil expression represents a 64 bit data type + which is not a float. So it must be either a pointer or an integer + that has the same size as a pointer.*) + let check_valid_pointer term = + match typeOf term with (* we want to make sure that the expression is valid *) + | exception Cilfacade.TypeOfError _ -> + false + | typ -> (* we only track equalties between pointers (variable of size 64)*) + get_size_in_bits typ = bitsSizeOfPtr () && + not (Cilfacade.isFloatType typ) + + (** Only keeps the variables that are actually pointers (or 64-bit integers). *) + let filter_valid_pointers = + let check_both_terms_valid_pointers = function + | Equal(t1,t2,_) + | Nequal(t1,t2,_) + | BlNequal(t1,t2) -> + let t1 = to_cil t1 in + let t2 = to_cil t2 in + check_valid_pointer t1 && check_valid_pointer t2 + in + List.filter check_both_terms_valid_pointers + + (** Get a Cil expression that is equivalent to *(exp + offset), + by taking into account type correctness.*) + let dereference_exp exp offset = + if M.tracing then M.trace "c2po-deref" "exp: %a, offset: %s" d_exp exp (Z.to_string offset); + let res = + let find_field cinfo = + try + let equal_to_offset field = + Z.equal (get_field_offset field) offset + in + let field_equal_to_offset = List.find equal_to_offset cinfo.cfields in + Field (field_equal_to_offset, NoOffset) + with Not_found -> + raise (UnsupportedCilExpression "invalid field offset") + in + let res = + match exp with + | AddrOf lval -> + Lval lval + | _ -> + let typ = typeOf exp in + match Cil.unrollType typ with + | TPtr (TComp (cinfo, _), _) -> + let field = find_field cinfo in + add_index_to_exp exp field + | TPtr (typ, _) -> + Lval (Mem (to_cil_sum offset exp), NoOffset) + | TArray (typ, _, _) when not (can_be_dereferenced typ) -> + let constant = to_cil_constant offset (Some typ) in + let index = Index (constant, NoOffset) in + begin match exp with + | Lval (Var v, NoOffset) -> + Lval (Var v, index) + | Lval (Mem v, NoOffset) -> + Lval (Mem v, index) + | _ -> + raise (UnsupportedCilExpression "not supported yet") + end + | TComp (cinfo, _) -> + let field = find_field cinfo in + add_index_to_exp exp field + | _ -> + let void_ptr_type = TPtr(TVoid [], []) in + let offset_plus_exp = to_cil_sum offset exp in + Lval (Mem (CastE (void_ptr_type, offset_plus_exp)), NoOffset) + in + if check_valid_pointer res then + res + else + raise (UnsupportedCilExpression "not a pointer variable") + in + if M.tracing then M.trace "c2po-deref" "deref result: %a" d_exp res; + res + + let get_size = get_size_in_bits % type_of_term + + let of_offset ask t off typ exp = + if off = NoOffset then + t + else + let z = z_of_offset ask off typ in + Deref (t, z, exp) + + (** Converts a cil expression to Some term, Some offset; + or None, Some offset is the expression equals an integer, + or None, None if the expression can't be described by our analysis.*) + let rec of_cil (ask:Queries.ask) e = match e with + | Const (CInt (i, _, _)) -> None, i + | Const _ -> raise (UnsupportedCilExpression "non-integer constant") + | AlignOf _ + | AlignOfE _ -> raise (UnsupportedCilExpression "unsupported AlignOf") + | Lval lval + | StartOf lval -> + Some (of_lval ask lval), Z.zero + | AddrOf (Var var, NoOffset) -> + Some (Addr (Var.NormalVar var)), Z.zero + | AddrOf (Mem exp, NoOffset) -> + of_cil ask exp + | UnOp (op,exp,typ) -> + begin match op with + | Neg -> + let off = eval_int ask exp in + None, Z.(-off) + | _ -> + raise (UnsupportedCilExpression "unsupported UnOp") + end + | BinOp (binop, exp1, exp2, typ)-> + let typ1_size = get_element_size_in_bits (Cilfacade.typeOf exp1) in + let typ2_size = get_element_size_in_bits (Cilfacade.typeOf exp2) in + begin match binop with + | PlusA + | PlusPI + | IndexPI -> + begin match eval_int_opt ask exp1, eval_int_opt ask exp2 with + | None, None -> + raise (UnsupportedCilExpression "unsupported BinOp +") + | None, Some off2 -> + let term, off1 = of_cil ask exp1 in + term, Z.(off1 + typ1_size * off2) + | Some off1, None -> + let term, off2 = of_cil ask exp2 in + term, Z.(typ2_size * off1 + off2) + | Some off1, Some off2 -> + None, Z.(off1 + off2) + end + | MinusA + | MinusPI + | MinusPP -> + begin match of_cil ask exp1, eval_int_opt ask exp2 with + | (Some term, off1), Some off2 -> + let typ1_size = get_element_size_in_bits (Cilfacade.typeOf exp1) in + Some term, Z.(off1 - typ1_size * off2) + | _ -> + raise (UnsupportedCilExpression "unsupported BinOp -") + end + | _ -> + raise (UnsupportedCilExpression "unsupported BinOp") + end + | CastE (typ, exp)-> + begin match of_cil ask exp with + | Some (Addr x), z -> + Some (Addr x), z + | Some (Aux (x, _)), z -> + Some (Aux (x, CastE (typ, exp))), z + | Some (Deref (x, z, _)), z' -> + Some (Deref (x, z, CastE (typ, exp))), z' + | t, z -> t, z + end + | _ -> raise (UnsupportedCilExpression "unsupported Cil Expression") + and of_lval ask lval = + let res = + match lval with + | (Var var, off) -> + if is_struct_type var.vtype then + let var_addr = Addr (Var.NormalVar var) in + of_offset ask var_addr off var.vtype (Lval lval) + else + let var_term = term_of_varinfo (Var.NormalVar var) in + of_offset ask var_term off var.vtype (Lval lval) + | (Mem exp, off) -> + begin match of_cil ask exp with + | (Some term, offset) -> + let typ = typeOf exp in + let typ = unrollType typ in + if is_struct_ptr_type typ then + match of_offset ask term off typ (Lval lval) with + | Addr x -> Addr x + | Aux (v,exp) -> Aux (v,exp) + | Deref (x, z, exp) -> Deref (x, Z.(z+offset), exp) + else + let deref_exp = (Mem exp, NoOffset) in + let deref_lval = Lval deref_exp in + let deref_typ = typeOfLval deref_exp in + let deref = Deref (term, offset, deref_lval) in + of_offset ask deref off deref_typ (Lval lval) + | _ -> raise (UnsupportedCilExpression "cannot dereference constant") + end + in + (if M.tracing then + match res with + | exception (UnsupportedCilExpression s) -> + M.trace "c2po-cil-conversion" "unsupported exp: %a\n%s\n" d_plainlval lval s + | t -> + M.trace "c2po-cil-conversion" "lval: %a --> %s\n" d_plainlval lval (show t)); + res + + let rec of_cil_neg ask neg e = match e with + | UnOp (op,exp,typ)-> + begin + match op with + | Neg -> + of_cil_neg ask (not neg) exp + | _ -> + if neg then + raise (UnsupportedCilExpression "unsupported UnOp Neg") + else + of_cil ask e + end + | _ -> + if neg then + raise (UnsupportedCilExpression "unsupported Neg") + else + of_cil ask e + + (** Converts the negation of the expression to a term if neg = true. + If neg = false then it simply converts the expression to a term. *) + let of_cil_neg ask neg e = + match Cilfacade.isFloatType (typeOf e) with + | exception Cilfacade.TypeOfError _ + | true -> None, None + | false -> + let res = match of_cil_neg ask neg (Cil.constFold false e) with + | exception (UnsupportedCilExpression s) -> + if M.tracing then M.trace "c2po-cil-conversion" "unsupported exp: %a\n%s\n" d_plainexp e s; + None, None + | t, z -> t, Some z + in + (if M.tracing && not neg then + match res with + | None, Some z -> M.trace "c2po-cil-conversion" "constant exp: %a --> %s\n" d_plainexp e (Z.to_string z) + | Some t, Some z -> M.trace "c2po-cil-conversion" "exp: %a --> %s + %s\n" d_plainexp e (show t) (Z.to_string z); + | _ -> ()); + res + + (** Convert the expression to a term, + and additionally check that the term is 64 bits. + If it's not a 64bit pointer, it returns None, None. *) + let of_cil ask e = + match of_cil_neg ask false e with + | Some t, Some z -> + (* check if t is a valid pointer *) + let exp = to_cil t in + if check_valid_pointer exp then + Some t, Some z + else begin + if M.tracing then M.trace "c2po-cil-conversion" "invalid exp: %a --> %s + %s\n" d_plainexp e (show t) (Z.to_string z); + None, None + end + | t, z -> t, z + + let map_z_opt op z = Tuple2.map2 (Option.map (op z)) + + (** Converts a cil expression e = "t1 + off1 - (t2 + off2)" to two terms (Some t1, Some off1), (Some t2, Some off2)*) + let rec two_terms_of_cil ask neg e = + let pos_t, neg_t = + match e with + | UnOp (Neg,exp,typ) -> + two_terms_of_cil ask (not neg) exp + | BinOp (binop, exp1, exp2, typ)-> + begin match binop with + | PlusA + | PlusPI + | IndexPI -> + begin match of_cil_neg ask false exp1 with + | (None, Some off1) -> + let pos_t, neg_t = two_terms_of_cil ask true exp2 in + map_z_opt Z.(+) off1 pos_t, neg_t + | (Some term, Some off1) -> + (Some term, Some off1), of_cil_neg ask true exp2 + | _ -> + (None, None), (None, None) + end + | MinusA + | MinusPI + | MinusPP -> + begin match of_cil_neg ask false exp1 with + | (None, Some off1) -> + let pos_t, neg_t = two_terms_of_cil ask false exp2 in + map_z_opt Z.(+) off1 pos_t, neg_t + | (Some term, Some off1) -> + (Some term, Some off1), of_cil_neg ask false exp2 + | _ -> + of_cil_neg ask false e, (None, Some Z.zero) + end + | _ -> of_cil_neg ask false e, (None, Some Z.zero) + end + | _ -> of_cil_neg ask false e, (None, Some Z.zero) + in + if neg then + neg_t, pos_t + else + pos_t, neg_t + + (** `prop_of_cil e pos` parses the expression `e` (or `not e` if `pos = false`) and + returns a list of length 1 with the parsed expresion or an empty list if + the expression can't be expressed with the type `prop`. *) + let rec prop_of_cil ask e pos = + let e = Cil.constFold false e in + match e with + | BinOp (r, e1, e2, _) -> + let e1_minus_e2 = (BinOp (MinusPI, e1, e2, TInt (Cilfacade.get_ikind_exp e,[]))) in + begin match two_terms_of_cil ask false e1_minus_e2 with + | ((Some t1, Some z1), (Some t2, Some z2)) -> + begin match r with + | Eq -> if pos then [Equal (t1, t2, Z.(z2-z1))] else [Nequal (t1, t2, Z.(z2-z1))] + | Ne -> if pos then [Nequal (t1, t2, Z.(z2-z1))] else [Equal (t1, t2, Z.(z2-z1))] + | _ -> [] + end + | _,_ -> [] + end + | UnOp (LNot, e1, _) -> + prop_of_cil ask e1 (not pos) + | _ -> [] + + let prop_to_cil p = + let op, t1, t2, z = match p with + | Equal (t1,t2,z) -> + Eq, t1, t2, z + | Nequal (t1,t2,z) -> + Ne, t1, t2, z + | BlNequal (t1,t2) -> + Ne, t1, t2, Z.zero + in + let t1 = to_cil t1 in + let z_plus_t2 = to_cil_sum z (to_cil t2) in + let bool_typ = TInt (IBool, []) in + + BinOp (op, t1, z_plus_t2, bool_typ) + +end + +module TMap = struct + include Map.Make(T) + let hash node_hash y = + let accumulate_key_value_has x node acc = + acc + T.hash x + node_hash node + in + fold accumulate_key_value_has y 0 +end + +module TSet = struct + include Set.Make(T) + let hash x = + let accumulate_element_hash x acc = + acc + T.hash x + in + fold accumulate_element_hash x 0 +end + +(** Quantitative union find *) +module UnionFind = struct + module ValMap = TMap + + (** (value * offset) ref * size of equivalence class *) + type 'v node = ('v * Z.t) * int [@@deriving eq, ord, hash] + + type t = T.t node ValMap.t [@@deriving eq, ord, hash] (** Union Find Map: maps value to a node type *) + + exception UnknownValue of T.t + exception InvalidUnionFind of string + + let empty = ValMap.empty + + (** `parent uf v` returns (p, z) where p is the parent element of + v in the union find tree and z is the offset. + + Throws "Unknown value" if v is not present in the data structure.*) + let parent uf v = + try + let parent, _ = ValMap.find v uf in + parent + with Not_found -> + raise (UnknownValue v) + + (** `parent_opt uf v` returns Some (p, z) where p is the parent element of + v in the union find tree and z is the offset. + It returns None if v is not present in the data structure. *) + let parent_opt uf v = + Option.map fst (ValMap.find_opt v uf) + + let parent_term uf v = + fst (parent uf v) + + let parent_offset uf v = + snd (parent uf v) + + let subtree_size uf v = + snd (ValMap.find v uf) + + (** Modifies the size of the equivalence class for the current element and + for the whole path to the root of this element. + + The third parameter `modification` is the function to apply to the sizes. *) + let rec modify_size t uf modification = + let (p, old_size) = ValMap.find t uf in + let uf = ValMap.add t (p, modification old_size) uf in + let parent = fst p in + if T.equal parent t then + uf + else + modify_size parent uf modification + + let modify_parent uf v (t, offset) = + let _, size = ValMap.find v uf in + ValMap.add v ((t, offset), size) uf + + let modify_offset uf v modification = + let ((t, offset), size) = ValMap.find v uf in + ValMap.add v ((t, modification offset), size) uf + + (** Returns true if each equivalence class in the data structure contains only one element, + i.e. every node is a root. *) + let is_empty uf = + let is_same_term (v, (t, _)) = + T.equal v (fst t) + in + let bindings = ValMap.bindings uf in + List.for_all is_same_term bindings + + (** Returns true if v is the representative value of its equivalence class. + + Throws "Unknown value" if v is not present in the data structure. *) + let is_root uf v = + match parent_opt uf v with + | None -> + true + | Some (parent_t, _) -> + T.equal v parent_t + + (** + For a variable v it returns the reference variable v' and the offset r'. + This find performs path compression. + It returns als the updated union-find tree after the path compression. + + Throws "Unknown value" if v is not present in the data structure. + Throws "Invalid Union Find" if it finds an element in the data structure that is a root but it has a non-zero distance to itself. + *) + let find uf v = + let (v', r') = parent uf v in + if T.equal v' v then + (* v is a root *) + if Z.equal r' Z.zero then + v', r', uf + else + raise (InvalidUnionFind "non-zero self-distance!") + else if is_root uf v' then + (* the parent of v is a root *) + v', r', uf + else + begin + if M.tracing then M.trace "c2po-find" "find DEEP TREE"; + let rec search v list = + let (v', r') = parent uf v in + if is_root uf v' then + let f (r0, uf) v = + let (parent_v, r''), size_v = ValMap.find v uf in + let uf = modify_parent uf v (v', Z.(r0 + r'')) in + let uf = modify_size parent_v uf (fun s -> s - size_v) in + let uf = modify_size v' uf ((+) size_v) in + Z.(r0 + r''), uf + in + (* perform path compresion *) + let (r', uf) = List.fold_left f (Z.zero, uf) (v::list) + in v', r', uf + else + search v' (v :: list) + in + search v' [v] + end + + (** + For a variable v it returns the reference variable v' and the offset r'. + This find DOES NOT perform path compression. + + Throws "Unknown value" if t is not present in the data structure. + Throws "Invalid Union Find" if it finds an element in the data structure that is a root but it has a non-zero distance to itself. + *) + let rec find_no_pc uf v = + let (v', r') = parent uf v in + if T.equal v' v then + if Z.equal r' Z.zero then + (v', r') + else + raise (InvalidUnionFind "non-zero self-distance!") + else + let (v'', r'') = find_no_pc uf v' in + (v'', Z.(r' + r'')) + + let compare_repr = Tuple2.compare ~cmp1:T.compare ~cmp2:Z.compare + + (** Compare only first element of the tuples (= the parent term). + It ignores the offset. *) + let compare_repr_v (v1, _) (v2, _) = T.compare v1 v2 + + (** + Parameters: uf v1 v2 r + + changes the union find data structure `uf` such that the equivalence classes of `v1` and `v2` are merged and `v1 = v2 + r` + + returns v,uf,b where + + - `v` is the new reference term of the merged equivalence class. It is either the old reference term of v1 or of v2, depending on which equivalence class is bigger. + + - `uf` is the new union find data structure + + - `b` is true iff v = find v1 + + *) + let union uf v'1 v'2 r = + let v1,r1,uf = find uf v'1 in + let v2,r2,uf = find uf v'2 in + if T.equal v1 v2 then + if Z.(equal r1 (r2 + r)) then + v1, uf, true + else + raise (Failure "incomparable union") + else + let (_,s1) = ValMap.find v1 uf in + let (_,s2) = ValMap.find v2 uf in + if s1 <= s2 then + let uf = modify_parent uf v1 (v2, Z.(r2 - r1 + r)) in + let uf = modify_size v2 uf ((+) s1) in + v2, uf, false + else + let uf = modify_parent uf v2 (v1, Z.(r1 - r2 - r)) in + let uf = modify_size v1 uf ((+) s2) in + v1, uf, true + + (** Returns a list of equivalence classes. *) + let get_eq_classes uf = + let compare (el1,_) (el2,_) = + compare_repr_v (find_no_pc uf el1) (find_no_pc uf el2) + in + let bindings = ValMap.bindings uf in + List.group compare bindings + + (** Throws "Unknown value" if the data structure is invalid. *) + let show_uf uf = + let show_element acc (v, (t, size)) = + acc ^ + "\t" ^ + (if is_root uf v then "R: " else "") ^ + "(" ^ + T.show v ^ + "; P: " ^ + T.show (fst t) ^ + "; o: " ^ + Z.to_string (snd t) ^ + "; s: " ^ + string_of_int size ^ + ")\n" + in + let show_eq_class acc eq_class = + acc ^ + List.fold_left show_element "" eq_class ^ + "----\n" + in + List.fold_left show_eq_class "" (get_eq_classes uf) ^ "\n" + + (** Returns a list of representative elements.*) + let get_representatives uf = + let get_if_root (el, _) = + if is_root uf el then + Some el + else + None + in + let bindings = TMap.bindings uf in + List.filter_map get_if_root bindings + +end + +module ZMap = struct + include Map.Make(Z) + + let hash hash_f y = + let accumulate_key_value_hash x node acc = + acc + Z.hash x + hash_f node + in + fold accumulate_key_value_hash y 0 +end + +(** For each representative t' of an equivalence class, the LookupMap maps t' to a map that maps z to a term in the data structure that is equal to *(z + t').*) +module LookupMap = struct + (* map: term -> z -> *(z + t) *) + type t = T.t ZMap.t TMap.t [@@deriving eq, ord, hash] + + let bindings = TMap.bindings + let add = TMap.add + let remove = TMap.remove + let empty = TMap.empty + let find_opt = TMap.find_opt + let find = TMap.find + + let zmap_bindings = ZMap.bindings + let zmap_find_opt = ZMap.find_opt + let zmap_add = ZMap.add + + (** Returns the element to which (v, r) is mapped, or None if (v, r) is mapped to nothing. *) + let map_find_opt (v,r) (map:t) = + match find_opt v map with + | None -> + None + | Some zmap -> + zmap_find_opt r zmap + + let map_add (v,r) v' (map:t) = + let zmap = match find_opt v map with + | None -> + zmap_add r v' ZMap.empty + | Some zmap -> + zmap_add r v' zmap + in + add v zmap map + + let show_map (map:t) = + let show_inner_binding acc (r, v) = + acc ^ + "\t" ^ + Z.to_string r ^ + ": " ^ + T.show v ^ + "; " + in + let show_inner_map zmap = + let inner_bindings = zmap_bindings zmap in + List.fold_left show_inner_binding "" inner_bindings + in + let show_binding s (v, zmap) = + s ^ + T.show v ^ + "\t:\n" ^ + show_inner_map zmap ^ "\n" + in + let bindings = bindings map in + List.fold_left show_binding "" bindings + + (** The value at v' is shifted by r and then added for v. + The old entry for v' is removed. *) + let shift v r v' map = + match find_opt v' map with + | None -> + map + | Some zmap -> + let infl = ZMap.bindings zmap in + let shift zmap (r', v') = + zmap_add Z.(r' + r) v' zmap + in + let zmap = List.fold_left shift ZMap.empty infl in + remove v' (add v zmap map) + + (** Find all outgoing edges of v in the automata.*) + let successors v (map:t) = + match find_opt v map with + | None -> + [] + | Some zmap -> + ZMap.bindings zmap + + (** Find all elements that are in the same equivalence class as t, + given the cmap. *) + let comp_t_cmap_repr cmap t = + match TMap.find_opt t cmap with + | None -> + [Z.zero, t] + | Some zmap -> + let offset_term_product (z, term_set) = + let terms = TSet.to_list term_set in + List.cartesian_product [z] terms + in + let zmap_bindings = ZMap.bindings zmap in + List.concat_map offset_term_product zmap_bindings + +end diff --git a/src/config/options.schema.json b/src/config/options.schema.json index ce0f398bee..c9a17b652c 100644 --- a/src/config/options.schema.json +++ b/src/config/options.schema.json @@ -1179,6 +1179,39 @@ }, "additionalProperties": false }, + "c2po": { + "title": "ana.c2po", + "type": "object", + "properties": { + "askbase": { + "title": "ana.c2po.askbase", + "description": "If true, the C-2PO Analysis uses the MayPointTo query to infer additional disequalities.", + "type": "boolean", + "default": true + }, + "join_algorithm": { + "title": "ana.c2po.join_algorithm", + "description": "Which join algorithm 'c2po' should use. Values: 'partition' (default): the more efficient version, it uses only the partition to compute the join. Circular equalities are lost during the join; 'precise': a more precise version, uses the automaton to compute the join.", + "type": "string", + "enum": [ + "precise", + "partition" + ], + "default": "partition" + }, + "equal": { + "title": "ana.c2po.equal", + "description": "Which equal algorithm 'c2po' should use. Values: 'partition' (default): it compares the partitions each time; 'normal_form': computes a normal form of the domain to compare them. It is lazily computed. The two possibilities are equivalent in precision, but in some cases computing the normal form is more efficient, as it is computed only once per distinct domain element.", + "type": "string", + "enum": [ + "normal_form", + "partition" + ], + "default": "partition" + } + }, + "additionalProperties": false + }, "unassume": { "title": "ana.unassume", "type": "object", diff --git a/src/goblint_lib.ml b/src/goblint_lib.ml index 415fb21605..338e1dddcb 100644 --- a/src/goblint_lib.ml +++ b/src/goblint_lib.ml @@ -82,6 +82,7 @@ module LinearTwoVarEqualityAnalysis = LinearTwoVarEqualityAnalysis module VarEq = VarEq module CondVars = CondVars module TmpSpecial = TmpSpecial +module C2poAnalysis = C2poAnalysis (** {2 Heap} @@ -171,6 +172,8 @@ module UnassumeAnalysis = UnassumeAnalysis module ExpRelation = ExpRelation module AbortUnless = AbortUnless module PtranalAnalysis = PtranalAnalysis +module StartStateAnalysis = StartStateAnalysis +module SingleThreadedLifter = SingleThreadedLifter (** {1 Analysis lifters} @@ -262,6 +265,13 @@ module ApronDomain = ApronDomain module AffineEqualityDomain = AffineEqualityDomain module LinearTwoVarEqualityDomain = LinearTwoVarEqualityDomain +(** {5 2-Pointer Logic} + + Domains for {!C2poAnalysis}. *) +module CongruenceClosure = CongruenceClosure +module UnionFind = UnionFind +module C2poDomain = C2poDomain + (** {3 Concurrency} *) module MutexAttrDomain = MutexAttrDomain @@ -410,6 +420,7 @@ module CilType = CilType module Cilfacade = Cilfacade module CilLocation = CilLocation module RichVarinfo = RichVarinfo +module DuplicateVars = DuplicateVars module CilCfg = CilCfg module LoopUnrolling = LoopUnrolling diff --git a/tests/regression/84-c2po/01-simple.c b/tests/regression/84-c2po/01-simple.c new file mode 100644 index 0000000000..abb6b2c69a --- /dev/null +++ b/tests/regression/84-c2po/01-simple.c @@ -0,0 +1,20 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include +#include + +void main(void) { + int *i; + int **j; + j = (int**)malloc(sizeof(int*)+7); + *(j + 3) = (int *)malloc(sizeof(int)); + int *k; + i = *(j + 3); + *j = k; + + __goblint_check(**j == *k); + __goblint_check(i == *(j + 3)); + + j = &k + 1; + + __goblint_check(j == &k); // FAIL +} diff --git a/tests/regression/84-c2po/02-rel-simple.c b/tests/regression/84-c2po/02-rel-simple.c new file mode 100644 index 0000000000..0e42b79ce8 --- /dev/null +++ b/tests/regression/84-c2po/02-rel-simple.c @@ -0,0 +1,70 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include +#include +#include + +int main(void) { + int *i = (int *)malloc(sizeof(int)); + int ***j = (int ***)malloc(sizeof(int) * 4); + int **j2 = (int **)malloc(sizeof(int)); + int **j23 = (int **)malloc(sizeof(int)); + *j = j2; + *(j + 3) = j23; + int *j3 = (int *)malloc(sizeof(int)); + int *j33 = (int *)malloc(sizeof(int)); + *j2 = j3; + **(j + 3) = j33; + *j3 = 4; + *j33 = 5; + int *k = i; + *k = 3; + // j --> *j=j2 --> **j=j3 --> ***j=|4| + // (j+3) --> j23 --> j33 --> |5| + // k=i --> |3| + + // printf("***j = %d\n", ***j); // 4 + // printf("***(j + 3) = %d\n", ***(j + 3)); // 5 + // printf("*i = %d\n", *i); // 3 + // printf("*k = %d\n", *k); // 3 + // printf("\n"); + + __goblint_check(*j23 == j33); + __goblint_check(*j2 == j3); + __goblint_check(*i == *k); + + i = **(j + 3); + + // j --> *j=j2 --> **j=j3 --> ***j=|4| + // (j+3) --> j23 --> j33=i --> |5| + // k --> |3| + + // printf("***j = %d\n", ***j); // 4 + // printf("***(j + 3) = %d\n", ***(j + 3)); // 5 + // printf("*i = %d\n", *i); // 5 + // printf("*k = %d\n", *k); // 3 + // printf("\n"); + + __goblint_check(*j23 == j33); + __goblint_check(*j2 == j3); + __goblint_check(*i == *j33); + + *j = &k; + + // j2 --> j3 --> |4| + // (j+3) --> j23 --> j33=i --> |5| + // j --> *j --> k --> |3| + + // printf("***j = %d\n", ***j); // 3 + // printf("***(j + 3) = %d\n", ***(j + 3)); // 5 + // printf("*i = %d\n", *i); // 5 + // printf("*k = %d\n", *k); // 3 + // printf("**j2 = %d\n", **j2); // 4 + + __goblint_check(*j23 == j33); + __goblint_check(*j2 == j3); + __goblint_check(**j == k); + + // not assignable: &k = *j; + + return 0; +} diff --git a/tests/regression/84-c2po/03-function-call.c b/tests/regression/84-c2po/03-function-call.c new file mode 100644 index 0000000000..73dd0f0c3c --- /dev/null +++ b/tests/regression/84-c2po/03-function-call.c @@ -0,0 +1,29 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include + +int *i; +int **j; + +int *f(int **a, int *b) { return *a; } + +int *g(int **a, int *b) { + a = (int **)malloc(sizeof(int *)); + return *a; +} + +int main(void) { + + j = (int **)malloc(sizeof(int *)); + *j = (int *)malloc(sizeof(int)); + int *k = f(j, i); + + __goblint_check(k == *j); + + k = g(j, i); + + __goblint_check(k == *j); // UNKNOWN! + + return 0; +} diff --git a/tests/regression/84-c2po/04-remove-vars.c b/tests/regression/84-c2po/04-remove-vars.c new file mode 100644 index 0000000000..b5e906bd9c --- /dev/null +++ b/tests/regression/84-c2po/04-remove-vars.c @@ -0,0 +1,23 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include +#include + +int *f(int **j) { + int *i = (int *)malloc(sizeof(int)); + + *j = i; + + return i; +} + +int main(void) { + int *i; + int **j; + j = (int**)malloc(sizeof(int*)); + *j = (int *)malloc(sizeof(int)); + int *k = f(j); + + __goblint_check(k == *j); + + return 0; +} diff --git a/tests/regression/84-c2po/05-branch.c b/tests/regression/84-c2po/05-branch.c new file mode 100644 index 0000000000..7d8b3bbd99 --- /dev/null +++ b/tests/regression/84-c2po/05-branch.c @@ -0,0 +1,47 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include +#include + +void main(void) { + int *i; + int **j; + int *k; + i = *(j + 3); + *j = k; + j = &k + 1; + int *f; + if (j != &k) { + f = k; + printf("branch2"); + __goblint_check(1); // reachable + } else { + f = i; + printf("branch1"); + __goblint_check(0); // NOWARN (unreachable) + } + + __goblint_check(f == k); + + j = &k; + if (j != &k) { + f = k; + printf("branch1"); + __goblint_check(0); // NOWARN (unreachable) + } else { + f = i; + printf("branch2"); + __goblint_check(1); // reachable + } + + __goblint_check(f == i); + + if (**j + *k * 23 - 2 * *k == 0 && j != &k) { + f = k; + printf("branch1"); + __goblint_check(0); // NOWARN (unreachable) + } else { + f = i; + printf("branch2"); + __goblint_check(1); // reachable + } +} diff --git a/tests/regression/84-c2po/06-invertible-assignment.c b/tests/regression/84-c2po/06-invertible-assignment.c new file mode 100644 index 0000000000..420e8117f3 --- /dev/null +++ b/tests/regression/84-c2po/06-invertible-assignment.c @@ -0,0 +1,17 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include + +void main(void) { + long *i; + long **j; + long *k; + j = &k + 1; + j++; + __goblint_check(j == &k + 2); + + i = *(j + 3); + i++; + __goblint_check(i == *(j + 3) + 1); + j++; + __goblint_check(i == *(j + 2) + 1); +} diff --git a/tests/regression/84-c2po/07-invertible-assignment2.c b/tests/regression/84-c2po/07-invertible-assignment2.c new file mode 100644 index 0000000000..559508e01d --- /dev/null +++ b/tests/regression/84-c2po/07-invertible-assignment2.c @@ -0,0 +1,22 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +// example of the paper "2-Pointer Logic" by Seidl et al., Example 9, pag. 22 +#include +#include + +void main(void) { + long x; + long *z; + z = &x; + long y; + + y = -1 + x; + + __goblint_check(z == &x); + __goblint_check(y == -1 + x); + + *z = 1 + x; + + __goblint_check(&x == z); + __goblint_check(y == -2 + x); + +} diff --git a/tests/regression/84-c2po/08-simple-assignment.c b/tests/regression/84-c2po/08-simple-assignment.c new file mode 100644 index 0000000000..5d80308f07 --- /dev/null +++ b/tests/regression/84-c2po/08-simple-assignment.c @@ -0,0 +1,15 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +// example of the paper "2-Pointer Logic" by Seidl et al., pag. 21 +#include + +void main(void) { + long x; + long *z = -1 + &x; + + __goblint_check(z == -1 + &x); + + z = (long*) *(1 + z); + + __goblint_check(x == (long)z); + +} diff --git a/tests/regression/84-c2po/09-different-offsets.c b/tests/regression/84-c2po/09-different-offsets.c new file mode 100644 index 0000000000..5d420d5851 --- /dev/null +++ b/tests/regression/84-c2po/09-different-offsets.c @@ -0,0 +1,20 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +struct Pair { + int *first; + int *second; +}; + +void main(void) { + int *x; + struct Pair p; + p.first = x; + + struct Pair p2; + p2.first = x; + + __goblint_check(p.first == p2.first); + +} diff --git a/tests/regression/84-c2po/10-different-types.c b/tests/regression/84-c2po/10-different-types.c new file mode 100644 index 0000000000..0f6e7317aa --- /dev/null +++ b/tests/regression/84-c2po/10-different-types.c @@ -0,0 +1,38 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +void main(void) { + // no problem if they are all ints + int *ipt = (int *)malloc(sizeof(int)); + int *ipt2; + int i; + ipt = &i; + // *ipt: 0; i: 0 + __goblint_check(*ipt == i); + ipt2 = (int *)ipt; + *(ipt2 + 1) = 'a'; + // *ipt: 0; i: 0 + __goblint_check(*ipt == i); + + // long pointer is cast to char pointer -> *(cpt + 1) overwrites *lpt + long *lpt = (long *)malloc(sizeof(long)); + char *cpt; + long lo; + *lpt = lo; + // *lpt: 0; l: 0 + __goblint_check(*lpt == lo); + cpt = (char *)lpt; + *(cpt + 1) = 'a'; + + // *lpt: 24832; l: 0 + __goblint_check(*lpt == lo); // UNKNOWN! + + lo = 0; + *lpt = lo; + // *lpt: 0; l: 0 + __goblint_check(*lpt == lo); + *((char *)lpt + 1) = 'a'; + // *lpt: 24832; l: 0 + __goblint_check(*lpt == lo); // UNKNOWN! +} diff --git a/tests/regression/84-c2po/11-array.c b/tests/regression/84-c2po/11-array.c new file mode 100644 index 0000000000..2b6c264852 --- /dev/null +++ b/tests/regression/84-c2po/11-array.c @@ -0,0 +1,21 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts +#include +#include + +void main(void) { + int m[5]; + + int **j; + int *l; + j = (int **)malloc(sizeof(int *) + 7); + j[3] = (int *)malloc(sizeof(int)); + int *k; + l = j[3]; + j[0] = k; + j[2] = m; + + __goblint_check(**j == *k); + __goblint_check(l == *(j + 3)); + __goblint_check(j[2] == m); + +} diff --git a/tests/regression/84-c2po/12-rel-function.c b/tests/regression/84-c2po/12-rel-function.c new file mode 100644 index 0000000000..ee7db3b01c --- /dev/null +++ b/tests/regression/84-c2po/12-rel-function.c @@ -0,0 +1,22 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts + +#include +#include + +void *f(int **a, int **b) { + int *j; + int **i = &j; + j = (int *)malloc(sizeof(int) * 2); + *a = j; + *b = *i + 1; +} + +int main(void) { + int **c = (int**)malloc(sizeof(int*)); + int **d = (int**)malloc(sizeof(int*));; + f(c, d); + + __goblint_check(*d == *c + 1); + + return 0; +} diff --git a/tests/regression/84-c2po/13-experiments.c b/tests/regression/84-c2po/13-experiments.c new file mode 100644 index 0000000000..d2023e1e47 --- /dev/null +++ b/tests/regression/84-c2po/13-experiments.c @@ -0,0 +1,42 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +struct Pair { + int (*first)[7]; + int second; +}; + +struct Crazy { + int whatever; + int arr[5]; +}; + +void main(void) { + int arr[7] = {1, 2, 3, 4, 5, 6, 7}; + int(*x)[7] = (int(*)[7])malloc(sizeof(int)); + struct Pair p; + p.first = x; + p.second = (*x)[3]; + + struct Pair p2; + p2.first = x; + + __goblint_check(p.first == p2.first); + + int arr2[2][2] = {{1, 2}, {1, 2}}; + p.second = arr2[1][1]; + + int *test; + + int *x2[2] = {test, test}; + + int test2 = *(x2[1]); + + struct Crazy crazyy[3][2]; + + __goblint_check(crazyy[2][1].arr[4] == ((struct Crazy *)crazyy)[5].arr[4]); + + int *sx[4]; + int k = *sx[1]; +} diff --git a/tests/regression/84-c2po/14-join.c b/tests/regression/84-c2po/14-join.c new file mode 100644 index 0000000000..33fad953f4 --- /dev/null +++ b/tests/regression/84-c2po/14-join.c @@ -0,0 +1,23 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include + +void main(void) { + long y; + long i; + long x; + long *z; + int top; + + if (top) { + z = -1 + &x; + y = x; + } else { + z = -1 + &x; + i = x; + } + + __goblint_check(z == -1 + &x); + __goblint_check(x == i); // UNKNOWN! + __goblint_check(y == x); // UNKNOWN! +} diff --git a/tests/regression/84-c2po/15-arrays-structs.c b/tests/regression/84-c2po/15-arrays-structs.c new file mode 100644 index 0000000000..3f68dcf87a --- /dev/null +++ b/tests/regression/84-c2po/15-arrays-structs.c @@ -0,0 +1,62 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +struct mystruct { + int first; + int second; +}; + +struct arrstruct { + int first[3]; + int second[3]; +}; + +void main(void) { + // array of struct + struct mystruct arrayStructs[3]; + + __goblint_check(arrayStructs[0].first == + ((int *)arrayStructs)[0]); // they are the same element + __goblint_check(arrayStructs[1].second == + ((int *)arrayStructs)[3]); // they are the same element + __goblint_check(arrayStructs[2].first == + ((int *)arrayStructs)[4]); // they are the same element + + // struct of array + struct arrstruct structArray; + int *pstruct = (int *)&structArray; // pointer to struct + __goblint_check(structArray.first[0] == + pstruct[0]); // they are the same element + __goblint_check(structArray.first[2] == + pstruct[2]); // they are the same element + __goblint_check(structArray.second[0] == + pstruct[3]); // they are the same element + __goblint_check(structArray.second[2] == + pstruct[5]); // they are the same element + + // array of array + int array2D[2][2] = {{1, 2}, {3, 4}}; + __goblint_check(array2D[0][0] == + *((int *)array2D + 0)); // they are the same element + __goblint_check(array2D[1][0] == + *((int *)array2D + 2)); // they are the same element + __goblint_check(array2D[1][1] == + *((int *)array2D + 3)); // they are the same element + + // arr2D[0][1] is the element and arr2D[2] is a pointer to an array + __goblint_check(array2D[0][1] == (long)array2D[2]); // UNKNOWN! + + __goblint_check((int *)array2D[0] + 4 == (int *)array2D[2]); + __goblint_check((int *)array2D + 4 == (int *)array2D[2]); + + __goblint_check(array2D[1][2] == *((int *)array2D + 4)); + __goblint_check((int *)array2D + 4 == (int *)array2D[2]); + + // 3D array + int array3D[2][3][4]; + __goblint_check(array3D[1][0][3] == *((int *)array3D + 15)); + __goblint_check(array3D[1][2][0] == *((int *)array3D + 20)); + __goblint_check(array3D[1][2][3] == *((int *)array3D + 23)); + __goblint_check(array3D[0][1][1] == *((int *)array3D + 5)); +} diff --git a/tests/regression/84-c2po/16-loops.c b/tests/regression/84-c2po/16-loops.c new file mode 100644 index 0000000000..3e508325b8 --- /dev/null +++ b/tests/regression/84-c2po/16-loops.c @@ -0,0 +1,30 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include + +int main(void) { + long y; + long i; + long *x = malloc(sizeof(long) * 300); + long *x2 = x; + long *z; + int top; + top = top % 300; // top is some number that is < 300 + + y = *x; + z = -1 + x; + + while (top > 0) { + z = (long *)malloc(sizeof(long)); + x++; + z = -1 + x; + y++; + top--; + } + + __goblint_check(z == -1 + x); + __goblint_check(y == *x2); // UNKNOWN! + + return 0; +} diff --git a/tests/regression/84-c2po/17-join2.c b/tests/regression/84-c2po/17-join2.c new file mode 100644 index 0000000000..97bcbdb2be --- /dev/null +++ b/tests/regression/84-c2po/17-join2.c @@ -0,0 +1,21 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts + +#include + +void main(void) { + long *y = (long *)malloc(4 * sizeof(long)); + long a; + long b; + long *x = (long *)malloc(4 * sizeof(long)); + int top; + + if (top) { + *(x + 2) = a + 1; + *(y + 1) = a + 2; + } else { + *(x + 2) = b + 2; + *(y + 1) = b + 3; + } + + __goblint_check(*(x + 2) == *(y + 1) - 1); +} diff --git a/tests/regression/84-c2po/18-complicated-join.c b/tests/regression/84-c2po/18-complicated-join.c new file mode 100644 index 0000000000..ff55e82d94 --- /dev/null +++ b/tests/regression/84-c2po/18-complicated-join.c @@ -0,0 +1,25 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.join_algorithm precise +// Example 1 from the paper Join Algorithms for the Theory of Uninterpreted +// Functions by Gulwani et al. + +#include +#include + +void main(void) { + long ********y = (long ********)malloc(100 * sizeof(long *)); + *y = (long *******)malloc(100 * sizeof(long *)); + **y = (long ******)malloc(100 * sizeof(long *)); + int top; + + if (top) { + **y = (long ******)y; + __goblint_check(**y == (long ******)y); + __goblint_check(******y == (long**)y); + } else { + ***y = (long ***)y; + __goblint_check(***y == (long *****)y); + __goblint_check(******y == (long**)y); + } + + __goblint_check(******y == (long**)y); +} diff --git a/tests/regression/84-c2po/19-disequalities.c b/tests/regression/84-c2po/19-disequalities.c new file mode 100644 index 0000000000..c7c8c8d22a --- /dev/null +++ b/tests/regression/84-c2po/19-disequalities.c @@ -0,0 +1,40 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +void main(void) { + long *i; + long **j; + j = (int **)malloc(sizeof(int *) + 7); + *(j + 3) = (int *)malloc(sizeof(int)); + int *k; + *j = k; + + __goblint_check(**j != *k + 1); + __goblint_check(**j != *k + 2); + + if (*i != **(j + 3)) { + __goblint_check(i != *(j + 3)); + __goblint_check(&i != j + 3); + j = NULL; + __goblint_check(i != *(j + 3)); // UNKNOWN + } + + int *k2 = (int *)malloc(sizeof(int)); + *j = k2; + k = k2; + + __goblint_check(*j == k); + __goblint_check(k2 == k); + + int *f1 = (int *)malloc(sizeof(int)); + int *f2 = f2; + + if (*j != f2) { + __goblint_check(*j != f2); + __goblint_check(k != f1); + j = NULL; + __goblint_check(*j != f2); // UNKNOWN + __goblint_check(k != f1); + } +} diff --git a/tests/regression/84-c2po/20-self-pointing-struct.c b/tests/regression/84-c2po/20-self-pointing-struct.c new file mode 100644 index 0000000000..3b6af9e865 --- /dev/null +++ b/tests/regression/84-c2po/20-self-pointing-struct.c @@ -0,0 +1,21 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +#include + +struct list { + int data; + struct list *next; +}; + +void main(void) { + struct list last = { + 41 + }; + struct list first = { + 42, &last + }; + + last.next = &last; + + __goblint_check(first.next->next->next->next == &last); +} diff --git a/tests/regression/84-c2po/21-global-var.c b/tests/regression/84-c2po/21-global-var.c new file mode 100644 index 0000000000..a4cf669f20 --- /dev/null +++ b/tests/regression/84-c2po/21-global-var.c @@ -0,0 +1,40 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts + +#include +#include + +int **i; +int **j; +int counter; + +void f() { __goblint_check(*i == *j); } + +void recursive_f() { + __goblint_check(*i == *j); + counter++; + if (counter < 25) + recursive_f(); +} + +void non_terminating_f() { + if (*i == *j) + non_terminating_f(); +} + +int main(void) { + + j = (int **)malloc(sizeof(int *)); + i = (int **)malloc(sizeof(int *)); + *i = (int *)malloc(sizeof(int)); + + *j = *i; + f(); + + recursive_f(); + + non_terminating_f(); + + __goblint_check(0); // NOWARN (unreachable) + + return 0; +} diff --git a/tests/regression/84-c2po/22-join-diseq.c b/tests/regression/84-c2po/22-join-diseq.c new file mode 100644 index 0000000000..c0192e5f93 --- /dev/null +++ b/tests/regression/84-c2po/22-join-diseq.c @@ -0,0 +1,37 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include + +void main(void) { + long *a; + long *b; + long *c; + long *d = (long *)malloc(4 * sizeof(long)); + long *e = (long *)malloc(4 * sizeof(long)); + + long *unknown; + + int top; + + if (a != b + 4 && e != c && c != d) { + __goblint_check(a != b + 4); + __goblint_check(e != c); + __goblint_check(c != d); + if (top) { + d = unknown; + __goblint_check(a != b + 4); + __goblint_check(e != c); + __goblint_check(c != d); // UNKNOWN! + + } else { + e = unknown; + __goblint_check(a != b + 4); + __goblint_check(e != c); // UNKNOWN! + __goblint_check(c != d); + } + // JOIN + __goblint_check(a != b + 4); + __goblint_check(e != c); // UNKNOWN! + __goblint_check(c != d); // UNKNOWN! + } +} diff --git a/tests/regression/84-c2po/23-function-deref.c b/tests/regression/84-c2po/23-function-deref.c new file mode 100644 index 0000000000..001dc6887e --- /dev/null +++ b/tests/regression/84-c2po/23-function-deref.c @@ -0,0 +1,25 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include + +void *g(int **a, int *b) { + b = (int *)malloc(sizeof(int *)); + *a = b; +} + +int main(void) { + int *i = (int *)malloc(sizeof(int)); + int **j; + j = (int **)malloc(sizeof(int *)); + *j = (int *)malloc(sizeof(int)); + int *k = *j; + + __goblint_check(k == *j); + + g(j, i); + + __goblint_check(k == *j); // UNKNOWN! + + return 0; +} diff --git a/tests/regression/84-c2po/24-disequalities-small-example.c b/tests/regression/84-c2po/24-disequalities-small-example.c new file mode 100644 index 0000000000..4bb007d017 --- /dev/null +++ b/tests/regression/84-c2po/24-disequalities-small-example.c @@ -0,0 +1,13 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include + +int *a, b; +c() { b = 0; } +main() { + int *d; + if (a == d) + ; + else + __goblint_check(a != d); + c(); +} diff --git a/tests/regression/84-c2po/25-struct-circular.c b/tests/regression/84-c2po/25-struct-circular.c new file mode 100644 index 0000000000..1073684dc7 --- /dev/null +++ b/tests/regression/84-c2po/25-struct-circular.c @@ -0,0 +1,29 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include + +struct mem { + int val; +}; + +struct list_node { + int x; + struct mem *mem; + struct list_node *next; +}; + +int main() { + struct mem *m = malloc(sizeof(*m)); + int x = ((struct mem *) m)->val; + m->val = 100; + + struct list_node *head = malloc(sizeof(*head)); + + head->x = 1; + head->mem = m; + head->next = head; + + __goblint_check(head->next == head); + __goblint_check(head->next->next == head->next); +} diff --git a/tests/regression/84-c2po/26-join3.c b/tests/regression/84-c2po/26-join3.c new file mode 100644 index 0000000000..f2a710a9b0 --- /dev/null +++ b/tests/regression/84-c2po/26-join3.c @@ -0,0 +1,45 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include +#include + +void main(void) { + long *x; + long *y; + long *z = malloc(sizeof(long)); + int top; + + if (top) { + x = z + 7; + y = z + 3; + } else { + x = z + 1; + y = z + 1; + } + + __goblint_check(x == z + 7); // UNKNOWN! + __goblint_check(x == z + 3); // UNKNOWN! + __goblint_check(x == z + 1); // UNKNOWN! + __goblint_check(x == z + 1); // UNKNOWN! + + long *x1; + long *x2; + long *y1; + long *y2; + + if (top) { + x1 = z + 1; + y1 = z + 2; + x2 = z + 1; + y2 = z + 2; + } else { + x1 = z + 2; + y1 = z + 3; + x2 = z + 4; + y2 = z + 5; + } + + __goblint_check(x1 == y1 - 1); + __goblint_check(x2 == y2 - 1); +} diff --git a/tests/regression/84-c2po/27-join-diseq2.c b/tests/regression/84-c2po/27-join-diseq2.c new file mode 100644 index 0000000000..4c02a4b93a --- /dev/null +++ b/tests/regression/84-c2po/27-join-diseq2.c @@ -0,0 +1,39 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include +#include + +int main(void) { + long *a; + long *b; + long *c; + long *d = (long *)malloc(4 * sizeof(long)); + long *e = (long *)malloc(4 * sizeof(long)); + + long *unknown; + + int top; + + if (a != b && e != c && c != d) { + __goblint_check(a != b); + __goblint_check(e != c); + __goblint_check(c != d); + if (top) { + d = unknown; + d = c + 1; + __goblint_check(a != b); + __goblint_check(e != c); + __goblint_check(c != d); // implicit disequality + } else { + e = unknown; + __goblint_check(a != b); + __goblint_check(e != c); // UNKNOWN! + __goblint_check(c != d); + } + // JOIN + __goblint_check(a != b); + __goblint_check(e != c); // UNKNOWN! + __goblint_check(c != d); + } + return 0; +} diff --git a/tests/regression/84-c2po/28-return-value.c b/tests/regression/84-c2po/28-return-value.c new file mode 100644 index 0000000000..5715080751 --- /dev/null +++ b/tests/regression/84-c2po/28-return-value.c @@ -0,0 +1,36 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +int a; + +struct h { + int c; +}; + +struct i { + int d; +}; + +struct h* b; +struct i* c; + +const void *d(const int* e) { + return e + 200; +} + +int *f() { + return (int*) 0; +} + +int g(int, struct h *, struct i *) { + int *j = f(); + d(j); + __goblint_check(1); // reachable +} + +int main() { + g(a, b, c); + if (0) { + __goblint_check(0); // NOWARN (unreachable) + } + __goblint_check(1); // reachable +} diff --git a/tests/regression/84-c2po/29-widen.c b/tests/regression/84-c2po/29-widen.c new file mode 100644 index 0000000000..d8247e09ab --- /dev/null +++ b/tests/regression/84-c2po/29-widen.c @@ -0,0 +1,25 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false +#include +int a; +long b, c, d, e, f, g, h; +int *i; +k() { + int j; + long top; + while (top) { + b = a * 424; + c = j; + d = j + b; + e = a * 424; + f = e + 8; + g = j; + h = j + f; + i = h; + a = a + 1; + __goblint_check(g == c); + // __goblint_check(h == 8 + d); + __goblint_check((long)i == h); + __goblint_check(j == c); + } +} +main() { k(); } diff --git a/tests/regression/84-c2po/30-call.c b/tests/regression/84-c2po/30-call.c new file mode 100644 index 0000000000..e2faf91b99 --- /dev/null +++ b/tests/regression/84-c2po/30-call.c @@ -0,0 +1,26 @@ +// PARAM: --set ana.activated[+] c2po --set ana.activated[+] startState --set ana.activated[+] taintPartialContexts --set ana.c2po.askbase false + +#include + +int g; +int h; + +int *foo(int *p, int *q){ + // Improved treatment of function calls required for this assertion + __goblint_check(p == q); // TODO + return p; +} + +int main(){ + int *ptr = &g; + int *ptr2 = &g; + int top; + if (top){ + ptr = &h; + ptr2 = &h; + } + __goblint_check(ptr == ptr2); + int* ret = foo(ptr, ptr); + __goblint_check(ptr == ret); + return 0; +} \ No newline at end of file