Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/analyses/base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2736,6 +2736,32 @@ struct
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
| _ -> st
end
| OCamlAlloc wosize, _ ->
(*begin match lv with
| Some lv ->
let heap_var = AD.unknown_ptr (*AD.of_var (heap_var false ctx)*) in
let sizeval = eval_int ~ctx st wosize in
set_many ~ctx st [
(heap_var, TVoid [], Blob (VD.bot (), sizeval, true));
(eval_lv ~ctx st lv, (Cilfacade.typeOfLval lv), Address heap_var)
]
| _ -> st
end*)
let open Queries.AllocationLocation in
begin
(* The behavior for alloc(0) is implementation defined. Here, we rely on the options specified for the malloc also applying to alloca. *)
match lv with
| Some lv ->
let loc = Heap in
let (heap_var, addr) = alloc loc wosize in
(* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *)
let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
| _ -> st
end
(* TODO: Rethink OCamlParam's placement *)
| OCamlParam params, _ ->
List.fold_left (fun acc param -> let _ = eval_rv ~man acc param in acc) st params
| Calloc { count = n; size }, _ ->
begin match lv with
| Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *)
Expand Down
239 changes: 239 additions & 0 deletions src/analyses/ocaml.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
(** Simple interprocedural analysis of OCaml C-stubs ([ocaml]). *)

(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *)
(* Helpful link on CIL: https://goblint.github.io/cil/ *)
(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml` *)
(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *)

open GoblintCil
open Analyses

module VarinfoSet = SetDomain.Make(CilType.Varinfo)

(** "Fake" variable to handle returning from a function *)
let return_varinfo = dummyFunDec.svar

module Spec : Analyses.MCPSpec =
struct
include Analyses.DefaultSpec

let name () = "ocaml"
module D =
struct
(* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. The third contains variables the analysis tracks. *)
module P = Lattice.Prod3 (VarinfoSet) (VarinfoSet) (VarinfoSet)
include P

let empty () = (VarinfoSet.empty (), VarinfoSet.empty (), VarinfoSet.empty ())

(* After garbage collection, the second set is written to the first set *)
let after_gc (accounted, registered, tracked) = (registered, registered, tracked)

(* Untracked variables are always fine. *)
let mem_a v (accounted, registered, tracked) =
VarinfoSet.mem v accounted

let mem_r v (accounted, registered, tracked) =
VarinfoSet.mem v registered

let mem_t v (accounted, registered, tracked) =
VarinfoSet.mem v tracked

let add_a v (accounted, registered, tracked) =
(VarinfoSet.add v accounted, registered, tracked)

let add_r v (accounted, registered, tracked) =
(accounted, VarinfoSet.add v registered, tracked)

let add_t v (accounted, registered, tracked) =
(accounted, registered, VarinfoSet.add v tracked)

let remove_a v (accounted, registered, tracked) =
(VarinfoSet.remove v accounted, registered, tracked)

let remove_r v (accounted, registered, tracked) =
(accounted, VarinfoSet.remove v registered, tracked)

let remove_t v (accounted, registered, tracked) =
(accounted, registered, VarinfoSet.remove v tracked)
end
module C = Printable.Unit

(* We are context insensitive in this analysis *)
let context ctx _ _ = ()
let startcontext () = ()


(** Determines whether an expression [e] is healthy, given a [state]. *)
let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with
(* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *)
| AddrOf v
| StartOf v
| Lval v -> lval_accounted_for state v
| BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2
| Real e
| Imag e
| SizeOfE e
| AlignOfE e
| CastE (_,e)
| UnOp (_,e,_) -> exp_accounted_for state e
| SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true
| Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f
and lval_accounted_for state = function
| (Var v, _) ->
(* Checks whether variable v is accounted for *) (*false*)
if D.mem_a v state || not (D.mem_t v state) then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false)
| _ ->
(* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *)
false

(** Determines whether an expression [e] has parts in the OCaml heap, given a [state]. *)
let rec exp_tracked (state:D.t) (e:Cil.exp) = match e with
(* Recurse over the structure in the expression, returning true if some varinfo appearing in the expression is tracked *)
| AddrOf v
| StartOf v
| Lval v -> lval_tracked state v
| BinOp (_,e1,e2,_) -> exp_tracked state e1 || exp_tracked state e2
| Real e
| Imag e
| SizeOfE e
| AlignOfE e
| CastE (_,e)
| UnOp (_,e,_) -> exp_tracked state e
| SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false
| Question (b, t, f, _) -> exp_tracked state b || exp_tracked state t || exp_tracked state f
and lval_tracked state = function
| (Var v, _) ->
(* Checks whether variable v is tracked *)
D.mem_t v state
| _ ->
false

(* transfer functions *)

(** Handles assignment of [rval] to [lval]. *)
let assign ctx (lval:lval) (rval:exp) : D.t =
let state = ctx.local in
match lval with
| Var v,_ ->
(* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *)
(* Emits an event for the variable v not being zero. *)

if Cil.isPointerType (Cil.typeOf rval) then

Check warning

Code scanning / Semgrep OSS

Semgrep Finding: semgrep.cilfacade Warning

use Cilfacade instead
if exp_accounted_for state rval then
if exp_tracked state rval then D.add_a v (D.add_t v state)
else D.add_a v (D.remove_t v state) (* TODO: Is add_a necessary for untracked variables? *)
else (M.info "The above is being assigned"; D.remove_a v (D.add_t v state))
else D.remove_t v state
| _ -> state

(** Handles conditional branching yielding truth value [tv]. *)
let branch ctx (exp:exp) (tv:bool) : D.t =
(* Nothing needs to be done *)
ctx.local

(** Handles going from start node of function [f] into the function body of [f].
Meant to handle e.g. initializiation of local variables. *)
let body ctx (f:fundec) : D.t =
(* The (non-formals) locals are tracked and initially accounted for *)
let state = ctx.local in
(* It is assumed that value-typed arguments are never nptrs. *)
let is_value_type (t:typ): bool = match t with
| TNamed (info, attr) -> info.tname = "value"
| _ -> false in
List.fold_left (fun st v -> if is_value_type v.vtype then
(ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v (D.add_t v st))
else D.add_a v (D.add_t v st))
state f.sformals

(** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *)
let return ctx (exp:exp option) (f:fundec) : D.t =
let state = ctx.local in
match exp with
| Some e ->
(* Checks that value returned is accounted for. *)
(* Return_varinfo is used in place of a "real" variable. *)
(* TODO: Consider how the return_varinfo needs to be tracked. *)
(* state *)
if exp_accounted_for state e then D.add_a return_varinfo state
else (M.warn "Value returned might be garbage collected"; D.remove_a return_varinfo state)
| None -> state

(** For a function call "lval = f(args)" or "f(args)",
[enter] returns a caller state, and the initial state of the callee.
In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below)
will compute the caller state after the function call, given the return state of the callee. *)
let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list =
let caller_state = ctx.local in
(* Create list of (formal, actual_exp)*)
(*
let zipped = List.combine f.sformals args in
(* TODO: For the initial callee_state, collect formal parameters where the actual is healthy. *)
let callee_state = List.fold_left (fun ts (f,a) ->
if exp_accounted_for caller_state a
then D.add f ts (* TODO: Change accumulator ts here? *)
else D.remove f ts)
(D.bot ())
zipped in
*)
(* TODO: Should this be checked with locals or formals, and how exactly? Likely with locals. *)
let callee_state = caller_state in
(* first component is state of caller, second component is state of callee *)
[caller_state, callee_state]

(** For a function call "lval = f(args)" or "f(args)",
computes the global environment state of the caller after the call.
Argument [callee_local] is the state of [f] at its return node. *)
let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
(* Nothing needs to be done *)
ctx.local

(** For a function call "lval = f(args)" or "f(args)",
computes the state of the caller after assigning the return value from the call.
Argument [callee_local] is the state of [f] at its return node. *)
let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
let caller_state = ctx.local in
(* Records whether lval was accounted for. *) (* caller_state *)
(* TODO: Consider how the return_varinfo needs to be tracked. *)
match lval with (* The variable returned is played by return_varinfo *)
| Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state
else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state)
| _ -> caller_state

(** For a call to a _special_ function f "lval = f(args)" or "f(args)",
computes the caller state after the function call.
For this analysis, source and sink functions will be considered _special_ and have to be treated here. *)
let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t =
let caller_state = ctx.local in
(* TODO: Check if f is a sink / source and handle it appropriately *)
(* To warn about a potential issue in the code, use M.warn. *)
(* caller_state *)
let desc = LibraryFunctions.find f in
match desc.special arglist with
| OCamlParam params ->
(* Variables are registered with a Param macro. Such variables are also tracked. *)
List.fold_left (fun state param -> match param with
| AddrOf (Var v, _) -> D.add_r v (D.add_t v state)
| _ -> state
) caller_state params
| OCamlAlloc size_exp ->
(* Garbage collection may trigger here and overwrite unregistered variables. *)
M.debug "Garbage collection triggers";
List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings *)
(match lval with
| Some (Var v, _) -> D.add_a v (D.add_t v (D.after_gc caller_state))
| _ -> D.after_gc caller_state
)
| _ ->
List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *)
caller_state

(* You may leave these alone *)
let startstate v = D.bot ()
let threadenter ctx ~multiple lval f args = [D.top ()]
let threadspawn ctx ~multiple lval f args fctx = ctx.local
let exitstate v = D.top ()
end

let _ =
MCP.register_analysis (module Spec : MCPSpec)
6 changes: 4 additions & 2 deletions src/config/options.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,8 @@
"pcre",
"zlib",
"liblzma",
"legacy"
"legacy",
"ocaml"
]
},
"default": [
Expand All @@ -1513,7 +1514,8 @@
"linux-userspace",
"goblint",
"ncurses",
"legacy"
"legacy",
"ocaml"
]
}
},
Expand Down
3 changes: 3 additions & 0 deletions src/util/library/libraryDesc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ type math =
type special =
| Alloca of Cil.exp
| Malloc of Cil.exp
| OCamlAlloc of Cil.exp
(* TODO: Rethink OCamlParam's placement. *)
| OCamlParam of Cil.exp list
| Calloc of { count: Cil.exp; size: Cil.exp; }
| Realloc of { ptr: Cil.exp; size: Cil.exp; }
| Free of Cil.exp
Expand Down
19 changes: 19 additions & 0 deletions src/util/library/libraryFunctions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ let c_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
("longjmp", special [__ "env" [r]; __ "value" []] @@ fun env value -> Longjmp { env; value });
("atexit", unknown [drop "function" [if_ (fun () -> not (get_bool "sem.atexit.ignore")) s]]);
("atoi", unknown [drop "nptr" [r]]);
("atof", unknown [drop "nptr" [r]]); (* TODO: Rethink atof's position in this list. *)
("atol", unknown [drop "nptr" [r]]);
("atoll", unknown [drop "nptr" [r]]);
("setlocale", unknown [drop "category" []; drop "locale" [r]]);
Expand Down Expand Up @@ -1242,6 +1243,23 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[
]
[@@coverage off]

let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1)));
(* Example: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *)
("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize);
("caml_alloc_2", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]] @@ fun tag _arg1 _arg2 -> OCamlAlloc (GoblintCil.integer 2));
("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3));
("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4));
("caml_alloc_5", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]; __ "arg5" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 _arg5 -> OCamlAlloc (GoblintCil.integer 5));
("__goblint_caml_param0", special [] @@ OCamlParam []);
("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]);
("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]);
("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]);
("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]);
("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]);
]
[@@coverage off]

let libraries = Hashtbl.of_list [
("c", c_descs_list @ math_descs_list);
("posix", posix_descs_list);
Expand All @@ -1259,6 +1277,7 @@ let libraries = Hashtbl.of_list [
("zlib", zlib_descs_list);
("liblzma", liblzma_descs_list);
("legacy", legacy_libs_misc_list);
("ocaml", ocaml_descs_list)
]

let libraries =
Expand Down
36 changes: 36 additions & 0 deletions tests/regression/90-ocaml/01-bagnall.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy" --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt

// Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered.

#include <stdint.h>
#include <string.h>
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include "goblint_caml.h"

CAMLprim value pringo_LXM_copy(value v)
{
value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag);
memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN
return res;
}

CAMLprim value pringo_LXM_copy_correct(value v)
{
CAMLparam1(v);
value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag);
memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN
CAMLreturn(res);
}

CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2,
uint64_t i3, uint64_t i4)
{
value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); // NOWARN
struct LXM_state * st = LXM_val(v);
st->a = i1 | 1; /* must be odd */
st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */
st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */
st->s = i4;
return v;
}
Loading
Loading