Skip to content

Commit a94cf1c

Browse files
author
Ronald Judin
committed
OCaml C-stub analysis
1 parent f69b2d3 commit a94cf1c

File tree

6 files changed

+289
-2
lines changed

6 files changed

+289
-2
lines changed

src/analyses/base.ml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,6 +2736,29 @@ struct
27362736
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
27372737
| _ -> st
27382738
end
2739+
| OCamlAlloc wosize, _ ->
2740+
(*begin match lv with
2741+
| Some lv ->
2742+
let heap_var = AD.unknown_ptr (*AD.of_var (heap_var false ctx)*) in
2743+
let sizeval = eval_int ~ctx st wosize in
2744+
set_many ~ctx st [
2745+
(heap_var, TVoid [], Blob (VD.bot (), sizeval, true));
2746+
(eval_lv ~ctx st lv, (Cilfacade.typeOfLval lv), Address heap_var)
2747+
]
2748+
| _ -> st
2749+
end*)
2750+
let open Queries.AllocationLocation in
2751+
begin
2752+
(* The behavior for alloc(0) is implementation defined. Here, we rely on the options specified for the malloc also applying to alloca. *)
2753+
match lv with
2754+
| Some lv ->
2755+
let loc = Heap in
2756+
let (heap_var, addr) = alloc loc wosize in
2757+
(* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *)
2758+
let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in
2759+
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
2760+
| _ -> st
2761+
end
27392762
| Calloc { count = n; size }, _ ->
27402763
begin match lv with
27412764
| Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *)

src/analyses/ocaml.ml

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
(** Simple interprocedural analysis of OCaml C-stubs ([ocaml]). *)
2+
3+
(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *)
4+
(* Helpful link on CIL: https://goblint.github.io/cil/ *)
5+
(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml` *)
6+
(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *)
7+
8+
open GoblintCil
9+
open Analyses
10+
11+
module VarinfoSet = SetDomain.Make(CilType.Varinfo)
12+
13+
(* Use to check if a specific function is a sink / source *)
14+
(* Sources take value-typed arguments *)
15+
(* Sinks may trigger garbage collection *)
16+
let is_sink varinfo = Cil.hasAttribute "ocaml_sink" varinfo.vattr
17+
let is_source varinfo = Cil.hasAttribute "ocaml_source" varinfo.vattr
18+
19+
20+
(** "Fake" variable to handle returning from a function *)
21+
let return_varinfo = dummyFunDec.svar
22+
23+
module Spec : Analyses.MCPSpec =
24+
struct
25+
include Analyses.DefaultSpec
26+
27+
let name () = "ocaml"
28+
module D =
29+
struct
30+
(* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. *)
31+
module P = Lattice.Prod (VarinfoSet) (VarinfoSet)
32+
include P
33+
34+
let empty () = (VarinfoSet.empty (), VarinfoSet.empty ())
35+
36+
(* After garbage collection, the second set is written to the first set *)
37+
let after_gc (accounted, registered) = (registered, registered)
38+
39+
let mem_a v (accounted, registered) =
40+
VarinfoSet.mem v accounted
41+
42+
let mem_r v (accounted, registered) =
43+
VarinfoSet.mem v registered
44+
45+
let add_a v (accounted, registered) =
46+
(VarinfoSet.add v accounted, registered)
47+
48+
let add_r v (accounted, registered) =
49+
(accounted, VarinfoSet.add v registered)
50+
51+
let remove_a v (accounted, registered) =
52+
(VarinfoSet.remove v accounted, registered)
53+
54+
let remove_r v (accounted, registered) =
55+
(accounted, VarinfoSet.remove v registered)
56+
end
57+
module C = Printable.Unit
58+
59+
(* We are context insensitive in this analysis *)
60+
let context ctx _ _ = ()
61+
let startcontext () = ()
62+
63+
64+
(** Determines whether an expression [e] is healthy, given a [state]. *)
65+
let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with
66+
(* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *)
67+
| AddrOf v
68+
| StartOf v
69+
| Lval v -> lval_accounted_for state v
70+
| BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2
71+
| Real e
72+
| Imag e
73+
| SizeOfE e
74+
| AlignOfE e
75+
| CastE (_,e)
76+
| UnOp (_,e,_) -> exp_accounted_for state e
77+
| SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false
78+
| Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f
79+
and lval_accounted_for state = function
80+
| (Var v, _) ->
81+
(* Checks whether variable v is accounted for *) (*false*)
82+
if D.mem_a v state then true else let _ = M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v in false
83+
| _ ->
84+
(* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *)
85+
false
86+
87+
(* transfer functions *)
88+
89+
(** Handles assignment of [rval] to [lval]. *)
90+
let assign ctx (lval:lval) (rval:exp) : D.t =
91+
let state = ctx.local in
92+
match lval with
93+
| Var v,_ ->
94+
(* If lval is of type value, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *)
95+
if exp_accounted_for state rval then D.add_a v state
96+
else D.remove_a v state
97+
| _ -> state
98+
99+
(** Handles conditional branching yielding truth value [tv]. *)
100+
let branch ctx (exp:exp) (tv:bool) : D.t =
101+
(* Nothing needs to be done *)
102+
ctx.local
103+
104+
(** Handles going from start node of function [f] into the function body of [f].
105+
Meant to handle e.g. initializiation of local variables. *)
106+
let body ctx (f:fundec) : D.t =
107+
(* The (non-formals) locals are initially accounted for *)
108+
let state = ctx.local in
109+
List.fold_left (fun st v -> D.add_a v st) state f.sformals
110+
111+
(** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *)
112+
let return ctx (exp:exp option) (f:fundec) : D.t =
113+
let state = ctx.local in
114+
match exp with
115+
| Some e ->
116+
(* Checks that value returned is accounted for. *)
117+
(* Return_varinfo is used in place of a "real" variable. *)
118+
(* state *)
119+
if exp_accounted_for state e then D.add_a return_varinfo state
120+
else let _ = M.warn "Value returned might be garbage collected" in D.remove_a return_varinfo state
121+
| None -> state
122+
123+
(** For a function call "lval = f(args)" or "f(args)",
124+
[enter] returns a caller state, and the initial state of the callee.
125+
In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below)
126+
will compute the caller state after the function call, given the return state of the callee. *)
127+
let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list =
128+
let caller_state = ctx.local in
129+
(* Create list of (formal, actual_exp)*)
130+
(*
131+
let zipped = List.combine f.sformals args in
132+
(* TODO: For the initial callee_state, collect formal parameters where the actual is healthy. *)
133+
let callee_state = List.fold_left (fun ts (f,a) ->
134+
if exp_accounted_for caller_state a
135+
then D.add f ts (* TODO: Change accumulator ts here? *)
136+
else D.remove f ts)
137+
(D.bot ())
138+
zipped in
139+
*)
140+
(* TODO: Should this be checked with locals or formals, and how exactly? Likely with locals. *)
141+
let callee_state = caller_state in
142+
(* first component is state of caller, second component is state of callee *)
143+
[caller_state, callee_state]
144+
145+
(** For a function call "lval = f(args)" or "f(args)",
146+
computes the global environment state of the caller after the call.
147+
Argument [callee_local] is the state of [f] at its return node. *)
148+
let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
149+
(* Nothing needs to be done *)
150+
ctx.local
151+
152+
(** For a function call "lval = f(args)" or "f(args)",
153+
computes the state of the caller after assigning the return value from the call.
154+
Argument [callee_local] is the state of [f] at its return node. *)
155+
let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
156+
let caller_state = ctx.local in
157+
(* Records whether lval was accounted for. *) (* caller_state *)
158+
match lval with (* The variable returned is played by return_varinfo *)
159+
| Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state
160+
else let _ = M.warn "Returned value may be garbage-collected" in D.remove_a v caller_state
161+
| _ -> caller_state
162+
163+
(** For a call to a _special_ function f "lval = f(args)" or "f(args)",
164+
computes the caller state after the function call.
165+
For this analysis, source and sink functions will be considered _special_ and have to be treated here. *)
166+
let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t =
167+
let caller_state = ctx.local in
168+
(* TODO: Check if f is a sink / source and handle it appropriately *)
169+
(* To warn about a potential issue in the code, use M.warn. *)
170+
(* caller_state *)
171+
let desc = LibraryFunctions.find f in
172+
match desc.special arglist with
173+
(* TODO: Add a source function that registers variables and add the non-buggy Bagnall code to the test. *)
174+
| OCamlAlloc size_exp ->
175+
(* Garbage collection may trigger here and overwrite unregistered variables *)
176+
let _ = M.info "Garbage collection triggers" in (match lval with
177+
| Some (Var v, _) -> D.add_a v (D.after_gc caller_state)
178+
| _ ->
179+
(* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) D.empty ()
180+
)
181+
| _ ->
182+
List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to sinks/sources *)
183+
if is_source f then match lval with (* Assigning the source's result makes the variable accounted for. *)
184+
| Some (Var v, _) -> D.add_a v caller_state
185+
| _ -> caller_state
186+
else
187+
if is_sink f then (* Warns if unaccounted variables reach the function. Empties the state of unregistered variables. *)
188+
let _ = M.info "Garbage collection triggers" in match lval with
189+
| Some (Var v, _) -> D.add_a v (D.after_gc caller_state)
190+
| _ ->
191+
(* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *)
192+
D.after_gc caller_state
193+
else caller_state
194+
195+
(* You may leave these alone *)
196+
let startstate v = D.bot ()
197+
let threadenter ctx ~multiple lval f args = [D.top ()]
198+
let threadspawn ctx ~multiple lval f args fctx = ctx.local
199+
let exitstate v = D.top ()
200+
end
201+
202+
let _ =
203+
MCP.register_analysis (module Spec : MCPSpec)

src/config/options.schema.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,8 @@
15011501
"pcre",
15021502
"zlib",
15031503
"liblzma",
1504-
"legacy"
1504+
"legacy",
1505+
"ocaml"
15051506
]
15061507
},
15071508
"default": [
@@ -1513,7 +1514,8 @@
15131514
"linux-userspace",
15141515
"goblint",
15151516
"ncurses",
1516-
"legacy"
1517+
"legacy",
1518+
"ocaml"
15171519
]
15181520
}
15191521
},

src/util/library/libraryDesc.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ type math =
4747
type special =
4848
| Alloca of Cil.exp
4949
| Malloc of Cil.exp
50+
| OCamlAlloc of Cil.exp
5051
| Calloc of { count: Cil.exp; size: Cil.exp; }
5152
| Realloc of { ptr: Cil.exp; size: Cil.exp; }
5253
| Free of Cil.exp

src/util/library/libraryFunctions.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,13 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[
12421242
]
12431243
[@@coverage off]
12441244

1245+
let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
1246+
("caml_copy_double", unknown [drop "nptr" []]);
1247+
(* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *)
1248+
("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize);
1249+
]
1250+
[@@coverage off]
1251+
12451252
let libraries = Hashtbl.of_list [
12461253
("c", c_descs_list @ math_descs_list);
12471254
("posix", posix_descs_list);
@@ -1259,6 +1266,7 @@ let libraries = Hashtbl.of_list [
12591266
("zlib", zlib_descs_list);
12601267
("liblzma", liblzma_descs_list);
12611268
("legacy", legacy_libs_misc_list);
1269+
("ocaml", ocaml_descs_list)
12621270
]
12631271

12641272
let libraries =
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy" --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt
2+
3+
// Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered.
4+
5+
// putchar from the standard library is marked as a sink
6+
7+
#include <stdint.h>
8+
#include <string.h>
9+
#include <caml/mlvalues.h>
10+
#include <caml/alloc.h>
11+
#include <caml/memory.h>
12+
13+
/* A small definition of the LXM state so sizeof works - from AI */
14+
struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; };
15+
16+
/* Minimal macros to mimic expected behaviour */
17+
#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value))
18+
#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v))
19+
20+
#define CAMLparam1(x) __goblint_caml_param1(&x)
21+
#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well.
22+
23+
/*CAMLextern value caml_alloc_small (mlsize_t wosize, tag_t) __attribute__((__ocaml_sink__));*/
24+
25+
CAMLprim value pringo_LXM_copy(value v)
26+
{
27+
value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag);
28+
memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state));
29+
return res;
30+
}
31+
32+
CAMLprim value pringo_LXM_copy_correct(value v)
33+
{
34+
CAMLparam1(v);
35+
value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag);
36+
memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state));
37+
CAMLreturn(res);
38+
}
39+
40+
CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2,
41+
uint64_t i3, uint64_t i4)
42+
{
43+
value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag);
44+
struct LXM_state * st = LXM_val(v);
45+
st->a = i1 | 1; /* must be odd */
46+
st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */
47+
st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */
48+
st->s = i4;
49+
return v;
50+
}

0 commit comments

Comments
 (0)