Skip to content

Commit 807cbbc

Browse files
committed
Update integer types to include signed / unsigned ints.
Various updates to make the types accurately reflect the integer sizes specified in the original C types. Added some basic tests for macho binary reading and GitHub CI setup to run tests.
1 parent 78bd21a commit 807cbbc

File tree

18 files changed

+2337
-1431
lines changed

18 files changed

+2337
-1431
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
3535
- name: Install dependencies
3636
run: |
37-
opam install . --deps-only --with-test
37+
opam install . --deps-only --with-test --with-dev-setup
3838
3939
- name: Build
4040
run: |
@@ -43,3 +43,7 @@ jobs:
4343
- name: Tests
4444
run: |
4545
opam exec -- dune build @runtest
46+
47+
- name: Format
48+
run: |
49+
opam exec -- dune build @fmt

.ocamlformat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
version = 0.27.0
2+
profile = conventional
3+
4+
ocaml-version = 5.2.0

dune-project

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
1-
(lang dune 3.0)
1+
(lang dune 3.9)
22
(name object)
33
(generate_opam_files true)
44

55
(license MIT)
6+
(maintainers "Tim McGilchrist <[email protected]>")
7+
(authors "Tim McGilchrist <[email protected]>")
8+
(source (github tmcgilchrist/object))
69

710
(package
811
(name object)
12+
(tags (elf object mach-o pe coff))
13+
(synopsis "A unified interface for reading and writing object file formats")
14+
(description "A unified interface for reading and writing object file formats")
915
(depends
10-
(ocaml (>= 4.08))
16+
(ocaml (>= 4.08))
1117
integers
12-
(cmdliner :with-test))
13-
(synopsis "A unified interface for reading and writing object file formats")
14-
(description "A unified interface for reading and writing object file formats"))
18+
(cmdliner :with-test)
19+
(alcotest :with-test)
20+
(ocamlformat (and :with-dev-setup (= 0.27.0)))))

object.opam

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,19 @@ opam-version: "2.0"
33
synopsis: "A unified interface for reading and writing object file formats"
44
description:
55
"A unified interface for reading and writing object file formats"
6+
maintainer: ["Tim McGilchrist <[email protected]>"]
7+
authors: ["Tim McGilchrist <[email protected]>"]
68
license: "MIT"
9+
tags: ["elf" "object" "mach-o" "pe" "coff"]
10+
homepage: "https://github.com/tmcgilchrist/object"
11+
bug-reports: "https://github.com/tmcgilchrist/object/issues"
712
depends: [
8-
"dune" {>= "3.0"}
13+
"dune" {>= "3.9"}
914
"ocaml" {>= "4.08"}
1015
"integers"
1116
"cmdliner" {with-test}
17+
"alcotest" {with-test}
18+
"ocamlformat" {with-dev-setup & = "0.27.0"}
1219
"odoc" {with-doc}
1320
]
1421
build: [
@@ -25,3 +32,7 @@ build: [
2532
"@doc" {with-doc}
2633
]
2734
]
35+
dev-repo: "git+https://github.com/tmcgilchrist/object.git"
36+
pin-depends: [
37+
["integers.dev" "git+https://github.com/tmcgilchrist/ocaml-integers.git#small-signed-integers-fixes"]
38+
]

object.opam.template

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pin-depends: [
2+
["integers.dev" "git+https://github.com/tmcgilchrist/ocaml-integers.git#small-signed-integers-fixes"]
3+
]

src/buffer.ml

Lines changed: 37 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,32 @@
11
(** Low-level buffer manipulation *)
22

3-
type t =
4-
(int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
5-
6-
type cursor = {
7-
buffer: t;
8-
mutable position: int;
9-
}
3+
type t = (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
4+
type cursor = { buffer : t; mutable position : int }
105

116
exception Invalid_format of string
7+
128
let invalid_format msg = raise (Invalid_format msg)
139

1410
let parse path =
15-
let fd = Unix.openfile path [Unix.O_RDONLY] 0 in
11+
let fd = Unix.openfile path [ Unix.O_RDONLY ] 0 in
1612
let len = Unix.lseek fd 0 Unix.SEEK_END in
1713
let t =
1814
Bigarray.array1_of_genarray
19-
(Unix.map_file fd Bigarray.int8_unsigned
20-
Bigarray.c_layout false [|len|]) in
15+
(Unix.map_file fd Bigarray.int8_unsigned Bigarray.c_layout false [| len |])
16+
in
2117
Unix.close fd;
2218
t
2319

2420
let size = Bigarray.Array1.dim
25-
26-
let cursor ?(at=0) buffer =
27-
{ buffer; position = at }
28-
29-
let seek t position =
30-
t.position <- position
21+
let cursor ?(at = 0) buffer = { buffer; position = at }
22+
let seek t position = t.position <- position
3123

3224
let ensure t count msg =
3325
(* Ensure position does not overflow before checking for buffer overflow. *)
3426
let new_pos = t.position + count in
35-
if (new_pos < 0) || (size t.buffer < new_pos) then
36-
invalid_format msg
27+
if new_pos < 0 || size t.buffer < new_pos then invalid_format msg
3728

3829
let advance t count = t.position <- t.position + count
39-
4030
let at_end t = size t.buffer = t.position
4131

4232
open Types
@@ -46,28 +36,28 @@ module Read = struct
4636
let u8 t : u8 =
4737
let result = t.buffer.{t.position} in
4838
advance t 1;
49-
result
39+
Unsigned.UInt8.of_int result
5040

5141
let s8 t : s8 =
5242
let result = t.buffer.{t.position} in
5343
advance t 1;
54-
if result > 0x7F
55-
then result lor ((-1) lsl 8)
56-
else result
44+
if result > 0x7F then Signed.Int8.of_int (result lor (-1 lsl 8))
45+
else Signed.Int8.of_int result
5746

5847
let u16 t : u16 =
59-
let result = t.buffer.{t.position} lor t.buffer.{t.position + 1} lsl 8 in
48+
let result = t.buffer.{t.position} lor (t.buffer.{t.position + 1} lsl 8) in
6049
advance t 2;
61-
result
50+
Unsigned.UInt16.of_int result
6251

6352
let u32 t : u32 =
64-
let result = t.buffer.{t.position}
65-
lor t.buffer.{t.position + 1} lsl 8
66-
lor t.buffer.{t.position + 2} lsl 16
67-
lor t.buffer.{t.position + 3} lsl 24
53+
let result =
54+
t.buffer.{t.position}
55+
lor (t.buffer.{t.position + 1} lsl 8)
56+
lor (t.buffer.{t.position + 2} lsl 16)
57+
lor (t.buffer.{t.position + 3} lsl 24)
6858
in
6959
advance t 4;
70-
result
60+
Unsigned.UInt32.of_int result
7161

7262
let u32be = u32
7363

@@ -79,38 +69,32 @@ module Read = struct
7969
result := logor !result (shift_left n (i * 8))
8070
done;
8171
advance t 8;
82-
!result
72+
Unsigned.UInt64.of_int64 !result
8373

8474
let i64 t : i64 =
8575
(* u64 are wrapped in an i64 and are actually signed. *)
86-
u64 t
76+
Unsigned.UInt64.to_int64 (u64 t)
8777

8878
let uleb128 t : u128 =
8979
let rec aux t shift acc =
90-
let x = u8 t in
80+
let x = u8 t |> Unsigned.UInt8.to_int in
9181
let acc = acc lor ((x land 0x7f) lsl shift) in
92-
if x land 0x80 = 0 then
93-
acc
94-
else
95-
aux t (shift + 7) acc
82+
if x land 0x80 = 0 then acc else aux t (shift + 7) acc
9683
in
9784
aux t 0 0
9885

9986
let sleb128 t : s128 =
10087
let rec aux t shift acc =
101-
let x = u8 t in
88+
let x = u8 t |> Unsigned.UInt8.to_int in
10289
let acc = acc lor ((x land 0x7f) lsl shift) in
10390
if x land 0x80 = 0 then
104-
if x land 0x40 = 0
105-
then acc
106-
else acc lor -(1 lsl (shift + 7))
107-
else
108-
aux t (shift + 7) acc
91+
if x land 0x40 = 0 then acc else acc lor -(1 lsl (shift + 7))
92+
else aux t (shift + 7) acc
10993
in
11094
aux t 0 0
11195

11296
let fixed_string t length =
113-
let {buffer; position} = t in
97+
let { buffer; position } = t in
11498
let result = Bytes.create length in
11599
for i = 0 to length - 1 do
116100
Bytes.set result i (Char.unsafe_chr buffer.{position + i})
@@ -119,30 +103,27 @@ module Read = struct
119103
Bytes.unsafe_to_string result
120104

121105
let rec scan_0 (b : t) ofs l i =
122-
if i >= l then
123-
None
124-
else if b.{ofs + i} = 0 then
125-
Some i
126-
else
127-
scan_0 b ofs l (i + 1)
106+
if i >= l then None
107+
else if b.{ofs + i} = 0 then Some i
108+
else scan_0 b ofs l (i + 1)
128109

129110
let zero_string t ?maxlen () =
130-
let maxlen = match maxlen with
111+
let maxlen =
112+
match maxlen with
131113
| None -> size t.buffer - t.position
132114
| Some maxlen -> maxlen
133115
in
134116
match scan_0 t.buffer t.position maxlen 0 with
135117
| None -> None
136118
| Some length ->
137-
let result = fixed_string t length in
138-
advance t 1;
139-
Some result
119+
let result = fixed_string t length in
120+
advance t 1;
121+
Some result
140122

141123
let buffer t length =
142124
let result = Bigarray.Array1.sub t.buffer t.position length in
143125
advance t length;
144126
result
145127
end
146128

147-
let sub t length =
148-
cursor (Read.buffer t length)
129+
let sub t length = cursor (Read.buffer t length)

src/buffer.mli

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,57 +2,56 @@ open Types
22

33
(** Low-level buffer manipulation backed by [Bigarray.Array1.t]. *)
44

5-
type t =
6-
(int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
5+
type t = (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
76

8-
(** Create buffer from filename. *)
97
val parse : string -> t
8+
(** Create buffer from filename. *)
109

1110
(* TODO
1211
Size of buffer remains int, because the size (aka dim) of
1312
Bigarray.Array1 is int, not int64. It should be enough in practice,
1413
as we will not be able to manipulate larger binaries anyway. *)
1514

15+
val size : t -> int
1616
(** Size of the buffer *)
17-
val size : t -> int
1817

19-
(** Minimal support for error reporting. FIXME: Exceptions as errors are disappointing. *)
2018
exception Invalid_format of string
19+
(** Minimal support for error reporting. FIXME: Exceptions as errors are
20+
disappointing. *)
21+
2122
val invalid_format : string -> 'a
23+
(** Raise [Invalid_format] exception with a msg. *)
2224

23-
type cursor = {
24-
buffer: t;
25-
mutable position: int;
26-
}
25+
type cursor = { buffer : t; mutable position : int }
2726

28-
val cursor : ?at:int -> t -> cursor
29-
val seek : cursor -> int -> unit
30-
val ensure : cursor -> int -> string -> unit
27+
val cursor : ?at:int -> t -> cursor
28+
val seek : cursor -> int -> unit
29+
val ensure : cursor -> int -> string -> unit
3130
val advance : cursor -> int -> unit
32-
val at_end : cursor -> bool
31+
val at_end : cursor -> bool
3332

34-
(** [sub t len] returns a fresh cursor pointing to the
35-
beginning of a sub-buffer of size [len] starting from [t], and
36-
advances [t] by [len] *)
37-
val sub : cursor -> int -> cursor
33+
val sub : cursor -> int -> cursor
34+
(** [sub t len] returns a fresh cursor pointing to the beginning of a sub-buffer
35+
of size [len] starting from [t], and advances [t] by [len] *)
3836

3937
module Read : sig
40-
val s8 : cursor -> s8
41-
val u8 : cursor -> u8
42-
val u16 : cursor -> u16
43-
val u32 : cursor -> u32
44-
val u32be : cursor -> u32
45-
val u64 : cursor -> u64
46-
val i64 : cursor -> i64
38+
val s8 : cursor -> s8
39+
val u8 : cursor -> u8
40+
val u16 : cursor -> u16
41+
val u32 : cursor -> u32
42+
val u32be : cursor -> u32
43+
val u64 : cursor -> u64
44+
val i64 : cursor -> i64
4745
val uleb128 : cursor -> u128
4846
val sleb128 : cursor -> s128
4947

50-
(** [fixed_string t len] reads a string of exactly [len] bytes from [t] *)
5148
val fixed_string : cursor -> int -> string
49+
(** [fixed_string t len] reads a string of exactly [len] bytes from [t] *)
5250

53-
(** [zero_string t ?maxlen ()] reads a zero-terminated string from [t],
54-
stopping at the first zero or when [maxlen] is reached, if it was provided. *)
5551
val zero_string : cursor -> ?maxlen:int -> unit -> string option
52+
(** [zero_string t ?maxlen ()] reads a zero-terminated string from [t],
53+
stopping at the first zero or when [maxlen] is reached, if it was
54+
provided. *)
5655

5756
val buffer : cursor -> int -> t
5857
end

0 commit comments

Comments
 (0)