Skip to content

Commit dc3d6f6

Browse files
authored
Merge pull request #5 from bglid/feature/b-plus-tree
Feature/min b plus tree
2 parents 2578b8d + 1a8e287 commit dc3d6f6

10 files changed

Lines changed: 280 additions & 7 deletions

File tree

lib/btree/btree.ml

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
[@@@warning "-69"]
2+
3+
(* minimal B+ tree struct *)
4+
type t = {
5+
storage_m : Storage_manager.t; (* file used to store to disk *)
6+
key : Keys.t;
7+
mutable root : Nodes.t;
8+
mutable root_num : int;
9+
}
10+
11+
[@@@warning "+69"]
12+
13+
let serialize (node : Nodes.t) (block_size : int) : Page.Page.t =
14+
(* recursive iterative helpers*)
15+
let rec layout_keys (nd : Nodes.t) (pg : Page.Page.t) pair_size i : unit =
16+
if i >= nd.capacity then
17+
()
18+
else
19+
let key_offset = 12 + (i * pair_size) + 4 in
20+
begin match nd.keys.(i) with
21+
| Integer n -> Page.Page.set_int32 pg key_offset n
22+
| Varchar s -> Page.Page.set_string_raw pg key_offset s
23+
end;
24+
layout_keys nd pg pair_size (i + 1)
25+
in
26+
27+
let rec layout_pointers (nd : Nodes.t) (pg : Page.Page.t) pair_size i : unit =
28+
if i > nd.capacity then
29+
()
30+
else
31+
let pointer_offset = 12 + (i * pair_size) in
32+
Page.Page.set_int32 pg pointer_offset (Int32.of_int nd.pointers.(i));
33+
layout_pointers nd pg pair_size (i + 1)
34+
in
35+
36+
(* Acutal serialization below*)
37+
let page = Page.Page.make ~block_size in
38+
(* set node type at first 4 bytes *)
39+
Page.Page.set_int32 page 0 (Nodes.serialize_node node.node_t);
40+
(* Set parent offset at 4 *)
41+
Page.Page.set_int32 page 4 (Int32.of_int node.parent);
42+
(* set curr size at 8 *)
43+
Page.Page.set_int32 page 8 (Int32.of_int node.cur_size);
44+
(* get the pointer -> key size, 4 bytes pointer + M bytes key*)
45+
let pair_size = 4 + Keys.size_of_key node.key_type in
46+
layout_keys node page pair_size 0;
47+
layout_pointers node page pair_size 0;
48+
49+
let final_pointer_offset = 12 + (node.capacity * pair_size) in
50+
(* ensuring serializing the sibling pointer *)
51+
if node.node_t = Leaf then
52+
Page.Page.set_int32 page final_pointer_offset
53+
(Int32.of_int node.pointers.(node.capacity));
54+
page
55+
56+
let get_num_keys (block_size : int) (key_type : Keys.t) : int =
57+
(* 12 bytes needed for metadata + 4 bytes for final sib pointer*)
58+
(block_size - 16) / (4 + Keys.size_of_key key_type)
59+
60+
let unused_pointer_serial = 3722304989 (*0xDDDDDDDD *)
61+
62+
let deserialize (page : Page.Page.t) (key_type : Keys.t) (block_size : int) :
63+
Nodes.t =
64+
(* read node type *)
65+
let node_type = Nodes.int32_to_node_t (Page.Page.get_int32 page 0) in
66+
(* Read parent*)
67+
let parent = Int32.to_int (Page.Page.get_int32 page 4) in
68+
(* Read N the cur num of keys *)
69+
let cur_size = Int32.to_int (Page.Page.get_int32 page 8) in
70+
(* Get the capacity using block size and k type *)
71+
let capacity = get_num_keys block_size key_type in
72+
let keys = Array.init capacity (fun _ -> Keys.empty_key key_type) in
73+
let pointers = Array.init (capacity + 1) (fun _ -> unused_pointer_serial) in
74+
let pair_size = 4 + Keys.size_of_key key_type in
75+
76+
(* reading keys and pointers - not doing this one recursively*)
77+
for i = 0 to cur_size - 1 do
78+
let pointer_offset = 12 + (i * pair_size) in
79+
let key_offset = 12 + (i * pair_size) + 4 in
80+
(* reading in the pointers from bytes *)
81+
pointers.(i) <- Int32.to_int (Page.Page.get_int32 page pointer_offset);
82+
(* reading the keys from bytes *)
83+
match key_type with
84+
| Keys.TVarchar n ->
85+
let str = Page.Page.get_string_raw page key_offset n in
86+
keys.(i) <- Keys.Varchar str
87+
| Keys.TInteger ->
88+
let num = Page.Page.get_int32 page key_offset in
89+
keys.(i) <- Keys.Integer num
90+
done;
91+
92+
(* again final pointer read *)
93+
let last_pointer_offset = 12 + (cur_size * pair_size) in
94+
pointers.(cur_size) <-
95+
Int32.to_int (Page.Page.get_int32 page last_pointer_offset);
96+
(* sibling pointer is last pointer in array at index capacity *)
97+
let sib_pointer_offset = 12 + (capacity * pair_size) in
98+
if node_type = Leaf then
99+
pointers.(capacity) <-
100+
Int32.to_int (Page.Page.get_int32 page sib_pointer_offset);
101+
102+
{ node_t = node_type; parent; cur_size; keys; pointers; capacity; key_type }
103+
104+
(* writing nodes to disk *)
105+
let write_node (btree : t) (node : Nodes.t) (n : int) : unit =
106+
let block_size = File_manager.get_blocksize btree.storage_m.file_manager in
107+
let page = serialize node block_size in
108+
Storage_manager.update_block_num ~storage_m:btree.storage_m ~block_num:n ~page
109+
110+
let write_node_append (btree : t) (node : Nodes.t) : int =
111+
let block_size = File_manager.get_blocksize btree.storage_m.file_manager in
112+
let page = serialize node block_size in
113+
let block = Storage_manager.append ~storage_m:btree.storage_m ~page in
114+
Page.Block.block_num block
115+
116+
(* getting a block from the btree and deserialize it into a node *)
117+
let get_node (btree : t) (p : int) : Nodes.t =
118+
let block_size = File_manager.get_blocksize btree.storage_m.file_manager in
119+
let page =
120+
Storage_manager.get_block ~storage_m:btree.storage_m ~block_num:p
121+
in
122+
deserialize page btree.key block_size

lib/btree/btree.mli

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
type t
2+
(** Struct for the B+ Tree *)
3+
4+
val serialize : Nodes.t -> int -> Page.Page.t
5+
(** Takes B+ tree node [t] and serializes it to a [Page.t], then writes to disk
6+
using the [Storage_manager.t]*)
7+
8+
val get_num_keys : int -> Keys.t -> int
9+
(** Get number of keys from block *)
10+
11+
val deserialize : Page.Page.t -> Keys.t -> int -> Nodes.t
12+
(** Convert page into B+ tree node struct *)
13+
14+
val write_node : t -> Nodes.t -> int -> unit
15+
(** Take B+ tree [t] [node] and offset n into the file. Updates block within
16+
file via storage manager *)
17+
18+
val write_node_append : t -> Nodes.t -> int
19+
(** Append a block in B+ tree and return offset to new blocks location. Used
20+
when creating a new node *)
21+
22+
val get_node : t -> int -> Nodes.t
23+
(** Get a block from the btree [t] and deserializeit using the pointer [p] into
24+
a btree node *)

lib/btree/keys.ml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
type t =
2+
| TVarchar of int
3+
| TInteger
4+
[@@deriving show]
5+
6+
type value =
7+
| Varchar of string
8+
| Integer of Int32.t
9+
[@@deriving show]
10+
11+
let less_than (k1 : value) (k2 : value) : bool =
12+
match (k1, k2) with
13+
| Integer n1, Integer n2 -> n1 < n2
14+
| Varchar v1, Varchar v2 -> v1 < v2
15+
| _ -> failwith "Incompatible keys"
16+
17+
let equals (k1 : value) (k2 : value) : bool =
18+
match (k1, k2) with
19+
| Integer n1, Integer n2 -> n1 = n2
20+
| Varchar v1, Varchar v2 -> v1 = v2
21+
| _ -> failwith "Incompatible keys"
22+
23+
let greater_than (k1 : value) (k2 : value) : bool =
24+
match (k1, k2) with
25+
| Integer n1, Integer n2 -> n1 > n2
26+
| Varchar v1, Varchar v2 -> v1 > v2
27+
| _ -> failwith "Incompatible keys"
28+
29+
let leq_than (k1 : value) (k2 : value) : bool = less_than k1 k2 || equals k1 k2
30+
31+
let greq_than (k1 : value) (k2 : value) : bool =
32+
greater_than k1 k2 || equals k1 k2
33+
34+
let string_of_key (k : value) : string =
35+
match k with
36+
| Integer n -> Printf.sprintf "Integer: %d" (Int32.to_int n)
37+
| Varchar v -> Printf.sprintf "Varchar: %s" v
38+
39+
let size_of_key (key_type : t) : int =
40+
match key_type with
41+
| TVarchar s -> s
42+
| TInteger -> 4
43+
44+
let empty_key (key_type : t) : value =
45+
match key_type with
46+
| TVarchar n -> Varchar (String.make n '\"')
47+
| TInteger -> Integer Int32.max_int

lib/btree/keys.mli

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
type t =
2+
| TVarchar of int
3+
| TInteger
4+
[@@deriving show]
5+
6+
type value =
7+
| Varchar of string
8+
| Integer of Int32.t
9+
[@@deriving show]
10+
11+
val less_than : value -> value -> bool
12+
val equals : value -> value -> bool
13+
val leq_than : value -> value -> bool
14+
val greater_than : value -> value -> bool
15+
val greq_than : value -> value -> bool
16+
val string_of_key : value -> string
17+
val size_of_key : t -> int
18+
val empty_key : t -> value

lib/btree/nodes.ml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
type node_type =
2+
| Leaf
3+
| Internal
4+
[@@deriving show]
5+
6+
[@@@warning "-69"]
7+
8+
type t = {
9+
(* There are 2 node types, leaf or internal *)
10+
mutable node_t : node_type;
11+
mutable parent : int;
12+
mutable cur_size : int;
13+
keys : Keys.value array;
14+
pointers : int array;
15+
(* max num of keys *)
16+
capacity : int;
17+
key_type : Keys.t;
18+
}
19+
[@@deriving show]
20+
21+
[@@@warning "+69"]
22+
23+
(* Constants for serialization*)
24+
let leaf_serial = Int32.of_int 2863311530 (* 0xAAAAAAAA *)
25+
let internal_serial = Int32.of_int 3149642683
26+
(* 0xBBBBBBBB *)
27+
28+
let serialize_node (node_t : node_type) : Int32.t =
29+
match node_t with
30+
| Leaf -> leaf_serial
31+
| Internal -> internal_serial
32+
33+
let int32_to_node_t (i32 : Int32.t) : node_type =
34+
if i32 = leaf_serial then
35+
Leaf
36+
else if i32 = internal_serial then
37+
Internal
38+
else
39+
failwith "WRONG i32!"

lib/btree/nodes.mli

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
(** Holds variants for different types of nodes *)
2+
type node_type =
3+
| Leaf
4+
| Internal
5+
[@@deriving show]
6+
7+
type t = {
8+
mutable node_t : node_type;
9+
mutable parent : int;
10+
mutable cur_size : int;
11+
keys : Keys.value array;
12+
pointers : int array;
13+
(* max num of keys *)
14+
capacity : int;
15+
key_type : Keys.t;
16+
}
17+
[@@deriving show]
18+
(** Data structure for nodes *)
19+
20+
val serialize_node : node_type -> Int32.t
21+
val int32_to_node_t : Int32.t -> node_type

lib/file_manager/page.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module Block = struct
2-
type t = string * int
2+
type t = string * int [@@deriving show]
33

44
let file_name (filename, _) = filename
55
let block_num (_, blocknum) = blocknum
@@ -9,7 +9,7 @@ module Block = struct
99
end
1010

1111
module Page = struct
12-
type t = bytes
12+
type t = bytes [@@deriving show]
1313

1414
let make ~block_size = Bytes.make block_size '\000'
1515
let from_bytes b = b

lib/file_manager/page.mli

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
(** Blocks are representative of the Blocks on the Disk *)
22
module Block : sig
3-
type t
3+
type t [@@deriving show]
44

55
val file_name : t -> string
66
(** Return the file name upon giving the block*)
@@ -23,7 +23,7 @@ end
2323
(** Page is a module that allows us to buffer bytes in memory. It allows us to
2424
do work on the blocks before writing them back to disk*)
2525
module Page : sig
26-
type t
26+
type t [@@deriving show]
2727

2828
val make : block_size:int -> t
2929
(** Calls Bytes.make to init a block of size ~block_size and returns an init

lib/storage_manager/storage_manager.ml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
(* open File_manager *)
2-
31
(* Block 0 is for the freelist. If it's not in the freelist it's being used *)
42
(* NOTE: The 'head_page', is top of the freelist *)
53

lib/storage_manager/storage_manager.mli

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
(** This is what our B+ tree will interact with to interact with the file
22
manager *)
33

4-
type t
4+
type t = {
5+
file_manager : File_manager.t;
6+
storage_file : string;
7+
mutable head_page : Page.Page.t (* First page contains metadata *);
8+
}
59

610
val make : file_manager:File_manager.t -> storage_file:string -> t
711
(** Takes a file manager and a string to create a new storage manager *)

0 commit comments

Comments
 (0)