Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: all clean test fuzz bench-pack bench doc examples
.PHONY: all clean test fuzz bench-pack bench bench-inline doc examples

all:
dune build
Expand All @@ -13,6 +13,25 @@ bench-pack-with-trace-replay:

bench: bench-pack-with-trace-replay

bench-inline:
@echo "Building inline contents benchmark..."
@dune build test/irmin-pack/bench_inline/main.exe
@echo "Running benchmark (this may take a few minutes)..."
@dune exec test/irmin-pack/bench_inline/main.exe -- run --all \
--contents 5000 --reads 500 --runs 3 \
> test/irmin-pack/bench_inline/results.csv
@echo "Generating data files..."
@cd test/irmin-pack/bench_inline && bash generate_data.sh results.csv
@echo "Generating plots..."
@cd test/irmin-pack/bench_inline && gnuplot bench_inline_plot.gp
@cd test/irmin-pack/bench_inline && gnuplot bench_inline_improvement.gp
@cd test/irmin-pack/bench_inline && gnuplot bench_inline_storage.gp
@echo "Done. Results in test/irmin-pack/bench_inline/"
@echo " - results.csv: Raw benchmark data"
@echo " - bench_inline_comparison.png: Latency comparison"
@echo " - bench_inline_improvement.png: Latency improvement"
@echo " - bench_inline_storage.png: Storage impact"

fuzz:
dune build @fuzz --no-buffer

Expand Down
8 changes: 7 additions & 1 deletion bench/irmin-pack/bench_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,13 @@ let with_progress_bar ~message ~n ~unit =
in
with_reporter ~config bar

module Conf = Irmin_tezos.Conf
module Conf = struct
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing this?

let entries = 32
let stable_hash = 256
let contents_length_header = None
let inode_child_order = `Seeded_hash
let forbid_empty_dir_persistence = true
end

module Schema = struct
open Irmin
Expand Down
204 changes: 204 additions & 0 deletions bench/irmin-pack/bench_inlined_contents.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
(*
* Copyright (c) 2018-2022 Tarides <contact@tarides.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

(** Benchmark for comparing performance with and without inline contents.

This benchmark measures the time for:
- Writing trees with small contents (that would be inlined when enabled)
- Reading back those contents
- Storage size comparison *)

module Store = Irmin_tezos.Store

let info = Store.Info.empty

(* Timing utilities *)
let time_it name f =
Gc.full_major ();
let t0 = Mtime_clock.counter () in
let result = f () in
let elapsed = Mtime_clock.count t0 in
let ms = Mtime.Span.to_float_ns elapsed *. 1e-6 in
Fmt.pr "%s: %.2f ms@." name ms;
(ms, result)

(* Content generators *)
let small_content i = Bytes.of_string (Printf.sprintf "v%d" (i mod 1000))

(* 2-4 bytes, will be inlined *)
let medium_content i = Bytes.of_string (Printf.sprintf "value_%06d" i)
(* 12 bytes, will be inlined *)

let large_content i =
Bytes.of_string (Printf.sprintf "large_content_value_%010d" i)
(* 30 bytes, NOT inlined *)

type content_size = Small | Medium | Large

let content_of_size size i =
match size with
| Small -> small_content i
| Medium -> medium_content i
| Large -> large_content i

let string_of_size = function
| Small -> "small"
| Medium -> "medium"
| Large -> "large"

(* Benchmark configuration *)
type config = {
num_entries : int;
num_commits : int;
content_size : content_size;
}

let _default_config =
{ num_entries = 1000; num_commits = 10; content_size = Small }

(* Run a single benchmark *)
let run_benchmark ~sw ~fs ~inline_contents ~config root =
Eio.Path.rmtree ~missing_ok:true root;
let store_config =
Irmin_pack.config ~sw ~fs ~fresh:true
~indexing_strategy:Irmin_pack.Indexing_strategy.minimal ~inline_contents
root
in
let repo = Store.Repo.v store_config in

(* Benchmark: Write phase *)
let write_time, commits =
time_it
(Printf.sprintf "Write (%d commits x %d entries)" config.num_commits
config.num_entries) (fun () ->
let commits = ref [] in
for commit_idx = 0 to config.num_commits - 1 do
let tree = Store.Tree.empty () in
let tree =
let rec add_entries tree i =
if i >= config.num_entries then tree
else
let key =
[
Printf.sprintf "dir%d" (i / 100); Printf.sprintf "file%d" i;
]
in
let value =
content_of_size config.content_size
((commit_idx * config.num_entries) + i)
in
let tree = Store.Tree.add tree key value in
add_entries tree (i + 1)
in
add_entries tree 0
in
let commit = Store.Commit.v repo ~parents:[] ~info tree in
commits := commit :: !commits
done;
!commits)
in

(* Benchmark: Read phase - read all contents from last commit *)
let read_time, () =
time_it (Printf.sprintf "Read (%d entries)" config.num_entries) (fun () ->
match commits with
| [] -> ()
| commit :: _ ->
let tree = Store.Commit.tree commit in
for i = 0 to config.num_entries - 1 do
let key =
[ Printf.sprintf "dir%d" (i / 100); Printf.sprintf "file%d" i ]
in
let _ = Store.Tree.find tree key in
()
done)
in

(* Get storage size *)
let size = Bench_common.FSHelper.get_size root in

Store.Repo.close repo;
(write_time, read_time, size)

(* Main benchmark runner *)
let run ~sw ~fs ~config =
let root_no_inline = Eio.Path.(fs / "_bench" / "inline-no") in
let root_with_inline = Eio.Path.(fs / "_bench" / "inline-yes") in

Fmt.pr "@.=== Benchmark: %s contents, %d commits x %d entries ===@.@."
(string_of_size config.content_size)
config.num_commits config.num_entries;

Fmt.pr "--- Without inlining ---@.";
let no_inline_write, no_inline_read, no_inline_size =
run_benchmark ~sw ~fs ~inline_contents:false ~config root_no_inline
in

Fmt.pr "@.--- With inlining ---@.";
let with_inline_write, with_inline_read, with_inline_size =
run_benchmark ~sw ~fs ~inline_contents:true ~config root_with_inline
in

(* Print comparison *)
Fmt.pr "@.=== Comparison ===@.";
Fmt.pr "Write time: %.2f ms (no inline) vs %.2f ms (inline) [%.1f%%]@."
no_inline_write with_inline_write
((with_inline_write -. no_inline_write) /. no_inline_write *. 100.0);
Fmt.pr "Read time: %.2f ms (no inline) vs %.2f ms (inline) [%.1f%%]@."
no_inline_read with_inline_read
((with_inline_read -. no_inline_read) /. no_inline_read *. 100.0);
Fmt.pr "Store size: %d MB (no inline) vs %d MB (inline) [%.1f%%]@."
no_inline_size with_inline_size
(Float.of_int (with_inline_size - no_inline_size)
/. Float.of_int (max 1 no_inline_size)
*. 100.0);
()

(* Command line interface *)
open Cmdliner

let num_entries =
let doc = "Number of entries per commit" in
Arg.(value & opt int 1000 & info [ "n"; "num-entries" ] ~doc)

let num_commits =
let doc = "Number of commits" in
Arg.(value & opt int 10 & info [ "c"; "num-commits" ] ~doc)

let content_size =
let doc =
"Content size: small (2-4 bytes), medium (12 bytes), or large (30 bytes)"
in
let sizes = [ ("small", Small); ("medium", Medium); ("large", Large) ] in
Arg.(value & opt (enum sizes) Small & info [ "s"; "size" ] ~doc)

let main () num_entries num_commits content_size =
Eio_main.run @@ fun env ->
let _fs = Eio.Stdenv.fs env in
let cwd = Eio.Stdenv.cwd env in
Eio.Switch.run @@ fun sw ->
let config = { num_entries; num_commits; content_size } in
(* Create benchmark directory *)
Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 Eio.Path.(cwd / "_bench");
run ~sw ~fs:cwd ~config

let cmd =
let doc = "Benchmark inline contents performance" in
let info = Cmd.info "bench-inlined-contents" ~doc in
Cmd.v info
Term.(const main $ const () $ num_entries $ num_commits $ content_size)

let () = exit (Cmd.eval cmd)
17 changes: 16 additions & 1 deletion bench/irmin-pack/dune
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,25 @@
(modules trace_stats)
(libraries cmdliner irmin_traces eio_main))

(executable
(name bench_inlined_contents)
(modules bench_inlined_contents)
(preprocess
(pps ppx_irmin.internal))
(libraries
irmin-pack
irmin-pack.unix
irmin-tezos
bench_common
cmdliner
eio_main
mtime
mtime.clock.os))

;; Require the executables to compile during tests

(rule
(alias runtest)
(package irmin-bench)
(deps main.exe tree.exe trace_stats.exe)
(deps main.exe tree.exe trace_stats.exe bench_inlined_contents.exe)
(action (progn)))
13 changes: 13 additions & 0 deletions bin/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; Disabled: debug executable requiring lavyek library
; (executable
; (public_name irmin-inlined)
; (name main)
; (package irmin-pack)
; (libraries unix eio eio_main irmin irmin-pack irmin-pack.unix lavyek)
; (preprocess
; (pps ppx_irmin.internal)))

; (env
; (dev
; (flags
; (:standard -warn-error -A))))
77 changes: 77 additions & 0 deletions bin/main.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
module Contents = Irmin.Contents.String

module Conf = struct
let entries = 4
let stable_hash = 256
let contents_length_header = Some `Varint
let inode_child_order = `Seeded_hash
let forbid_empty_dir_persistence = true
end

let root = "/tmp/irmin-db"

let config ~fresh ~sw ~fs =
Irmin_pack.config ~fresh ~sw ~fs Eio.Path.(fs / root)

module StoreMaker = Irmin_pack_unix.KV (Conf)
module Store = StoreMaker.Make (Contents)

let date = ref 0L

let info () =
let info = Store.Info.v ~author:"foo" ~message:"bar" !date in
date := Int64.add !date 3600L;
info

let set sw fs =
let conf = config ~fresh:true ~sw ~fs in
Fmt.pr "Store.Repo.v@.";
let repo = Store.Repo.v conf in
Fmt.pr "Store.main@.";
let main = Store.main repo in
let tree = Store.tree main in
let tree = Store.Tree.add tree [ "a"; "b" ] "Hello" in
let tree = Store.Tree.add tree [ "a"; "c" ] "!" in
(* let tree = Store.Tree.add tree [ "a"; "d"; "e" ] "World" in
let tree = Store.Tree.add tree [ "a"; "f" ] "!" in *)
Store.set_tree_exn ~info main [] tree;
Fmt.pr "Store.close@.";
Store.Repo.close repo

(* let get sw fs =
let conf = config ~fresh:false ~sw ~fs in
Fmt.pr "Store.Repo.v@.";
let repo = Store.Repo.v conf in
Fmt.pr "Store.main@.";
let main = Store.main repo in
let value = "Hello" in
Fmt.pr "Store.get_exn %S@." value;
let s = Store.get main [ "a"; "b"; "c" ] in
assert (s = value);
let value = "World" in
Fmt.pr "Store.get_exn %S@." value;
let s = Store.get main [ "a"; "b"; "d" ] in
assert (s = value);
let value = "!" in
Fmt.pr "Store.get_exn %S@." value;
let s = Store.get main [ "a"; "e" ] in
assert (s = value);
Fmt.pr "Store.close@.";
Store.Repo.close repo *)

let () =
Eio_main.run @@ fun env ->
Eio.Switch.run @@ fun sw ->
let fs = Eio.Stdenv.fs env in
Fmt_tty.setup_std_outputs ();
Fmt_tty.setup_std_outputs ~style_renderer:`Ansi_tty ();
(* Logs.set_reporter (Logs_fmt.reporter ()); *)
Logs.(set_level @@ Some Debug);
set sw fs
(* get sw fs *)

(*
"a" |-> "b" |-> "c" = "Hello"
| |-> "d" = "World"
|-> "e" = "!"
*)
1 change: 1 addition & 0 deletions doc/irmin-pack/design/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
- [Layered store](./layered_store.md), the design document for the first version of garbage collection (GC) that shipped with [Irmin 3.4](https://github.com/mirage/irmin/releases/tag/3.4.0).
- [Chunked suffix](./chunked_suffix.md), the design document for the GC's second version, which introduced a chunked suffix for disk space saving during a GC. Introduced in [Irmin 3.5](https://github.com/mirage/irmin/releases/tag/3.5.0).
- [Lower layer](./lower_layer.md), the design document for the GC's third phase, extending it to work for unlimited history stores by archiving instead of deleting data. Will be introduced in Irmin 3.7.
- [Inline contents](./inline_contents.md), the design document for embedding small content values directly within node entries to reduce storage overhead and improve read performance.
Loading
Loading