diff --git a/doc/changes/changed/14542.md b/doc/changes/changed/14542.md new file mode 100644 index 00000000000..6b51e232f7f --- /dev/null +++ b/doc/changes/changed/14542.md @@ -0,0 +1,6 @@ +- Extract the structural-hash accumulator into a new `Stdune.Hash` + module and route `Stdune.Tuple.T2.hash`, `Stdune.Tuple.T3.hash`, + `Stdune.List.hash`, `Stdune.Result.hash`, and `Resolve.error_hash` + through it. Hashing these structural shapes no longer allocates per + call. + (#14542, @robinbb) diff --git a/otherlibs/stdune/src/hash.ml b/otherlibs/stdune/src/hash.ml new file mode 100644 index 00000000000..ad60df786b3 --- /dev/null +++ b/otherlibs/stdune/src/hash.ml @@ -0,0 +1,5 @@ +type t = int + +let create () = 1 +let feed acc x = (acc * 31) + x +let hash t = t diff --git a/otherlibs/stdune/src/hash.mli b/otherlibs/stdune/src/hash.mli new file mode 100644 index 00000000000..64815a2d3e0 --- /dev/null +++ b/otherlibs/stdune/src/hash.mli @@ -0,0 +1,16 @@ +(** A non-allocating hash accumulator. + + Threads an [int] seed through a sequence of input ints with a + multiplicative combiner; designed for use by structural [hash] + functions on records, tuples, and lists that previously built a + temporary value to pass to [Stdlib.Hashtbl.hash]. + + Hash values produced by this module are stable within a single + process run but make no cross-process or cross-version guarantee; + use only as a [Hashtbl] / [Memo] bucket selector. *) + +type t + +val create : unit -> t +val feed : t -> int -> t +val hash : t -> int diff --git a/otherlibs/stdune/src/list.ml b/otherlibs/stdune/src/list.ml index 7ce0ea75ebc..04f7a231940 100644 --- a/otherlibs/stdune/src/list.ml +++ b/otherlibs/stdune/src/list.ml @@ -202,7 +202,14 @@ let rec equal eq xs ys = | _, _ -> false ;; -let hash f xs = Stdlib.Hashtbl.hash (map ~f xs) +let hash f xs = + let rec loop acc = function + | [] -> acc + | x :: xs -> loop (Hash.feed acc (f x)) xs + in + loop (Hash.create ()) xs |> Hash.hash +;; + let cons x xs = x :: xs (* copy&paste from [base] *) diff --git a/otherlibs/stdune/src/result.ml b/otherlibs/stdune/src/result.ml index 0ab43ca48f4..8440be5d5dc 100644 --- a/otherlibs/stdune/src/result.ml +++ b/otherlibs/stdune/src/result.ml @@ -134,11 +134,17 @@ module List = struct ;; end -let hash h1 h2 t = - Stdlib.Hashtbl.hash - (match t with - | Ok s -> h1 s - | Error e -> h2 e) +let hash h1 h2 = function + | Ok s -> + let acc = Hash.create () in + let acc = Hash.feed acc 0 in + let acc = Hash.feed acc (h1 s) in + Hash.hash acc + | Error e -> + let acc = Hash.create () in + let acc = Hash.feed acc 1 in + let acc = Hash.feed acc (h2 e) in + Hash.hash acc ;; let equal e1 e2 x y = diff --git a/otherlibs/stdune/src/stdune.ml b/otherlibs/stdune/src/stdune.ml index 5986ce6364c..92f00c0a556 100644 --- a/otherlibs/stdune/src/stdune.ml +++ b/otherlibs/stdune/src/stdune.ml @@ -20,6 +20,7 @@ module Exn_with_backtrace = Exn_with_backtrace module Filename = Filename module Filename_set = Filename_set module Format = Format +module Hash = Hash module Hashtbl = Hashtbl module Table = Table module Int = Int diff --git a/otherlibs/stdune/src/tuple.ml b/otherlibs/stdune/src/tuple.ml index 27bcdba6539..d1708c03697 100644 --- a/otherlibs/stdune/src/tuple.ml +++ b/otherlibs/stdune/src/tuple.ml @@ -3,7 +3,13 @@ module T2 = struct let to_dyn = Dyn.pair let equal f g (x1, y1) (x2, y2) = f x1 x2 && g y1 y2 - let hash f g (a, b) = Poly.hash (f a, g b) + + let hash f g (a, b) = + let acc = Hash.create () in + let acc = Hash.feed acc (f a) in + let acc = Hash.feed acc (g b) in + Hash.hash acc + ;; let compare f g (a1, b1) (a2, b2) = match f a1 a2 with @@ -18,7 +24,15 @@ module T3 = struct type ('a, 'b, 'c) t = 'a * 'b * 'c let to_dyn = Dyn.triple - let hash f g h (a, b, c) = Poly.hash (f a, g b, h c) + + let hash f g h (a, b, c) = + let acc = Hash.create () in + let acc = Hash.feed acc (f a) in + let acc = Hash.feed acc (g b) in + let acc = Hash.feed acc (h c) in + Hash.hash acc + ;; + let equal f g h (a1, b1, c1) (a2, b2, c2) = f a1 a2 && g b1 b2 && h c1 c2 let compare f g h (a1, b1, c1) (a2, b2, c2) = diff --git a/otherlibs/stdune/test/list_tests.ml b/otherlibs/stdune/test/list_tests.ml index c589fa1705d..f90daadd0d5 100644 --- a/otherlibs/stdune/test/list_tests.ml +++ b/otherlibs/stdune/test/list_tests.ml @@ -18,3 +18,28 @@ let%expect_test _ = intersperse [ "foo"; "bar"; "baz" ] ~sep:"sep"; [%expect {| [ "foo"; "sep"; "bar"; "sep"; "baz" ] |}] ;; + +let%expect_test "List.hash distinguishes length and identity" = + (* Bool.hash false = 0, so under any rolling hash that starts at 0, + [], [false], [false; false], ... would all collide. The current + implementation starts at 1, breaking that family of collisions. *) + let h = List.hash Bool.hash in + let print xs = Printf.printf "%d\n" (h xs) in + print []; + print [ false ]; + print [ false; false ]; + print [ false; false; false ]; + print [ true ]; + print [ true; false ]; + print [ false; true ]; + [%expect + {| + 1 + 31 + 961 + 29791 + 32 + 992 + 962 + |}] +;; diff --git a/otherlibs/stdune/test/tuple_tests.ml b/otherlibs/stdune/test/tuple_tests.ml new file mode 100644 index 00000000000..43447203ac5 --- /dev/null +++ b/otherlibs/stdune/test/tuple_tests.ml @@ -0,0 +1,36 @@ +open Stdune + +let%expect_test "Tuple.T2.hash distinguishes trivial inputs" = + (* Bool.hash false = 0, so a 2-tuple of all-zeros would collide with + ([], false), etc. if the combiner forgot to seed the accumulator. + The Hash module's [create ()] seed of 1 keeps these distinct. *) + let h = Tuple.T2.hash Bool.hash Bool.hash in + let print xy = Printf.printf "%d\n" (h xy) in + print (false, false); + print (false, true); + print (true, false); + print (true, true); + [%expect + {| + 961 + 962 + 992 + 993 + |}] +;; + +let%expect_test "Tuple.T3.hash distinguishes trivial inputs" = + let h = Tuple.T3.hash Bool.hash Bool.hash Bool.hash in + let print xyz = Printf.printf "%d\n" (h xyz) in + print (false, false, false); + print (false, false, true); + print (false, true, false); + print (true, false, false); + [%expect + {| + 29791 + 29792 + 29822 + 30752 + |}] +;; diff --git a/src/dune_rules/resolve.ml b/src/dune_rules/resolve.ml index 4cc87e67d5a..8ecd58e27e0 100644 --- a/src/dune_rules/resolve.ml +++ b/src/dune_rules/resolve.ml @@ -28,7 +28,10 @@ let error_equal { message; stack_frames } b = let equal f = Result.equal f error_equal let error_hash { message; stack_frames } = - Poly.hash (Poly.hash message, Stdune.List.map stack_frames ~f:Lazy.force) + let acc = Hash.create () in + let acc = Hash.feed acc (Poly.hash message) in + let acc = Hash.feed acc (List.hash (fun f -> Poly.hash (Lazy.force f)) stack_frames) in + Hash.hash acc ;; let to_dyn f t = diff --git a/src/source/opam_switch.ml b/src/source/opam_switch.ml index f476cf97e99..a02c41b8e38 100644 --- a/src/source/opam_switch.ml +++ b/src/source/opam_switch.ml @@ -21,8 +21,4 @@ include Repr.Poly (struct let repr = repr end) -let hash { root; switch } = - Tuple.T2.hash (Option.hash String.hash) String.hash (root, switch) -;; - let opam_switch_prefix_var_name = "OPAM_SWITCH_PREFIX"