Skip to content

Commit 969a443

Browse files
committed
Clean API
Exposes the skeleton through the root module and sets the pattern for defining more scoring algorithms
1 parent 49e0539 commit 969a443

9 files changed

Lines changed: 195 additions & 105 deletions

File tree

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ HRW (Highest Random Weight) is another name for rendezvous hashing, an alternati
99

1010
The most common library in the Elixir community to use to solve that problem is ExHashRing by Discord, which is battle-tested and highly performant. However, it requires starting and maintaining processes, and HRW does not. For smaller lists of nodes, `HRW.owner` (O(n)) or `HRW.owners` (O(n log n)) will perform just fine, and is completely stateless, requiring no setup when starting your app.
1111

12-
This library also comes with HRW.Skeleton which uses a clustering mechanism to go from O(n) to O(log n), with the trade-off that you need to create the struct with `HRW.Skeleton.build` and pass to each call of `HRW.Skeleton.owner`.
12+
For larger node sets, build a skeleton with `HRW.build` and pass it to `HRW.owner` to get O(log n) lookups. The skeleton is plain data — build it once, reuse it across calls.
1313

1414
Additionally, there's `HRW.Bounded` for when you want to control the distribution of keys across nodes to limit skew. Consistent hashing and rendezvous hashing algorithms can easily result in uneven distribution for smaller node counts, and `HRW.Bounded` lets you control that, assuming that you have the whole key set up front.
1515

@@ -21,11 +21,11 @@ HRW.owner("192.168.0.1", ["server1", "server2", "server3"])
2121
HRW.owners("192.168.0.1", ["server1", "server2", "server3"], 2)
2222
#=> ["server2", "server3"]
2323

24-
# HRW.Skeleton
25-
skeleton = HRW.Skeleton.build(["server1", "server2", "server3"])
26-
#=> #HRW.Skeleton<3 nodes, fanout: 3>
24+
# Skeleton-backed lookup for large node sets
25+
skeleton = HRW.build(["server1", "server2", "server3"])
26+
#=> #HRW.Skeleton<3 nodes, fanout: 3, scorer: %HRW{hash_fn: nil}>
2727

28-
HRW.Skeleton.owner("192.168.0.2", skeleton)
28+
HRW.owner("192.168.0.2", skeleton)
2929
#=> "server3"
3030

3131
# HRW.Bounded
@@ -39,11 +39,11 @@ tl;dr HRW performs similarly to ExHashRing on smaller node lists, but falls behi
3939

4040
Lookup latency on Apple M4 Pro / Elixir 1.19.5 / OTP 28.5, median per call:
4141

42-
| nodes | HRW.owner | HRW.Skeleton.owner | ExHashRing.find_node |
42+
| nodes | HRW.owner | HRW.owner (skeleton) | ExHashRing.find_node |
4343
|-------:|------------:|-------------------:|---------------------:|
4444
| 10 | 292 ns | 292 ns | 333 ns |
4545
| 100 | 2.67 µs | 875 ns | 375 ns |
4646
| 1,000 | 25.54 µs | 1.08 µs | 380 ns |
4747
| 10,000 | 253.58 µs | 1.38 µs | 420 ns |
4848

49-
Reproduce with `elixir benches/hrw.exs`.
49+
Reproduce with `elixir benches/bench.exs`.

benches/bench.exs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
Mix.install([
2+
{:hrw, path: Path.expand("..", __DIR__)},
3+
{:benchee, "~> 1.5"},
4+
{:ex_hash_ring, "~> 7.0"}
5+
])
6+
7+
alias ExHashRing.Ring
8+
9+
defmodule Bench do
10+
def run do
11+
setup = fn n ->
12+
nodes = Enum.map(1..n, &"node-#{&1}")
13+
{:ok, ring} = Ring.start_link()
14+
Ring.set_nodes(ring, nodes, :infinity)
15+
%{nodes: nodes, skeleton: HRW.build(nodes), ring: ring}
16+
end
17+
18+
Benchee.run(%{
19+
"HRW.owner" => fn %{nodes: nodes} -> HRW.owner("test", nodes) end,
20+
"HRW.owner (skeleton)" => fn %{skeleton: skeleton} -> HRW.owner("test", skeleton) end,
21+
"ExHashRing.Ring.find_node" => fn %{ring: ring} -> Ring.find_node(ring, "test") end
22+
}, inputs: %{
23+
"A: 10" => setup.(10),
24+
"B: 100" => setup.(100),
25+
"C: 1_000" => setup.(1_000),
26+
"D: 10_000" => setup.(10_000)
27+
})
28+
end
29+
end
30+
31+
Bench.run()

benches/hrw.exs

Lines changed: 0 additions & 25 deletions
This file was deleted.

lib/hrw.ex

Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,26 @@ defmodule HRW do
44
to a node out of a set in a way that stays stable when nodes are added or
55
removed.
66
7-
This module is stateless. For O(log n) lookups over large node sets, see
8-
`HRW.Skeleton`.
7+
This module is stateless. For O(log n) lookups over large node sets, build
8+
a skeleton with `build/2` and pass it to `owner/3`.
99
"""
10+
@behaviour HRW.Scorer
11+
12+
defstruct [:hash_fn]
13+
14+
@type t :: %__MODULE__{hash_fn: (term() -> integer()) | nil}
15+
16+
@doc """
17+
Default scorer. Hashes `{key, node}` with the struct's `hash_fn`, falling
18+
back to `:erlang.phash2/1` when `hash_fn` is `nil`.
19+
20+
Implements `HRW.Scorer`. Called internally whenever the default scorer is
21+
selected (no `:scorer` option, or `scorer: %HRW{}` passed explicitly).
22+
"""
23+
@impl HRW.Scorer
24+
@spec score(t(), term(), term()) :: integer()
25+
def score(%__MODULE__{hash_fn: nil}, key, node), do: :erlang.phash2({key, node})
26+
def score(%__MODULE__{hash_fn: hash_fn}, key, node), do: hash_fn.({key, node})
1027

1128
@doc """
1229
Returns the node responsible for `key`.
@@ -15,32 +32,50 @@ defmodule HRW do
1532
1633
## Options
1734
18-
* `:hash_fn` - a function `term -> integer`. Defaults to `&:erlang.phash2/1`.
35+
* `:scorer` - scoring strategy struct. Defaults to `%HRW{}`. Ignored when
36+
the second argument is a skeleton — pass `:scorer` to `build/2` instead.
1937
2038
## Examples
2139
2240
iex> HRW.owner("192.168.0.1", ["server1", "server2", "server3"])
2341
"server2"
2442
43+
iex> skeleton = HRW.build(["server1", "server2", "server3"])
44+
iex> HRW.owner("192.168.0.2", skeleton)
45+
"server3"
2546
"""
26-
@spec owner(term(), [term()], keyword()) :: term()
27-
def owner(key, nodes, opts \\ []) do
28-
hash_fn = Keyword.get(opts, :hash_fn, &:erlang.phash2/1)
29-
30-
nodes
31-
|> Enum.uniq()
32-
|> Enum.sort()
33-
|> Enum.max_by(fn node ->
34-
hash_fn.({key, node})
35-
end)
47+
@spec owner(term(), [term()] | HRW.Skeleton.t(), keyword()) :: term()
48+
def owner(key, nodes_or_skeleton, opts \\ [])
49+
50+
def owner(key, %HRW.Skeleton{} = skeleton, _opts) do
51+
HRW.Skeleton.owner(key, skeleton)
52+
end
53+
54+
def owner(key, nodes, opts) do
55+
nodes =
56+
nodes
57+
|> Enum.sort()
58+
|> Enum.uniq()
59+
60+
if scorer = Keyword.get(opts, :scorer) do
61+
%mod{} = scorer
62+
63+
Enum.max_by(nodes, fn node ->
64+
mod.score(scorer, key, node)
65+
end)
66+
else
67+
Enum.max_by(nodes, fn node ->
68+
:erlang.phash2({key, node})
69+
end)
70+
end
3671
end
3772

3873
@doc """
3974
Returns the top `count` nodes responsible for `key`, in descending weight order.
4075
4176
## Options
4277
43-
* `:hash_fn` - a function `term -> integer`. Defaults to `&:erlang.phash2/1`.
78+
* `:scorer` - scoring strategy struct. Defaults to `%HRW{}`.
4479
4580
## Examples
4681
@@ -50,12 +85,42 @@ defmodule HRW do
5085
"""
5186
@spec owners(term(), [term()], non_neg_integer(), keyword()) :: [term()]
5287
def owners(key, nodes, count, opts \\ []) do
53-
hash_fn = Keyword.get(opts, :hash_fn, &:erlang.phash2/1)
88+
nodes =
89+
nodes
90+
|> Enum.sort()
91+
|> Enum.uniq()
92+
93+
if scorer = Keyword.get(opts, :scorer) do
94+
%mod{} = scorer
95+
96+
nodes
97+
|> Enum.sort_by(fn node -> mod.score(scorer, key, node) end, :desc)
98+
|> Enum.take(count)
99+
else
100+
nodes
101+
|> Enum.sort_by(fn node -> :erlang.phash2({key, node}) end, :desc)
102+
|> Enum.take(count)
103+
end
104+
end
54105

55-
nodes
56-
|> Enum.uniq()
57-
|> Enum.sort()
58-
|> Enum.sort_by(fn node -> hash_fn.({key, node}) end, :desc)
59-
|> Enum.take(count)
106+
@doc """
107+
Builds a skeleton from `nodes` for O(log n) lookups.
108+
109+
Pass the result to `owner/3`.
110+
111+
## Options
112+
113+
* `:fanout` - branching factor of the virtual tree. Defaults to `3`.
114+
* `:cluster_size` - target number of nodes per cluster. Defaults to `16`.
115+
* `:scorer` - scoring strategy struct. Defaults to `%HRW{}`.
116+
117+
## Examples
118+
119+
iex> HRW.build(["server1", "server2", "server3"])
120+
#HRW.Skeleton<3 nodes, fanout: 3, scorer: %HRW{hash_fn: nil}>
121+
"""
122+
@spec build([term()], keyword()) :: HRW.Skeleton.t()
123+
def build(nodes, opts \\ []) do
124+
HRW.Skeleton.build(nodes, opts)
60125
end
61126
end

lib/hrw/scorer.ex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
defmodule HRW.Scorer do
2+
@moduledoc """
3+
Behaviour for HRW scoring strategies. Each variant module (`HRW`, future
4+
`HRW.Weighted`, etc.) defines a struct holding its configuration and
5+
implements `score/3` returning an integer score for a `(key, node)` pair.
6+
7+
Pass an instance via the `:scorer` option to `HRW.owner/3`, `HRW.owners/4`,
8+
or `HRW.build/2`. The highest-scoring node wins.
9+
"""
10+
11+
@callback score(scorer :: struct(), key :: term(), node :: term()) :: integer()
12+
end

lib/hrw/skeleton.ex

Lines changed: 39 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,22 @@
11
defmodule HRW.Skeleton do
22
@moduledoc """
3-
A skeleton-based variant of HRW that gives O(log n) lookups by grouping
4-
nodes into clusters and routing keys through a virtual tree.
3+
Internal data structure backing `HRW.build/2` and `HRW.owner/3` for
4+
O(log n) lookups. Nodes are grouped into clusters and routed through a
5+
virtual tree. Plain data, not a process.
56
6-
Build the skeleton once with `build/2`, then pass it to each `owner/3` call.
7-
The skeleton is plain data, not a process.
7+
Not intended for direct use — go through `HRW`.
88
"""
99

10-
defstruct [:clusters, :fanout, :levels]
10+
defstruct [:clusters, :fanout, :levels, :scorer]
1111

1212
@type t :: %__MODULE__{
1313
clusters: tuple(),
1414
fanout: pos_integer(),
15-
levels: non_neg_integer()
15+
levels: non_neg_integer(),
16+
scorer: struct() | nil
1617
}
1718

18-
@doc """
19-
Builds a skeleton from `nodes`.
20-
21-
## Options
22-
23-
* `:fanout` - branching factor of the virtual tree. Defaults to `3`.
24-
* `:cluster_size` - target number of nodes per cluster. Defaults to `16`.
25-
26-
## Examples
27-
28-
iex> HRW.Skeleton.build(["server1", "server2", "server3"])
29-
#HRW.Skeleton<3 nodes, fanout: 3>
30-
31-
"""
19+
@doc false
3220
@spec build([term()], keyword()) :: t()
3321
def build(nodes, opts \\ [])
3422

@@ -39,6 +27,7 @@ defmodule HRW.Skeleton do
3927
def build(nodes, opts) do
4028
fanout = Keyword.get(opts, :fanout, 3)
4129
size = Keyword.get(opts, :cluster_size, 16)
30+
scorer = Keyword.get(opts, :scorer, %HRW{})
4231

4332
cluster_list = chunk_redistribute(nodes, size)
4433
clusters = List.to_tuple(cluster_list)
@@ -48,46 +37,52 @@ defmodule HRW.Skeleton do
4837
%__MODULE__{
4938
clusters: clusters,
5039
fanout: fanout,
51-
levels: levels
40+
levels: levels,
41+
scorer: scorer
5242
}
5343
end
5444

55-
@doc """
56-
Returns the node responsible for `key` in the given skeleton.
57-
58-
## Options
59-
60-
* `:hash_fn` - a function `term -> integer`. Defaults to `&:erlang.phash2/1`.
45+
@doc false
46+
def owner(key, %__MODULE__{} = skeleton) do
47+
do_owner(key, skeleton, 0)
48+
end
6149

62-
## Examples
50+
# We take the fast path when scorer and hash_fn are not overridden.
51+
defp do_owner(key, %__MODULE__{clusters: {cluster}, scorer: %HRW{hash_fn: nil}}, _salt) do
52+
Enum.max_by(cluster, fn node -> :erlang.phash2({key, node}) end)
53+
end
6354

64-
iex> skeleton = HRW.Skeleton.build(["server1", "server2", "server3"])
65-
iex> HRW.Skeleton.owner("192.168.0.2", skeleton)
66-
"server3"
55+
defp do_owner(key, %__MODULE__{scorer: %HRW{hash_fn: nil}} = skeleton, salt) do
56+
index =
57+
Enum.reduce(0..(skeleton.levels - 1), 0, fn level, acc ->
58+
digit = Enum.max_by(0..(skeleton.fanout - 1), &:erlang.phash2({{key, salt, level}, &1}))
59+
acc * skeleton.fanout + digit
60+
end)
6761

68-
"""
69-
@spec owner(term(), t(), keyword()) :: term()
70-
def owner(key, %__MODULE__{} = skeleton, opts \\ []) do
71-
hash_fn = Keyword.get(opts, :hash_fn, &:erlang.phash2/1)
72-
do_owner(key, skeleton, 0, hash_fn)
62+
if index < tuple_size(skeleton.clusters) do
63+
cluster = elem(skeleton.clusters, index)
64+
Enum.max_by(cluster, fn node -> :erlang.phash2({{key, salt, index}, node}) end)
65+
else
66+
do_owner(key, skeleton, salt + 1)
67+
end
7368
end
7469

75-
defp do_owner(key, %__MODULE__{clusters: {cluster}}, _salt, hash_fn) do
76-
Enum.max_by(cluster, fn node -> hash_fn.({key, node}) end)
70+
defp do_owner(key, %__MODULE__{clusters: {cluster}, scorer: %mod{} = scorer}, _salt) do
71+
Enum.max_by(cluster, fn node -> mod.score(scorer, key, node) end)
7772
end
7873

79-
defp do_owner(key, skeleton, salt, hash_fn) do
74+
defp do_owner(key, %__MODULE__{scorer: %mod{} = scorer} = skeleton, salt) do
8075
index =
8176
Enum.reduce(0..(skeleton.levels - 1), 0, fn level, acc ->
82-
digit = Enum.max_by(0..(skeleton.fanout - 1), &hash_fn.({key, salt, level, &1}))
77+
digit = Enum.max_by(0..(skeleton.fanout - 1), &mod.score(scorer, {key, salt, level}, &1))
8378
acc * skeleton.fanout + digit
8479
end)
8580

8681
if index < tuple_size(skeleton.clusters) do
8782
cluster = elem(skeleton.clusters, index)
88-
Enum.max_by(cluster, fn node -> hash_fn.({key, salt, index, node}) end)
83+
Enum.max_by(cluster, fn node -> mod.score(scorer, {key, salt, index}, node) end)
8984
else
90-
do_owner(key, skeleton, salt + 1, hash_fn)
85+
do_owner(key, skeleton, salt + 1)
9186
end
9287
end
9388

@@ -117,13 +112,13 @@ defmodule HRW.Skeleton do
117112
end
118113

119114
defimpl Inspect, for: HRW.Skeleton do
120-
def inspect(%HRW.Skeleton{clusters: clusters, fanout: fanout}, _opts) do
115+
def inspect(%HRW.Skeleton{clusters: clusters, fanout: fanout, scorer: scorer}, _opts) do
121116
nodes =
122117
clusters
123118
|> Tuple.to_list()
124119
|> Enum.reduce(0, fn cluster, acc -> acc + length(cluster) end)
125120

126121
label = if nodes == 1, do: "node", else: "nodes"
127-
"#HRW.Skeleton<#{nodes} #{label}, fanout: #{fanout}>"
122+
"#HRW.Skeleton<#{nodes} #{label}, fanout: #{fanout}, scorer: #{inspect(scorer)}>"
128123
end
129124
end

0 commit comments

Comments
 (0)