Skip to content

Commit 5aac189

Browse files
authored
Merge pull request #38 from somdoron/main
add support for rangeByCompoundKeyPrefix
2 parents a124cd9 + 7269653 commit 5aac189

File tree

3 files changed

+258
-16
lines changed

3 files changed

+258
-16
lines changed

raft/src/main/scala/zio/raft/HMap.scala

Lines changed: 119 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
129129
// Lower bound: [length][prefix] - all keys with this prefix start here
130130
val lowerBound = Array(prefixLength.toByte) ++ prefixBytes
131131

132+
val upperBound = computePrefixUpperBound(lowerBound)
133+
134+
(lowerBound, upperBound)
135+
136+
// Helper: computes the lexicographic upper bound for a prefix
137+
private[raft] def computePrefixUpperBound(prefixBytes: Array[Byte]): Array[Byte] =
132138
// Upper bound: Increment prefix bytes with carry propagation
133139
// Start from rightmost byte, find first byte that isn't 0xFF, increment it, zero rest
134140
val upperPrefixBytes = prefixBytes.clone()
@@ -137,24 +143,27 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
137143

138144
while carry && i >= 0 do
139145
if upperPrefixBytes(i) != 0xff.toByte then
140-
// Found a byte that is already 0xFF, will zero it and propagate carry (continue)
141-
if upperPrefixBytes(i) == 0xff.toByte then
142-
upperPrefixBytes(i) = 0.toByte
143-
else
144-
// Found a byte that is not 0xFF, increment it and stop carry
145-
upperPrefixBytes(i) = (upperPrefixBytes(i) + 1).toByte
146-
carry = false
146+
// Found a byte that is not 0xFF, increment it and stop carry
147+
upperPrefixBytes(i) = (upperPrefixBytes(i) + 1).toByte
148+
carry = false
149+
else
150+
// Found a byte that is 0xFF, will zero it and propagate carry (continue)
151+
upperPrefixBytes(i) = 0.toByte
147152
i -= 1
148153

149-
val upperBound =
150-
if carry then
151-
// All bytes were 0xFF - use next length value with empty prefix
152-
// This is lexicographically after all keys with current prefix length
153-
Array((prefixLength + 1).toByte)
154-
else
155-
Array(prefixLength.toByte) ++ upperPrefixBytes
154+
if carry then
155+
// All bytes were 0xFF, append zero to the string
156+
Array.fill(prefixBytes.length)(0xff.toByte) ++ Array(0.toByte)
157+
else
158+
// Truncate any trailing zeros from the upperPrefixBytes for minimal upper bound representation
159+
var end = upperPrefixBytes.length
160+
while end > 0 && upperPrefixBytes(end - 1) == 0.toByte do
161+
end -= 1
156162

157-
(lowerBound, upperBound)
163+
if end != upperPrefixBytes.length then
164+
upperPrefixBytes.slice(0, end)
165+
else
166+
upperPrefixBytes
158167

159168
/** Retrieve a value for the given prefix and key.
160169
*
@@ -342,6 +351,100 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
342351
(logicalKey, v.asInstanceOf[ValueAt[M, P]])
343352
}
344353

354+
/** Returns an iterator over (key, value) pairs for all entries whose compound key starts with the specified prefix
355+
* and partial key, but only within the scope of the first component of the compound key.
356+
*
357+
* This is intended for use with compound keys, where you want to fetch all entries grouped by the first part of the
358+
* compound key. The user specifies the first part of the key, and a "zero" value (usually empty string or zero-like
359+
* value) for the second component. The method returns all keys beginning with that compound key prefix, but only
360+
* within the same first key.
361+
*
362+
* For example, for compound keys like (namespace, userId), you can fetch all keys for a given namespace:
363+
* hmap.rangeByCompoundKeyPrefix["users"]((namespace, "")) This will return all user records within the `namespace`,
364+
* regardless of the second component value.
365+
*
366+
* IMPORTANT: For this method to work correctly, the `KeyLike` implementation for the compound key type must encode
367+
* only the leading component(s) in the byte array when the trailing ("zero") component is empty. That is, when
368+
* encoding a partial/compound key like (namespace, ""), the encoder must omit any length prefix or bytes for the
369+
* "zero"/empty part — the resulting byte array must end after the first, non-empty component. Do NOT emit an
370+
* explicit "length = 0" for the empty tail component.
371+
*
372+
* For decoding, the `KeyLike` instance should interpret a missing (truncated) trailing component in the byte array
373+
* as the "zero" value (such as empty string, 0, or Nil), i.e., treat the absence of those bytes as an empty value.
374+
*
375+
* This is required because the range calculation increments the bytes length for the entire prefix (including the
376+
* first key component). If the zero-part is ever encoded explicitly, it will instead increment that rather than just
377+
* the first component, breaking correct grouping/iteration.
378+
*
379+
* Example: KeyLike instance for (String, String) that omits the second component if empty:
380+
* {{{
381+
* given KeyLike[(String, String)] with
382+
* def asBytes(key: (String, String)): Array[Byte] =
383+
* val (first, second) = key
384+
* val firstBytes = first.getBytes(StandardCharsets.UTF_8)
385+
* if second.isEmpty then
386+
* // Only encode the first component, omit the second part entirely
387+
* Array(firstBytes.length.toByte) ++ firstBytes
388+
* else
389+
* val secondBytes = second.getBytes(StandardCharsets.UTF_8)
390+
* Array(firstBytes.length.toByte) ++ firstBytes ++ Array(secondBytes.length.toByte) ++ secondBytes
391+
*
392+
* def fromBytes(bytes: Array[Byte]): (String, String) =
393+
* // Decode first component
394+
* val len1 = bytes(0) & 0xff
395+
* val first = new String(bytes.slice(1, 1 + len1), StandardCharsets.UTF_8)
396+
* // If there are no more bytes, treat second as ""
397+
* if bytes.length == 1 + len1 then (first, "")
398+
* else
399+
* val len2Pos = 1 + len1
400+
* val len2 = bytes(len2Pos) & 0xff
401+
* val second = new String(bytes.slice(len2Pos + 1, len2Pos + 1 + len2), StandardCharsets.UTF_8)
402+
* (first, second)
403+
* }}}
404+
*
405+
* @tparam P
406+
* The prefix (must be present in the schema)
407+
* @param partial
408+
* The partial (compound) key, where the first component is provided and the trailing component is "zero" (empty
409+
* string, 0, Nil, etc. depending on how KeyLike[KeyAt[M, P]] is implemented, but crucially, should be OMITTED in
410+
* byte encoding)
411+
* @return
412+
* Iterator of (KeyType, ValueType) pairs matching the compound prefix
413+
*
414+
* @example
415+
* {{{
416+
* type Schema = ("users", (String, String), UserData) *: EmptyTuple
417+
* val hmap = HMap.empty[Schema]
418+
* .updated["users"](("region_1", "userA"), UserData(...))
419+
* .updated["users"](("region_1", "userB"), UserData(...))
420+
* .updated["users"](("region_2", "userC"), UserData(...))
421+
*
422+
* // To select all users in "region_1", you must implement KeyLike so that
423+
* // ("region_1", "") is encoded as just the bytes of "region_1" (no length/marker for the second field).
424+
* hmap.rangeByCompoundKeyPrefix["users"](("region_1", "")) // Returns both userA and userB
425+
* }}}
426+
*
427+
* NOTE: The name `rangeByCompoundKeyPrefix` is suggested as it clarifies the intent and scope. Alternative names:
428+
* `rangeByPrefixKey`, `rangeByPrimaryKey`.
429+
*/
430+
def rangeByCompoundKeyPrefix[P <: String & Singleton: ValueOf](partial: KeyAt[M, P])(using
431+
c: Contains[M, P],
432+
kl: KeyLike[KeyAt[M, P]]
433+
): Iterator[(KeyAt[M, P], ValueAt[M, P])] =
434+
val prefixBytes = valueOf[P].getBytes(StandardCharsets.UTF_8)
435+
val prefixLength = Array(prefixBytes.length.toByte)
436+
val prefixWithLength = prefixLength ++ prefixBytes
437+
438+
val fromKey = prefixWithLength ++ kl.asBytes(partial)
439+
440+
// Compute the lexicographic upper bound for the given partial key (only increment the prefix + first component of the compound key)
441+
val untilKey = computePrefixUpperBound(fromKey)
442+
443+
m.range(fromKey, untilKey).iterator.map { case (k, v) =>
444+
val logicalKey = extractKey(k)
445+
(logicalKey, v.asInstanceOf[ValueAt[M, P]])
446+
}
447+
345448
/** Check if any entry in the specified prefix satisfies the predicate.
346449
*
347450
* Uses the underlying TreeMap's range.exists for efficient short-circuit evaluation. Stops as soon as it finds a
@@ -403,7 +506,7 @@ object HMap:
403506
*
404507
* Private since it's only used internally by HMap and is explicitly referenced where needed.
405508
*/
406-
private given byteArrayOrdering: Ordering[Array[Byte]] =
509+
private[raft] given byteArrayOrdering: Ordering[Array[Byte]] =
407510
Ordering.comparatorToOrdering(java.util.Arrays.compareUnsigned(_, _))
408511

409512
/** Create an empty HMap with the given schema.

raft/src/test/scala/zio/raft/HMapPrefixRangeSpec.scala

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,5 +185,36 @@ object HMapPrefixRangeSpec extends ZIOSpecDefault:
185185
assertTrue(compareUnsigned(upper, lower) > 0) &&
186186
assertTrue(compareUnsigned(lower, upper) < 0)
187187
}
188+
189+
test("computePrefixUpperBound works") {
190+
val prefixBytes = Array[Byte]('t'.toByte, 'e'.toByte, 's'.toByte, 't'.toByte)
191+
val hmap = HMap.empty[TestSchema]
192+
val upper = hmap.computePrefixUpperBound(prefixBytes)
193+
194+
assertTrue(HMap.byteArrayOrdering.compare(upper, prefixBytes) == 1) &&
195+
assertTrue(upper.length == 4) &&
196+
assertTrue(upper.sameElements(Array[Byte]('t'.toByte, 'e'.toByte, 's'.toByte, 'u'.toByte)))
197+
}
198+
199+
test("computePrefixUpperBound for max string 0xff") {
200+
val prefixBytes = Array[Byte](0xff.toByte, 0xff.toByte)
201+
val hmap = HMap.empty[TestSchema]
202+
val upper = hmap.computePrefixUpperBound(prefixBytes)
203+
204+
assertTrue(HMap.byteArrayOrdering.compare(upper, prefixBytes) == 1) &&
205+
assertTrue(upper.length == 3) &&
206+
assertTrue(upper.sameElements(Array[Byte](0xff.toByte, 0xff.toByte, 0x00.toByte)))
207+
}
208+
209+
test("truncate trailing zeros from computePrefixUpperBound") {
210+
val prefixBytes = Array[Byte](0x01.toByte, 0xFF.toByte)
211+
val hmap = HMap.empty[TestSchema]
212+
val upper = hmap.computePrefixUpperBound(prefixBytes)
213+
214+
assertTrue(HMap.byteArrayOrdering.compare(upper, prefixBytes) == 1) &&
215+
assertTrue(upper.length == 1) &&
216+
assertTrue(upper.sameElements(Array[Byte](0x02.toByte)))
217+
}
218+
188219
}
189220
end HMapPrefixRangeSpec
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package zio.raft
2+
3+
import zio.test.*
4+
import zio.test.Assertion.*
5+
import java.nio.charset.StandardCharsets
6+
7+
object HMapRangeByCompoundKeyPrefixSpec extends ZIOSpecDefault:
8+
9+
// Compound key encoding:
10+
// bytes = lengthOfFirstComponent (1 byte) ++ firstComponentUtf8 ++ [lengthOfSecondComponent (1 byte) ++ secondComponentUtf8]
11+
// (The second component is omitted if empty.)
12+
// This ensures that all keys that share the same first component are in a contiguous
13+
// lexicographic range [firstComponentLength ++ firstComponentUtf8, ...), which is what
14+
// rangeByCompoundKeyPrefix relies on by computing the upper bound using carry propagation,
15+
// special handling for 0xFF bytes, and trailing zero truncation, as implemented in computePrefixUpperBound.
16+
given HMap.KeyLike[(String, String)] with
17+
def asBytes(key: (String, String)): Array[Byte] =
18+
val (first, second) = key
19+
val firstBytes = first.getBytes(StandardCharsets.UTF_8)
20+
if second.isEmpty then
21+
Array(firstBytes.length.toByte) ++ firstBytes
22+
else
23+
val secondBytes = second.getBytes(StandardCharsets.UTF_8)
24+
Array(firstBytes.length.toByte) ++ firstBytes ++ Array(secondBytes.length.toByte) ++ secondBytes
25+
26+
def fromBytes(bytes: Array[Byte]): (String, String) =
27+
val len1 = bytes(0) & 0xff
28+
val first = new String(bytes.slice(1, 1 + len1), StandardCharsets.UTF_8)
29+
if bytes.length == 1 + len1 then (first, "")
30+
else
31+
val len2Pos = 1 + len1
32+
val len2 = bytes(len2Pos) & 0xff
33+
val second = new String(bytes.slice(len2Pos + 1, len2Pos + 1 + len2), StandardCharsets.UTF_8)
34+
(first, second)
35+
36+
type Schema = ("users", (String, String), Int) *: EmptyTuple
37+
38+
def spec = suiteAll("HMap.rangeByCompoundKeyPrefix") {
39+
40+
test("computePrefixUpperBound works for compound key prefix") {
41+
val prefix = ("r1", "")
42+
val key = ("r1", "a")
43+
val hmap = HMap.empty[Schema]
44+
val keyBytes = summon[HMap.KeyLike[(String, String)]].asBytes(key)
45+
val upper = hmap.computePrefixUpperBound(summon[HMap.KeyLike[(String, String)]].asBytes(prefix))
46+
47+
48+
assertTrue(HMap.byteArrayOrdering.compare(upper, keyBytes) > 0)
49+
}
50+
51+
test("returns all entries that share the same first component only") {
52+
val hmap =
53+
HMap.empty[Schema]
54+
.updated["users"](("r1", "a"), 1)
55+
.updated["users"](("r1", "b"), 2)
56+
.updated["users"](("r1", "c"), 3)
57+
.updated["users"](("r2", "x"), 10)
58+
.updated["users"](("r3", "y"), 20)
59+
60+
val results = hmap.rangeByCompoundKeyPrefix["users"](("r1", "")).toList
61+
val keys = results.map(_._1)
62+
val values = results.map(_._2)
63+
64+
assertTrue(results.length == 3) &&
65+
assertTrue(keys.toSet == Set(("r1", "a"), ("r1", "b"), ("r1", "c"))) &&
66+
assertTrue(values.toSet == Set(1, 2, 3))
67+
}
68+
69+
test("includes empty-second-component key and excludes other first components") {
70+
val hmap =
71+
HMap.empty[Schema]
72+
.updated["users"](("ns", ""), 0)
73+
.updated["users"](("ns", "k1"), 1)
74+
.updated["users"](("ns", "k2"), 2)
75+
.updated["users"](("ns2", ""), 100)
76+
.updated["users"](("ns2", "k3"), 101)
77+
78+
val nsResults = hmap.rangeByCompoundKeyPrefix["users"](("ns", "")).toList
79+
val nsKeys = nsResults.map(_._1).toSet
80+
val nsValues = nsResults.map(_._2).toSet
81+
82+
val ns2Results = hmap.rangeByCompoundKeyPrefix["users"](("ns2", "")).toList
83+
val ns2Keys = ns2Results.map(_._1).toSet
84+
val ns2Values = ns2Results.map(_._2).toSet
85+
86+
assertTrue(nsKeys == Set(("ns", ""), ("ns", "k1"), ("ns", "k2"))) &&
87+
assertTrue(nsValues == Set(0, 1, 2)) &&
88+
assertTrue(ns2Keys == Set(("ns2", ""), ("ns2", "k3"))) &&
89+
assertTrue(ns2Values == Set(100, 101))
90+
}
91+
92+
test("works with unicode in first component and multiple seconds") {
93+
val first = "régiön-𝟙" // unicode characters
94+
val hmap =
95+
HMap.empty[Schema]
96+
.updated["users"]((first, "α"), 5)
97+
.updated["users"]((first, "β"), 6)
98+
.updated["users"]((first, "γ"), 7)
99+
.updated["users"](("other", "δ"), 8)
100+
101+
val results = hmap.rangeByCompoundKeyPrefix["users"]((first, "")).toList
102+
103+
assertTrue(results.length == 3) &&
104+
assertTrue(results.map(_._1).toSet == Set((first, "α"), (first, "β"), (first, "γ"))) &&
105+
assertTrue(results.map(_._2).toSet == Set(5, 6, 7))
106+
}
107+
}
108+
end HMapRangeByCompoundKeyPrefixSpec

0 commit comments

Comments
 (0)