@@ -80,21 +80,75 @@ import java.nio.charset.StandardCharsets
8080final case class HMap [M <: Tuple ](private val m : TreeMap [Array [Byte ], Any ] =
8181 TreeMap .empty(using HMap .byteArrayOrdering)):
8282
83- /** Constructs the internal key by combining prefix and key bytes with a backslash separator */
83+ /** Constructs the internal key with length-prefixed encoding.
84+ *
85+ * Format: [1 byte: prefix length] ++ [N bytes: prefix UTF-8] ++ [key bytes]
86+ *
87+ * Prefix length is limited to 254 bytes (1 unsigned byte).
88+ */
8489 private def fullKey [P <: String & Singleton : ValueOf ](key : KeyAt [M , P ])(using kl : KeyLike [KeyAt [M , P ]]): Array [Byte ] =
8590 val prefixBytes = valueOf[P ].getBytes(StandardCharsets .UTF_8 )
8691 val keyBytes = kl.asBytes(key)
87- val separator = Array (0x5c .toByte) // backslash
88- prefixBytes ++ separator ++ keyBytes
89-
90- /** Calculate the range bounds for a prefix in the TreeMap. Returns (lowerBound, upperBound) where lowerBound is
91- * inclusive and upperBound is exclusive. This allows efficient iteration over all entries with a given prefix.
92+
93+ // Max 254 bytes because if all bytes are 0xFF, upperBound uses length+1
94+ require(prefixBytes.length <= 254 , s " Prefix ' ${valueOf[P ]}' too long: ${prefixBytes.length} bytes (max 254) " )
95+
96+ Array (prefixBytes.length.toByte) ++ prefixBytes ++ keyBytes
97+
98+ /**
99+ * Extract and decode the logical key from a full internal key.
100+ *
101+ * Skips the prefix length byte and prefix bytes, then decodes the key bytes
102+ * using the KeyLike instance.
103+ *
104+ * @param fullKey The complete internal key [length][prefix][key]
105+ * @param kl KeyLike instance for decoding key bytes
106+ * @tparam K The key type
107+ * @return The decoded logical key
108+ */
109+ private def extractKey [K ](fullKey : Array [Byte ])(using kl : KeyLike [K ]): K =
110+ val prefixLength = fullKey(0 ) & 0xFF
111+ val keyBytes = fullKey.drop(1 + prefixLength)
112+ kl.fromBytes(keyBytes)
113+
114+ /** Calculate the range bounds for a prefix in the TreeMap with length-prefixed encoding.
115+ *
116+ * Format: [1 byte: prefix length] ++ [N bytes: prefix UTF-8] ++ [key bytes...]
117+ *
118+ * Returns (lowerBound, upperBound) where lowerBound is inclusive and upperBound is exclusive.
92119 */
93120 private def prefixRange [P <: String & Singleton : ValueOf ](): (Array [Byte ], Array [Byte ]) =
94121 val prefixBytes = valueOf[P ].getBytes(StandardCharsets .UTF_8 )
95- val separatorByte = 0x5c .toByte // backslash
96- val lowerBound = prefixBytes :+ separatorByte
97- val upperBound = prefixBytes :+ (separatorByte + 1 ).toByte
122+ val prefixLength = prefixBytes.length
123+
124+ // Lower bound: [length][prefix] - all keys with this prefix start here
125+ val lowerBound = Array (prefixLength.toByte) ++ prefixBytes
126+
127+ // Upper bound: Increment prefix bytes with carry propagation
128+ // Start from rightmost byte, find first byte that isn't 0xFF, increment it, zero rest
129+ val upperPrefixBytes = prefixBytes.clone()
130+ var i = upperPrefixBytes.length - 1
131+ var carry = true
132+
133+ while carry && i >= 0 do
134+ if upperPrefixBytes(i) != 0xff .toByte then
135+ // Found a byte that is already 0xFF, will zero it and propagate carry (continue)
136+ if upperPrefixBytes(i) == 0xff .toByte then
137+ upperPrefixBytes(i) = 0 .toByte
138+ else
139+ // Found a byte that is not 0xFF, increment it and stop carry
140+ upperPrefixBytes(i) = (upperPrefixBytes(i) + 1 ).toByte
141+ carry = false
142+ i -= 1
143+
144+ val upperBound =
145+ if carry then
146+ // All bytes were 0xFF - use next length value with empty prefix
147+ // This is lexicographically after all keys with current prefix length
148+ Array ((prefixLength + 1 ).toByte)
149+ else
150+ Array (prefixLength.toByte) ++ upperPrefixBytes
151+
98152 (lowerBound, upperBound)
99153
100154 /** Retrieve a value for the given prefix and key.
@@ -235,9 +289,7 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
235289
236290 // Use TreeMap's range for efficient iteration (O(log n + k) where k = results)
237291 m.range(lowerBound, upperBound).iterator.map { case (k, v) =>
238- // Extract key bytes (everything after prefix + separator)
239- val keyBytes = k.drop(lowerBound.length)
240- val logicalKey = kl.fromBytes(keyBytes)
292+ val logicalKey = extractKey(k)
241293 (logicalKey, v.asInstanceOf [ValueAt [M , P ]])
242294 }
243295
@@ -273,17 +325,15 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
273325 kl : KeyLike [KeyAt [M , P ]]
274326 ): Iterator [(KeyAt [M , P ], ValueAt [M , P ])] =
275327 val prefixBytes = valueOf[P ].getBytes(StandardCharsets .UTF_8 )
276- val separatorByte = 0x5c . toByte // backslash
277- val prefixWithSep = prefixBytes :+ separatorByte
328+ val prefixLength = Array (prefixBytes.length. toByte)
329+ val prefixWithLength = prefixLength ++ prefixBytes
278330
279- val fromKey = prefixWithSep ++ kl.asBytes(from)
280- val untilKey = prefixWithSep ++ kl.asBytes(until)
331+ val fromKey = prefixWithLength ++ kl.asBytes(from)
332+ val untilKey = prefixWithLength ++ kl.asBytes(until)
281333
282334 // TreeMap.range returns entries in [from, until) based on byte ordering
283335 m.range(fromKey, untilKey).iterator.map { case (k, v) =>
284- // Extract key bytes (everything after prefix + separator)
285- val keyBytes = k.drop(prefixWithSep.length)
286- val logicalKey = kl.fromBytes(keyBytes)
336+ val logicalKey = extractKey(k)
287337 (logicalKey, v.asInstanceOf [ValueAt [M , P ]])
288338 }
289339
@@ -318,8 +368,7 @@ final case class HMap[M <: Tuple](private val m: TreeMap[Array[Byte], Any] =
318368
319369 // Use TreeMap's range.exists directly for maximum efficiency
320370 m.range(lowerBound, upperBound).exists { case (k, v) =>
321- val keyBytes = k.drop(lowerBound.length)
322- val logicalKey = kl.fromBytes(keyBytes)
371+ val logicalKey = extractKey(k)
323372 predicate(logicalKey, v.asInstanceOf [ValueAt [M , P ]])
324373 }
325374
@@ -393,38 +442,40 @@ object HMap:
393442
394443 /** Extract the prefix string from a full byte key.
395444 *
396- * Full keys have format: prefixBytes ++ separator ++ keyBytes This extracts the prefix part (before the separator).
445+ * Full keys have format: [1 byte: prefix length] ++ [N bytes: prefix UTF-8] ++ [key bytes]
397446 *
398447 * Useful for deserialization when you need to determine which prefix (and thus which codec/typeclass) to use for a
399448 * key-value pair.
400449 *
401450 * @param fullKey
402451 * The complete byte key from HMap internal storage
403452 * @return
404- * The prefix string, or None if separator not found
453+ * The prefix string, or None if invalid format
405454 *
406455 * @example
407456 * {{{
408457 * // During deserialization
409458 * rawMap.foreach { case (fullKey, value) =>
410459 * HMap.extractPrefix(fullKey) match
411460 * case Some(prefix) =>
412- * val codec = codecs.forPrefix[ prefix] // Get codec for this prefix
461+ * val codec = codecs.forPrefix( prefix) // Get codec for this prefix
413462 * // decode value using codec
414463 * case None =>
415464 * // Invalid key format
416465 * }
417466 * }}}
418467 */
419468 def extractPrefix (fullKey : Array [Byte ]): Option [String ] =
420- val separatorByte = 0x5c .toByte // backslash
421- val separatorIndex = fullKey.indexOf(separatorByte)
422-
423- if separatorIndex >= 0 then
424- val prefixBytes = fullKey.take(separatorIndex)
425- Some (new String (prefixBytes, StandardCharsets .UTF_8 ))
426- else
469+ if fullKey.isEmpty then
427470 None
471+ else
472+ val prefixLength = fullKey(0 ) & 0xff // Unsigned byte to int
473+
474+ if fullKey.length > prefixLength then
475+ val prefixBytes = fullKey.slice(1 , 1 + prefixLength)
476+ Some (new String (prefixBytes, StandardCharsets .UTF_8 ))
477+ else
478+ None
428479
429480 // ---------------------------------------------
430481 // Type-level machinery for compile-time schema validation
0 commit comments