Skip to content

Commit ea70392

Browse files
authored
Add NamespaceHasher (#22)
1 parent a5a193b commit ea70392

10 files changed

+122
-56
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

simd-r-drive-extensions/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "simd-r-drive-extensions"
33
authors = ["Jeremy Harris <[email protected]>"]
4-
version = "0.4.0-alpha.5"
4+
version = "0.4.0-alpha.6"
55
edition = "2021"
66
repository = "https://github.com/jzombie/rust-simd-r-drive"
77
description = "Storage extensions for SIMD R Drive."

simd-r-drive-extensions/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ mod storage_cache_ext;
1111
pub use storage_cache_ext::*;
1212

1313
mod constants;
14+
15+
pub mod namespace_hasher;
16+
pub use namespace_hasher::NamespaceHasher;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use simd_r_drive::storage_engine::digest::compute_hash;
2+
3+
/// A utility struct for namespacing keys using XXH3 hashing.
4+
///
5+
/// This ensures that keys are uniquely identified within a given namespace,
6+
/// even if they share the same suffix. By hashing both the namespace and key
7+
/// separately before combining them, it prevents unintended collisions.
8+
///
9+
/// # Example:
10+
/// ```
11+
/// use simd_r_drive_extensions::NamespaceHasher;
12+
///
13+
/// let hasher = NamespaceHasher::new(b"opt");
14+
/// let namespaced_key = hasher.namespace(b"my_key");
15+
/// assert_eq!(namespaced_key.len(), 16, "Namespaced key should be exactly 16 bytes");
16+
/// ```
17+
pub struct NamespaceHasher {
18+
prefix: u64,
19+
}
20+
21+
impl NamespaceHasher {
22+
/// Creates a new `NamespaceHasher` with a given prefix.
23+
///
24+
/// The prefix itself is hashed using XXH3 to ensure a unique namespace identifier.
25+
/// This avoids collisions between different namespaces while keeping the hashing fast.
26+
///
27+
/// # Arguments
28+
/// - `prefix`: A byte slice representing the namespace prefix.
29+
///
30+
/// # Returns
31+
/// - A `NamespaceHasher` instance with a precomputed prefix hash.
32+
#[inline]
33+
pub fn new(prefix: &[u8]) -> Self {
34+
Self {
35+
prefix: compute_hash(prefix),
36+
}
37+
}
38+
39+
/// Computes a namespaced key, returning it as a **16-byte vector**.
40+
///
41+
/// The final namespaced key is derived by:
42+
/// 1. Hashing the key separately to ensure uniqueness.
43+
/// 2. Combining it with the precomputed namespace hash.
44+
/// 3. Returning the **concatenation of both hashes** as a **16-byte key**.
45+
///
46+
/// This ensures that:
47+
/// - **Different namespaces** do not generate overlapping keys.
48+
/// - **Keys within a namespace** remain **uniquely identifiable**.
49+
///
50+
/// # Arguments
51+
/// - `key`: A byte slice representing the key to be namespaced.
52+
///
53+
/// # Returns
54+
/// - A `Vec<u8>` containing the **16-byte** namespaced key (`8-byte prefix hash + 8-byte key hash`).
55+
#[inline]
56+
pub fn namespace(&self, key: &[u8]) -> Vec<u8> {
57+
let key_hash = compute_hash(key);
58+
59+
// Combine both hashes into a 16-byte buffer
60+
let mut buffer = Vec::with_capacity(16);
61+
buffer.extend_from_slice(&self.prefix.to_le_bytes()); // Prefix hash (8 bytes)
62+
buffer.extend_from_slice(&key_hash.to_le_bytes()); // Key hash (8 bytes)
63+
64+
buffer
65+
}
66+
}

simd-r-drive-extensions/src/storage_cache_ext.rs

+13-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
use crate::constants::TTL_PREFIX;
2-
use crate::utils::prefix_key;
2+
use crate::NamespaceHasher;
33
use serde::de::DeserializeOwned;
44
use serde::Serialize;
55
use simd_r_drive::DataStore;
66
use std::io::{self, ErrorKind};
7+
use std::sync::{Arc, OnceLock};
78
use std::time::{SystemTime, UNIX_EPOCH};
89

10+
static TTL_NAMESPACE_HASHER: OnceLock<Arc<NamespaceHasher>> = OnceLock::new();
11+
912
#[cfg(any(test, debug_assertions))]
1013
pub const TEST_TTL_PREFIX: &[u8] = TTL_PREFIX;
1114

@@ -54,7 +57,9 @@ impl StorageCacheExt for DataStore {
5457
value: &T,
5558
ttl_secs: u64,
5659
) -> io::Result<u64> {
57-
let key = &prefix_key(TTL_PREFIX, key);
60+
let namespace_hasher =
61+
TTL_NAMESPACE_HASHER.get_or_init(|| Arc::new(NamespaceHasher::new(TTL_PREFIX)));
62+
let namespaced_key = namespace_hasher.namespace(key);
5863

5964
let expiration_timestamp = SystemTime::now()
6065
.duration_since(UNIX_EPOCH)
@@ -67,13 +72,15 @@ impl StorageCacheExt for DataStore {
6772
.map_err(|_| io::Error::new(ErrorKind::InvalidData, "Serialization failed"))?;
6873
data.extend_from_slice(&serialized_value);
6974

70-
self.write(key, &data)
75+
self.write(&namespaced_key, &data)
7176
}
7277

7378
fn read_with_ttl<T: DeserializeOwned>(&self, key: &[u8]) -> Result<Option<T>, io::Error> {
74-
let key = &prefix_key(TTL_PREFIX, key);
79+
let namespace_hasher =
80+
TTL_NAMESPACE_HASHER.get_or_init(|| Arc::new(NamespaceHasher::new(TTL_PREFIX)));
81+
let namespaced_key = namespace_hasher.namespace(key);
7582

76-
match self.read(key) {
83+
match self.read(&namespaced_key) {
7784
Some(entry) => {
7885
let data = entry.as_slice();
7986

@@ -91,7 +98,7 @@ impl StorageCacheExt for DataStore {
9198
.as_secs();
9299

93100
if now >= expiration_timestamp {
94-
self.delete_entry(key.as_slice()).ok(); // Remove expired entry
101+
self.delete_entry(&namespaced_key).ok(); // Remove expired entry
95102
return Ok(None);
96103
}
97104

simd-r-drive-extensions/src/storage_option_ext.rs

+12-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
use crate::constants::{OPTION_PREFIX, OPTION_TOMBSTONE_MARKER};
2-
use crate::utils::prefix_key;
2+
use crate::NamespaceHasher;
33
use crate::{deserialize_option, serialize_option};
44
use serde::de::DeserializeOwned;
55
use serde::Serialize;
66
use simd_r_drive::DataStore;
77
use std::io::{self, ErrorKind};
8+
use std::sync::{Arc, OnceLock};
9+
10+
static OPTION_NAMESPACE_HASHER: OnceLock<Arc<NamespaceHasher>> = OnceLock::new();
811

912
#[cfg(any(test, debug_assertions))]
1013
pub const TEST_OPTION_TOMBSTONE_MARKER: [u8; 2] = OPTION_TOMBSTONE_MARKER;
@@ -124,16 +127,20 @@ pub trait StorageOptionExt {
124127
/// Implements `StorageOptionExt` for `DataStore`
125128
impl StorageOptionExt for DataStore {
126129
fn write_option<T: Serialize>(&self, key: &[u8], value: Option<&T>) -> io::Result<u64> {
127-
let key = &prefix_key(OPTION_PREFIX, key);
130+
let namespace_hasher =
131+
OPTION_NAMESPACE_HASHER.get_or_init(|| Arc::new(NamespaceHasher::new(OPTION_PREFIX)));
132+
let namespaced_key = namespace_hasher.namespace(key);
128133

129134
let serialized = serialize_option(value)?;
130-
self.write(key, &serialized)
135+
self.write(&namespaced_key, &serialized)
131136
}
132137

133138
fn read_option<T: DeserializeOwned>(&self, key: &[u8]) -> Result<Option<T>, io::Error> {
134-
let key = &prefix_key(OPTION_PREFIX, key);
139+
let namespace_hasher =
140+
OPTION_NAMESPACE_HASHER.get_or_init(|| Arc::new(NamespaceHasher::new(OPTION_PREFIX)));
141+
let namespaced_key = namespace_hasher.namespace(key);
135142

136-
match self.read(key) {
143+
match self.read(&namespaced_key) {
137144
Some(entry) => deserialize_option::<T>(entry.as_slice()),
138145
None => Err(io::Error::new(
139146
ErrorKind::NotFound,

simd-r-drive-extensions/src/utils.rs

-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,2 @@
11
pub mod option_serializer;
22
pub use option_serializer::{deserialize_option, serialize_option};
3-
4-
pub mod prefix_key;
5-
pub use prefix_key::prefix_key;

simd-r-drive-extensions/src/utils/prefix_key.rs

-27
This file was deleted.

simd-r-drive-extensions/tests/storage_cache_tests.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use serde::{Deserialize, Serialize};
22
use simd_r_drive::DataStore;
3-
use simd_r_drive_extensions::{utils::prefix_key, StorageCacheExt, TEST_TTL_PREFIX};
3+
use simd_r_drive_extensions::{NamespaceHasher, StorageCacheExt, TEST_TTL_PREFIX};
44
use std::io::ErrorKind;
55
use std::thread::sleep;
66
use std::time::Duration;
@@ -258,8 +258,10 @@ fn test_write_and_read_option_with_ttl() {
258258
fn test_ttl_prefix_is_applied() {
259259
let (_dir, storage) = create_temp_storage();
260260

261+
let namespace_hasher = NamespaceHasher::new(TEST_TTL_PREFIX);
262+
261263
let key = b"test_key";
262-
let prefixed_key = prefix_key(TEST_TTL_PREFIX, key);
264+
let namespaced_key = namespace_hasher.namespace(key);
263265
let test_value = TestData {
264266
id: 123,
265267
name: "Test Value".to_string(),
@@ -271,7 +273,7 @@ fn test_ttl_prefix_is_applied() {
271273
.expect("Failed to write with TTL");
272274

273275
// Ensure the prefixed key exists in storage
274-
let raw_data = storage.read(&prefixed_key);
276+
let raw_data = storage.read(&namespaced_key);
275277
assert!(
276278
raw_data.is_some(),
277279
"Expected data to be stored under the prefixed key"
@@ -306,14 +308,16 @@ fn test_ttl_prefixing_does_not_affect_regular_storage() {
306308
name: "Non-TTL Value".to_string(),
307309
};
308310

311+
let namespace_hasher = NamespaceHasher::new(TEST_TTL_PREFIX);
312+
309313
// Directly write without TTL
310314
storage
311315
.write(key, &bincode::serialize(&test_value).unwrap())
312316
.expect("Failed to write without TTL");
313317

314318
// Ensure reading from TTL-prefixed key fails (since it was not stored with TTL)
315-
let prefixed_key = prefix_key(TEST_TTL_PREFIX, key);
316-
let raw_data_prefixed = storage.read(&prefixed_key);
319+
let namespaced_key = namespace_hasher.namespace(key);
320+
let raw_data_prefixed = storage.read(&namespaced_key);
317321
assert!(
318322
raw_data_prefixed.is_none(),
319323
"No TTL-prefixed entry should exist for a non-TTL write"

simd-r-drive-extensions/tests/storage_option_tests.rs

+17-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ mod tests {
33
use serde::{Deserialize, Serialize};
44
use simd_r_drive::DataStore;
55
use simd_r_drive_extensions::{
6-
utils::prefix_key, StorageOptionExt, TEST_OPTION_PREFIX, TEST_OPTION_TOMBSTONE_MARKER,
6+
NamespaceHasher, StorageOptionExt, TEST_OPTION_PREFIX, TEST_OPTION_TOMBSTONE_MARKER,
77
};
88
use std::io::ErrorKind;
99
use tempfile::tempdir;
@@ -106,8 +106,11 @@ mod tests {
106106
"Entry should return None when tombstone is written"
107107
);
108108

109+
let namespace_hasher = NamespaceHasher::new(TEST_OPTION_PREFIX);
110+
let namespaced_key = namespace_hasher.namespace(key);
111+
109112
// Step 4: Ensure the entry still exists in storage (not fully deleted)
110-
let raw_entry = storage.read(&prefix_key(&TEST_OPTION_PREFIX, key));
113+
let raw_entry = storage.read(&namespaced_key);
111114
assert!(
112115
raw_entry.is_some(),
113116
"Entry should still exist in storage even after writing None"
@@ -219,8 +222,10 @@ mod tests {
219222
fn test_option_prefix_is_applied_for_some() {
220223
let (_dir, storage) = create_temp_storage();
221224

225+
let namespace_hasher = NamespaceHasher::new(TEST_OPTION_PREFIX);
226+
222227
let key = b"test_key_option";
223-
let prefixed_key = prefix_key(TEST_OPTION_PREFIX, key);
228+
let namespaced_key = namespace_hasher.namespace(key);
224229
let test_value = Some(TestData {
225230
id: 456,
226231
name: "Test Option Value".to_string(),
@@ -232,7 +237,7 @@ mod tests {
232237
.expect("Failed to write option");
233238

234239
// Ensure the prefixed key exists in storage
235-
let raw_data = storage.read(&prefixed_key);
240+
let raw_data = storage.read(&namespaced_key);
236241
assert!(
237242
raw_data.is_some(),
238243
"Expected data to be stored under the prefixed key"
@@ -260,16 +265,18 @@ mod tests {
260265
fn test_option_prefix_is_applied_for_none() {
261266
let (_dir, storage) = create_temp_storage();
262267

268+
let namespace_hasher = NamespaceHasher::new(TEST_OPTION_PREFIX);
269+
263270
let key = b"test_key_none";
264-
let prefixed_key = prefix_key(TEST_OPTION_PREFIX, key);
271+
let namespaced_key = namespace_hasher.namespace(key);
265272

266273
// Write `None`
267274
storage
268275
.write_option::<TestData>(key, None)
269276
.expect("Failed to write None with option handling");
270277

271278
// Ensure the prefixed key exists in storage (tombstone marker stored)
272-
let raw_data = storage.read(&prefixed_key);
279+
let raw_data = storage.read(&namespaced_key);
273280
assert!(
274281
raw_data.is_some(),
275282
"Expected tombstone marker to be stored under the prefixed key"
@@ -308,9 +315,11 @@ mod tests {
308315
.write(key, &bincode::serialize(&test_value).unwrap())
309316
.expect("Failed to write non-option value");
310317

318+
let namespace_hasher = NamespaceHasher::new(TEST_OPTION_PREFIX);
319+
311320
// Ensure reading from the option-prefixed key fails (since it was not stored as an option)
312-
let prefixed_key = prefix_key(TEST_OPTION_PREFIX, key);
313-
let raw_data_prefixed = storage.read(&prefixed_key);
321+
let namespaced_key = namespace_hasher.namespace(key);
322+
let raw_data_prefixed = storage.read(&namespaced_key);
314323
assert!(
315324
raw_data_prefixed.is_none(),
316325
"No option-prefixed entry should exist for a non-prefixed write"

0 commit comments

Comments
 (0)