Skip to content

Commit 8adf7d3

Browse files
[storage/index] Optimize Translator Usage (#843)
1 parent b2a17a7 commit 8adf7d3

3 files changed

Lines changed: 129 additions & 31 deletions

File tree

storage/src/index/mod.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,20 @@ mod storage;
1515
pub use storage::{Index, RemoveValueIterator, UpdateValueIterator, ValueIterator};
1616
pub mod translator;
1717

18-
use std::hash::Hash;
18+
use std::hash::{BuildHasher, Hash};
1919

2020
/// Translate keys into an internal representation used by `Index`.
2121
///
2222
/// # Warning
2323
///
24-
/// If invoking `transform` on keys results in many conflicts, the performance of `Index` will
25-
/// degrade substantially.
26-
pub trait Translator: Clone {
27-
type Key: Eq + Hash + Send + Sync + Clone;
24+
/// The output of `transform` is used as the key in a hash table. If the output is not uniformly
25+
/// distributed, the performance of [Index] will degrade substantially.
26+
pub trait Translator: Clone + BuildHasher {
27+
/// The type of the internal representation of keys.
28+
///
29+
/// Although `Translator` is a [BuildHasher], the `Key` type must still implement [Hash] for compatibility
30+
/// with the [std::collections::HashMap] used internally by [Index].
31+
type Key: Eq + Hash;
2832

2933
/// Transform a key into its internal representation.
3034
fn transform(&self, key: &[u8]) -> Self::Key;

storage/src/index/storage.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ use std::{
99
mem::swap,
1010
};
1111

12+
/// The initial capacity of the hashmap. This is a guess at the number of unique keys we will
13+
/// encounter. The hashmap will grow as needed, but this is a good starting point (covering
14+
/// the entire [super::translator::OneCap] range).
15+
const INITIAL_CAPACITY: usize = 256;
16+
1217
/// Each key is mapped to a `Record` that contains a linked list of potential values for the key.
1318
///
1419
/// In the common case of a single value associated with a key, the value is stored within the
@@ -205,7 +210,7 @@ impl<V> Record<V> {
205210
/// An index that maps translated keys to values.
206211
pub struct Index<T: Translator, V> {
207212
translator: T,
208-
map: HashMap<T::Key, Record<V>>,
213+
map: HashMap<T::Key, Record<V>, T>,
209214

210215
collisions: Counter,
211216
keys_pruned: Counter,
@@ -215,8 +220,8 @@ impl<T: Translator, V> Index<T, V> {
215220
/// Create a new index.
216221
pub fn init(context: impl Metrics, translator: T) -> Self {
217222
let s = Self {
218-
translator,
219-
map: HashMap::new(),
223+
translator: translator.clone(),
224+
map: HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, translator),
220225
collisions: Counter::default(),
221226
keys_pruned: Counter::default(),
222227
};

storage/src/index/translator.rs

Lines changed: 112 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,57 @@
1+
//! Primitive implementations of [Translator].
2+
13
use super::Translator;
4+
use std::hash::{BuildHasher, Hasher};
5+
6+
/// A “do-nothing” hasher for `uint`.
7+
///
8+
/// [super::Index] typically stores keys that are **already hashed** (shortened by the [Translator]).
9+
/// Re-hashing them with SipHash (by [std::collections::HashMap]) would waste CPU, so we give `HashMap`
10+
/// this identity hasher instead:
11+
///
12+
/// * `write_u8`, `write_u16`, `write_u32`, `write_u64` copies the input into an internal field;
13+
/// * `finish()` returns that value unchanged.
14+
///
15+
/// # Warning
16+
///
17+
/// This hasher is not suitable for general use. If the hasher is called over some type that is not
18+
/// `u8`, `u16`, `u32` or `u64`, it will panic.
19+
#[derive(Default, Clone)]
20+
pub struct UintIdentity {
21+
value: u64,
22+
}
23+
24+
impl Hasher for UintIdentity {
25+
#[inline]
26+
fn write(&mut self, _: &[u8]) {
27+
unimplemented!("we should only ever call type-specific write methods");
28+
}
29+
30+
#[inline]
31+
fn write_u8(&mut self, i: u8) {
32+
self.value = i as u64;
33+
}
34+
35+
#[inline]
36+
fn write_u16(&mut self, i: u16) {
37+
self.value = i as u64;
38+
}
39+
40+
#[inline]
41+
fn write_u32(&mut self, i: u32) {
42+
self.value = i as u64;
43+
}
44+
45+
#[inline]
46+
fn write_u64(&mut self, i: u64) {
47+
self.value = i;
48+
}
49+
50+
#[inline]
51+
fn finish(&self) -> u64 {
52+
self.value
53+
}
54+
}
255

356
/// Cap the key to a fixed length.
457
///
@@ -13,60 +66,96 @@ fn cap<const N: usize>(key: &[u8]) -> [u8; N] {
1366
capped[..len].copy_from_slice(&key[..len]);
1467
capped
1568
}
69+
1670
macro_rules! define_cap_translator {
17-
($name:ident, $size:expr) => {
18-
#[doc = concat!("A translator that caps the key to ", stringify!($size), " bytes.")]
19-
#[derive(Clone)]
71+
($name:ident, $size:expr, $int:ty) => {
72+
#[doc = concat!("Translator that caps the key to ", stringify!($size), " byte(s) and returns it packed in a ", stringify!($int), ".")]
73+
#[derive(Clone, Default)]
2074
pub struct $name;
2175

2276
impl Translator for $name {
23-
type Key = [u8; $size];
77+
// Minimal uint size for the key.
78+
type Key = $int;
2479

80+
#[inline]
2581
fn transform(&self, key: &[u8]) -> Self::Key {
26-
cap(key)
82+
let capped = cap::<$size>(key);
83+
<$int>::from_le_bytes(capped)
84+
}
85+
}
86+
87+
// Implement the `BuildHasher` trait for `IdentityHasher`.
88+
impl BuildHasher for $name {
89+
type Hasher = UintIdentity;
90+
91+
#[inline]
92+
fn build_hasher(&self) -> Self::Hasher {
93+
UintIdentity::default()
2794
}
2895
}
2996
};
3097
}
3198

32-
define_cap_translator!(TwoCap, 2);
33-
define_cap_translator!(FourCap, 4);
34-
define_cap_translator!(EightCap, 8);
99+
// Define translators for different sizes.
100+
define_cap_translator!(OneCap, 1, u8);
101+
define_cap_translator!(TwoCap, 2, u16);
102+
define_cap_translator!(FourCap, 4, u32);
103+
define_cap_translator!(EightCap, 8, u64);
35104

36105
#[cfg(test)]
37106
mod tests {
38107
use super::*;
108+
use std::hash::Hasher;
109+
110+
#[test]
111+
fn test_one_cap() {
112+
let t = OneCap;
113+
assert_eq!(t.transform(b"").to_le_bytes(), [0]);
114+
assert_eq!(t.transform(b"a").to_le_bytes(), [b'a']);
115+
assert_eq!(t.transform(b"ab").to_le_bytes(), [b'a']);
116+
assert_eq!(t.transform(b"abc").to_le_bytes(), [b'a']);
117+
}
39118

40119
#[test]
41120
fn test_two_cap() {
42-
let translator = TwoCap;
43-
assert_eq!(translator.transform(b""), [0, 0]);
44-
assert_eq!(translator.transform(b"a"), [b'a', 0]);
45-
assert_eq!(translator.transform(b"ab"), [b'a', b'b']);
46-
assert_eq!(translator.transform(b"abc"), [b'a', b'b']);
121+
let t = TwoCap;
122+
assert_eq!(t.transform(b"").to_le_bytes(), [0, 0]);
123+
assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0]);
124+
assert_eq!(t.transform(b"ab").to_le_bytes(), [b'a', b'b']);
125+
assert_eq!(t.transform(b"abc").to_le_bytes(), [b'a', b'b']);
47126
}
48127

49128
#[test]
50129
fn test_four_cap() {
51-
let translator = FourCap;
52-
assert_eq!(translator.transform(b""), [0, 0, 0, 0]);
53-
assert_eq!(translator.transform(b"a"), [b'a', 0, 0, 0]);
54-
assert_eq!(translator.transform(b"abcd"), [b'a', b'b', b'c', b'd']);
55-
assert_eq!(translator.transform(b"abcdef"), [b'a', b'b', b'c', b'd']);
130+
let t = FourCap;
131+
assert_eq!(t.transform(b"").to_le_bytes(), [0, 0, 0, 0]);
132+
assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0, 0, 0]);
133+
assert_eq!(t.transform(b"abcd").to_le_bytes(), [b'a', b'b', b'c', b'd']);
134+
assert_eq!(
135+
t.transform(b"abcdef").to_le_bytes(),
136+
[b'a', b'b', b'c', b'd']
137+
);
56138
}
57139

58140
#[test]
59141
fn test_eight_cap() {
60-
let translator = EightCap;
61-
assert_eq!(translator.transform(b""), [0, 0, 0, 0, 0, 0, 0, 0]);
62-
assert_eq!(translator.transform(b"a"), [b'a', 0, 0, 0, 0, 0, 0, 0]);
142+
let t = EightCap;
143+
assert_eq!(t.transform(b"").to_le_bytes(), [0, 0, 0, 0, 0, 0, 0, 0]);
144+
assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0, 0, 0, 0, 0, 0, 0]);
63145
assert_eq!(
64-
translator.transform(b"abcdefgh"),
146+
t.transform(b"abcdefgh").to_le_bytes(),
65147
[b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
66148
);
67149
assert_eq!(
68-
translator.transform(b"abcdefghijk"),
150+
t.transform(b"abcdefghijk").to_le_bytes(),
69151
[b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
70152
);
71153
}
154+
155+
#[test]
156+
#[should_panic(expected = "we should only ever call type-specific write methods")]
157+
fn identity_hasher_panics_on_write_slice() {
158+
let mut h = UintIdentity::default();
159+
h.write(b"not an int");
160+
}
72161
}

0 commit comments

Comments
 (0)