Skip to content

Commit bb4c6fe

Browse files
committed
Drop redundant LE normalization, add cross-platform hash stability test
- Remove to_le() calls from StableHasher — foldhash-portable's portable feature handles endianness internally. The double-conversion was causing wrong results on big-endian platforms. - Keep usize/isize → u64/i64 widening (foldhash can't know pointer width). - Add test_hash_stability: serialized filters with hardcoded expected bytes, verified on both x86_64 (64-bit LE) and PowerPC (32-bit BE) via cross. - Add cross-platform CI job testing on powerpc, i686, and aarch64. - Remove RandomBuildHasher (users pass foldhash types directly). - Make StableBuildHasher pub(crate).
1 parent 8bf77a6 commit bb4c6fe

File tree

3 files changed

+67
-21
lines changed

3 files changed

+67
-21
lines changed

.github/workflows/ci.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ jobs:
3636
- run: cargo test --no-default-features
3737
- run: cargo test --all-features
3838

39+
cross-platform-test:
40+
name: Cross-platform tests
41+
runs-on: ubuntu-latest
42+
strategy:
43+
matrix:
44+
target:
45+
- powerpc-unknown-linux-gnu # 32-bit big-endian
46+
- i686-unknown-linux-gnu # 32-bit little-endian
47+
- aarch64-unknown-linux-gnu # 64-bit little-endian (ARM)
48+
steps:
49+
- uses: actions/checkout@v4
50+
- uses: dtolnay/rust-toolchain@stable
51+
- run: cargo install cross
52+
- run: cross test --features serde --target ${{ matrix.target }} --lib
53+
3954
fuzz-tests:
4055
name: Fuzz tests
4156
runs-on: ubuntu-latest

src/lib.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3423,4 +3423,39 @@ mod tests {
34233423
let fps2: Vec<u64> = f2.fingerprints().collect();
34243424
assert_eq!(fps1, fps2);
34253425
}
3426+
3427+
#[cfg(feature = "serde")]
3428+
#[test]
3429+
fn test_hash_stability() {
3430+
// Build filters with known inputs and verify serialized bytes match
3431+
// hardcoded values. This catches any change in the hash algorithm or
3432+
// StableHasher normalization, and ensures cross-platform compatibility
3433+
// (the same bytes must be produced on 32-bit BE and 64-bit LE).
3434+
3435+
fn build_filter(items: &[usize], capacity: u64, fp_rate: f64) -> Vec<u8> {
3436+
let mut f = Filter::new(capacity, fp_rate).unwrap();
3437+
for &i in items {
3438+
f.insert(i).unwrap();
3439+
}
3440+
serde_cbor::to_vec(&f).unwrap()
3441+
}
3442+
3443+
// Small filter with a few usize items (exercises write_usize → write_u64 normalization)
3444+
let small = build_filter(&[1, 2, 3, 42, 100], 100, 0.01);
3445+
// Larger filter with sequential usize items
3446+
let seq = build_filter(&(0..50).collect::<Vec<_>>(), 100, 0.01);
3447+
// Filter with string items
3448+
let mut f = Filter::new(100, 0.01).unwrap();
3449+
for s in ["hello", "world", "foo", "bar"] {
3450+
f.insert(s).unwrap();
3451+
}
3452+
let strings = serde_cbor::to_vec(&f).unwrap();
3453+
3454+
// Hardcoded expected bytes — if these change, the hash algorithm or
3455+
// StableHasher normalization changed, breaking cross-platform compatibility.
3456+
// usize items exercise write_usize → write_u64 normalization (32-bit vs 64-bit).
3457+
assert_eq!(small, [165, 97, 98, 88, 154, 0, 0, 17, 0, 136, 0, 0, 0, 0, 0, 17, 0, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, 0, 0, 160, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 14, 0, 0, 158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 108, 5, 97, 113, 7, 97, 114, 7, 97, 109, 246]);
3458+
assert_eq!(seq, [165, 97, 98, 88, 154, 0, 6, 249, 12, 228, 11, 2, 36, 134, 6, 121, 13, 228, 11, 2, 68, 134, 0, 203, 30, 0, 0, 0, 0, 93, 0, 32, 162, 45, 167, 48, 91, 0, 189, 11, 0, 0, 0, 0, 192, 5, 0, 184, 14, 159, 164, 22, 224, 2, 0, 0, 0, 128, 35, 0, 0, 0, 0, 0, 0, 128, 0, 0, 184, 113, 1, 0, 11, 22, 0, 0, 0, 226, 0, 14, 74, 209, 193, 40, 72, 117, 144, 14, 74, 145, 195, 40, 72, 117, 144, 128, 110, 44, 15, 0, 0, 0, 0, 40, 224, 7, 0, 132, 1, 32, 0, 0, 0, 6, 220, 242, 126, 42, 0, 0, 0, 140, 1, 0, 0, 160, 11, 120, 1, 0, 0, 0, 0, 2, 0, 4, 1, 59, 128, 21, 224, 137, 121, 0, 0, 0, 0, 80, 6, 0, 120, 0, 0, 0, 0, 0, 0, 0, 0, 97, 108, 24, 50, 97, 113, 7, 97, 114, 7, 97, 109, 246]);
3459+
assert_eq!(strings, [165, 97, 98, 88, 154, 0, 32, 0, 0, 0, 0, 8, 0, 32, 32, 0, 0, 0, 0, 8, 0, 32, 0, 0, 0, 0, 72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 108, 4, 97, 113, 7, 97, 114, 7, 97, 109, 246]);
3460+
}
34263461
}

src/stable_hasher.rs

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
use std::hash::{BuildHasher, Hasher};
22

3-
/// Wrapper over a hasher that provides stable output across platforms
4-
/// Based on https://github.com/rust-lang/rust/blob/c0955a34bcb17f0b31d7b86522a520ebe7fa93ac/src/librustc_data_structures/stable_hasher.rs#L78-L166
3+
/// Wrapper over a hasher that provides stable output across platforms.
54
///
6-
/// To that end we always convert integers to little-endian format before
7-
/// hashing and the architecture dependent `isize` and `usize` types are
8-
/// extended to 64 bits if needed.
5+
/// The architecture dependent `isize` and `usize` types are extended to
6+
/// 64 bits if needed. The `portable` feature of foldhash-portable handles
7+
/// endianness normalization internally.
98
pub struct StableHasher {
109
state: foldhash_portable::quality::FoldHasher<'static>,
1110
}
@@ -17,7 +16,6 @@ impl StableHasher {
1716
state: foldhash_portable::quality::FixedState::with_seed(0).build_hasher(),
1817
}
1918
}
20-
2119
}
2220

2321
impl Hasher for StableHasher {
@@ -38,30 +36,29 @@ impl Hasher for StableHasher {
3836

3937
#[inline]
4038
fn write_u16(&mut self, i: u16) {
41-
self.state.write_u16(i.to_le());
39+
self.state.write_u16(i);
4240
}
4341

4442
#[inline]
4543
fn write_u32(&mut self, i: u32) {
46-
self.state.write_u32(i.to_le());
44+
self.state.write_u32(i);
4745
}
4846

4947
#[inline]
5048
fn write_u64(&mut self, i: u64) {
51-
self.state.write_u64(i.to_le());
49+
self.state.write_u64(i);
5250
}
5351

5452
#[inline]
5553
fn write_u128(&mut self, i: u128) {
56-
self.state.write_u128(i.to_le());
54+
self.state.write_u128(i);
5755
}
5856

5957
#[inline]
6058
fn write_usize(&mut self, i: usize) {
6159
// Always treat usize as u64 so we get the same results on 32 and 64 bit
62-
// platforms. This is important for symbol hashes when cross compiling,
63-
// for example.
64-
self.state.write_u64((i as u64).to_le());
60+
// platforms.
61+
self.state.write_u64(i as u64);
6562
}
6663

6764
#[inline]
@@ -71,30 +68,29 @@ impl Hasher for StableHasher {
7168

7269
#[inline]
7370
fn write_i16(&mut self, i: i16) {
74-
self.state.write_i16(i.to_le());
71+
self.state.write_i16(i);
7572
}
7673

7774
#[inline]
7875
fn write_i32(&mut self, i: i32) {
79-
self.state.write_i32(i.to_le());
76+
self.state.write_i32(i);
8077
}
8178

8279
#[inline]
8380
fn write_i64(&mut self, i: i64) {
84-
self.state.write_i64(i.to_le());
81+
self.state.write_i64(i);
8582
}
8683

8784
#[inline]
8885
fn write_i128(&mut self, i: i128) {
89-
self.state.write_i128(i.to_le());
86+
self.state.write_i128(i);
9087
}
9188

9289
#[inline]
9390
fn write_isize(&mut self, i: isize) {
9491
// Always treat isize as i64 so we get the same results on 32 and 64 bit
95-
// platforms. This is important for symbol hashes when cross compiling,
96-
// for example.
97-
self.state.write_i64((i as i64).to_le());
92+
// platforms.
93+
self.state.write_i64(i as i64);
9894
}
9995
}
10096

@@ -117,4 +113,4 @@ impl BuildHasher for StableBuildHasher {
117113
fn build_hasher(&self) -> StableHasher {
118114
StableHasher::new()
119115
}
120-
}
116+
}

0 commit comments

Comments
 (0)