Skip to content

Commit e7c956f

Browse files
authored
Merge pull request #105 from shepmaster/xxh3-128
Add XXH3 128-bit implementation
2 parents 725da3f + f382d8d commit e7c956f

33 files changed

+4154
-1938
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
runs-on: ubuntu-latest
103103

104104
env:
105-
IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64
105+
IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 xxhash3_128
106106
FEATURE_SET: random serialize std alloc
107107

108108
steps:

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ members = [
2525
#END-[workspace]
2626

2727
[features]
28-
default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "std"]
28+
default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"]
2929

3030
random = ["dep:rand"]
3131

@@ -34,6 +34,7 @@ serialize = ["dep:serde"]
3434
xxhash32 = []
3535
xxhash64 = []
3636
xxhash3_64 = []
37+
xxhash3_128 = []
3738

3839
std = ["alloc"]
3940
alloc = []

comparison/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ harness = false
1414
criterion = { version = "0.5.1", features = [] }
1515
proptest = "1.5.0"
1616
rand = "0.8.5"
17-
twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "std"] }
17+
twox-hash = { path = "..", default-features = false, features = ["xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"] }
1818
xx_hash-sys = { path = "../xx_hash-sys" }

comparison/README.md

+84-12
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ graphs are boring flat lines, so a table is used instead.
1919

2020
| Implementation | Throughput (GiB/s) |
2121
|----------------|--------------------|
22-
| Rust | 13.5 |
23-
| C | 13.5 |
22+
| Rust | 13.4 |
23+
| C | 13.4 |
2424

2525
## x86_64
2626

2727
| Implementation | Throughput (GiB/s) |
2828
|----------------|--------------------|
29-
| Rust | 16.5 |
30-
| C | 16.5 |
29+
| Rust | 16.7 |
30+
| C | 16.6 |
3131

3232

3333
## Streaming data
@@ -75,7 +75,6 @@ Compares the **time taken** to hash 0 to 32 bytes of data.
7575
/>
7676
</a>
7777

78-
7978
# xxHash3 (64-bit)
8079

8180
## Oneshot hashing
@@ -88,20 +87,21 @@ graphs are boring flat lines, so a table is used instead.
8887

8988
| Implementation | Throughput (GiB/s) |
9089
|----------------|--------------------|
91-
| Rust | 35.2 |
90+
| Rust | 35.0 |
9291
| C | 35.0 |
9392
| C (scalar) | 21.2 |
94-
| C (NEON) | 35.1 |
93+
| C (NEON) | 35.0 |
9594

9695
### x86_64
9796

9897
| Implementation | Throughput (GiB/s) |
9998
|----------------|--------------------|
100-
| Rust | 58.6 |
101-
| C | 25.0 |
102-
| C (scalar) | 7.5 |
99+
| Rust | 58.9 |
100+
| C | 25.1 |
101+
| C (scalar) | 7.6 |
103102
| C (SSE2) | 25.1 |
104-
| C (AVX2) | 57.8 |
103+
| C (AVX2) | 58.4 |
104+
105105

106106
## Streaming data
107107

@@ -150,6 +150,78 @@ cluttering the graph and wasting benchmarking time.
150150
/>
151151
</a>
152152

153+
# xxHash3 (128-bit)
154+
155+
## Oneshot hashing
156+
157+
Compares the **speed** of hashing an entire buffer of data in one
158+
function call. Data sizes from 256 KiB to 4 MiB are tested. These
159+
graphs are boring flat lines, so a table is used instead.
160+
161+
| Implementation | Throughput (GiB/s) |
162+
|----------------|--------------------|
163+
| Rust | 34.4 |
164+
| C | 34.8 |
165+
| C (scalar) | 21.3 |
166+
| C (NEON) | 34.6 |
167+
168+
### x86_64
169+
170+
| Implementation | Throughput (GiB/s) |
171+
|----------------|--------------------|
172+
| Rust | 58.3 |
173+
| C | 25.6 |
174+
| C (scalar) | 7.6 |
175+
| C (SSE2) | 25.5 |
176+
| C (AVX2) | 57.4 |
177+
178+
## Streaming data
179+
180+
Compares the **speed** of hashing a 1 MiB buffer of data split into
181+
various chunk sizes.
182+
183+
### aarch64
184+
185+
<a href="./results/xxhash3_128-streaming-aarch64.svg">
186+
<img
187+
src="./results/xxhash3_128-streaming-aarch64.svg"
188+
alt="xxHash3, 128-bit, streaming data, on an aarch64 processor"
189+
/>
190+
</a>
191+
192+
### x86_64
193+
194+
<a href="./results/xxhash3_128-streaming-x86_64.svg">
195+
<img
196+
src="./results/xxhash3_128-streaming-x86_64.svg"
197+
alt="xxHash3, 128-bit, streaming data, on an x86_64 processor"
198+
/>
199+
</a>
200+
201+
## Small amounts of data
202+
203+
Compares the **time taken** to hash 0 to 230 bytes of
204+
data. Representative samples are taken from similar times to avoid
205+
cluttering the graph and wasting benchmarking time.
206+
207+
### aarch64
208+
209+
<a href="./results/xxhash3_128-tiny_data-aarch64.svg">
210+
<img
211+
src="./results/xxhash3_128-tiny_data-aarch64.svg"
212+
alt="xxHash3, 128-bit, small data, on an aarch64 processor"
213+
/>
214+
</a>
215+
216+
### x86_64
217+
218+
<a href="./results/xxhash3_128-tiny_data-x86_64.svg">
219+
<img
220+
src="./results/xxhash3_128-tiny_data-x86_64.svg"
221+
alt="xxHash3, 128-bit, small data, on an x86_64 processor"
222+
/>
223+
</a>
224+
153225
# Benchmark machines
154226

155227
## Overview
@@ -159,7 +231,7 @@ cluttering the graph and wasting benchmarking time.
159231
| Apple M1 Max | 64 GiB | clang 16.0.0 |
160232
| AMD Ryzen 9 3950X | 32 GiB | cl.exe 19.41.34120 |
161233

162-
Tests were run with `rustc 1.81.0 (eeb90cda1 2024-09-04)`.
234+
Tests were run with `rustc 1.82.0 (f6e511eec 2024-10-15)`.
163235

164236
## Details
165237

comparison/benches/benchmark.rs

+198-1
Original file line numberDiff line numberDiff line change
@@ -314,4 +314,201 @@ mod xxhash3_64 {
314314
criterion_group!(benches, tiny_data, oneshot, streaming);
315315
}
316316

317-
criterion_main!(xxhash64::benches, xxhash3_64::benches);
317+
mod xxhash3_128 {
318+
use super::*;
319+
320+
fn tiny_data(c: &mut Criterion) {
321+
let (seed, data) = gen_data(240);
322+
let mut g = c.my_benchmark_group("xxhash3_128", "tiny_data");
323+
324+
// let categories = 0..=data.len();
325+
326+
// Visual inspection of all the data points showed these as
327+
// examples of thier nearby neighbors.
328+
let categories = [
329+
0, 2, 6, 13, 25, 50, 80, 113, 135, 150, 165, 185, 200, 215, 230,
330+
];
331+
332+
for size in categories {
333+
let data = &data[..size];
334+
g.throughput(Throughput::Bytes(data.len() as _));
335+
336+
let id = format!("impl-c/size-{size:03}");
337+
g.bench_function(id, |b| {
338+
b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data))
339+
});
340+
341+
let id = format!("impl-c-scalar/size-{size:03}");
342+
g.bench_function(id, |b| {
343+
b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data))
344+
});
345+
346+
#[cfg(target_arch = "aarch64")]
347+
{
348+
let id = format!("impl-c-neon/size-{size:03}");
349+
g.bench_function(id, |b| {
350+
b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data))
351+
});
352+
}
353+
354+
#[cfg(target_arch = "x86_64")]
355+
{
356+
let id = format!("impl-c-avx2/size-{size:03}");
357+
g.bench_function(id, |b| {
358+
b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data))
359+
});
360+
361+
let id = format!("impl-c-sse2/size-{size:03}");
362+
g.bench_function(id, |b| {
363+
b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data))
364+
});
365+
}
366+
367+
let id = format!("impl-rust/size-{size:03}");
368+
g.bench_function(id, |b| {
369+
b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data))
370+
});
371+
}
372+
373+
g.finish();
374+
}
375+
376+
fn oneshot(c: &mut Criterion) {
377+
let (seed, data) = gen_data(BIG_DATA_SIZE);
378+
let mut g = c.my_benchmark_group("xxhash3_128", "oneshot");
379+
380+
for size in half_sizes(data.len()).take_while(|&s| s >= MIN_BIG_DATA_SIZE) {
381+
let data = &data[..size];
382+
g.throughput(Throughput::Bytes(data.len() as _));
383+
384+
let id = format!("impl-c/size-{size:07}");
385+
g.bench_function(id, |b| {
386+
b.iter(|| c::XxHash3_128::oneshot_with_seed(seed, data))
387+
});
388+
389+
let id = format!("impl-c-scalar/size-{size:07}");
390+
g.bench_function(id, |b| {
391+
b.iter(|| c::scalar::XxHash3_128::oneshot_with_seed(seed, data))
392+
});
393+
394+
#[cfg(target_arch = "aarch64")]
395+
{
396+
let id = format!("impl-c-neon/size-{size:07}");
397+
g.bench_function(id, |b| {
398+
b.iter(|| c::neon::XxHash3_128::oneshot_with_seed(seed, data))
399+
});
400+
}
401+
402+
#[cfg(target_arch = "x86_64")]
403+
{
404+
let id = format!("impl-c-avx2/size-{size:07}");
405+
g.bench_function(id, |b| {
406+
b.iter(|| c::avx2::XxHash3_128::oneshot_with_seed(seed, data))
407+
});
408+
409+
let id = format!("impl-c-sse2/size-{size:07}");
410+
g.bench_function(id, |b| {
411+
b.iter(|| c::sse2::XxHash3_128::oneshot_with_seed(seed, data))
412+
});
413+
}
414+
415+
let id = format!("impl-rust/size-{size:07}");
416+
g.bench_function(id, |b| {
417+
b.iter(|| rust::XxHash3_128::oneshot_with_seed(seed, data))
418+
});
419+
}
420+
421+
g.finish();
422+
}
423+
424+
fn streaming(c: &mut Criterion) {
425+
let mut g = c.my_benchmark_group("xxhash3_128", "streaming");
426+
427+
let size = 1024 * 1024;
428+
let (seed, data) = gen_data(size);
429+
430+
for chunk_size in half_sizes(size) {
431+
let chunks = data.chunks(chunk_size).collect::<Vec<_>>();
432+
433+
g.throughput(Throughput::Bytes(size as _));
434+
435+
let id = format!("impl-c/size-{size:07}/chunk_size-{chunk_size:07}");
436+
g.bench_function(id, |b| {
437+
b.iter(|| {
438+
let mut hasher = c::XxHash3_128::with_seed(seed);
439+
for chunk in &chunks {
440+
hasher.write(chunk);
441+
}
442+
hasher.finish()
443+
})
444+
});
445+
446+
let id = format!("impl-c-scalar/size-{size:07}/chunk_size-{chunk_size:07}");
447+
g.bench_function(id, |b| {
448+
b.iter(|| {
449+
let mut hasher = c::scalar::XxHash3_128::with_seed(seed);
450+
for chunk in &chunks {
451+
hasher.write(chunk);
452+
}
453+
hasher.finish()
454+
})
455+
});
456+
457+
#[cfg(target_arch = "aarch64")]
458+
{
459+
let id = format!("impl-c-neon/size-{size:07}/chunk_size-{chunk_size:07}");
460+
g.bench_function(id, |b| {
461+
b.iter(|| {
462+
let mut hasher = c::neon::XxHash3_128::with_seed(seed);
463+
for chunk in &chunks {
464+
hasher.write(chunk);
465+
}
466+
hasher.finish()
467+
})
468+
});
469+
}
470+
471+
#[cfg(target_arch = "x86_64")]
472+
{
473+
let id = format!("impl-c-avx2/size-{size:07}/chunk_size-{chunk_size:07}");
474+
g.bench_function(id, |b| {
475+
b.iter(|| {
476+
let mut hasher = c::avx2::XxHash3_128::with_seed(seed);
477+
for chunk in &chunks {
478+
hasher.write(chunk);
479+
}
480+
hasher.finish()
481+
})
482+
});
483+
484+
let id = format!("impl-c-sse2/size-{size:07}/chunk_size-{chunk_size:07}");
485+
g.bench_function(id, |b| {
486+
b.iter(|| {
487+
let mut hasher = c::sse2::XxHash3_128::with_seed(seed);
488+
for chunk in &chunks {
489+
hasher.write(chunk);
490+
}
491+
hasher.finish()
492+
})
493+
});
494+
}
495+
496+
let id = format!("impl-rust/size-{size:07}/chunk_size-{chunk_size:07}");
497+
g.bench_function(id, |b| {
498+
b.iter(|| {
499+
let mut hasher = rust::XxHash3_128::with_seed(seed);
500+
for chunk in &chunks {
501+
hasher.write(chunk);
502+
}
503+
hasher.finish_128()
504+
})
505+
});
506+
}
507+
508+
g.finish();
509+
}
510+
511+
criterion_group!(benches, tiny_data, oneshot, streaming);
512+
}
513+
514+
criterion_main!(xxhash64::benches, xxhash3_64::benches, xxhash3_128::benches);

comparison/generate-graph.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ cpus = c(aarch64 = "Apple M1 Max", x86_64 = "AMD Ryzen 9 3950X")
4545

4646
common_theme = theme(legend.position = "inside", legend.position.inside = c(0.8, 0.2), plot.margin = unit(c(0.1, 1, 0.1, 0.1), 'cm'))
4747

48-
for (algo in c("xxhash64", "xxhash3_64")) {
48+
for (algo in c("xxhash64", "xxhash3_64", "xxhash3_128")) {
4949
message("# ", algo)
5050

5151
algo_data = data[data$algo == algo,]

0 commit comments

Comments
 (0)