diff --git a/Cargo.toml b/Cargo.toml index b8ff99f..e0ae395 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,8 @@ license = "MIT" [features] compile_failure = [] -unstable = [] + +__internal_expose_string_pool = [] [dependencies] peresil = "0.3.0" diff --git a/compatibility-tests/benchmarks/Cargo.toml b/compatibility-tests/benchmarks/Cargo.toml new file mode 100644 index 0000000..42d13e3 --- /dev/null +++ b/compatibility-tests/benchmarks/Cargo.toml @@ -0,0 +1,17 @@ +# This exists so that our Rust version choice doesn't affect our +# benchmarking tool choice. + +[package] +name = "benchmarks" +version = "0.1.0" +edition = "2024" +publish = false + +[dependencies] +sxd-document = { path = "../..", features = ["__internal_expose_string_pool"] } + +criterion = "0.6.0" + +[[bench]] +name = "string_pool" +harness = false diff --git a/compatibility-tests/benchmarks/benches/string_pool.rs b/compatibility-tests/benchmarks/benches/string_pool.rs new file mode 100644 index 0000000..c94b2c9 --- /dev/null +++ b/compatibility-tests/benchmarks/benches/string_pool.rs @@ -0,0 +1,64 @@ +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use sxd_document::__internal::StringPool; + +fn single_string(c: &mut Criterion) { + let mut group = c.benchmark_group("single_string"); + + let original = "hello"; + + group.throughput(Throughput::Bytes(original.len() as u64)); + group.bench_function("single_string", |b| { + let pool = StringPool::new(); + b.iter(|| pool.intern(original)); + }); + + group.finish(); +} + +fn many_unique_string(c: &mut Criterion) { + let mut group = c.benchmark_group("many_unique_string"); + + let strings: Vec<_> = (0..1000).map(|i| format!("str{i}str")).collect(); + let total_len = strings.iter().map(|s| s.len()).sum::(); + + group.throughput(Throughput::Bytes(total_len as u64)); + group.bench_function("many_unique_string", |b| { + let pool = StringPool::new(); + + b.iter(|| { + for s in &strings { + pool.intern(s); + } + }); + }); + + group.finish(); +} + +fn many_repeated_string(c: &mut Criterion) { + let mut group = c.benchmark_group("many_repeated_string"); + + let strings: Vec<_> = (0..1000).map(|i| format!("str{}str", i % 100)).collect(); + let total_len = strings.iter().map(|s| s.len()).sum::(); + + group.throughput(Throughput::Bytes(total_len as u64)); + group.bench_function("many_unique_string", |b| { + let pool = StringPool::new(); + + b.iter(|| { + for s in &strings { + pool.intern(s); + } + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + single_string, + many_unique_string, + many_repeated_string, +); +criterion_main!(benches); diff --git a/compatibility-tests/benchmarks/src/lib.rs b/compatibility-tests/benchmarks/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/lib.rs b/src/lib.rs index e4de3db..d16a394 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,6 @@ //! Try to leverage the type system as much as possible. #![deny(rust_2018_idioms)] -#![cfg_attr(feature = "unstable", feature(test))] #[macro_use] extern crate peresil; @@ -68,6 +67,11 @@ pub mod parser; pub mod thindom; pub mod writer; +#[cfg(feature = "__internal_expose_string_pool")] +pub mod __internal { + pub use super::string_pool::StringPool; +} + pub use crate::str::XmlChar; static XML_NS_PREFIX: &str = "xml"; diff --git a/src/string_pool.rs b/src/string_pool.rs index 41b0e26..330e701 100644 --- a/src/string_pool.rs +++ b/src/string_pool.rs @@ -275,45 +275,3 @@ mod test { assert_eq!(interned.as_ptr(), ptr_val); } } - -#[cfg(feature = "unstable")] -mod bench { - extern crate test; - - use self::test::Bencher; - - use super::StringPool; - - #[bench] - fn single_string(b: &mut Bencher) { - let s = StringPool::new(); - b.iter(|| s.intern("hello")); - b.bytes = "hello".len() as u64; - } - - #[bench] - fn many_unique_string(b: &mut Bencher) { - let s = StringPool::new(); - - let strings: Vec = (0..1000).map(|i| format!("str{}str", i)).collect(); - b.iter(|| { - for ss in strings.iter() { - s.intern(ss); - } - }); - b.bytes = strings.iter().fold(0, |a, s| a + s.len()) as u64; - } - - #[bench] - fn many_repeated_string(b: &mut Bencher) { - let s = StringPool::new(); - - let strings: Vec = (0..1000).map(|i| format!("str{}str", i % 100)).collect(); - b.iter(|| { - for ss in strings.iter() { - s.intern(ss); - } - }); - b.bytes = strings.iter().fold(0, |a, s| a + s.len()) as u64; - } -}