Skip to content

Commit 1d08600

Browse files
committed
expand benchmarks taken from ada-url/ada
1 parent 911bbbf commit 1d08600

File tree

7 files changed

+748
-18
lines changed

7 files changed

+748
-18
lines changed

Cargo.toml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@ name = "parse"
2222
path = "bench/parse.rs"
2323
harness = false
2424

25+
[[bench]]
26+
name = "bbc"
27+
path = "bench/bbc.rs"
28+
harness = false
29+
30+
[[bench]]
31+
name = "search_params"
32+
path = "bench/search_params.rs"
33+
harness = false
34+
35+
[[bench]]
36+
name = "ipv4"
37+
path = "bench/ipv4.rs"
38+
harness = false
39+
40+
[[bench]]
41+
name = "scheme"
42+
path = "bench/scheme.rs"
43+
harness = false
44+
45+
[[bench]]
46+
name = "wpt"
47+
path = "bench/wpt.rs"
48+
harness = false
49+
2550
[features]
2651
default = ["std"]
2752
# pass `cpp_set_stdlib("c++")` to `cc`

bench/bbc.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
2+
use std::hint::black_box;
3+
use ada_url::Url;
4+
5+
/// Realistic URL examples collected from the BBC homepage.
6+
/// Matches url_examples in bbc_bench.cpp.
7+
const BBC_URLS: &[&str] = &[
8+
"https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/js/polyfills.js",
9+
"https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/css/orbit-v5-ltr.min.css",
10+
"https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/js/require.min.js",
11+
"https://static.files.bbci.co.uk/fonts/reith/2.512/BBCReithSans_W_Rg.woff2",
12+
"https://nav.files.bbci.co.uk/searchbox/c8bfe8595e453f2b9483fda4074e9d15/css/box.css",
13+
"https://static.files.bbci.co.uk/cookies/d3bb303e79f041fec95388e04f84e716/cookie-banner/cookie-library.bundle.js",
14+
"https://static.files.bbci.co.uk/account/id-cta/597/style/id-cta.css",
15+
"https://gn-web-assets.api.bbc.com/wwhp/20220908-1153-091014d07889c842a7bdc06e00fa711c9e04f049/responsive/css/old-ie.min.css",
16+
"https://gn-web-assets.api.bbc.com/wwhp/20220908-1153-091014d07889c842a7bdc06e00fa711c9e04f049/modules/vendor/bower/modernizr/modernizr.js",
17+
];
18+
19+
fn total_bytes() -> u64 {
20+
BBC_URLS.iter().map(|u| u.len() as u64).sum()
21+
}
22+
23+
/// Parse BBC URLs and get href — matches BBC_BasicBench_AdaURL_href in bbc_bench.cpp.
24+
pub fn bbc_basic_bench_ada_url_href(c: &mut Criterion) {
25+
let mut group = c.benchmark_group("BBC_BasicBench_AdaURL_href");
26+
group.throughput(Throughput::Bytes(total_bytes()));
27+
group.bench_function("ada_url", |b| {
28+
b.iter(|| {
29+
let mut href_size = 0usize;
30+
for &url in BBC_URLS {
31+
if let Ok(parsed) = Url::parse(black_box(url), None) {
32+
href_size += parsed.href().len();
33+
}
34+
}
35+
black_box(href_size)
36+
})
37+
});
38+
group.bench_function("url", |b| {
39+
b.iter(|| {
40+
let mut href_size = 0usize;
41+
for &url in BBC_URLS {
42+
if let Ok(parsed) = black_box(url).parse::<url::Url>() {
43+
href_size += parsed.as_str().len();
44+
}
45+
}
46+
black_box(href_size)
47+
})
48+
});
49+
group.finish();
50+
}
51+
52+
/// Check if BBC URLs can be parsed — matches BBC_BasicBench_AdaURL_CanParse in bbc_bench.cpp.
53+
pub fn bbc_basic_bench_ada_url_can_parse(c: &mut Criterion) {
54+
let mut group = c.benchmark_group("BBC_BasicBench_AdaURL_CanParse");
55+
group.throughput(Throughput::Bytes(total_bytes()));
56+
group.bench_function("ada_url", |b| {
57+
b.iter(|| {
58+
let mut success = 0usize;
59+
for &url in BBC_URLS {
60+
if Url::can_parse(black_box(url), None) {
61+
success += 1;
62+
}
63+
}
64+
black_box(success)
65+
})
66+
});
67+
group.finish();
68+
}
69+
70+
criterion_group!(
71+
benches,
72+
bbc_basic_bench_ada_url_href,
73+
bbc_basic_bench_ada_url_can_parse
74+
);
75+
criterion_main!(benches);

bench/ipv4.rs

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
use ada_url::Url;
2+
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
3+
use std::hint::black_box;
4+
use std::num::Wrapping;
5+
6+
/// Non-decimal IPv4 URL examples — matches kIpv4NonDecimalUrls in bench_ipv4.cpp.
7+
const IPV4_NON_DECIMAL_URLS: &[&str] = &[
8+
"http://0x7f.0x0.0x0.0x1",
9+
"http://0177.000.000.001",
10+
"http://0x7f.1.2.03",
11+
"http://0x7f.000.00.000",
12+
"http://000.000.000.000",
13+
"http://0x.0x.0x.0x",
14+
"http://0300.0250.0001.0001",
15+
"http://0xc0.0xa8.0x01.0x01",
16+
"http://3232235777",
17+
"http://0xc0a80101",
18+
"http://030052000401",
19+
"http://127.1",
20+
"http://127.0.1",
21+
"http://0x7f.1",
22+
"http://0177.1",
23+
"http://0300.0xa8.1.1",
24+
"http://192.168.0x1.01",
25+
"http://0x0.0x0.0x0.0x0",
26+
"http://0.0.0.0x0",
27+
"http://022.022.022.022",
28+
"http://0x12.0x12.0x12.0x12",
29+
"http://0xff.0xff.0xff.0xff",
30+
"http://0377.0377.0377.0377",
31+
"http://4294967295",
32+
"http://0xffffffff",
33+
"http://0x00.0x00.0x00.0x00",
34+
"http://00000.00000.00000.00000",
35+
"http://1.0x2.03.4",
36+
"http://0x1.2.0x3.4",
37+
"http://0.01.0x02.3",
38+
];
39+
40+
/// DNS fallback URL examples — matches kDnsFallbackUrls in bench_ipv4.cpp.
41+
const DNS_FALLBACK_URLS: &[&str] = &[
42+
"http://example.com",
43+
"http://www.google.com",
44+
"http://localhost",
45+
"http://foo.bar",
46+
"http://github.com",
47+
"http://microsoft.com",
48+
"http://aws.amazon.com",
49+
"http://adaparser.com",
50+
"http://www.wikipedia.org",
51+
"http://www.apple.com",
52+
"http://www.amazon.com",
53+
"http://www.facebook.com",
54+
"http://www.twitter.com",
55+
"http://www.instagram.com",
56+
"http://www.linkedin.com",
57+
"http://www.reddit.com",
58+
"http://www.netflix.com",
59+
"http://www.youtube.com",
60+
"http://www.bing.com",
61+
"http://www.yahoo.com",
62+
];
63+
64+
/// Simple xorshift64 RNG for reproducible data generation.
65+
/// Matches the sequence quality of std::mt19937 in bench_ipv4.cpp (seed 42).
66+
struct Xorshift64 {
67+
state: Wrapping<u64>,
68+
}
69+
70+
impl Xorshift64 {
71+
fn new(seed: u64) -> Self {
72+
// Splitmix64 init to avoid zero state
73+
let mut s = Wrapping(seed);
74+
s += Wrapping(0x9e3779b97f4a7c15u64);
75+
s = (s ^ (s >> 30)) * Wrapping(0xbf58476d1ce4e5b9u64);
76+
s = (s ^ (s >> 27)) * Wrapping(0x94d049bb133111ebu64);
77+
s ^= s >> 31;
78+
Self { state: s }
79+
}
80+
81+
fn next_u64(&mut self) -> u64 {
82+
let mut x = self.state;
83+
x ^= x << 13;
84+
x ^= x >> 7;
85+
x ^= x << 17;
86+
self.state = x;
87+
x.0
88+
}
89+
90+
fn next_octet(&mut self) -> u8 {
91+
(self.next_u64() & 0xff) as u8
92+
}
93+
}
94+
95+
/// Generate 5000 random decimal IPv4 URLs — matches GetDecimalWorkload in bench_ipv4.cpp.
96+
fn generate_decimal_ipv4(count: usize) -> Vec<String> {
97+
let mut rng = Xorshift64::new(42);
98+
(0..count)
99+
.map(|_| {
100+
format!(
101+
"http://{}.{}.{}.{}",
102+
rng.next_octet(),
103+
rng.next_octet(),
104+
rng.next_octet(),
105+
rng.next_octet()
106+
)
107+
})
108+
.collect()
109+
}
110+
111+
/// Build permutation with Knuth shuffle, using a fixed seed — matches make_permutation.
112+
fn make_permutation(count: usize, seed: u64) -> Vec<usize> {
113+
let mut order: Vec<usize> = (0..count).collect();
114+
if count < 2 {
115+
return order;
116+
}
117+
let mut rng = Xorshift64::new(seed);
118+
for i in (1..count).rev() {
119+
let j = (rng.next_u64() as usize) % (i + 1);
120+
order.swap(i, j);
121+
}
122+
order
123+
}
124+
125+
/// Build coprime strides — matches make_strides in bench_ipv4.cpp.
126+
fn make_strides(count: usize) -> Vec<usize> {
127+
let mut strides = Vec::new();
128+
if count > 1 {
129+
let limit = count.min(100);
130+
for s in 1..limit {
131+
if gcd(s, count) == 1 {
132+
strides.push(s);
133+
}
134+
}
135+
}
136+
if strides.is_empty() {
137+
strides.push(1);
138+
}
139+
strides
140+
}
141+
142+
fn gcd(mut a: usize, mut b: usize) -> usize {
143+
while b != 0 {
144+
let t = b;
145+
b = a % b;
146+
a = t;
147+
}
148+
a
149+
}
150+
151+
/// Core benchmark runner — matches run_benchmark in bench_ipv4.cpp.
152+
fn run_benchmark(c: &mut Criterion, group_name: &str, urls: &[String]) {
153+
if urls.is_empty() {
154+
return;
155+
}
156+
let bytes: u64 = urls.iter().map(|u| u.len() as u64).sum();
157+
let count = urls.len();
158+
159+
let order = make_permutation(count, 0x12345678);
160+
let strides = make_strides(count);
161+
162+
let mut group = c.benchmark_group(group_name);
163+
group.throughput(Throughput::Bytes(bytes));
164+
165+
let mut iter = 0usize;
166+
group.bench_function("ada_url", |b| {
167+
b.iter(|| {
168+
let stride = strides[iter % strides.len()];
169+
let mut pos = iter % count;
170+
let mut success = 0usize;
171+
for _ in 0..count {
172+
let result = Url::parse(black_box(urls[order[pos]].as_str()), None);
173+
if result.is_ok() {
174+
success += 1;
175+
}
176+
pos += stride;
177+
if pos >= count {
178+
pos -= count;
179+
}
180+
}
181+
black_box(success);
182+
iter = iter.wrapping_add(1);
183+
})
184+
});
185+
group.finish();
186+
}
187+
188+
fn run_benchmark_static(c: &mut Criterion, group_name: &str, urls: &[&str]) {
189+
if urls.is_empty() {
190+
return;
191+
}
192+
let bytes: u64 = urls.iter().map(|u| u.len() as u64).sum();
193+
let count = urls.len();
194+
195+
let order = make_permutation(count, 0x12345678);
196+
let strides = make_strides(count);
197+
198+
let mut group = c.benchmark_group(group_name);
199+
group.throughput(Throughput::Bytes(bytes));
200+
201+
let mut iter = 0usize;
202+
group.bench_function("ada_url", |b| {
203+
b.iter(|| {
204+
let stride = strides[iter % strides.len()];
205+
let mut pos = iter % count;
206+
let mut success = 0usize;
207+
for _ in 0..count {
208+
let result = Url::parse(black_box(urls[order[pos]]), None);
209+
if result.is_ok() {
210+
success += 1;
211+
}
212+
pos += stride;
213+
if pos >= count {
214+
pos -= count;
215+
}
216+
}
217+
black_box(success);
218+
iter = iter.wrapping_add(1);
219+
})
220+
});
221+
group.finish();
222+
}
223+
224+
/// Benchmark decimal IPv4 URL parsing — matches Bench_IPv4_Decimal_AdaURL in bench_ipv4.cpp.
225+
pub fn bench_ipv4_decimal(c: &mut Criterion) {
226+
let urls = generate_decimal_ipv4(5000);
227+
run_benchmark(c, "Bench_IPv4_Decimal", &urls);
228+
}
229+
230+
/// Benchmark non-decimal IPv4 URL parsing — matches Bench_IPv4_NonDecimal_AdaURL in bench_ipv4.cpp.
231+
pub fn bench_ipv4_non_decimal(c: &mut Criterion) {
232+
// Repeat fixed non-decimal set to 5000 entries, matching GetNonDecimalWorkload
233+
let src_len = IPV4_NON_DECIMAL_URLS.len();
234+
let urls: Vec<&str> = (0..5000).map(|i| IPV4_NON_DECIMAL_URLS[i % src_len]).collect();
235+
run_benchmark_static(c, "Bench_IPv4_NonDecimal", &urls);
236+
}
237+
238+
/// Benchmark DNS hostname URL parsing — matches Bench_DNS_AdaURL in bench_ipv4.cpp.
239+
pub fn bench_dns(c: &mut Criterion) {
240+
// Repeat fixed DNS set to 2000 entries, matching GetDnsWorkload fallback
241+
let src_len = DNS_FALLBACK_URLS.len();
242+
let urls: Vec<&str> = (0..2000).map(|i| DNS_FALLBACK_URLS[i % src_len]).collect();
243+
run_benchmark_static(c, "Bench_DNS", &urls);
244+
}
245+
246+
criterion_group!(benches, bench_ipv4_decimal, bench_ipv4_non_decimal, bench_dns);
247+
criterion_main!(benches);

0 commit comments

Comments
 (0)