Skip to content

Commit 8cda201

Browse files
committed
make improvements
1 parent 1e8931d commit 8cda201

File tree

16 files changed

+12643
-4348
lines changed

16 files changed

+12643
-4348
lines changed

examples/bench_debug.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use std::hint::black_box;
2+
use std::time::Instant;
3+
4+
const URLS: &[&str] = &[
5+
"https://www.google.com/webhp?hl=en&ictx=2",
6+
"https://en.wikipedia.org/wiki/Dog#Roles_with_humans",
7+
"https://www.tiktok.com/@aguyandagolden/video/7133277734310038830",
8+
"https://business.twitter.com/en/help/troubleshooting/how-twitter-ads-work.html?ref=web",
9+
"https://images-na.ssl-images-amazon.com/images/I/41Gc3C8UysL.css?AUI",
10+
"https://www.reddit.com/?after=t3_zvz1ze",
11+
"https://www.reddit.com/login/?dest=https%3A%2F%2Fwww.reddit.com%2F",
12+
"postgresql://other:pass@localhost:5432/db",
13+
"http://192.168.1.1",
14+
"http://[2606:4700::1111]",
15+
];
16+
17+
fn bench(label: &str, iters: u32, f: impl Fn()) {
18+
// Warm up
19+
for _ in 0..5000 {
20+
f();
21+
}
22+
let start = Instant::now();
23+
for _ in 0..iters {
24+
f();
25+
}
26+
let ns = start.elapsed().as_nanos() as f64 / iters as f64;
27+
println!("{}: {:.0} ns/iter", label, ns);
28+
}
29+
30+
fn main() {
31+
let n = 300_000u32;
32+
33+
bench("parse (all 10)", n, || {
34+
for url in URLS {
35+
let _ = ada_url::Url::parse(black_box(url), None);
36+
}
37+
});
38+
39+
// Isolate fast-path vs slow-path
40+
bench("parse (first 7 fast-path)", n, || {
41+
for url in &URLS[..7] {
42+
let _ = ada_url::Url::parse(black_box(url), None);
43+
}
44+
});
45+
46+
bench("parse (3 slow-path)", n, || {
47+
for url in &URLS[7..] {
48+
let _ = ada_url::Url::parse(black_box(url), None);
49+
}
50+
});
51+
52+
bench("can_parse (all 10)", n, || {
53+
for url in URLS {
54+
let _ = ada_url::Url::can_parse(black_box(url), None);
55+
}
56+
});
57+
}

examples/check_idna.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@ fn main() {
66
let arr = v.as_array().unwrap();
77
let mut wrong = 0;
88
for elem in arr.iter() {
9-
if elem.is_string() { continue; }
9+
if elem.is_string() {
10+
continue;
11+
}
1012
let o = elem.as_object().unwrap();
1113
let input = o["input"].as_str().unwrap_or("");
1214
let expected = o.get("output").unwrap();
1315
let result = Idna::ascii(input);
1416
if expected.is_null() {
15-
if !result.is_empty() { eprintln!("SHOULD_FAIL {:?} -> {:?}", input, result); wrong+=1; }
17+
if !result.is_empty() {
18+
eprintln!("SHOULD_FAIL {:?} -> {:?}", input, result);
19+
wrong += 1;
20+
}
1621
} else {
1722
let exp = expected.as_str().unwrap();
18-
if result != exp { eprintln!("WRONG {:?}: got={:?} exp={:?}", input, result, exp); wrong+=1; }
23+
if result != exp {
24+
eprintln!("WRONG {:?}: got={:?} exp={:?}", input, result, exp);
25+
wrong += 1;
26+
}
1927
}
2028
}
2129
eprintln!("Total wrong: {}", wrong);

src/checkers.rs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ pub fn is_ipv4(input: &str) -> bool {
6666
let last_char = b[b.len() - 1];
6767

6868
// Quick filter: the last character must be a decimal digit, a-f, or 'x'
69-
let possible = last_char.is_ascii_digit()
70-
|| matches!(last_char, b'a'..=b'f')
71-
|| last_char == b'x';
69+
let possible =
70+
last_char.is_ascii_digit() || matches!(last_char, b'a'..=b'f') || last_char == b'x';
7271
if !possible {
7372
return false;
7473
}
@@ -119,34 +118,54 @@ pub fn is_ipv4(input: &str) -> bool {
119118
const PATH_SIG_TABLE: [u8; 256] = {
120119
let mut t = [0u8; 256];
121120
// Needs encoding: C0 controls (0x00-0x1F), DEL (0x7F), high bytes (0x80-0xFF)
122-
let mut i = 0usize; while i <= 0x1F { t[i] |= 0x01; i += 1; }
123-
let mut i = 0x7Fusize; while i < 256 { t[i] |= 0x01; i += 1; }
121+
let mut i = 0usize;
122+
while i <= 0x1F {
123+
t[i] |= 0x01;
124+
i += 1;
125+
}
126+
let mut i = 0x7Fusize;
127+
while i < 256 {
128+
t[i] |= 0x01;
129+
i += 1;
130+
}
124131
// Needs encoding: specific printable ASCII chars
125132
let enc: &[u8] = b" \"#<>?^`{|}";
126133

127-
let mut i = 0; while i < enc.len() { t[enc[i] as usize] |= 0x01; i += 1; }
134+
let mut i = 0;
135+
while i < enc.len() {
136+
t[enc[i] as usize] |= 0x01;
137+
i += 1;
138+
}
128139
// Special flags
129140
t[b'\\' as usize] |= 0x02; // backslash
130-
t[b'.' as usize] |= 0x04; // dot
131-
t[b'%' as usize] |= 0x08; // percent
141+
t[b'.' as usize] |= 0x04; // dot
142+
t[b'%' as usize] |= 0x08; // percent
132143
t
133144
};
134145

135-
/// Compute a path-signature byte via an unrolled table lookup — branch-free.
136-
/// Returns a bitmask of the flags above.
146+
/// Compute a path-signature byte via Ada's exact 8-at-a-time unrolled lookup.
147+
///
148+
/// Ada C++ uses `for (; i + 7 < size; i += 8)` — we match that exactly.
137149
pub fn path_signature(input: &str) -> u8 {
138150
let b = input.as_bytes();
139151
let mut acc = 0u8;
140152
let mut i = 0;
141-
// Unrolled 4-at-a-time — same as Ada C++ style
142-
while i + 4 <= b.len() {
143-
acc |= PATH_SIG_TABLE[b[i] as usize]
144-
| PATH_SIG_TABLE[b[i+1] as usize]
145-
| PATH_SIG_TABLE[b[i+2] as usize]
146-
| PATH_SIG_TABLE[b[i+3] as usize];
147-
i += 4;
148-
}
149-
while i < b.len() { acc |= PATH_SIG_TABLE[b[i] as usize]; i += 1; }
153+
// 8-at-a-time — Ada C++ uses this exact unroll factor
154+
while i + 8 <= b.len() {
155+
acc |= PATH_SIG_TABLE[b[i] as usize]
156+
| PATH_SIG_TABLE[b[i + 1] as usize]
157+
| PATH_SIG_TABLE[b[i + 2] as usize]
158+
| PATH_SIG_TABLE[b[i + 3] as usize]
159+
| PATH_SIG_TABLE[b[i + 4] as usize]
160+
| PATH_SIG_TABLE[b[i + 5] as usize]
161+
| PATH_SIG_TABLE[b[i + 6] as usize]
162+
| PATH_SIG_TABLE[b[i + 7] as usize];
163+
i += 8;
164+
}
165+
while i < b.len() {
166+
acc |= PATH_SIG_TABLE[b[i] as usize];
167+
i += 1;
168+
}
150169
acc
151170
}
152171

src/helpers.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use crate::checkers::{
1717
};
1818
use crate::scheme::SchemeType;
1919
use crate::unicode::{
20-
is_c0_control_or_space, is_double_dot_path_segment,
21-
is_single_dot_path_segment, percent_encode_append,
20+
is_c0_control_or_space, is_double_dot_path_segment, is_single_dot_path_segment,
21+
percent_encode_append,
2222
};
2323

2424
// ---------------------------------------------------------------------------
@@ -66,6 +66,7 @@ pub fn remove_ascii_tab_or_newline(s: &mut String) {
6666
/// Trim leading and trailing C0 control characters and ASCII space.
6767
/// Returns a `&str` slice into the original — **zero allocation**.
6868
#[inline]
69+
#[allow(dead_code)]
6970
pub fn trim_c0_whitespace(s: &str) -> &str {
7071
let start = s
7172
.as_bytes()

src/idna.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,13 @@ mod tests {
3737

3838
#[test]
3939
#[cfg(feature = "std")]
40-
fn unicode_works() { assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); }
40+
fn unicode_works() {
41+
assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca");
42+
}
4143

4244
#[test]
4345
#[cfg(feature = "std")]
44-
fn ascii_works() { assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); }
46+
fn ascii_works() {
47+
assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca");
48+
}
4549
}

0 commit comments

Comments
 (0)