Skip to content

Commit 40b6bd5

Browse files
committed
Implement normalize_url helper
1 parent 6f083cc commit 40b6bd5

2 files changed

Lines changed: 217 additions & 3 deletions

File tree

lib/asimov-module/Cargo.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,19 @@ default = ["all", "std"]
1919
all = ["cli", "tracing", "serde"]
2020
cli = ["std", "dep:clientele", "clientele?/clap"]
2121
std = [
22+
"clientele?/std",
2223
"dep:asimov-env",
2324
"dep:getenv",
24-
"clientele?/std",
2525
"dogma/std",
2626
"getenv?/std",
27-
"serde?/std",
27+
"iri-string/std",
2828
"serde_json?/std",
29+
"serde?/std",
2930
"slab/std",
3031
"thiserror/std",
31-
"tracing?/std",
3232
"tracing-subscriber?/fmt",
3333
"tracing-subscriber?/std",
34+
"tracing?/std",
3435
"url/std",
3536
]
3637
unstable = []
@@ -47,6 +48,7 @@ secrecy.workspace = true
4748
slab.workspace = true
4849
thiserror.workspace = true
4950
url.workspace = true
51+
iri-string = { version = "0.7", default-features = false, features = ["alloc"] }
5052

5153
# Optional dependencies:
5254
asimov-env = { workspace = true, optional = true }

lib/asimov-module/src/url.rs

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// This is free and unencumbered software released into the public domain.
2+
3+
use core::fmt::Write;
4+
use iri_string::types::IriReferenceString;
5+
use std::string::String;
6+
7+
pub fn normalize_url(url: &str) -> Result<String, iri_string::types::CreationError<String>> {
8+
let iri = IriReferenceString::try_from(url)
9+
.or_else(|_| IriReferenceString::try_from(url.replace(" ", "%20")))?;
10+
11+
let mut out: String = String::new();
12+
13+
// default `file:` scheme
14+
let scheme = iri.scheme_str().unwrap_or("file");
15+
write!(&mut out, "{scheme}:").unwrap();
16+
17+
if let Some(auth) = iri.authority_str() {
18+
write!(&mut out, "//{auth}").unwrap();
19+
}
20+
21+
let path = iri.path_str();
22+
23+
if scheme == "file"
24+
&& let Some(rest) = path.strip_prefix("~/")
25+
{
26+
let home_dir = std::env::home_dir().expect("unable to determine home directory");
27+
let path2 = home_dir.join(rest);
28+
let path3 = std::path::absolute(&path2).unwrap_or(path2);
29+
let path4 = path3.to_str().unwrap_or(path);
30+
31+
write!(&mut out, "{}", path4).unwrap();
32+
} else if scheme == "file" && !path.starts_with('/') {
33+
let cur_dir = std::env::current_dir().expect("unable to determine current directory");
34+
let path2 = cur_dir.join(path);
35+
let path3 = std::path::absolute(&path2).unwrap_or(path2);
36+
let path4 = path3.to_str().unwrap_or(path);
37+
38+
write!(&mut out, "{}", path4).unwrap();
39+
} else if iri.authority_str().is_some() && path.is_empty() {
40+
write!(&mut out, "/").unwrap();
41+
} else {
42+
write!(&mut out, "{path}").unwrap();
43+
}
44+
45+
if let Some(query) = iri.query() {
46+
write!(&mut out, "?{query}").unwrap()
47+
}
48+
49+
if let Some(fraq) = iri.fragment() {
50+
write!(&mut out, "#{fraq}").unwrap()
51+
}
52+
53+
Ok(out)
54+
}
55+
56+
#[cfg(test)]
57+
mod tests {
58+
use super::*;
59+
use std::string::ToString;
60+
61+
#[test]
62+
fn url_normalization() {
63+
let cases = [
64+
("https://example.org", "https://example.org/"),
65+
("https://example.org/", "https://example.org/"),
66+
("http://example.com/path", "http://example.com/path"),
67+
(
68+
"https://user:pass@example.org:8080/path?query=value#fragment",
69+
"https://user:pass@example.org:8080/path?query=value#fragment",
70+
),
71+
("near://testnet/123456789", "near://testnet/123456789"),
72+
(
73+
"ftp://files.example.com/file.txt",
74+
"ftp://files.example.com/file.txt",
75+
),
76+
("ws://localhost:3000/socket", "ws://localhost:3000/socket"),
77+
("mailto:user@example.com", "mailto:user@example.com"),
78+
(
79+
"https://example.org/path with spaces",
80+
"https://example.org/path%20with%20spaces",
81+
),
82+
(
83+
"https://example.org/path+with+plus",
84+
"https://example.org/path+with+plus",
85+
),
86+
(
87+
"https://example.org/path%20already%20encoded",
88+
"https://example.org/path%20already%20encoded",
89+
),
90+
(
91+
"https://example.org/?q=test&foo=bar",
92+
"https://example.org/?q=test&foo=bar",
93+
),
94+
(
95+
"https://example.org/page#section1",
96+
"https://example.org/page#section1",
97+
),
98+
(
99+
"https://example.org/search?q=hello world",
100+
"https://example.org/search?q=hello%20world",
101+
),
102+
(
103+
"data:text/plain;base64,SGVsbG8=",
104+
"data:text/plain;base64,SGVsbG8=",
105+
),
106+
("tel:+1-555-123-4567", "tel:+1-555-123-4567"),
107+
("urn:isbn:1234567890", "urn:isbn:1234567890"),
108+
];
109+
110+
for case in cases {
111+
assert_eq!(
112+
normalize_url(case.0).unwrap(),
113+
case.1,
114+
"input: {:?}",
115+
case.0
116+
);
117+
}
118+
119+
#[cfg(unix)]
120+
{
121+
let cases = [
122+
("/file with spaces.txt", "file:/file%20with%20spaces.txt"),
123+
("/file+with+pluses.txt", "file:/file+with+pluses.txt"),
124+
];
125+
126+
for case in cases {
127+
assert_eq!(
128+
normalize_url(case.0).unwrap(),
129+
case.1,
130+
"input: {:?}",
131+
case.0
132+
);
133+
}
134+
135+
if let Some(home_dir) = std::env::home_dir() {
136+
let home_dir = home_dir.display().to_string();
137+
138+
let input = "~/path/to/file.txt";
139+
let want = "file:".to_string() + &home_dir + "/path/to/file.txt";
140+
assert_eq!(
141+
normalize_url(input).unwrap(),
142+
want,
143+
"relative path should be get added after current directory, input: {:?}",
144+
input
145+
);
146+
}
147+
148+
let cur_dir = std::env::current_dir().unwrap().display().to_string();
149+
150+
let input = "path/to/file.txt";
151+
let want = "file:".to_string() + &cur_dir + "/path/to/file.txt";
152+
assert_eq!(
153+
normalize_url(input).unwrap(),
154+
want,
155+
"relative path should be get added after current directory, input: {:?}",
156+
input
157+
);
158+
159+
let input = "../path/./file.txt";
160+
let want = "file:".to_string() + &cur_dir + "/../path/file.txt";
161+
assert_eq!(
162+
normalize_url(input).unwrap(),
163+
want,
164+
"relative path should be get added after current directory, input: {:?}",
165+
input
166+
);
167+
168+
let input = "another-type-of-a-string";
169+
let want = "file:".to_string() + &cur_dir + "/another-type-of-a-string";
170+
assert_eq!(
171+
normalize_url(input).unwrap(),
172+
want,
173+
"non-path-looking input should be treated as a file in current directory, input: {:?}",
174+
input
175+
);
176+
177+
// let input = "hello\\ world!";
178+
// let want = "file:".to_string() + &cur_dir + "/hello%5C%20world!";
179+
// assert_eq!(
180+
// normalize_url(input).unwrap(),
181+
// want,
182+
// "output should be url encoded, input: {:?}",
183+
// input
184+
// );
185+
}
186+
187+
#[cfg(windows)]
188+
{
189+
let cwd = std::env::current_dir().unwrap();
190+
let drive = cwd.to_str().unwrap().chars().next().unwrap();
191+
let cases = [
192+
(
193+
"/file with spaces.txt",
194+
format!("file:///{drive}:/file%20with%20spaces.txt"),
195+
),
196+
(
197+
"/file+with+pluses.txt",
198+
format!("file:///{drive}:/file+with+pluses.txt"),
199+
),
200+
];
201+
202+
for case in cases {
203+
assert_eq!(
204+
normalize_url(case.0).unwrap(),
205+
case.1,
206+
"input: {:?}",
207+
case.0
208+
);
209+
}
210+
}
211+
}
212+
}

0 commit comments

Comments
 (0)