Skip to content

Commit 9ce6e12

Browse files
committed
fix(html): percent-encode unsafe characters in symbol page paths
Symbol names that come from string literals (e.g. `obj["a/b"]` or a property named `'"><img src=x onerror=alert(1)>'`) can contain characters that are reserved on common filesystems (Windows forbids `<>:"/\|?*`) or that have special meaning in a URL. Such a name was used verbatim as the file-name component of the generated page, producing an invalid path that crashed `deno doc --html` on Windows, and an unescaped link that could 404 or inject markup into the surrounding attribute. Percent-encode the unsafe characters when building both the written file name and every link to it, so they stay consistent. Ordinary identifier names are unaffected (the encoder borrows without allocating).
1 parent 64a834e commit 9ce6e12

7 files changed

Lines changed: 149 additions & 15 deletions

src/html/mod.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,8 +1401,11 @@ pub fn generate(
14011401
Some(short_path),
14021402
);
14031403

1404-
let file_name =
1405-
format!("{}/~/{}.html", short_path.path, symbol_group_ctx.name);
1404+
let file_name = format!(
1405+
"{}/~/{}.html",
1406+
short_path.path,
1407+
util::sanitize_symbol_path_part(&symbol_group_ctx.name)
1408+
);
14061409

14071410
let page_ctx = pages::SymbolPageCtx {
14081411
html_head_ctx,
@@ -1426,8 +1429,11 @@ pub fn generate(
14261429
let redirect =
14271430
serde_json::json!({ "kind": "redirect", "path": href });
14281431

1429-
let file_name =
1430-
format!("{}/~/{}.html", short_path.path, current_symbol);
1432+
let file_name = format!(
1433+
"{}/~/{}.html",
1434+
short_path.path,
1435+
util::sanitize_symbol_path_part(&current_symbol)
1436+
);
14311437

14321438
vec![(file_name, ctx.render("pages/redirect", &redirect))]
14331439
}
@@ -1616,8 +1622,11 @@ where
16161622
continue;
16171623
}
16181624

1619-
let file_name =
1620-
format!("{}/~/{}.json", short_path.path, symbol_group_ctx.name);
1625+
let file_name = format!(
1626+
"{}/~/{}.json",
1627+
short_path.path,
1628+
util::sanitize_symbol_path_part(&symbol_group_ctx.name)
1629+
);
16211630
if !emitted_keys.insert(file_name.clone()) {
16221631
continue;
16231632
}
@@ -1667,8 +1676,11 @@ where
16671676
continue;
16681677
}
16691678

1670-
let file_name =
1671-
format!("{}/~/{}.json", short_path.path, current_symbol);
1679+
let file_name = format!(
1680+
"{}/~/{}.json",
1681+
short_path.path,
1682+
util::sanitize_symbol_path_part(&current_symbol)
1683+
);
16721684
if !emitted_keys.insert(file_name.clone()) {
16731685
continue;
16741686
}

src/html/util.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,11 @@ pub fn href_path_resolve(
402402
symbol: target_symbol,
403403
..
404404
} => {
405-
format!("{backs}./{}/~/{target_symbol}.html", target_file.path)
405+
format!(
406+
"{backs}./{}/~/{}.html",
407+
target_file.path,
408+
sanitize_symbol_path_part(target_symbol)
409+
)
406410
}
407411
UrlResolveKind::File { file: target_file } => {
408412
format!("{backs}./{}/index.html", target_file.path)
@@ -955,3 +959,57 @@ pub fn slugify(name: &str) -> String {
955959
.replace_all(&name.to_lowercase(), "")
956960
.replace(' ', "-")
957961
}
962+
963+
/// Whether a character is unsafe inside the file-name component of a symbol
964+
/// page, either because it is reserved on common filesystems (Windows forbids
965+
/// `<>:"/\|?*`) or because it has special meaning in a URL path segment (`#`,
966+
/// `?`, `%`, whitespace, …).
967+
fn is_symbol_path_unsafe(c: char) -> bool {
968+
c.is_control()
969+
|| c.is_whitespace()
970+
|| matches!(
971+
c,
972+
'"'
973+
| '#'
974+
| '%'
975+
| '<'
976+
| '>'
977+
| '?'
978+
| '`'
979+
| '{'
980+
| '}'
981+
| '|'
982+
| '\\'
983+
| '^'
984+
| '*'
985+
| ':'
986+
| '/'
987+
)
988+
}
989+
990+
/// Percent-encode any character of a symbol name that is unsafe to use as the
991+
/// file-name component of its generated page. Symbol names are normally plain
992+
/// identifiers, for which this is a borrow-only no-op; but a symbol whose name
993+
/// comes from a string literal (e.g. `obj["a/b"]` or `'"><img …>'`) can contain
994+
/// characters that would otherwise produce an invalid file path or a broken —
995+
/// and potentially HTML-injecting — link. The same encoding is applied wherever
996+
/// such a path is built (the written file name and every link to it) so they
997+
/// stay consistent. See issue #724.
998+
pub(crate) fn sanitize_symbol_path_part(name: &str) -> Cow<'_, str> {
999+
if !name.chars().any(is_symbol_path_unsafe) {
1000+
return Cow::Borrowed(name);
1001+
}
1002+
1003+
let mut out = String::with_capacity(name.len());
1004+
let mut buf = [0u8; 4];
1005+
for c in name.chars() {
1006+
if is_symbol_path_unsafe(c) {
1007+
for b in c.encode_utf8(&mut buf).as_bytes() {
1008+
out.push_str(&format!("%{b:02X}"));
1009+
}
1010+
} else {
1011+
out.push(c);
1012+
}
1013+
}
1014+
Cow::Owned(out)
1015+
}

tests/html_test.rs

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ async fn html_doc_files_multiple() {
352352
"./~/Enum2.html",
353353
"./~/Foo.bar.html",
354354
"./~/Foo.html",
355-
"./~/Foo.prototype.\"><img src=x onerror=alert(1)>.html",
355+
"./~/Foo.prototype.%22%3E%3Cimg%20src=x%20onerror=alert(1)%3E.html",
356356
"./~/Foo.prototype.[Symbol.iterator].html",
357357
"./~/Foo.prototype.foo.html",
358358
"./~/Foo.prototype.getter.html",
@@ -697,6 +697,70 @@ async fn parse_file(path: &std::path::Path) -> ParseOutput {
697697
parse_source(&content).await
698698
}
699699

700+
// Regression test for https://github.com/denoland/deno_doc/issues/724:
701+
// a member whose name comes from a string literal can contain characters that
702+
// are invalid in a file name (Windows reserves `<>:"/\|?*`) or that break a
703+
// URL. The generated page path must be filesystem- and URL-safe, and the link
704+
// to it must use the same encoded form so it still resolves.
705+
#[tokio::test]
706+
async fn html_symbol_name_unsafe_chars_in_path() {
707+
let source = r#"
708+
export class Foo {
709+
"a/b<c>": number = 0;
710+
}
711+
"#;
712+
713+
let ctx = GenerateCtx::create_basic(
714+
GenerateOptions {
715+
package_name: None,
716+
main_entrypoint: None,
717+
href_resolver: Arc::new(EmptyResolver),
718+
usage_composer: Some(Arc::new(EmptyResolver)),
719+
rewrite_map: None,
720+
category_docs: None,
721+
disable_search: false,
722+
symbol_redirect_map: None,
723+
default_symbol_map: None,
724+
markdown_renderer: comrak::create_renderer(None, None, None),
725+
markdown_stripper: Arc::new(comrak::strip),
726+
head_inject: None,
727+
id_prefix: None,
728+
diff_only: false,
729+
},
730+
parse_source(source).await,
731+
None,
732+
)
733+
.unwrap();
734+
735+
let files = generate(ctx).unwrap();
736+
737+
// No generated file path may contain characters that are reserved on common
738+
// filesystems or that need escaping in a URL.
739+
for name in files.keys() {
740+
assert!(
741+
!name.contains(['"', '<', '>', '|', '?', '*', '\\', ' ']),
742+
"generated file path is not filesystem/URL safe: {name}"
743+
);
744+
}
745+
746+
// The member still gets a page, under a percent-encoded name
747+
// (`a/b<c>` -> `a%2Fb%3Cc%3E`).
748+
assert!(
749+
files
750+
.keys()
751+
.any(|k| k.ends_with("/~/Foo.prototype.a%2Fb%3Cc%3E.html")),
752+
"expected a percent-encoded page for the member, got: {:?}",
753+
files.keys().collect::<Vec<_>>()
754+
);
755+
756+
// No rendered page may contain the raw (unencoded) link, which would both
757+
// 404 and inject markup into the surrounding attribute.
758+
assert!(
759+
files.values().all(|content| !content.contains("a/b<c>")),
760+
"a rendered page contains an unsafe, unencoded symbol link"
761+
);
762+
}
763+
700764
#[tokio::test]
701765
async fn diff_kind_change() {
702766
let test_dir = std::env::current_dir()

tests/snapshots/html_test__html_doc_files_multiple-23.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ Properties</h2></div><div class="space-y-8"><div class="anchorable docEntry" id=
555555
</defs>
556556
</svg>
557557
</a>
558-
<a class="font-bold font-lg link" href="..&#x2F;.&#x2F;.&#x2F;~&#x2F;Foo.prototype.&quot;&gt;&lt;img src=x onerror=alert(1)&gt;.html">"&gt;&lt;img src=x onerror=alert(1)&gt;</a><span class="font-medium text-stone-500 dark:text-stone-200"><span>: <span class="td-kw">number</span></span></span>
558+
<a class="font-bold font-lg link" href="..&#x2F;.&#x2F;.&#x2F;~&#x2F;Foo.prototype.%22%3E%3Cimg%20src=x%20onerror=alert(1)%3E.html">"&gt;&lt;img src=x onerror=alert(1)&gt;</a><span class="font-medium text-stone-500 dark:text-stone-200"><span>: <span class="td-kw">number</span></span></span>
559559
</code>
560560
</div></div></div>
561561
<div class="anchorable docEntry" id="property_foo">

tests/snapshots/html_test__html_doc_files_multiple-73.snap

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/snapshots/html_test__symbol_group.snap

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ expression: files
216216
{
217217
"name_prefix": null,
218218
"name": "\"&gt;&lt;img src=x onerror=alert(1)&gt;",
219-
"name_href": "../././~/Foo.prototype.\"><img src=x onerror=alert(1)>.html",
219+
"name_href": "../././~/Foo.prototype.%22%3E%3Cimg%20src=x%20onerror=alert(1)%3E.html",
220220
"content": "<span>: <span class=\"td-kw\">number</span></span>",
221221
"anchor": {
222222
"id": "property_&quot;&gt;&lt;img-src=x-onerror=alert(1)&gt;"
@@ -7841,7 +7841,7 @@ expression: files
78417841
},
78427842
{
78437843
"name": "\"><img src=x onerror=alert(1)>",
7844-
"href": "../././~/Foo.prototype.\"><img src=x onerror=alert(1)>.html"
7844+
"href": "../././~/Foo.prototype.%22%3E%3Cimg%20src=x%20onerror=alert(1)%3E.html"
78457845
}
78467846
]
78477847
},

tests/snapshots/html_test__symbol_search.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ expression: search_index
212212
"name": "Foo.prototype.\"&gt;&lt;img src=x onerror=alert(1)&gt;",
213213
"file": ".",
214214
"doc": "",
215-
"url": "././~/Foo.prototype.\"><img src=x onerror=alert(1)>.html",
215+
"url": "././~/Foo.prototype.%22%3E%3Cimg%20src=x%20onerror=alert(1)%3E.html",
216216
"deprecated": false
217217
},
218218
{

0 commit comments

Comments
 (0)