Skip to content

Commit 5bf7890

Browse files
authored
fix: recognize code-fence language when the directive uses a comma (#806)
fix: recognize language when code-fence directive uses a comma A code block whose info string separates the language from a directive with a comma (e.g. ```ts, no-eval```) was not syntax-highlighted. Markdown parsers split the info string at the first whitespace, so the language token arrived as `ts,` with the comma attached, which failed the example-language check and was passed verbatim to the syntax highlighter. Add a `language_token` helper that strips directive separators (whitespace and commas) and use it both when processing hidden example lines and when handing the language to the highlighter, so `ts,`, `ts, no-eval` and `ts,no-eval` are all treated as `ts`.
1 parent 867e43c commit 5bf7890

2 files changed

Lines changed: 56 additions & 3 deletions

File tree

src/html/comrak.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ impl SyntaxHighlighterAdapter for ComrakHighlightWrapperAdapter {
212212
lang: Option<&str>,
213213
code: &str,
214214
) -> std::io::Result<()> {
215+
// The markdown parser hands us the info string up to the first whitespace,
216+
// so a comma-separated directive such as `ts, no-eval` arrives as `ts,`.
217+
// Normalize it to the bare language token so both hidden-line processing and
218+
// the downstream highlighter recognize it.
219+
let lang = lang.map(crate::util::example_code::language_token);
220+
215221
// Resolve any "hidden" lines (rustdoc style): they are removed from the
216222
// displayed code but kept in the copyable form so copied snippets still
217223
// run. Non-example languages are left untouched.
@@ -303,4 +309,17 @@ mod tests {
303309
assert!(displayed.contains("# shown"));
304310
assert!(!displayed.contains("## shown"));
305311
}
312+
313+
#[test]
314+
fn comma_in_info_string_is_treated_as_example() {
315+
// A comma-separated directive (`ts, no-eval`) leaves the parser handing us
316+
// `ts,`; it must still be recognized as a `ts` example so hidden lines are
317+
// processed (regression test for broken highlighting on commas).
318+
let html = render("```ts, no-eval\n# const x = 1;\nconsole.log(x);\n```");
319+
let displayed = html.split("</code>").next().unwrap();
320+
assert!(!displayed.contains("const x = 1;"));
321+
assert!(displayed.contains("console.log(x);"));
322+
// The full snippet is still copyable.
323+
assert!(html.contains("const x = 1;"));
324+
}
306325
}

src/util/example_code.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
//!
1313
//! See <https://doc.rust-lang.org/rustdoc/write-documentation/documentation-tests.html#hiding-portions-of-the-example>.
1414
15+
/// Extracts the language token from a fenced code block's info string.
16+
///
17+
/// The info string may carry additional attributes after the language, written
18+
/// either space-separated (`ts ignore`) or comma-separated (`ts, no-eval` /
19+
/// `ts,no-eval`). Only the leading token denotes the language. Markdown parsers
20+
/// typically split the info string at the first whitespace, which leaves a
21+
/// trailing comma attached (`ts,`); splitting on commas as well as whitespace
22+
/// recovers the bare language (`ts`).
23+
pub fn language_token(lang: &str) -> &str {
24+
lang
25+
.split(|c: char| c.is_whitespace() || c == ',')
26+
.next()
27+
.unwrap_or("")
28+
}
29+
1530
/// Whether a fenced code block's language denotes a runnable JavaScript or
1631
/// TypeScript example.
1732
///
@@ -53,9 +68,7 @@ pub fn process_example_code(
5368
lang: Option<&str>,
5469
code: &str,
5570
) -> Option<ExampleCode> {
56-
// The info string may carry additional attributes (e.g. `ts ignore`); only
57-
// the first token denotes the language.
58-
let lang = lang.unwrap_or("").split_whitespace().next().unwrap_or("");
71+
let lang = language_token(lang.unwrap_or(""));
5972
if !is_example_lang(lang) {
6073
return None;
6174
}
@@ -187,6 +200,27 @@ mod tests {
187200
assert_eq!(displayed(Some("ts ignore"), code).unwrap(), "shown;\n");
188201
}
189202

203+
#[test]
204+
fn language_token_strips_directive_separators() {
205+
assert_eq!(language_token("ts"), "ts");
206+
assert_eq!(language_token("ts no-eval"), "ts");
207+
// A comma-separated directive (markdown parsers leave the comma attached to
208+
// the first token, so this arrives as `ts,`).
209+
assert_eq!(language_token("ts,"), "ts");
210+
assert_eq!(language_token("ts, no-eval"), "ts");
211+
assert_eq!(language_token("ts,no-eval"), "ts");
212+
assert_eq!(language_token(""), "");
213+
}
214+
215+
#[test]
216+
fn handles_comma_separated_info_string() {
217+
// Regression test: a comma in the directive (`ts, no-eval`) must not stop
218+
// the language from being recognized as a runnable example.
219+
let code = "# hidden;\nshown;\n";
220+
assert_eq!(displayed(Some("ts,"), code).unwrap(), "shown;\n");
221+
assert_eq!(displayed(Some("ts, no-eval"), code).unwrap(), "shown;\n");
222+
}
223+
190224
#[test]
191225
fn handles_missing_trailing_newline() {
192226
let code = "# hidden;\nshown;";

0 commit comments

Comments
 (0)