Skip to content

Commit 6bb998b

Browse files
committed
fix textdecoder labels
1 parent 26271b7 commit 6bb998b

File tree

2 files changed

+111
-11
lines changed

2 files changed

+111
-11
lines changed

core/runtime/src/text/mod.rs

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,51 @@ pub enum Encoding {
3434
Utf16Be,
3535
}
3636

37+
const UTF_8_LABELS: &[&str] = &[
38+
"unicode-1-1-utf-8",
39+
"unicode11utf8",
40+
"unicode20utf8",
41+
"utf-8",
42+
"utf8",
43+
"x-unicode20utf8",
44+
];
45+
46+
const UTF_16BE_LABELS: &[&str] = &["unicodefffe", "utf-16be"];
47+
48+
const UTF_16LE_LABELS: &[&str] = &[
49+
"csunicode",
50+
"iso-10646-ucs-2",
51+
"ucs-2",
52+
"unicode",
53+
"unicodefeff",
54+
"utf-16",
55+
"utf-16le",
56+
];
57+
58+
#[inline]
59+
fn resolve_text_decoder_label(label: &str) -> Option<Encoding> {
60+
let label = label.trim_matches(['\u{0009}', '\u{000A}', '\u{000C}', '\u{000D}', '\u{0020}']);
61+
62+
if UTF_8_LABELS
63+
.iter()
64+
.any(|supported| label.eq_ignore_ascii_case(supported))
65+
{
66+
Some(Encoding::Utf8)
67+
} else if UTF_16LE_LABELS
68+
.iter()
69+
.any(|supported| label.eq_ignore_ascii_case(supported))
70+
{
71+
Some(Encoding::Utf16Le)
72+
} else if UTF_16BE_LABELS
73+
.iter()
74+
.any(|supported| label.eq_ignore_ascii_case(supported))
75+
{
76+
Some(Encoding::Utf16Be)
77+
} else {
78+
None
79+
}
80+
}
81+
3782
/// The [`TextDecoder`][mdn] class represents an encoder for a specific method, that is
3883
/// a specific character encoding, like `utf-8`.
3984
///
@@ -62,17 +107,12 @@ impl TextDecoder {
62107
let ignore_bom = options.and_then(|o| o.ignore_bom).unwrap_or(false);
63108

64109
let encoding = match encoding {
65-
Some(enc) => match enc.to_std_string_lossy().as_str() {
66-
"utf-8" => Encoding::Utf8,
67-
// Default encoding is Little Endian.
68-
"utf-16" | "utf-16le" => Encoding::Utf16Le,
69-
"utf-16be" => Encoding::Utf16Be,
70-
e => {
71-
return Err(
72-
js_error!(RangeError: "The given encoding '{}' is not supported.", e),
73-
);
74-
}
75-
},
110+
Some(enc) => {
111+
let label = enc.to_std_string_lossy();
112+
resolve_text_decoder_label(&label).ok_or_else(
113+
|| js_error!(RangeError: "The given encoding '{}' is not supported.", label),
114+
)?
115+
}
76116
None => Encoding::default(),
77117
};
78118

core/runtime/src/text/tests.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,66 @@ fn decoder_bom_ignore_bom_false(encoding: &'static str, bytes: &'static [u8]) {
288288
);
289289
}
290290

291+
#[test_case("UTF-8", "utf-8"; "uppercase utf8")]
292+
#[test_case(" utf-8 ", "utf-8"; "spaced utf8")]
293+
#[test_case("\nutf-16\t", "utf-16le"; "spaced utf16")]
294+
#[test_case("UTF-16BE", "utf-16be"; "uppercase utf16be")]
295+
#[test_case("utf8", "utf-8"; "utf8 alias")]
296+
#[test_case("Unicode-1-1-UTF-8", "utf-8"; "unicode alias")]
297+
#[test_case("csUnicode", "utf-16le"; "csunicode alias")]
298+
#[test_case(" unicodefeff ", "utf-16le"; "unicodefeff alias")]
299+
#[test_case("UnicodeFFFE", "utf-16be"; "unicodefffe alias")]
300+
fn decoder_normalizes_supported_labels(label: &'static str, expected: &'static str) {
301+
let context = &mut Context::default();
302+
text::register(None, context).unwrap();
303+
304+
run_test_actions_with(
305+
[
306+
TestAction::run(format!(
307+
r#"
308+
const d = new TextDecoder({label:?});
309+
encoding = d.encoding;
310+
"#
311+
)),
312+
TestAction::inspect_context(move |context| {
313+
let encoding = context
314+
.global_object()
315+
.get(js_str!("encoding"), context)
316+
.unwrap();
317+
assert_eq!(encoding.as_string(), Some(JsString::from(expected)));
318+
}),
319+
],
320+
context,
321+
);
322+
}
323+
324+
#[test]
325+
fn decoder_rejects_unsupported_label_after_normalization() {
326+
let context = &mut Context::default();
327+
text::register(None, context).unwrap();
328+
329+
run_test_actions_with(
330+
[
331+
TestAction::run(indoc! {r#"
332+
threw = false;
333+
try {
334+
new TextDecoder(" utf-32 ");
335+
} catch (e) {
336+
threw = e instanceof RangeError;
337+
}
338+
"#}),
339+
TestAction::inspect_context(|context| {
340+
let threw = context
341+
.global_object()
342+
.get(js_str!("threw"), context)
343+
.unwrap();
344+
assert_eq!(threw.as_boolean(), Some(true));
345+
}),
346+
],
347+
context,
348+
);
349+
}
350+
291351
#[test]
292352
fn decoder_ignore_bom_getter() {
293353
let context = &mut Context::default();

0 commit comments

Comments
 (0)