Skip to content

Commit 99fa1bb

Browse files
authored
tr: fix possible usage in invalid utf8 set sequence. (uutils#10791)
1 parent 2a45733 commit 99fa1bb

File tree

4 files changed

+36
-7
lines changed

4 files changed

+36
-7
lines changed

src/uu/tr/locales/en-US.ftl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ tr-error-write-error = write error
2323
# Warning messages
2424
tr-warning-unescaped-backslash = warning: an unescaped backslash at end of string is not portable
2525
tr-warning-ambiguous-octal-escape = the ambiguous octal escape \{ $origin_octal } is being interpreted as the 2-byte sequence \0{ $actual_octal_tail }, { $outstand_char }
26+
tr-warning-invalid-utf8 = invalid utf8 sequence
2627
2728
# Sequence parsing error messages
2829
tr-error-missing-char-class-name = missing character class name '[::]'

src/uu/tr/locales/fr-FR.ftl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ tr-error-write-error = erreur d'écriture
2424
tr-warning-unescaped-backslash = avertissement : une barre oblique inverse non échappée à la fin de la chaîne n'est pas portable
2525
tr-warning-ambiguous-octal-escape = l'échappement octal ambigu \{ $origin_octal } est en cours
2626
d'interprétation comme la séquence de 2 octets \0{ $actual_octal_tail }, { $outstand_char }
27+
tr-warning-invalid-utf8 = séquence UTF-8 non valide
2728
2829
# Messages d'erreur d'analyse de séquence
2930
tr-error-missing-char-class-name = nom de classe de caractères manquant '[::]'

src/uu/tr/src/operation.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -379,13 +379,16 @@ impl Sequence {
379379
let str_to_parse = std::str::from_utf8(out).unwrap();
380380
let result = u8::from_str_radix(str_to_parse, 8).ok();
381381
if result.is_none() {
382-
let origin_octal: &str = std::str::from_utf8(input).unwrap();
383-
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
384-
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
385-
show_warning!(
386-
"{}",
387-
translate!("tr-warning-ambiguous-octal-escape", "origin_octal" => origin_octal, "actual_octal_tail" => actual_octal_tail, "outstand_char" => outstand_char)
388-
);
382+
if let Ok(origin_octal) = std::str::from_utf8(input) {
383+
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
384+
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
385+
show_warning!(
386+
"{}",
387+
translate!("tr-warning-ambiguous-octal-escape", "origin_octal" => origin_octal, "actual_octal_tail" => actual_octal_tail, "outstand_char" => outstand_char)
388+
);
389+
} else {
390+
show_warning!("{}", translate!("tr-warning-invalid-utf8"));
391+
}
389392
}
390393
result
391394
},

tests/by-util/test_tr.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,30 @@ fn test_non_digit_repeat() {
15441544
.stderr_only("tr: invalid repeat count 'c' in [c*n] construct\n");
15451545
}
15461546

1547+
#[test]
1548+
#[cfg(unix)]
1549+
fn test_octal_escape_ambiguous_followed_by_non_utf8() {
1550+
// This case does not trigger the panic
1551+
let set1 = OsStr::from_bytes(b"\\501a");
1552+
new_ucmd!()
1553+
.arg("-d")
1554+
.arg(set1)
1555+
.pipe_in("(1a)")
1556+
.succeeds()
1557+
.stderr_contains("warning: the ambiguous octal escape")
1558+
.stdout_is(")");
1559+
1560+
// An user is not supposed to use this invalid utf8 set but
1561+
// we would need to make the command more error-proof still
1562+
let set1 = OsStr::from_bytes(b"\\501\xff");
1563+
new_ucmd!()
1564+
.arg("-d")
1565+
.arg(set1)
1566+
.pipe_in([b'(', b'1', 0xff, b')'])
1567+
.succeeds()
1568+
.stderr_contains("warning: invalid utf8 sequence");
1569+
}
1570+
15471571
#[cfg(target_os = "linux")]
15481572
#[test]
15491573
fn test_failed_write_is_reported() {

0 commit comments

Comments
 (0)