Skip to content

Commit 9bada21

Browse files
committed
fix: ReDoS in CSS tokenizer ident rule
A second instance of the same backtracking pattern: `{unicode}`'s `[0-9A-Fa-f]{1,6}` admits six match lengths per escape position, and {nmchar} appears under `*` in {name}. When the `{ident}\({w}` rule fails (no `(` after an identifier-shaped prefix), the engine backtracks through `{nmchar}*` for 6**N parses. Payload `\aaaaaa\aaaaaa...X` triggers it: at n=8 it takes 330ms, at n=10 it takes 11.4s. Wrap the body alternations of {nmchar} and {nmstart} in atomic groups, mirroring the prior STRING-rule fix. Each nmchar/nmstart match is locked once committed, so the outer `{nmchar}*` can release whole iterations but cannot try alternative inner consumption of the {1,6} hex run. Add a benchmark test asserting linear time, similar to previous. ref: GHSA-c4rq-3m3g-8wgx
1 parent 807f6ee commit 9bada21

3 files changed

Lines changed: 24 additions & 5 deletions

File tree

lib/nokogiri/css/tokenizer.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# frozen_string_literal: true
12
#--
23
# DO NOT MODIFY!!!!
34
# This file is automatically generated by rex 1.0.8
@@ -62,13 +63,13 @@ def _next_token
6263
when (text = @ss.scan(/has\([\s]*/))
6364
action { [:HAS, text] }
6465

65-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
66+
when (text = @ss.scan(/-?(?>[_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))(?>[_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
6667
action { [:FUNCTION, text] }
6768

68-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
69+
when (text = @ss.scan(/-?(?>[_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))(?>[_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
6970
action { [:IDENT, text] }
7071

71-
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
72+
when (text = @ss.scan(/\#(?>[_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
7273
action { [:HASH, text] }
7374

7475
when (text = @ss.scan(/[\s]*~=[\s]*/))

lib/nokogiri/css/tokenizer.rex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ macro
1111
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
1212

1313
escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
14-
nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
15-
nmstart ([_A-Za-z]|{nonascii}|{escape})
14+
nmchar (?>[_A-Za-z0-9-]|{nonascii}|{escape})
15+
nmstart (?>[_A-Za-z]|{nonascii}|{escape})
1616
name {nmstart}{nmchar}*
1717
ident -?{name}
1818
charref {nmchar}+

test/css/bench_tokenizer.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,22 @@ class TestBenchCSSTokenizer < Nokogiri::TestBenchmark
3737
end
3838
end
3939
end
40+
41+
# The function-call rule {ident}\({w} requires `(` after an identifier.
42+
# If the `(` is missing and the ident-shaped prefix contains many
43+
# `\<6-hex>` escapes, the engine backtracks through the {1,6}
44+
# ambiguity inside `{nmchar}*` for 6**N parses.
45+
describe "css ident tokenizer (function-rule failure ambiguity)" do
46+
bench_range { bench_linear(50_000, 300_000, 50_000) }
47+
48+
bench_performance_linear("redos in function rule", 0.99) do |n|
49+
Timeout.timeout(5) do
50+
payload = ('\\aaaaaa' * n) + "X"
51+
1000.times do
52+
Nokogiri::CSS.xpath_for(payload)
53+
rescue Nokogiri::CSS::SyntaxError
54+
end
55+
end
56+
end
57+
end
4058
end

0 commit comments

Comments
 (0)