Skip to content

Commit

Permalink
Merge pull request #1839 from Shopify/lexer-parse-error-with-utf8
Browse files Browse the repository at this point in the history
raise syntax error from lexer parser with UTF-8 character
  • Loading branch information
ggmichaelgo authored Oct 30, 2024
2 parents 1943441 + 8a9f33a commit c626dfa
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
11 changes: 9 additions & 2 deletions lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def tokenize

break if @ss.eos?

start_pos = @ss.pos
peeked = @ss.peek_byte

if (special = SPECIAL_TABLE[peeked])
Expand All @@ -196,7 +197,7 @@ def tokenize
@output << found
@ss.scan_byte
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
raise_syntax_error(start_pos)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
@ss.scan_byte
Expand All @@ -217,14 +218,20 @@ def tokenize
[type, t]
end
else
raise SyntaxError, "Unexpected character #{peeked.chr}"
raise_syntax_error(start_pos)
end
end
end
# rubocop:enable Metrics/BlockNesting

@output << EOS
end

def raise_syntax_error(start_pos)
@ss.pos = start_pos
# the character could be a UTF-8 character, use getch to get all the bytes
raise SyntaxError, "Unexpected character #{@ss.getch}"
end
end

Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1
Expand Down
11 changes: 11 additions & 0 deletions test/unit/lexer_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,15 @@ def test_greater_than_two_digits
tokens = Lexer.new("foo > 12").tokenize
assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
end

def test_error_with_utf8_character
error = assert_raises(SyntaxError) do
Lexer.new("1 < 1Ø").tokenize
end

assert_equal(
'Liquid syntax error: Unexpected character Ø',
error.message,
)
end
end

0 comments on commit c626dfa

Please sign in to comment.