Skip to content

Commit

Permalink
Merge pull request #1838 from Shopify/quirky-lexer-parsing
Browse files Browse the repository at this point in the history
fix parsing quirky incomplete expressions
  • Loading branch information
ggmichaelgo authored Oct 30, 2024
2 parents f6ffc37 + ffce6de commit 8e40f80
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 35 deletions.
6 changes: 3 additions & 3 deletions lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def tokenize
@output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if NUMBER_TABLE[@ss.peek_byte]
if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte]
@ss.pos -= 1
@output << [:number, @ss.scan(NUMBER_LITERAL)]
else
Expand All @@ -192,15 +192,15 @@ def tokenize
end
elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
@ss.scan_byte
if (found = sub_table[@ss.peek_byte])
if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
@output << found
@ss.scan_byte
else
raise_syntax_error(start_pos)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
@ss.scan_byte
if (found = sub_table[@ss.peek_byte])
if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
@output << found
@ss.scan_byte
else
Expand Down
12 changes: 12 additions & 0 deletions test/integration/parsing_quirks_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,16 @@ def test_lookup_on_var_with_literal_name
def test_contains_in_id
assert_template_result(' YES ', '{% if containsallshipments == true %} YES {% endif %}', { 'containsallshipments' => true })
end

def test_incomplete_expression
with_error_mode(:lax) do
assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false =\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false !\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false 1\n%}{{ foo }}")
assert_template_result("false", "{% liquid assign foo = false a\n%}{{ foo }}")
end
end
end # ParsingQuirksTest
98 changes: 66 additions & 32 deletions test/unit/lexer_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,88 +6,109 @@ class LexerUnitTest < Minitest::Test
include Liquid

def test_strings
tokens = Lexer.new(%( 'this is a test""' "wat 'lol'")).tokenize
assert_equal([[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]], tokens)
assert_equal(
[[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]],
tokenize(%( 'this is a test""' "wat 'lol'")),
)
end

def test_integer
tokens = Lexer.new('hi 50').tokenize
assert_equal([[:id, 'hi'], [:number, '50'], [:end_of_string]], tokens)
assert_equal(
[[:id, 'hi'], [:number, '50'], [:end_of_string]],
tokenize('hi 50'),
)
end

def test_float
tokens = Lexer.new('hi 5.0').tokenize
assert_equal([[:id, 'hi'], [:number, '5.0'], [:end_of_string]], tokens)
assert_equal(
[[:id, 'hi'], [:number, '5.0'], [:end_of_string]],
tokenize('hi 5.0'),
)
end

def test_comparison
tokens = Lexer.new('== <> contains ').tokenize
assert_equal([[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]], tokens)
assert_equal(
[[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]],
tokenize('== <> contains '),
)
end

def test_comparison_without_whitespace
tokens = Lexer.new('1>0').tokenize
assert_equal([[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]], tokens)
assert_equal(
[[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]],
tokenize('1>0'),
)
end

def test_comparison_with_negative_number
tokens = Lexer.new('1>-1').tokenize
assert_equal([[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]], tokens)
assert_equal(
[[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]],
tokenize('1>-1'),
)
end

def test_raise_for_invalid_comparison
assert_raises(SyntaxError) do
Lexer.new('1>!1').tokenize
tokenize('1>!1')
end

assert_raises(SyntaxError) do
Lexer.new('1=<1').tokenize
tokenize('1=<1')
end

assert_raises(SyntaxError) do
Lexer.new('1!!1').tokenize
tokenize('1!!1')
end
end

def test_specials
tokens = Lexer.new('| .:').tokenize
assert_equal([[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]], tokens)
tokens = Lexer.new('[,]').tokenize
assert_equal([[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]], tokens)
assert_equal(
[[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]],
tokenize('| .:'),
)

assert_equal(
[[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]],
tokenize('[,]'),
)
end

def test_fancy_identifiers
tokens = Lexer.new('hi five?').tokenize
assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokens)
assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokenize('hi five?'))

tokens = Lexer.new('2foo').tokenize
assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokens)
assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokenize('2foo'))
end

def test_whitespace
tokens = Lexer.new("five|\n\t ==").tokenize
assert_equal([[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]], tokens)
assert_equal(
[[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]],
tokenize("five|\n\t =="),
)
end

def test_unexpected_character
assert_raises(SyntaxError) do
Lexer.new("%").tokenize
tokenize("%")
end
end

def test_negative_numbers
tokens = Lexer.new("foo | default: -1").tokenize
assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens)
assert_equal(
[[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]],
tokenize("foo | default: -1"),
)
end

def test_greater_than_two_digits
tokens = Lexer.new("foo > 12").tokenize
assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
assert_equal(
[[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]],
tokenize("foo > 12"),
)
end

def test_error_with_utf8_character
error = assert_raises(SyntaxError) do
Lexer.new("1 < 1Ø").tokenize
tokenize("1 < 1Ø")
end

assert_equal(
Expand All @@ -99,7 +120,20 @@ def test_error_with_utf8_character
def test_contains_as_attribute_name
assert_equal(
[[:id, "a"], [:dot, "."], [:id, "contains"], [:dot, "."], [:id, "b"], [:end_of_string]],
Lexer.new("a.contains.b").tokenize,
tokenize("a.contains.b"),
)
end

def test_tokenize_incomplete_expression
assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], tokenize("false -"))
assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], tokenize("false <"))
assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], tokenize("false >"))
assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1"))
end

private

def tokenize(input)
Lexer.new(input).tokenize
end
end

0 comments on commit 8e40f80

Please sign in to comment.