From f6a3e25e2ea21f1014e46ec97e8446f77e870d47 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Tue, 29 Oct 2024 17:06:02 -0300 Subject: [PATCH 1/3] fix parsing quirky incomplete expressions --- lib/liquid/lexer.rb | 6 +++--- test/integration/parsing_quirks_test.rb | 12 ++++++++++++ test/unit/lexer_unit_test.rb | 7 +++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 61ee24a9d..d02c24e67 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -181,7 +181,7 @@ def tokenize @output << DOTDOT elsif special == DASH # Special case for negative numbers - if NUMBER_TABLE[@ss.peek_byte] + if !@ss.eos? && NUMBER_TABLE[@ss.peek_byte] @ss.pos -= 1 @output << [:number, @ss.scan(NUMBER_LITERAL)] else @@ -192,7 +192,7 @@ def tokenize end elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) @ss.scan_byte - if (found = sub_table[@ss.peek_byte]) + if !@ss.eos? && (found = sub_table[@ss.peek_byte]) @output << found @ss.scan_byte else @@ -200,7 +200,7 @@ def tokenize end elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) @ss.scan_byte - if (found = sub_table[@ss.peek_byte]) + if !@ss.eos? && (found = sub_table[@ss.peek_byte]) @output << found @ss.scan_byte else diff --git a/test/integration/parsing_quirks_test.rb b/test/integration/parsing_quirks_test.rb index 744936c97..48f914635 100644 --- a/test/integration/parsing_quirks_test.rb +++ b/test/integration/parsing_quirks_test.rb @@ -131,4 +131,16 @@ def test_lookup_on_var_with_literal_name def test_contains_in_id assert_template_result(' YES ', '{% if containsallshipments == true %} YES {% endif %}', { 'containsallshipments' => true }) end + + def test_incomplete_expression + with_error_mode(:lax) do + assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false =\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false !\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false 1\n%}{{ foo }}") + assert_template_result("false", "{% liquid assign foo = false a\n%}{{ foo }}") + end + end end # ParsingQuirksTest diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index 5aa6066fa..54ed0b50d 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -102,4 +102,11 @@ def test_contains_as_attribute_name Lexer.new("a.contains.b").tokenize, ) end + + def test_tokenize_incomplete_expression + assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], Lexer.new("false -").tokenize) + assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], Lexer.new("false <").tokenize) + assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], Lexer.new("false >").tokenize) + assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], Lexer.new("false 1").tokenize) + end end From f00670cb01340dfd602a7fa6a22bf3b9c906d446 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Tue, 29 Oct 2024 17:11:35 -0300 Subject: [PATCH 2/3] refactor lexer unit test --- test/unit/lexer_unit_test.rb | 95 +++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index 54ed0b50d..7f8457395 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -6,83 +6,104 @@ class LexerUnitTest < Minitest::Test include Liquid def test_strings - tokens = Lexer.new(%( 'this is a test""' "wat 'lol'")).tokenize - assert_equal([[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]], tokens) + assert_equal( + [[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]], + tokenize(%( 'this is a test""' "wat 'lol'")), + ) end def test_integer - tokens = Lexer.new('hi 50').tokenize - assert_equal([[:id, 'hi'], [:number, '50'], [:end_of_string]], tokens) + assert_equal( + [[:id, 'hi'], [:number, '50'], [:end_of_string]], + tokenize('hi 50'), + ) end def test_float - tokens = Lexer.new('hi 5.0').tokenize - assert_equal([[:id, 'hi'], [:number, '5.0'], [:end_of_string]], tokens) + assert_equal( + [[:id, 'hi'], [:number, '5.0'], [:end_of_string]], + tokenize('hi 5.0'), + ) end def test_comparison - tokens = Lexer.new('== <> contains ').tokenize - assert_equal([[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]], tokens) + assert_equal( + [[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]], + tokenize('== <> contains '), + ) end def test_comparison_without_whitespace - tokens = Lexer.new('1>0').tokenize - assert_equal([[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]], tokens) + assert_equal( + [[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]], + tokenize('1>0'), + ) end def test_comparison_with_negative_number - tokens = Lexer.new('1>-1').tokenize - assert_equal([[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]], tokens) + assert_equal( + [[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]], + tokenize('1>-1'), + ) end def test_raise_for_invalid_comparison assert_raises(SyntaxError) do - Lexer.new('1>!1').tokenize + tokenize('1>!1') end assert_raises(SyntaxError) do - Lexer.new('1=<1').tokenize + tokenize('1=<1') end assert_raises(SyntaxError) do - Lexer.new('1!!1').tokenize + tokenize('1!!1') end end def test_specials - tokens = Lexer.new('| .:').tokenize - assert_equal([[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]], tokens) - tokens = Lexer.new('[,]').tokenize - assert_equal([[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]], tokens) + assert_equal( + [[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]], + tokenize('| .:'), + ) + + assert_equal( + [[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]], + tokenize('[,]'), + ) end def test_fancy_identifiers - tokens = Lexer.new('hi five?').tokenize - assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokens) + assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokenize('hi five?')) - tokens = Lexer.new('2foo').tokenize - assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokens) + assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokenize('2foo')) end def test_whitespace - tokens = Lexer.new("five|\n\t ==").tokenize - assert_equal([[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]], tokens) + assert_equal( + [[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]], + tokenize("five|\n\t =="), + ) end def test_unexpected_character assert_raises(SyntaxError) do - Lexer.new("%").tokenize + tokenize("%") end end def test_negative_numbers - tokens = Lexer.new("foo | default: -1").tokenize - assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens) + assert_equal( + [[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], + tokenize("foo | default: -1"), + ) end def test_greater_than_two_digits - tokens = Lexer.new("foo > 12").tokenize - assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens) + assert_equal( + [[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], + tokenize("foo > 12"), + ) end def test_error_with_utf8_character @@ -104,9 +125,15 @@ def test_contains_as_attribute_name end def test_tokenize_incomplete_expression - assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], Lexer.new("false -").tokenize) - assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], Lexer.new("false <").tokenize) - assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], Lexer.new("false >").tokenize) - assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], Lexer.new("false 1").tokenize) + assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], tokenize("false -")) + assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], tokenize("false <")) + assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], tokenize("false >")) + assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1")) + end + + private + + def tokenize(input) + Lexer.new(input).tokenize end end From ffce6de8bb484c660b88dbdb0c726867da541c84 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 30 Oct 2024 11:53:54 -0300 Subject: [PATCH 3/3] avoid using StringScanner eos --- lib/liquid/lexer.rb | 6 +++--- test/unit/lexer_unit_test.rb | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index d02c24e67..b9e5443c1 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -181,7 +181,7 @@ def tokenize @output << DOTDOT elsif special == DASH # Special case for negative numbers - if !@ss.eos? && NUMBER_TABLE[@ss.peek_byte] + if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte] @ss.pos -= 1 @output << [:number, @ss.scan(NUMBER_LITERAL)] else @@ -192,7 +192,7 @@ def tokenize end elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) @ss.scan_byte - if !@ss.eos? && (found = sub_table[@ss.peek_byte]) + if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found @ss.scan_byte else @@ -200,7 +200,7 @@ def tokenize end elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) @ss.scan_byte - if !@ss.eos? && (found = sub_table[@ss.peek_byte]) + if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found @ss.scan_byte else diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index 7f8457395..26a25b629 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -108,7 +108,7 @@ def test_greater_than_two_digits def test_error_with_utf8_character error = assert_raises(SyntaxError) do - Lexer.new("1 < 1Ø").tokenize + tokenize("1 < 1Ø") end assert_equal( @@ -120,7 +120,7 @@ def test_error_with_utf8_character def test_contains_as_attribute_name assert_equal( [[:id, "a"], [:dot, "."], [:id, "contains"], [:dot, "."], [:id, "b"], [:end_of_string]], - Lexer.new("a.contains.b").tokenize, + tokenize("a.contains.b"), ) end