Merge pull request #1838 from Shopify/quirky-lexer-parsing

fix parsing quirky incomplete expressions
Shopify · Oct 30, 2024 · 8e40f80 · 8e40f80
2 parents f6ffc37 + ffce6de
commit 8e40f80
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 35 deletions.
diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb
@@ -181,7 +181,7 @@ def tokenize
             @output << DOTDOT
           elsif special == DASH
             # Special case for negative numbers
-            if NUMBER_TABLE[@ss.peek_byte]
+            if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte]
               @ss.pos -= 1
               @output << [:number, @ss.scan(NUMBER_LITERAL)]
             else
@@ -192,15 +192,15 @@ def tokenize
           end
         elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if (found = sub_table[@ss.peek_byte])
+          if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
             @output << found
             @ss.scan_byte
           else
             raise_syntax_error(start_pos)
           end
         elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if (found = sub_table[@ss.peek_byte])
+          if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
             @output << found
             @ss.scan_byte
           else

diff --git a/test/integration/parsing_quirks_test.rb b/test/integration/parsing_quirks_test.rb
@@ -131,4 +131,16 @@ def test_lookup_on_var_with_literal_name
   def test_contains_in_id
     assert_template_result(' YES ', '{% if containsallshipments == true %} YES {% endif %}', { 'containsallshipments' => true })
   end
+
+  def test_incomplete_expression
+    with_error_mode(:lax) do
+      assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false =\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false !\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false 1\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false a\n%}{{ foo }}")
+    end
+  end
 end # ParsingQuirksTest
diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb
@@ -6,88 +6,109 @@ class LexerUnitTest < Minitest::Test
   include Liquid
 
   def test_strings
-    tokens = Lexer.new(%( 'this is a test""' "wat 'lol'")).tokenize
-    assert_equal([[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]], tokens)
+    assert_equal(
+      [[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]],
+      tokenize(%( 'this is a test""' "wat 'lol'")),
+    )
   end
 
   def test_integer
-    tokens = Lexer.new('hi 50').tokenize
-    assert_equal([[:id, 'hi'], [:number, '50'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'hi'], [:number, '50'], [:end_of_string]],
+      tokenize('hi 50'),
+    )
   end
 
   def test_float
-    tokens = Lexer.new('hi 5.0').tokenize
-    assert_equal([[:id, 'hi'], [:number, '5.0'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'hi'], [:number, '5.0'], [:end_of_string]],
+      tokenize('hi 5.0'),
+    )
   end
 
   def test_comparison
-    tokens = Lexer.new('== <> contains ').tokenize
-    assert_equal([[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]],
+      tokenize('== <> contains '),
+    )
   end
 
   def test_comparison_without_whitespace
-    tokens = Lexer.new('1>0').tokenize
-    assert_equal([[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]],
+      tokenize('1>0'),
+    )
   end
 
   def test_comparison_with_negative_number
-    tokens = Lexer.new('1>-1').tokenize
-    assert_equal([[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]],
+      tokenize('1>-1'),
+    )
   end
 
   def test_raise_for_invalid_comparison
     assert_raises(SyntaxError) do
-      Lexer.new('1>!1').tokenize
+      tokenize('1>!1')
     end
 
     assert_raises(SyntaxError) do
-      Lexer.new('1=<1').tokenize
+      tokenize('1=<1')
     end
 
     assert_raises(SyntaxError) do
-      Lexer.new('1!!1').tokenize
+      tokenize('1!!1')
     end
   end
 
   def test_specials
-    tokens = Lexer.new('| .:').tokenize
-    assert_equal([[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]], tokens)
-    tokens = Lexer.new('[,]').tokenize
-    assert_equal([[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]],
+      tokenize('| .:'),
+    )
+
+    assert_equal(
+      [[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]],
+      tokenize('[,]'),
+    )
   end
 
   def test_fancy_identifiers
-    tokens = Lexer.new('hi five?').tokenize
-    assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokens)
+    assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokenize('hi five?'))
 
-    tokens = Lexer.new('2foo').tokenize
-    assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokens)
+    assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokenize('2foo'))
   end
 
   def test_whitespace
-    tokens = Lexer.new("five|\n\t ==").tokenize
-    assert_equal([[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]],
+      tokenize("five|\n\t =="),
+    )
   end
 
   def test_unexpected_character
     assert_raises(SyntaxError) do
-      Lexer.new("%").tokenize
+      tokenize("%")
     end
   end
 
   def test_negative_numbers
-    tokens = Lexer.new("foo | default: -1").tokenize
-    assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]],
+      tokenize("foo | default: -1"),
+    )
   end
 
   def test_greater_than_two_digits
-    tokens = Lexer.new("foo > 12").tokenize
-    assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]],
+      tokenize("foo > 12"),
+    )
   end
 
   def test_error_with_utf8_character
     error = assert_raises(SyntaxError) do
-      Lexer.new("1 < 1Ø").tokenize
+      tokenize("1 < 1Ø")
     end
 
     assert_equal(
@@ -99,7 +120,20 @@ def test_error_with_utf8_character
   def test_contains_as_attribute_name
     assert_equal(
       [[:id, "a"], [:dot, "."], [:id, "contains"], [:dot, "."], [:id, "b"], [:end_of_string]],
-      Lexer.new("a.contains.b").tokenize,
+      tokenize("a.contains.b"),
     )
   end
+
+  def test_tokenize_incomplete_expression
+    assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], tokenize("false -"))
+    assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], tokenize("false <"))
+    assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], tokenize("false >"))
+    assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1"))
+  end
+
+  private
+
+  def tokenize(input)
+    Lexer.new(input).tokenize
+  end
 end