From f6a3e25e2ea21f1014e46ec97e8446f77e870d47 Mon Sep 17 00:00:00 2001
From: Michael Go <michael.go@shopify.com>
Date: Tue, 29 Oct 2024 17:06:02 -0300
Subject: [PATCH 1/3] fix parsing quirky incomplete expressions

---
 lib/liquid/lexer.rb                     |  6 +++---
 test/integration/parsing_quirks_test.rb | 12 ++++++++++++
 test/unit/lexer_unit_test.rb            |  7 +++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb
index 61ee24a9d..d02c24e67 100644
--- a/lib/liquid/lexer.rb
+++ b/lib/liquid/lexer.rb
@@ -181,7 +181,7 @@ def tokenize
             @output << DOTDOT
           elsif special == DASH
             # Special case for negative numbers
-            if NUMBER_TABLE[@ss.peek_byte]
+            if !@ss.eos? && NUMBER_TABLE[@ss.peek_byte]
               @ss.pos -= 1
               @output << [:number, @ss.scan(NUMBER_LITERAL)]
             else
@@ -192,7 +192,7 @@ def tokenize
           end
         elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if (found = sub_table[@ss.peek_byte])
+          if !@ss.eos? && (found = sub_table[@ss.peek_byte])
             @output << found
             @ss.scan_byte
           else
@@ -200,7 +200,7 @@ def tokenize
           end
         elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if (found = sub_table[@ss.peek_byte])
+          if !@ss.eos? && (found = sub_table[@ss.peek_byte])
             @output << found
             @ss.scan_byte
           else
diff --git a/test/integration/parsing_quirks_test.rb b/test/integration/parsing_quirks_test.rb
index 744936c97..48f914635 100644
--- a/test/integration/parsing_quirks_test.rb
+++ b/test/integration/parsing_quirks_test.rb
@@ -131,4 +131,16 @@ def test_lookup_on_var_with_literal_name
   def test_contains_in_id
     assert_template_result(' YES ', '{% if containsallshipments == true %} YES {% endif %}', { 'containsallshipments' => true })
   end
+
+  def test_incomplete_expression
+    with_error_mode(:lax) do
+      assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false =\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false !\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false 1\n%}{{ foo }}")
+      assert_template_result("false", "{% liquid assign foo = false a\n%}{{ foo }}")
+    end
+  end
 end # ParsingQuirksTest
diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb
index 5aa6066fa..54ed0b50d 100644
--- a/test/unit/lexer_unit_test.rb
+++ b/test/unit/lexer_unit_test.rb
@@ -102,4 +102,11 @@ def test_contains_as_attribute_name
       Lexer.new("a.contains.b").tokenize,
     )
   end
+
+  def test_tokenize_incomplete_expression
+    assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], Lexer.new("false -").tokenize)
+    assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], Lexer.new("false <").tokenize)
+    assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], Lexer.new("false >").tokenize)
+    assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], Lexer.new("false 1").tokenize)
+  end
 end

From f00670cb01340dfd602a7fa6a22bf3b9c906d446 Mon Sep 17 00:00:00 2001
From: Michael Go <michael.go@shopify.com>
Date: Tue, 29 Oct 2024 17:11:35 -0300
Subject: [PATCH 2/3] refactor lexer unit test

---
 test/unit/lexer_unit_test.rb | 95 +++++++++++++++++++++++-------------
 1 file changed, 61 insertions(+), 34 deletions(-)

diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb
index 54ed0b50d..7f8457395 100644
--- a/test/unit/lexer_unit_test.rb
+++ b/test/unit/lexer_unit_test.rb
@@ -6,83 +6,104 @@ class LexerUnitTest < Minitest::Test
   include Liquid
 
   def test_strings
-    tokens = Lexer.new(%( 'this is a test""' "wat 'lol'")).tokenize
-    assert_equal([[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]], tokens)
+    assert_equal(
+      [[:string, %('this is a test""')], [:string, %("wat 'lol'")], [:end_of_string]],
+      tokenize(%( 'this is a test""' "wat 'lol'")),
+    )
   end
 
   def test_integer
-    tokens = Lexer.new('hi 50').tokenize
-    assert_equal([[:id, 'hi'], [:number, '50'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'hi'], [:number, '50'], [:end_of_string]],
+      tokenize('hi 50'),
+    )
   end
 
   def test_float
-    tokens = Lexer.new('hi 5.0').tokenize
-    assert_equal([[:id, 'hi'], [:number, '5.0'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'hi'], [:number, '5.0'], [:end_of_string]],
+      tokenize('hi 5.0'),
+    )
   end
 
   def test_comparison
-    tokens = Lexer.new('== <> contains ').tokenize
-    assert_equal([[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:comparison, '=='], [:comparison, '<>'], [:comparison, 'contains'], [:end_of_string]],
+      tokenize('== <> contains '),
+    )
   end
 
   def test_comparison_without_whitespace
-    tokens = Lexer.new('1>0').tokenize
-    assert_equal([[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:number, '1'], [:comparison, '>'], [:number, '0'], [:end_of_string]],
+      tokenize('1>0'),
+    )
   end
 
   def test_comparison_with_negative_number
-    tokens = Lexer.new('1>-1').tokenize
-    assert_equal([[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:number, '1'], [:comparison, '>'], [:number, '-1'], [:end_of_string]],
+      tokenize('1>-1'),
+    )
   end
 
   def test_raise_for_invalid_comparison
     assert_raises(SyntaxError) do
-      Lexer.new('1>!1').tokenize
+      tokenize('1>!1')
     end
 
     assert_raises(SyntaxError) do
-      Lexer.new('1=<1').tokenize
+      tokenize('1=<1')
     end
 
     assert_raises(SyntaxError) do
-      Lexer.new('1!!1').tokenize
+      tokenize('1!!1')
     end
   end
 
   def test_specials
-    tokens = Lexer.new('| .:').tokenize
-    assert_equal([[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]], tokens)
-    tokens = Lexer.new('[,]').tokenize
-    assert_equal([[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:pipe, '|'], [:dot, '.'], [:colon, ':'], [:end_of_string]],
+      tokenize('| .:'),
+    )
+
+    assert_equal(
+      [[:open_square, '['], [:comma, ','], [:close_square, ']'], [:end_of_string]],
+      tokenize('[,]'),
+    )
   end
 
   def test_fancy_identifiers
-    tokens = Lexer.new('hi five?').tokenize
-    assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokens)
+    assert_equal([[:id, 'hi'], [:id, 'five?'], [:end_of_string]], tokenize('hi five?'))
 
-    tokens = Lexer.new('2foo').tokenize
-    assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokens)
+    assert_equal([[:number, '2'], [:id, 'foo'], [:end_of_string]], tokenize('2foo'))
   end
 
   def test_whitespace
-    tokens = Lexer.new("five|\n\t ==").tokenize
-    assert_equal([[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'five'], [:pipe, '|'], [:comparison, '=='], [:end_of_string]],
+      tokenize("five|\n\t =="),
+    )
   end
 
   def test_unexpected_character
     assert_raises(SyntaxError) do
-      Lexer.new("%").tokenize
+      tokenize("%")
     end
   end
 
   def test_negative_numbers
-    tokens = Lexer.new("foo | default: -1").tokenize
-    assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]],
+      tokenize("foo | default: -1"),
+    )
   end
 
   def test_greater_than_two_digits
-    tokens = Lexer.new("foo > 12").tokenize
-    assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens)
+    assert_equal(
+      [[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]],
+      tokenize("foo > 12"),
+    )
   end
 
   def test_error_with_utf8_character
@@ -104,9 +125,15 @@ def test_contains_as_attribute_name
   end
 
   def test_tokenize_incomplete_expression
-    assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], Lexer.new("false -").tokenize)
-    assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], Lexer.new("false <").tokenize)
-    assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], Lexer.new("false >").tokenize)
-    assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], Lexer.new("false 1").tokenize)
+    assert_equal([[:id, "false"], [:dash, "-"], [:end_of_string]], tokenize("false -"))
+    assert_equal([[:id, "false"], [:comparison, "<"], [:end_of_string]], tokenize("false <"))
+    assert_equal([[:id, "false"], [:comparison, ">"], [:end_of_string]], tokenize("false >"))
+    assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1"))
+  end
+
+  private
+
+  def tokenize(input)
+    Lexer.new(input).tokenize
   end
 end

From ffce6de8bb484c660b88dbdb0c726867da541c84 Mon Sep 17 00:00:00 2001
From: Michael Go <michael.go@shopify.com>
Date: Wed, 30 Oct 2024 11:53:54 -0300
Subject: [PATCH 3/3] avoid using StringScanner eos

---
 lib/liquid/lexer.rb          | 6 +++---
 test/unit/lexer_unit_test.rb | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb
index d02c24e67..b9e5443c1 100644
--- a/lib/liquid/lexer.rb
+++ b/lib/liquid/lexer.rb
@@ -181,7 +181,7 @@ def tokenize
             @output << DOTDOT
           elsif special == DASH
             # Special case for negative numbers
-            if !@ss.eos? && NUMBER_TABLE[@ss.peek_byte]
+            if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte]
               @ss.pos -= 1
               @output << [:number, @ss.scan(NUMBER_LITERAL)]
             else
@@ -192,7 +192,7 @@ def tokenize
           end
         elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if !@ss.eos? && (found = sub_table[@ss.peek_byte])
+          if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
             @output << found
             @ss.scan_byte
           else
@@ -200,7 +200,7 @@ def tokenize
           end
         elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
           @ss.scan_byte
-          if !@ss.eos? && (found = sub_table[@ss.peek_byte])
+          if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte])
             @output << found
             @ss.scan_byte
           else
diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb
index 7f8457395..26a25b629 100644
--- a/test/unit/lexer_unit_test.rb
+++ b/test/unit/lexer_unit_test.rb
@@ -108,7 +108,7 @@ def test_greater_than_two_digits
 
   def test_error_with_utf8_character
     error = assert_raises(SyntaxError) do
-      Lexer.new("1 < 1Ø").tokenize
+      tokenize("1 < 1Ø")
     end
 
     assert_equal(
@@ -120,7 +120,7 @@ def test_error_with_utf8_character
   def test_contains_as_attribute_name
     assert_equal(
       [[:id, "a"], [:dot, "."], [:id, "contains"], [:dot, "."], [:id, "b"], [:end_of_string]],
-      Lexer.new("a.contains.b").tokenize,
+      tokenize("a.contains.b"),
     )
   end