diff --git a/.github/workflows/liquid.yml b/.github/workflows/liquid.yml index 63d966621..cb1087695 100644 --- a/.github/workflows/liquid.yml +++ b/.github/workflows/liquid.yml @@ -11,10 +11,10 @@ jobs: strategy: matrix: entry: - - { ruby: 2.7, allowed-failure: false } # minimum supported + - { ruby: 3.0, allowed-failure: false } # minimum supported - { ruby: 3.2, allowed-failure: false } - { ruby: 3.3, allowed-failure: false } # latest - - { ruby: ruby-head, allowed-failure: true } + - { ruby: ruby-head, allowed-failure: false } name: Test Ruby ${{ matrix.entry.ruby }} steps: - uses: actions/checkout@v3 diff --git a/.gitignore b/.gitignore index 9029bf307..29d622d4b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ pkg .rvmrc .bundle .byebug_history +Gemfile.lock diff --git a/.ruby-version b/.ruby-version index 15a279981..a0891f563 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -3.3.0 +3.3.4 diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 61acc767d..000000000 --- a/Gemfile.lock +++ /dev/null @@ -1,75 +0,0 @@ -GIT - remote: https://github.com/Shopify/liquid-c.git - revision: 5a786af7284df55e013ea20551c4b688d02e8326 - ref: main - specs: - liquid-c (4.2.0) - liquid (>= 5.0.1) - -PATH - remote: . - specs: - liquid (5.6.0.alpha) - -GEM - remote: https://rubygems.org/ - specs: - ast (2.4.2) - base64 (0.2.0) - benchmark-ips (2.13.0) - json (2.7.2) - language_server-protocol (3.17.0.3) - memory_profiler (1.0.1) - minitest (5.22.3) - parallel (1.24.0) - parser (3.3.0.5) - ast (~> 2.4.1) - racc - racc (1.7.3) - rainbow (3.1.1) - rake (13.2.1) - regexp_parser (2.9.0) - rexml (3.2.6) - rubocop (1.61.0) - json (~> 2.3) - language_server-protocol (>= 3.17.0) - parallel (~> 1.10) - parser (>= 3.3.0.2) - rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 1.8, < 3.0) - rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.30.0, < 2.0) - ruby-progressbar (~> 1.7) - unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.31.2) - parser (>= 3.3.0.4) - rubocop-performance (1.19.1) - rubocop (>= 1.7.0, < 2.0) - rubocop-ast (>= 0.4.0) - rubocop-shopify (2.12.0) - rubocop (~> 1.44) - ruby-progressbar (1.13.0) - stackprof (0.2.26) - terminal-table (3.0.2) - unicode-display_width (>= 1.1.1, < 3) - unicode-display_width (2.5.0) - -PLATFORMS - ruby - -DEPENDENCIES - base64 - benchmark-ips - liquid! - liquid-c! - memory_profiler - minitest - rake (~> 13.0) - rubocop (~> 1.61.0) - rubocop-performance - rubocop-shopify (~> 2.12.0) - stackprof - terminal-table - -BUNDLED WITH - 2.5.7 diff --git a/Rakefile b/Rakefile index 889fc722e..82588eda1 100755 --- a/Rakefile +++ b/Rakefile @@ -81,6 +81,14 @@ namespace :benchmark do task :strict do ruby "./performance/benchmark.rb strict" end + + desc "Run unit benchmarks" + task :unit do + Dir["./performance/unit/*_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end + end end namespace :profile do diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 4ce2bc7b9..9c392fad5 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -1,8 +1,9 @@ # frozen_string_literal: true require "strscan" + module Liquid - class Lexer + class Lexer1 SPECIALS = { '|' => :pipe, '.' => :dot, @@ -58,4 +59,157 @@ def tokenize @output << [:end_of_string] end end + + class Lexer2 + CLOSE_ROUND = [:close_round, ")"].freeze + CLOSE_SQUARE = [:close_square, "]"].freeze + COLON = [:colon, ":"].freeze + COMMA = [:comma, ","].freeze + COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze + COMPARISON_CONTAINS = [:comparison, "contains"].freeze + COMPARISON_EQUAL = [:comparison, "=="].freeze + COMPARISON_GREATER_THAN = [:comparison, ">"].freeze + COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze + COMPARISON_LESS_THAN = [:comparison, "<"].freeze + COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze + COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze + CONTAINS = /contains(?=\s)/ + DASH = [:dash, "-"].freeze + DOT = [:dot, "."].freeze + DOTDOT = [:dotdot, ".."].freeze + DOT_ORD = ".".ord + DOUBLE_STRING_LITERAL = /"[^\"]*"/ + EOS = [:end_of_string].freeze + IDENTIFIER = /[a-zA-Z_][\w-]*\??/ + NUMBER_LITERAL = /-?\d+(\.\d+)?/ + OPEN_ROUND = [:open_round, "("].freeze + OPEN_SQUARE = [:open_square, "["].freeze + PIPE = [:pipe, "|"].freeze + QUESTION = [:question, "?"].freeze + RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze + SINGLE_STRING_LITERAL = /'[^\']*'/ + WHITESPACE_OR_NOTHING = /\s*/ + + COMPARISON_JUMP_TABLE = [].tap do |table| + table["=".ord] = [].tap do |sub_table| + sub_table["=".ord] = COMPARISON_EQUAL + sub_table.freeze + end + table["!".ord] = [].tap do |sub_table| + sub_table["=".ord] = COMPARISION_NOT_EQUAL + sub_table.freeze + end + table["<".ord] = [].tap do |sub_table| + sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL + sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT + RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN } + sub_table.freeze + end + table[">".ord] = [].tap do |sub_table| + sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL + RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN } + sub_table.freeze + end + table.freeze + end + + NEXT_MATCHER_JUMP_TABLE = [].tap do |table| + "a".upto("z") do |c| + table[c.ord] = [:id, IDENTIFIER].freeze + table[c.upcase.ord] = [:id, IDENTIFIER].freeze + end + table["_".ord] = [:id, IDENTIFIER].freeze + + "0".upto("9") do |c| + table[c.ord] = [:number, NUMBER_LITERAL].freeze + end + table["-".ord] = [:number, NUMBER_LITERAL].freeze + + table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze + table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze + table.freeze + end + + SPECIAL_TABLE = [].tap do |table| + table["|".ord] = PIPE + table[".".ord] = DOT + table[":".ord] = COLON + table[",".ord] = COMMA + table["[".ord] = OPEN_SQUARE + table["]".ord] = CLOSE_SQUARE + table["(".ord] = OPEN_ROUND + table[")".ord] = CLOSE_ROUND + table["?".ord] = QUESTION + table["-".ord] = DASH + end + + NUMBER_TABLE = [].tap do |table| + "0".upto("9") do |c| + table[c.ord] = true + end + table.freeze + end + + def initialize(input) + @ss = StringScanner.new(input) + end + + # rubocop:disable Metrics/BlockNesting + def tokenize + @output = [] + + until @ss.eos? + @ss.skip(WHITESPACE_OR_NOTHING) + + break if @ss.eos? + + peeked = @ss.peek_byte + + if (special = SPECIAL_TABLE[peeked]) + @ss.scan_byte + # Special case for ".." + if special == DOT && @ss.peek_byte == DOT_ORD + @ss.scan_byte + @output << DOTDOT + elsif special == DASH + # Special case for negative numbers + if NUMBER_TABLE[@ss.peek_byte] + @ss.pos -= 1 + @output << [:number, @ss.scan(NUMBER_LITERAL)] + else + @output << special + end + else + @output << special + end + elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) + @ss.scan_byte + if (found = sub_table[@ss.peek_byte]) + @output << found + @ss.scan_byte + else + raise SyntaxError, "Unexpected character #{peeked.chr}" + end + else + type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] + + if type && (t = @ss.scan(pattern)) + # Special case for "contains" + @output << if type == :id && t == "contains" + COMPARISON_CONTAINS + else + [type, t] + end + else + raise SyntaxError, "Unexpected character #{peeked.chr}" + end + end + end + # rubocop:enable Metrics/BlockNesting + + @output << EOS + end + end + + Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1 end diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index 609601ac4..8560fc58e 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -53,7 +53,7 @@ def expression str = consume str << variable_lookups when :open_square - str = consume + str = consume.dup str << expression str << consume(:close_square) str << variable_lookups diff --git a/liquid.gemspec b/liquid.gemspec index 65176776e..7b8c68f9f 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -17,7 +17,7 @@ Gem::Specification.new do |s| s.license = "MIT" # s.description = "A secure, non-evaling end user template engine with aesthetic markup." - s.required_ruby_version = ">= 2.7.0" + s.required_ruby_version = ">= 3.0.0" s.required_rubygems_version = ">= 1.3.7" s.metadata['allowed_push_host'] = 'https://rubygems.org' @@ -28,6 +28,9 @@ Gem::Specification.new do |s| s.require_path = "lib" + s.add_dependency("strscan") + s.add_dependency("bigdecimal") + s.add_development_dependency('rake', '~> 13.0') s.add_development_dependency('minitest') end diff --git a/performance/benchmark.rb b/performance/benchmark.rb index 8bf231340..da31aa0c6 100644 --- a/performance/benchmark.rb +++ b/performance/benchmark.rb @@ -3,6 +3,7 @@ require 'benchmark/ips' require_relative 'theme_runner' +RubyVM::YJIT.enable if defined?(RubyVM::YJIT) Liquid::Template.error_mode = ARGV.first.to_sym if ARGV.first profiler = ThemeRunner.new diff --git a/performance/unit/lexer_benchmark.rb b/performance/unit/lexer_benchmark.rb new file mode 100644 index 000000000..2faa1f686 --- /dev/null +++ b/performance/unit/lexer_benchmark.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require "benchmark/ips" + +# benchmark liquid lexing + +require 'liquid' + +RubyVM::YJIT.enable + +EXPRESSIONS = [ + "foo[1..2].baz", + "12.0", + "foo.bar.based", + "21 - 62", + "foo.bar.baz", + "foo > 12", + "foo < 12", + "foo <= 12", + "foo >= 12", + "foo <> 12", + "foo == 12", + "foo != 12", + "foo contains 12", + "foo contains 'bar'", + "foo != 'bar'", + "'foo' contains 'bar'", + '234089', + "foo | default: -1", +] + +EXPRESSIONS.each do |expr| + lexer_1_result = Liquid::Lexer1.new(expr).tokenize + lexer_2_result = Liquid::Lexer2.new(expr).tokenize + + next if lexer_1_result == lexer_2_result + + warn "Lexer1 and Lexer2 results are different for expression: #{expr}" + warn "expected: #{lexer_1_result}" + warn "got: #{lexer_2_result}" + abort +end + +Benchmark.ips do |x| + x.config(time: 10, warmup: 5) + + x.report("Liquid::Lexer1#tokenize") do + EXPRESSIONS.each do |expr| + l = Liquid::Lexer1.new(expr) + l.tokenize + end + end + + x.report("Liquid::Lexer2#tokenize") do + EXPRESSIONS.each do |expr| + l = Liquid::Lexer2.new(expr) + l.tokenize + end + end + + x.compare! +end diff --git a/test/integration/standard_filter_test.rb b/test/integration/standard_filter_test.rb index 0564404c3..eae4a1c9c 100644 --- a/test/integration/standard_filter_test.rb +++ b/test/integration/standard_filter_test.rb @@ -32,7 +32,7 @@ def initialize(value:) attr_reader :value def registers - { @value => @context.registers[@value] } + "{#{@value.inspect}=>#{@context.registers[@value].inspect}}" end end diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index ad4a5bd3e..363740b42 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -50,4 +50,14 @@ def test_unexpected_character Lexer.new("%").tokenize end end + + def test_negative_numbers + tokens = Lexer.new("foo | default: -1").tokenize + assert_equal([[:id, 'foo'], [:pipe, '|'], [:id, 'default'], [:colon, ":"], [:number, '-1'], [:end_of_string]], tokens) + end + + def test_greater_than_two_digits + tokens = Lexer.new("foo > 12").tokenize + assert_equal([[:id, 'foo'], [:comparison, '>'], [:number, '12'], [:end_of_string]], tokens) + end end