Skip to content

Commit

Permalink
Make Liquid::C::Tokenizer#shift private so it is only used for testing (
Browse files Browse the repository at this point in the history
#140)

Since it won't be supported for liquid template (de)serialization.
  • Loading branch information
dylanahsmith authored Jan 7, 2021
1 parent bd5ef13 commit 83e41df
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 40 deletions.
2 changes: 1 addition & 1 deletion ext/liquid_c/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ void liquid_define_tokenizer()

rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);

// For testing the internal token representation.
rb_define_private_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
}

52 changes: 27 additions & 25 deletions lib/liquid/c.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,18 @@ class Tokenizer
end
end

Liquid::Tokenizer.class_eval do
def self.new(source, line_numbers = false, line_number: nil, for_liquid_tag: false)
source = source.to_s
if Liquid::C.enabled && source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE
source = source.encode(Encoding::UTF_8)
Liquid::C::Tokenizer.new(source, line_number || (line_numbers ? 1 : 0), for_liquid_tag)
Liquid::Raw.class_eval do
alias_method :ruby_parse, :parse

def parse(tokenizer)
if parse_context.liquid_c_nodes_disabled?
ruby_parse(tokenizer)
else
super
c_parse(tokenizer)
end
end
end

Liquid::Raw.class_eval do
alias_method :ruby_parse, :parse
end

Liquid::ParseContext.class_eval do
class << self
attr_accessor :liquid_c_nodes_disabled
Expand All @@ -73,34 +69,42 @@ def new_block_body
end
end

alias_method :ruby_new_tokenizer, :new_tokenizer

def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false)
unless liquid_c_nodes_disabled?
source = source.to_s
if source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE
source = source.encode(Encoding::UTF_8)
return Liquid::C::Tokenizer.new(source, start_line_number || 0, for_liquid_tag)
else
@liquid_c_nodes_disabled = true
end
end

ruby_new_tokenizer(source, start_line_number: start_line_number, for_liquid_tag: for_liquid_tag)
end

# @api private
def liquid_c_nodes_disabled?
# Liquid::Profiler exposes the internal parse tree that we don't want to build when
# parsing with liquid-c, so consider liquid-c to be disabled when using it.
# Also, some templates are parsed before the profiler is running, on which case we
# provide the `disable_liquid_c_nodes` option to enable the Ruby AST to be produced
# so the profiler can use it on future runs.
@liquid_c_nodes_disabled ||= !Liquid::C.enabled || @template_options[:profile] ||
return @liquid_c_nodes_disabled if defined?(@liquid_c_nodes_disabled)
@liquid_c_nodes_disabled = !Liquid::C.enabled || @template_options[:profile] ||
@template_options[:disable_liquid_c_nodes] || self.class.liquid_c_nodes_disabled
end

# @api private
attr_writer :liquid_c_nodes_disabled
end

module Liquid
module C
module DocumentClassPatch
def parse(tokenizer, parse_context)
if tokenizer.is_a?(Liquid::C::Tokenizer)
if tokenizer.is_a?(Liquid::C::Tokenizer) && parse_context[:bug_compatible_whitespace_trimming]
# Temporary to test rollout of the fix for this bug
if parse_context[:bug_compatible_whitespace_trimming]
tokenizer.bug_compatible_whitespace_trimming!
end
else
# Liquid::Tokenizer.new may return a Liquid::Tokenizer if the source is too large
# to be supported, so indicate in the parse context that the liquid VM won't be used
parse_context.liquid_c_nodes_disabled = true
tokenizer.bug_compatible_whitespace_trimming!
end
super
end
Expand Down Expand Up @@ -237,12 +241,10 @@ def enabled=(value)
Liquid::Context.send(:alias_method, :evaluate, :c_evaluate)
Liquid::Context.send(:alias_method, :find_variable, :c_find_variable_kwarg)
Liquid::Context.send(:alias_method, :strict_variables=, :c_strict_variables=)
Liquid::Raw.send(:alias_method, :parse, :c_parse)
else
Liquid::Context.send(:alias_method, :evaluate, :ruby_evaluate)
Liquid::Context.send(:alias_method, :find_variable, :ruby_find_variable)
Liquid::Context.send(:alias_method, :strict_variables=, :ruby_strict_variables=)
Liquid::Raw.send(:alias_method, :parse, :ruby_parse)
end
end
end
Expand Down
29 changes: 15 additions & 14 deletions test/unit/tokenizer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class TokenizerTest < Minitest::Test
def test_tokenizer_nil
tokenizer = Liquid::Tokenizer.new(nil)
assert_nil(tokenizer.shift)
tokenizer = new_tokenizer(nil)
assert_nil(tokenizer.send(:shift))
end

def test_tokenize_strings
Expand Down Expand Up @@ -58,11 +58,11 @@ def test_utf8_encoded_source

def test_utf8_compatible_source
source = String.new('ascii', encoding: Encoding::ASCII)
tokenizer = Liquid::Tokenizer.new(source)
output = tokenizer.shift
tokenizer = new_tokenizer(source)
output = tokenizer.send(:shift)
assert_equal(Encoding::UTF_8, output.encoding)
assert_equal(source, output)
assert_nil(tokenizer.shift)
assert_nil(tokenizer.send(:shift))
end

def test_non_utf8_compatible_source
Expand All @@ -84,26 +84,27 @@ def test_source_too_large
assert_match(/Source too large, max \d+ bytes/, err.message)

# ruby patch fallback
liquid_c_tokenizer = Liquid::Tokenizer.new(max_length_source)
parse_context = Liquid::ParseContext.new
liquid_c_tokenizer = parse_context.new_tokenizer(max_length_source)
assert_instance_of(Liquid::C::Tokenizer, liquid_c_tokenizer)
fallback_tokenizer = Liquid::Tokenizer.new(too_large_source)
assert_instance_of(Liquid::Tokenizer, fallback_tokenizer)
refute(parse_context.liquid_c_nodes_disabled?)

# Document.parse patch parse context update
parse_context = Liquid::ParseContext.new
refute(parse_context.liquid_c_nodes_disabled?)
Liquid::Document.parse(liquid_c_tokenizer, parse_context)
refute(parse_context.liquid_c_nodes_disabled?)
Liquid::Document.parse(fallback_tokenizer, parse_context)
fallback_tokenizer = parse_context.new_tokenizer(too_large_source)
assert_instance_of(Liquid::Tokenizer, fallback_tokenizer)
assert_equal(true, parse_context.liquid_c_nodes_disabled?)
end

private

def new_tokenizer(source, parse_context: Liquid::ParseContext.new)
parse_context.new_tokenizer(source)
end

def tokenize(source, for_liquid_tag: false, trimmed: false)
tokenizer = Liquid::C::Tokenizer.new(source, 1, for_liquid_tag)
tokens = []
while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.shift)
while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.send(:shift))
tokens << t
end
tokens
Expand Down

0 comments on commit 83e41df

Please sign in to comment.