Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proposal] Scanning Tokenizer with Improved String support #1174

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion lib/liquid/tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,84 @@ def tokenize

return @source.split("\n") if @for_liquid_tag

tokens = @source.split(TemplateParser)
tokens = tokenize_new(@source)
# tokens = @source.split(TemplateParser)

# removes the rogue empty element at the beginning of the array
tokens.shift if tokens[0]&.empty?

tokens
end

T_TAG_OPEN = "{%"
T_VAR_OPEN = "{{"
T_SIN_QUOT = "'"
T_DOU_QUOT = '"'
T_TAG_CLOS = "%}"
T_VAR_CLOS = "}}"
T_VAR_CLO2 = "}"

S_NIL = 0
S_TAG = 1
S_VAR = 2
S_TAG_SIN = 3
S_TAG_DOU = 4
S_VAR_SIN = 5
S_VAR_DOU = 6

def tokenize_new(source)
output = []
s = S_NIL
current = +""
source.split(/({%|{{|"|'|}}|%}|})/om).each do |t|
if t == T_TAG_OPEN && s <= S_VAR
s = S_TAG
output << current
current = t
elsif t == T_VAR_OPEN && s <= S_VAR
s = S_VAR
output << current
current = t
elsif t == T_SIN_QUOT && s == S_TAG
s = S_TAG_SIN
current += t
elsif t == T_SIN_QUOT && s == S_TAG_SIN
s = S_TAG
current += t
elsif t == T_DOU_QUOT && s == S_TAG
s = S_TAG_DOU
current += t
elsif t == T_DOU_QUOT && s == S_TAG_DOU
s = S_TAG
current += t
elsif t == T_SIN_QUOT && s == S_VAR
s = S_VAR_SIN
current += t
elsif t == T_SIN_QUOT && s == S_VAR_SIN
s = S_VAR
current += t
elsif t == T_DOU_QUOT && s == S_VAR
s = S_VAR_DOU
current += t
elsif t == T_DOU_QUOT && s == S_VAR_DOU
s = S_VAR
current += t
elsif t == T_TAG_CLOS && s == S_TAG
s = S_NIL
current += t
output << current
current = +""
elsif (t == T_VAR_CLOS || t == T_VAR_CLO2) && s == S_VAR
s = S_NIL
current += t
output << current
current = +""
else
current += t
end
end
output << current unless current == ""
output
end
end
end
19 changes: 19 additions & 0 deletions test/integration/tags/assign_tag_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# frozen_string_literal: true

require 'test_helper'

class AssignTagTest < Minitest::Test
include Liquid

def test_assign
assert_template_result('monkey', "{% assign foo = 'monkey' %}{{ foo }}")
end

def test_string_with_end_tag
assert_template_result("{% quoted %}", "{% assign string = '{% quoted %}' %}{{ string }}")
end

def test_liquid_issue_701
assert_template_result(" contents: _{% endraw %}_", "{% assign endraw = '{% endraw %}' %} contents: _{{endraw}}_")
end
end
8 changes: 7 additions & 1 deletion test/integration/tags/raw_tag_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ def test_open_tag_in_raw
assert_template_result ' Foobar {% {% {% ', '{% raw %} Foobar {% {% {% {% endraw %}'
assert_template_result ' test {% raw %} {% endraw %}', '{% raw %} test {% raw %} {% {% endraw %}endraw %}'
assert_template_result ' Foobar {{ invalid 1', '{% raw %} Foobar {{ invalid {% endraw %}{{ 1 }}'
assert_template_result ' Foobar {{ invalid 12', '{% raw %} Foobar {{ invalid {% endraw %}{{ 1 }}{{ 2 }}'
assert_template_result ' Foobar {{ invalid 1', '{% raw %} Foobar {{ invalid {% endraw %}{{ 1 }}'
end

def test_nested_tag_in_raw
assert_template_result '{{ {% test %} }}', '{% raw %}{{ {% test %} }}{% endraw %}'
end

def test_invalid_raw
assert_match_syntax_error(/tag was never closed/, '{% raw %} foo')
assert_match_syntax_error(/Valid syntax/, '{% raw } foo {% endraw %}')
assert_match_syntax_error(/was not properly terminated/, '{% raw } foo {% endraw %}')
assert_match_syntax_error(/Valid syntax/, '{% raw } foo %}{% endraw %}')
end
end
5 changes: 5 additions & 0 deletions test/integration/template_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,9 @@ def test_using_range_literal_works_as_expected
result = t.render('x' => 1, 'y' => 5)
assert_equal '12345', result
end

def test_curly_braces
assert_template_result "{}", "{{ '{}' }}"
assert_template_result "{}", "{% assign test = '{}' %}{{ test }}"
end
end
17 changes: 17 additions & 0 deletions test/integration/variable_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,21 @@ def test_multiline_variable
def test_render_symbol
assert_template_result 'bar', '{{ foo }}', 'foo' => :bar
end

def test_quoted_single_curly_braces
assert_template_result "{user}", "{{ variable | prepend: '{' | append: '}' }}", 'variable' => 'user'
end

def test_string_with_curly_brackets
json = '{ "key": { "nested": "value" }}'
assert_template_result(json, "{{ '#{json}' }}")
end

def test_liquid_issue_344
assert_template_result "blah xx yy }}", "{{ 'blah {{ yy }}' | replace: '{{', 'xx' }}"
end

def test_liquid_issue_213
assert_template_result "blah", "{{ 'blah}' | remove: '}' }}"
end
end
61 changes: 61 additions & 0 deletions test/unit/tokenizer_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,67 @@ def test_calculate_line_numbers_per_token_with_profiling
assert_equal [1, 1, 3], tokenize_line_numbers(" {{\n funk \n}} ")
end

def test_tokenize_quirks
assert_equal ['{%comment%}'], tokenize('{%comment%}')
assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ')

assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ')
assert_equal [' ', '{% "{% comment" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "{% comment" %} {% endcomment %} ')
assert_equal [' ', '{% "{% comment %}" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "{% comment %}" %} {% endcomment %} ')
assert_equal [' ', '{% "comment %}" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "comment %}" %} {% endcomment %} ')
assert_equal [' ', '{% "{{ comment" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "{{ comment" %} {% endcomment %} ')
assert_equal [' ', '{% "{{ comment }}" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "{{ comment }}" %} {% endcomment %} ')
assert_equal [' ', '{% "comment }}" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "comment }}" %} {% endcomment %} ')
assert_equal [' ', '{% "comment }" %}', ' ', '{% endcomment %}', ' '], tokenize(' {% "comment }" %} {% endcomment %} ')

assert_equal [" ", "{%comment%}", " ", "{%endcomment%}", " "], tokenize(" {%comment%} {%endcomment%} ")
assert_equal [" ", "{% '{% comment' %}", " ", "{% endcomment %}", " "], tokenize(" {% '{% comment' %} {% endcomment %} ")
assert_equal [" ", "{% '{% comment %}' %}", " ", "{% endcomment %}", " "], tokenize(" {% '{% comment %}' %} {% endcomment %} ")
assert_equal [" ", "{% 'comment %}' %}", " ", "{% endcomment %}", " "], tokenize(" {% 'comment %}' %} {% endcomment %} ")
assert_equal [" ", "{% '{{ comment' %}", " ", "{% endcomment %}", " "], tokenize(" {% '{{ comment' %} {% endcomment %} ")
assert_equal [" ", "{% '{{ comment }}' %}", " ", "{% endcomment %}", " "], tokenize(" {% '{{ comment }}' %} {% endcomment %} ")
assert_equal [" ", "{% 'comment }}' %}", " ", "{% endcomment %}", " "], tokenize(" {% 'comment }}' %} {% endcomment %} ")
assert_equal [" ", "{% 'comment }' %}", " ", "{% endcomment %}", " "], tokenize(" {% 'comment }' %} {% endcomment %} ")

assert_equal [' ', '{{comment}}', ' ', '{{endcomment}}', ' '], tokenize(' {{comment}} {{endcomment}} ')
assert_equal [' ', '{{ "{{ comment" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "{{ comment" }} {{ endcomment }} ')
assert_equal [' ', '{{ "{{ comment }}" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "{{ comment }}" }} {{ endcomment }} ')
assert_equal [' ', '{{ "comment }}" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "comment }}" }} {{ endcomment }} ')
assert_equal [' ', '{{ "{{ comment" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "{{ comment" }} {{ endcomment }} ')
assert_equal [' ', '{{ "{{ comment }}" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "{{ comment }}" }} {{ endcomment }} ')
assert_equal [' ', '{{ "comment }}" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "comment }}" }} {{ endcomment }} ')
assert_equal [' ', '{{ "comment }" }}', ' ', '{{ endcomment }}', ' '], tokenize(' {{ "comment }" }} {{ endcomment }} ')

assert_equal [" ", "{{comment}}", " ", "{{endcomment}}", " "], tokenize(" {{comment}} {{endcomment}} ")
assert_equal [" ", "{{ '{% comment' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ '{% comment' }} {{ endcomment }} ")
assert_equal [" ", "{{ '{% comment }}' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ '{% comment }}' }} {{ endcomment }} ")
assert_equal [" ", "{{ 'comment }}' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ 'comment }}' }} {{ endcomment }} ")
assert_equal [" ", "{{ '{{ comment' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ '{{ comment' }} {{ endcomment }} ")
assert_equal [" ", "{{ '{{ comment }}' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ '{{ comment }}' }} {{ endcomment }} ")
assert_equal [" ", "{{ 'comment }}' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ 'comment }}' }} {{ endcomment }} ")
assert_equal [" ", "{{ 'comment }' }}", " ", "{{ endcomment }}", " "], tokenize(" {{ 'comment }' }} {{ endcomment }} ")

assert_equal [' ', '{{comment}', ' ', '{{endcomment}', ' '], tokenize(' {{comment} {{endcomment} ')
assert_equal [' ', '{{ "{% comment" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "{% comment" } {{ endcomment } ')
assert_equal [' ', '{{ "{% comment }" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "{% comment }" } {{ endcomment } ')
assert_equal [' ', '{{ "comment }" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "comment }" } {{ endcomment } ')
assert_equal [' ', '{{ "{{ comment" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "{{ comment" } {{ endcomment } ')
assert_equal [' ', '{{ "{{ comment }}" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "{{ comment }}" } {{ endcomment } ')
assert_equal [' ', '{{ "comment }}" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "comment }}" } {{ endcomment } ')
assert_equal [' ', '{{ "comment }" }', ' ', '{{ endcomment }', ' '], tokenize(' {{ "comment }" } {{ endcomment } ')

assert_equal [" ", "{{comment}", " ", "{{endcomment}", " "], tokenize(" {{comment} {{endcomment} ")
assert_equal [" ", "{{ '{{ comment' }", " ", "{{ endcomment }", " "], tokenize(" {{ '{{ comment' } {{ endcomment } ")
assert_equal [" ", "{{ '{{ comment }' }", " ", "{{ endcomment }", " "], tokenize(" {{ '{{ comment }' } {{ endcomment } ")
assert_equal [" ", "{{ 'comment }' }", " ", "{{ endcomment }", " "], tokenize(" {{ 'comment }' } {{ endcomment } ")
assert_equal [" ", "{{ '{{ comment' }", " ", "{{ endcomment }", " "], tokenize(" {{ '{{ comment' } {{ endcomment } ")
assert_equal [" ", "{{ '{{ comment }}' }", " ", "{{ endcomment }", " "], tokenize(" {{ '{{ comment }}' } {{ endcomment } ")
assert_equal [" ", "{{ 'comment }}' }", " ", "{{ endcomment }", " "], tokenize(" {{ 'comment }}' } {{ endcomment } ")
assert_equal [" ", "{{ 'comment }' }", " ", "{{ endcomment }", " "], tokenize(" {{ 'comment }' } {{ endcomment } ")

assert_equal ['{{funk | replace: "}", \'}}\' }}'], tokenize('{{funk | replace: "}", \'}}\' }}')
end

private

def tokenize(source)
Expand Down
6 changes: 6 additions & 0 deletions test/unit/variable_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ def test_variable_lookup_interface
assert_equal ['b', 'c'], lookup.lookups
end

def test_filters_with_properly_quoted_curlies
var = create_variable("hello | replace: \"}\", '}}'")
assert_equal VariableLookup.new('hello'), var.name
assert_equal [['replace', ['}', '}}']]], var.filters
end

private

def create_variable(markup, options = {})
Expand Down