Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branches Proof Of Concept #174

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ instruments*.trace
*.cpu
*.object
*.dSYM
.vscode/
vendor/
20 changes: 20 additions & 0 deletions dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: liquid-c

type: ruby

up:
- ruby: 3.1.1
- bundler

commands:
console:
desc: 'start a console'
run: bin/console
run:
desc: 'start the application'
run: bin/run
test:
syntax:
argument: file
optional: args...
run: bin/testunit
143 changes: 127 additions & 16 deletions ext/liquid_c/block.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#include "liquid.h"
#include "block.h"
#include "intutil.h"
#include "tokenizer.h"
#include "stringutil.h"
#include "vm.h"
#include "variable.h"
#include "context.h"
#include "parse_context.h"
#include "expression.h"
#include "vm_assembler.h"
#include <stdio.h>

Expand All @@ -15,24 +15,14 @@ static ID
intern_raise_missing_tag_terminator,
intern_is_blank,
intern_parse,
intern_new,
intern_square_brackets,
intern_unknown_tag_in_liquid_tag,
intern_ivar_nodelist;

static VALUE tag_registry;
static VALUE variable_placeholder = Qnil;

typedef struct tag_markup {
VALUE name;
VALUE markup;
} tag_markup_t;

typedef struct parse_context {
tokenizer_t *tokenizer;
VALUE tokenizer_obj;
VALUE ruby_obj;
} parse_context_t;

static void ensure_body_compiled(const block_body_t *body)
{
if (!body->compiled) {
Expand Down Expand Up @@ -192,7 +182,6 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_
case TOKEN_TAG:
{
const char *start = token.str_trimmed, *end = token.str_trimmed + token.len_trimmed;

// Imitate \s*(\w+)\s*(.*)? regex
const char *name_start = read_while(start, end, rb_isspace);
const char *name_end = read_while(name_start, end, is_id);
Expand Down Expand Up @@ -222,11 +211,28 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_
break;
}

VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding);
VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name);

const char *markup_start = read_while(name_end, end, rb_isspace);
VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding);
VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding);

if (name_len == 2 && strncmp(name_start, "if", 2) == 0) {
unknown_tag = parse_if_tag(markup, body, parse_context);
if (unknown_tag.name != Qnil) {
goto loop_break;
}
render_score_increment += 1;
body->as.intermediate.blank = false;
Comment on lines +223 to +224
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what these two do, might be updating them incorrectly.

break;
} else if (
Copy link
Member Author

@damnMeddlingKid damnMeddlingKid Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of scrappy, need a better abstraction for this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of something along the lines of #96 so we could pick the right parsing handler from ruby land.

(name_len == 5 && strncmp(name_start, "elsif", 5) == 0)
||(name_len == 4 && strncmp(name_start, "else", 4) == 0)
|| (name_len == 5 && strncmp(name_start, "endif", 5) == 0)
) {
unknown_tag = (tag_markup_t) { tag_name, markup };
goto loop_break;
}

VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name);

if (tag_class == Qnil) {
unknown_tag = (tag_markup_t) { tag_name, markup };
Expand Down Expand Up @@ -262,6 +268,110 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_
return unknown_tag;
}

VALUE parse_single_binary_comparison(VALUE markup) {
Copy link
Member Author

@damnMeddlingKid damnMeddlingKid Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only supports parsing 1 binary comparison, need to extend this to support more conditions.

if (NIL_P(markup))
return Qnil;

StringValue(markup);
char *start = RSTRING_PTR(markup);

parser_t p;
init_parser(&p, start, start + RSTRING_LEN(markup));
VALUE a = internal_expression_parse(&p);
lexer_token_t op = parser_consume(&p, TOKEN_COMPARISON);

if(op.type) {
VALUE op_str = rb_enc_str_new(op.val, op.val_end - op.val, utf8_encoding);
VALUE b = internal_expression_parse(&p);
return rb_funcall(cLiquidCondition, intern_new, 3, a, op_str, b);
}

return rb_funcall(cLiquidCondition, intern_new, 1, a);
}

tag_markup_t parse_if_tag(VALUE markup, block_body_t *body, parse_context_t *parse_context) {
/*
1 parse expression into condition object
2 push OP_EVAL with condition object
3 push OP_BRANCH_UNLESS with placeholder address
4 recursively parse body
5 on else/elsif
- push OP_BRANCH with placeholder address, this will make previous blocks jump to endif once they are done
- resolve the address for the previous OP_BRANCH_UNLESS
6 on endif resolve the address for any OP_BRANCH/OP_BRANCH_UNLESS
*/
vm_assembler_t* body_code = body->as.intermediate.code;
VALUE condition_obj = parse_single_binary_comparison(markup);
vm_assembler_add_op_with_constant(body_code, condition_obj, OP_EVAL_CONDITION);

ptrdiff_t exit_branches[10];
Copy link
Member Author

@damnMeddlingKid damnMeddlingKid Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This array is used to keep track of all the branches that need to jump to "endif". This needs to use a dynamically sized list here, something like cbuffer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The offsets get stored with the instruction in 16 bits. assuming this is ok for a jump offset, otherwise need to store them somewhere else.

ptrdiff_t* exit_start = exit_branches;
ptrdiff_t* exit_end = exit_branches;

ptrdiff_t open_branch = vm_assembler_open_branch(body_code, OP_BRANCH_UNLESS);
ptrdiff_t jump;

tag_markup_t unknown_tag;

while(true) {
unknown_tag = internal_block_body_parse(body, parse_context);

if(unknown_tag.name != Qnil) {
StringValue(unknown_tag.name);
char *name_start = RSTRING_PTR(unknown_tag.name);
int name_len = RSTRING_LEN(unknown_tag.name);

if (name_len == 4 && strncmp(name_start, "else", 4) == 0) {
// Unconditionally branch to endif for the previous block
*exit_end++ = vm_assembler_open_branch(body_code, OP_BRANCH);

// Calculate the offset that would jump to here, this is where the <if> jumps to if it fails the condition.
jump = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data);
Copy link
Member Author

@damnMeddlingKid damnMeddlingKid Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calculating indices since cbuffer can realloc.

jump = jump - open_branch - 1;

// Resolve the open branch from the <if> with the calculated offset.
vm_assembler_close_branch(body_code, open_branch, jump);
open_branch = -1;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using -1 as a sentinel value to indicate no open branch.

} else if(name_len == 5 && strncmp(name_start, "elsif", 5) == 0) {
// Unconditionally branch to endif for the previous block
*exit_end++ = vm_assembler_open_branch(body_code, OP_BRANCH);

// Calculate the offset that would jump to here, this is where the <if> jumps to if it fails the condition.
jump = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data);
jump = jump - open_branch - 1;

// Resolve the open branch from the <if> with the calculated offset.
vm_assembler_close_branch(body_code, open_branch, jump);
open_branch = -1;

// Start a new condition eval and branch for the elsif.
condition_obj = parse_single_binary_comparison(unknown_tag.markup);
vm_assembler_add_op_with_constant(body_code, condition_obj, OP_EVAL_CONDITION);
open_branch = vm_assembler_open_branch(body_code, OP_BRANCH_UNLESS);
} else if(name_len == 5 && strncmp(name_start, "endif", 5) == 0) {
ptrdiff_t jump_dest = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data);

// Resolve an open branch from an if/elsif.
if(open_branch != -1) {
jump = jump_dest - open_branch - 1;
vm_assembler_close_branch(body_code, open_branch, jump);
}

// Resolve all the open uncoditional branches.
while(exit_start < exit_end) {
jump = jump_dest - *exit_start - 1;
vm_assembler_close_branch(body_code, *exit_start, jump);
exit_start++;
}

return (tag_markup_t) { Qnil, Qnil };
} else {
return unknown_tag;
}
}
}
}

static void ensure_intermediate(block_body_t *body)
{
if (body->compiled) {
Expand Down Expand Up @@ -537,6 +647,7 @@ void liquid_define_block_body(void)
intern_raise_missing_tag_terminator = rb_intern("raise_missing_tag_terminator");
intern_is_blank = rb_intern("blank?");
intern_parse = rb_intern("parse");
intern_new = rb_intern("new");
intern_square_brackets = rb_intern("[]");
intern_unknown_tag_in_liquid_tag = rb_intern("unknown_tag_in_liquid_tag");
intern_ivar_nodelist = rb_intern("@nodelist");
Expand Down
13 changes: 13 additions & 0 deletions ext/liquid_c/block.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#if !defined(LIQUID_BLOCK_H)
#define LIQUID_BLOCK_H

#include "tokenizer.h"
#include "document_body.h"
#include "vm_assembler_pool.h"

Expand All @@ -23,7 +24,19 @@ typedef struct block_body {
} as;
} block_body_t;

typedef struct parse_context {
tokenizer_t *tokenizer;
VALUE tokenizer_obj;
VALUE ruby_obj;
} parse_context_t;

typedef struct tag_markup {
VALUE name;
VALUE markup;
} tag_markup_t;

void liquid_define_block_body(void);
tag_markup_t parse_if_tag(VALUE markup, block_body_t *body, parse_context_t *parse_context);

static inline uint8_t *block_body_instructions_ptr(block_body_header_t *body)
{
Expand Down
2 changes: 1 addition & 1 deletion ext/liquid_c/expression.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ VALUE expression_new(VALUE klass, expression_t **expression_ptr)
return obj;
}

static VALUE internal_expression_parse(parser_t *p)
VALUE internal_expression_parse(parser_t *p)
{
if (p->cur.type == TOKEN_EOS)
return Qnil;
Expand Down
1 change: 1 addition & 0 deletions ext/liquid_c/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ extern const rb_data_type_t expression_data_type;

void liquid_define_expression(void);

VALUE internal_expression_parse(parser_t *p);
VALUE expression_new(VALUE klass, expression_t **expression_ptr);
VALUE expression_evaluate(VALUE self, VALUE context);
VALUE internal_expression_evaluate(expression_t *expression, VALUE context);
Expand Down
3 changes: 2 additions & 1 deletion ext/liquid_c/extconf.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# frozen_string_literal: true

require "mkmf"
$CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
$CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g"
append_cflags("-fvisibility=hidden")
# In Ruby 2.6 and earlier, the Ruby headers did not have struct timespec defined
RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC']
valid_headers = RbConfig::CONFIG["host_os"] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
pedantic = !ENV["LIQUID_C_PEDANTIC"].to_s.empty?
if pedantic && valid_headers
Expand Down
5 changes: 4 additions & 1 deletion ext/liquid_c/liquid.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ID id_call;
ID id_compile_evaluate;
ID id_ivar_line_number;

VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody, cLiquidCondition;
VALUE cLiquidVariableLookup, cLiquidRangeLookup;
VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;

Expand Down Expand Up @@ -70,6 +70,9 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void)
cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
rb_global_variable(&cLiquidBlockBody);

cLiquidCondition = rb_const_get(mLiquid, rb_intern("Condition"));
rb_global_variable(&cLiquidCondition);

cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
rb_global_variable(&cLiquidVariableLookup);

Expand Down
2 changes: 1 addition & 1 deletion ext/liquid_c/liquid.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ extern ID id_call;
extern ID id_compile_evaluate;
extern ID id_ivar_line_number;

extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody, cLiquidCondition;
extern VALUE cLiquidVariableLookup, cLiquidRangeLookup;
extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
extern rb_encoding *utf8_encoding;
Expand Down
33 changes: 33 additions & 0 deletions ext/liquid_c/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,36 @@ static VALUE vm_render_until_error(VALUE uncast_args)
resource_limits_increment_write_score(vm->context.resource_limits, output);
break;
}

case OP_EVAL_CONDITION:
{
constant_index = (ip[0] << 8) | ip[1];
constant = constants[constant_index];
ip += 2;
VALUE condition_eval = rb_funcall(constant, id_evaluate, 1, vm->context.self);
vm_stack_push(vm, RTEST(condition_eval));
break;
}

case OP_BRANCH_UNLESS:
{
VALUE condition_truthy = vm_stack_pop(vm);
if(!condition_truthy) {
constant_index = (ip[0] << 8) | ip[1];
ip += constant_index;
break;
}
ip += 2;
break;
}

case OP_BRANCH:
{
constant_index = (ip[0] << 8) | ip[1];
ip += constant_index;
break;
}

case OP_JUMP_FWD_W:
{
size_t size = bytes_to_uint24(ip);
Expand Down Expand Up @@ -465,6 +495,9 @@ void liquid_vm_next_instruction(const uint8_t **ip_ptr)
ip++;
break;

case OP_BRANCH:
case OP_BRANCH_UNLESS:
case OP_EVAL_CONDITION:
case OP_BUILTIN_FILTER:
case OP_PUSH_INT16:
case OP_PUSH_CONST:
Expand Down
Loading