From 624cec10c6a7844b1c32f37902efea6cfbba6e3b Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 30 Nov 2020 11:40:59 -0500 Subject: [PATCH 01/18] Implement deserialize --- ext/liquid_c/block.c | 187 +++++++++++++++++++------ ext/liquid_c/block.h | 5 + ext/liquid_c/document.c | 21 +++ ext/liquid_c/document.h | 7 + ext/liquid_c/document_body.c | 61 ++++++-- ext/liquid_c/document_body.h | 33 ++++- ext/liquid_c/liquid.c | 6 + ext/liquid_c/parse_context.c | 7 +- ext/liquid_c/parse_context.h | 3 + ext/liquid_c/serialize_parse_context.c | 76 ++++++++++ ext/liquid_c/serialize_parse_context.h | 22 +++ ext/liquid_c/tag_markup.c | 30 ++++ ext/liquid_c/tag_markup.h | 11 ++ ext/liquid_c/template.c | 52 +++++++ ext/liquid_c/template.h | 6 + lib/liquid/c.rb | 8 +- test/unit/template_test.rb | 22 +++ 17 files changed, 496 insertions(+), 61 deletions(-) create mode 100644 ext/liquid_c/document.c create mode 100644 ext/liquid_c/document.h create mode 100644 ext/liquid_c/serialize_parse_context.c create mode 100644 ext/liquid_c/serialize_parse_context.h create mode 100644 ext/liquid_c/template.c create mode 100644 ext/liquid_c/template.h create mode 100644 test/unit/template_test.rb diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 5eab0835..bf04ae6c 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -7,6 +7,7 @@ #include "variable.h" #include "context.h" #include "parse_context.h" +#include "serialize_parse_context.h" #include "vm_assembler.h" #include "tag_markup.h" #include @@ -44,6 +45,9 @@ static void block_body_mark(void *ptr) if (body->compiled) { document_body_entry_mark(&body->as.compiled.document_body_entry); rb_gc_mark(body->as.compiled.nodelist); + } else if (body->from_serialize) { + document_body_entry_mark(&body->as.serialize.document_body_entry); + rb_gc_mark(body->as.serialize.parse_context); } else { rb_gc_mark(body->as.intermediate.parse_context); if (body->as.intermediate.vm_assembler_pool) @@ -56,7 +60,7 @@ static void block_body_mark(void *ptr) static void block_body_free(void *ptr) { block_body_t *body = ptr; - if (!body->compiled) { + if (!body->compiled && !body->from_serialize) { // Free the assembler instead of recycling it because the vm_assembler_pool may have been GC'd vm_assembler_pool_free_assembler(body->as.intermediate.code); } @@ -67,7 +71,7 @@ static size_t block_body_memsize(const void *ptr) { const block_body_t *body = ptr; if (!ptr) return 0; - if (body->compiled) { + if (body->compiled || body->from_serialize) { return sizeof(block_body_t); } else { return sizeof(block_body_t) + vm_assembler_alloc_memsize(body->as.intermediate.code); @@ -88,6 +92,7 @@ static VALUE block_body_allocate(VALUE klass) VALUE obj = TypedData_Make_Struct(klass, block_body_t, &block_body_data_type, body); body->compiled = false; + body->from_serialize = false; body->obj = obj; body->tags = c_buffer_init(); body->as.intermediate.blank = true; @@ -104,18 +109,24 @@ static VALUE block_body_initialize(VALUE self, VALUE parse_context) block_body_t *body; BlockBody_Get_Struct(self, body); - body->as.intermediate.parse_context = parse_context; - - if (parse_context_document_body_initialized_p(parse_context)) { - body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context); + if (is_serialize_parse_context_p(parse_context)) { + body->from_serialize = true; + body->as.serialize.document_body_entry = document_body_entry_init(); + body->as.serialize.parse_context = parse_context; } else { - parse_context_init_document_body(parse_context); - body->as.intermediate.root = true; - body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context); - } + body->as.intermediate.parse_context = parse_context; + + if (parse_context_document_body_initialized_p(parse_context)) { + body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context); + } else { + parse_context_init_document_body(parse_context); + body->as.intermediate.root = true; + body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context); + } - body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool); - vm_assembler_add_leave(body->as.intermediate.code); + body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool); + vm_assembler_add_leave(body->as.intermediate.code); + } return Qnil; } @@ -139,6 +150,22 @@ static void block_body_push_tag_markup(block_body_t *body, VALUE parse_context, parse_context_set_parent_tag(parse_context, tag_markup); } +static void ensure_intermediate(block_body_t *body) +{ + if (body->compiled) { + rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled"); + } +} + +static void ensure_intermediate_not_parsing(block_body_t *body) +{ + ensure_intermediate(body); + + if (body->as.intermediate.code->parsing) { + rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state"); + } +} + static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *parse_context) { tokenizer_t *tokenizer = parse_context->tokenizer; @@ -258,7 +285,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars } VALUE tag_markup = tag_markup_new(token_start_line_number, tag_name, markup, false); - block_body_push_tag_markup(body, parse_context->ruby_obj, tag_markup); + parse_context_set_parent_tag(parse_context->ruby_obj, tag_markup); VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, parse_context->tokenizer_obj, parse_context->ruby_obj); @@ -271,11 +298,12 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars if (tokenizer->raw_tag_body) { if (tokenizer->raw_tag_body_len) { vm_assembler_add_write_raw(body->as.intermediate.code, tokenizer->raw_tag_body, - tokenizer->raw_tag_body_len); + tokenizer->raw_tag_body_len); } tokenizer->raw_tag_body = NULL; tokenizer->raw_tag_body_len = 0; } else { + vm_assembler_write_tag(body->as.intermediate.code, tag_markup); block_body_add_node(body, new_tag); } @@ -291,23 +319,80 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars return unknown_tag; } -static void ensure_intermediate(block_body_t *body) +typedef struct block_body_yield_tag_args { + block_body_t *body; + serialize_parse_context_t *serialize_context; + tag_markup_header_t *current_tag; +} block_body_yield_tag_args_t; + +static VALUE block_body_try_yield_tag(VALUE uncast_args) { - if (body->compiled) { - rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled"); - } + block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; + tag_markup_header_t *current_tag = args->current_tag; + + serialize_parse_context_enter_tag(args->serialize_context, current_tag); + VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); + VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); + return rb_yield_values(2, tag_name, markup); } -static void ensure_intermediate_not_parsing(block_body_t *body) +static VALUE block_body_rescue_yield_tag(VALUE uncast_args, VALUE exception) +{ + block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; + + serialize_parse_context_exit_tag(args->serialize_context, &args->body->as.serialize.document_body_entry, + args->current_tag); + rb_exc_raise(exception); +} + +static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) { + assert(body->from_serialize); + assert(is_serialize_parse_context_p(parse_context_obj)); + ensure_intermediate(body); + if (body->as.serialize.parse_context != parse_context_obj) { + rb_raise(rb_eArgError, "Liquid::C::BlockBody#parse called with different parse context"); + } - if (body->as.intermediate.code->parsing) { - rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state"); + serialize_parse_context_t *serialize_context; + SerializeParseContext_Get_Struct(parse_context_obj, serialize_context); + + body->as.serialize.document_body_entry = serialize_context->current_entry; + + tag_markup_header_t *current_tag = serialize_context->current_tag; + while (current_tag) { + bool tag_unknown = TAG_UNKNOWN_P(current_tag); + + if (tag_unknown) { + block_body_yield_tag_args_t yield_args = { + .body = body, + .serialize_context = serialize_context, + .current_tag = current_tag + }; + return rb_rescue(block_body_try_yield_tag, (VALUE)&yield_args, block_body_rescue_yield_tag, (VALUE)&yield_args); + } else { + VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); + VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); + + VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); + assert(RTEST(tag_class)); + + serialize_parse_context_enter_tag(serialize_context, current_tag); + VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, + tag_name, markup, tokenizer_obj, parse_context_obj); + serialize_parse_context_exit_tag(serialize_context, &body->as.serialize.document_body_entry, current_tag); + + c_buffer_write_ruby_value(&body->tags, new_tag); + } + + current_tag = tag_markup_get_next_tag(&body->as.serialize.document_body_entry, current_tag); } + + return rb_yield_values(2, Qnil, Qnil); } -static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj) +static VALUE block_body_parse_from_source(VALUE self, block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) { parse_context_t parse_context = { .parent_tag = parse_context_get_parent_tag(parse_context_obj), @@ -315,8 +400,6 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte .ruby_obj = parse_context_obj, }; Tokenizer_Get_Struct(tokenizer_obj, parse_context.tokenizer); - block_body_t *body; - BlockBody_Get_Struct(self, body); ensure_intermediate_not_parsing(body); if (body->as.intermediate.parse_context != parse_context_obj) { @@ -333,6 +416,10 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte tag_name = tag_markup_get_tag_name(unknown_tag); markup = tag_markup_get_markup(unknown_tag); block_body_push_tag_markup(body, parse_context_obj, unknown_tag); + + if (RTEST(parse_context.parent_tag)) { + tag_markup_set_block_body(parse_context.parent_tag, self, body); + } } VALUE block_ret = rb_yield_values(2, tag_name, markup); @@ -345,6 +432,18 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte return block_ret; } +static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj) +{ + block_body_t *body; + BlockBody_Get_Struct(self, body); + + if (body->from_serialize) { + return block_body_parse_from_serialize(body, tokenizer_obj, parse_context_obj); + } else { + return block_body_parse_from_source(self, body, tokenizer_obj, parse_context_obj); + } +} + static VALUE block_body_freeze(VALUE self) { @@ -353,26 +452,32 @@ static VALUE block_body_freeze(VALUE self) if (body->compiled) return Qnil; - VALUE parse_context = body->as.intermediate.parse_context; - VALUE document_body = parse_context_get_document_body(parse_context); - - bool root = body->as.intermediate.root; - - vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool; - vm_assembler_t *assembler = body->as.intermediate.code; - bool blank = body->as.intermediate.blank; - uint32_t render_score = body->as.intermediate.render_score; - vm_assembler_t *code = body->as.intermediate.code; body->compiled = true; - body->as.compiled.nodelist = Qundef; - document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry); - vm_assembler_pool_recycle_assembler(assembler_pool, assembler); - if (root) { - parse_context_remove_document_body(parse_context); - parse_context_remove_vm_assembler_pool(parse_context); + if (body->from_serialize) { + body->as.compiled.nodelist = Qundef; + } else { + VALUE parse_context = body->as.intermediate.parse_context; + VALUE document_body = parse_context_get_document_body(parse_context); + + bool root = body->as.intermediate.root; + + vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool; + vm_assembler_t *assembler = body->as.intermediate.code; + bool blank = body->as.intermediate.blank; + uint32_t render_score = body->as.intermediate.render_score; + vm_assembler_t *code = body->as.intermediate.code; + body->as.compiled.nodelist = Qundef; + document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry); + vm_assembler_pool_recycle_assembler(assembler_pool, assembler); + + if (root) { + parse_context_remove_document_body(parse_context); + parse_context_remove_vm_assembler_pool(parse_context); + } } + rb_call_super(0, NULL); return Qnil; @@ -410,6 +515,8 @@ static VALUE block_body_remove_blank_strings(VALUE self) block_body_t *body; BlockBody_Get_Struct(self, body); + if (body->from_serialize) return Qnil; + ensure_intermediate_not_parsing(body); if (!body->as.intermediate.blank) { diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index 6e538bb2..1aceeb81 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -6,6 +6,7 @@ typedef struct block_body { bool compiled; + bool from_serialize; VALUE obj; c_buffer_t tags; @@ -14,6 +15,10 @@ typedef struct block_body { document_body_entry_t document_body_entry; VALUE nodelist; } compiled; + struct { + document_body_entry_t document_body_entry; + VALUE parse_context; + } serialize; struct { VALUE parse_context; vm_assembler_pool_t *vm_assembler_pool; diff --git a/ext/liquid_c/document.c b/ext/liquid_c/document.c new file mode 100644 index 00000000..ea91f46b --- /dev/null +++ b/ext/liquid_c/document.c @@ -0,0 +1,21 @@ +#include +#include "liquid.h" +#include "document.h" +#include "parse_context.h" +#include "document_body.h" + +static ID id_parse; +static VALUE cLiquidDocument; + +VALUE document_parse(VALUE tokenizer, VALUE parse_context) +{ + return rb_funcall(cLiquidDocument, id_parse, 2, tokenizer, parse_context); +} + +void liquid_define_document() +{ + id_parse = rb_intern("parse"); + + cLiquidDocument = rb_const_get(mLiquid, rb_intern("Document")); + rb_global_variable(&cLiquidDocument); +} diff --git a/ext/liquid_c/document.h b/ext/liquid_c/document.h new file mode 100644 index 00000000..2ed41573 --- /dev/null +++ b/ext/liquid_c/document.h @@ -0,0 +1,7 @@ +#ifndef LIQUID_DOCUMENT_H +#define LIQUID_DOCUMENT_H + +void liquid_define_document(); +VALUE document_parse(VALUE tokenizer, VALUE parse_context); + +#endif diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 74648160..8398124d 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -11,19 +11,28 @@ static void document_body_mark(void *ptr) { document_body_t *body = ptr; rb_gc_mark(body->constants); + if (!body->mutable) { + rb_gc_mark(body->as.immutable.serialize_str); + } } static void document_body_free(void *ptr) { document_body_t *body = ptr; - c_buffer_free(&body->buffer); + if (body->mutable) { + c_buffer_free(&body->as.mutable.buffer); + } xfree(body); } static size_t document_body_memsize(const void *ptr) { const document_body_t *body = ptr; - return sizeof(document_body_t) + c_buffer_size(&body->buffer); + size_t size = sizeof(document_body_t); + if (body->mutable) { + size += c_buffer_size(&body->as.mutable.buffer); + } + return size; } const rb_data_type_t document_body_data_type = { @@ -32,23 +41,34 @@ const rb_data_type_t document_body_data_type = { NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY }; -static VALUE document_body_allocate(VALUE klass) +#define DocumentBody_Get_Struct(obj, sval) TypedData_Get_Struct(obj, document_body_t, &document_body_data_type, sval) + +VALUE document_body_new_mutable_instance() { document_body_t *body; - - VALUE obj = TypedData_Make_Struct(klass, document_body_t, &document_body_data_type, body); + VALUE obj = TypedData_Make_Struct(cLiquidCDocumentBody, document_body_t, &document_body_data_type, body); body->self = obj; body->constants = rb_ary_new(); - body->buffer = c_buffer_init(); + body->mutable = true; + body->as.mutable.buffer = c_buffer_init(); return obj; } -#define DocumentBody_Get_Struct(obj, sval) TypedData_Get_Struct(obj, document_body_t, &document_body_data_type, sval) - -VALUE document_body_new_instance() +VALUE document_body_new_immutable_instance(VALUE constants, VALUE serialize_str, const char *data) { - return rb_class_new_instance(0, NULL, cLiquidCDocumentBody); + assert(BUILTIN_TYPE(constants) == T_ARRAY); + assert(BUILTIN_TYPE(serialize_str) == T_STRING); + + document_body_t *body; + VALUE obj = TypedData_Make_Struct(cLiquidCDocumentBody, document_body_t, &document_body_data_type, body); + body->self = obj; + body->constants = constants; + body->mutable = false; + body->as.immutable.serialize_str = serialize_str; + body->as.immutable.data = data; + + return obj; } static void document_body_write_tag_markup(document_body_t *body, VALUE tag_markup_obj, bool last) @@ -90,11 +110,12 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor { document_body_t *body; DocumentBody_Get_Struct(self, body); + assert(body->mutable); - c_buffer_zero_pad_for_alignment(&body->buffer, alignof(block_body_header_t)); + c_buffer_zero_pad_for_alignment(&body->as.mutable.buffer, alignof(block_body_header_t)); entry->body = body; - entry->buffer_offset = c_buffer_size(&body->buffer); + entry->buffer_offset = c_buffer_size(&body->as.mutable.buffer); size_t instructions_byte_size = c_buffer_size(&code->instructions); size_t header_and_instructions_size = sizeof(block_body_header_t) + instructions_byte_size; @@ -137,7 +158,7 @@ VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_offset { assert(BUILTIN_TYPE(body->constants) == T_ARRAY); - uint32_t buffer_len = (uint32_t)c_buffer_size(&body->buffer); + uint32_t buffer_len = (uint32_t)c_buffer_size(&body->as.mutable.buffer); VALUE constants = rb_marshal_dump(body->constants, Qnil); uint32_t constants_len = (uint32_t)RSTRING_LEN(constants); @@ -154,16 +175,26 @@ VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_offset }; rb_str_cat(str, (const char *)&header, sizeof(document_body_header_t)); - rb_str_cat(str, (const char *)body->buffer.data, buffer_len); + rb_str_cat(str, (const char *)body->as.mutable.buffer.data, buffer_len); rb_str_append(str, constants); return str; } +void document_body_setup_entry_for_header(VALUE self, uint32_t offset, document_body_entry_t *entry) +{ + document_body_t *body; + DocumentBody_Get_Struct(self, body); + + entry->body = body; + entry->buffer_offset = offset; +} + + void liquid_define_document_body() { cLiquidCDocumentBody = rb_define_class_under(mLiquidC, "DocumentBody", rb_cObject); rb_global_variable(&cLiquidCDocumentBody); - rb_define_alloc_func(cLiquidCDocumentBody, document_body_allocate); + rb_undef_alloc_func(cLiquidCDocumentBody); } diff --git a/ext/liquid_c/document_body.h b/ext/liquid_c/document_body.h index 7dcf43ed..a7519fe2 100644 --- a/ext/liquid_c/document_body.h +++ b/ext/liquid_c/document_body.h @@ -23,7 +23,16 @@ typedef struct block_body_header { typedef struct document_body { VALUE self; VALUE constants; - c_buffer_t buffer; + bool mutable; + union { + struct { + c_buffer_t buffer; + } mutable; + struct { + VALUE serialize_str; + const char *data; + } immutable; + } as; } document_body_t; /* Bump this version every time a backwards incompatible change has been made in the serialization headers. @@ -45,19 +54,35 @@ typedef struct document_body_entry { } document_body_entry_t; void liquid_define_document_body(); -VALUE document_body_new_instance(); +VALUE document_body_new_mutable_instance(); +VALUE document_body_new_immutable_instance(VALUE constants, VALUE serialize_str, const char *data); void document_body_write_block_body(VALUE self, bool blank, uint32_t render_score, vm_assembler_t *code, document_body_entry_t *entry); VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_offset); +void document_body_setup_entry_for_header(VALUE self, uint32_t offset, document_body_entry_t *entry); + +static inline document_body_entry_t document_body_entry_init() +{ + return (document_body_entry_t) { NULL, 0 }; +} static inline void document_body_entry_mark(document_body_entry_t *entry) { + if (!entry->body) return; + rb_gc_mark(entry->body->self); - rb_gc_mark(entry->body->constants); + + if (!entry->body->mutable) { + rb_gc_mark(entry->body->as.immutable.serialize_str); + } } static inline block_body_header_t *document_body_get_block_body_header_ptr(const document_body_entry_t *entry) { - return (block_body_header_t *)(entry->body->buffer.data + entry->buffer_offset); + if (entry->body->mutable) { + return (block_body_header_t *)(entry->body->as.mutable.buffer.data + entry->buffer_offset); + } else { + return (block_body_header_t *)(entry->body->as.immutable.data + entry->buffer_offset); + } } static inline const VALUE *document_body_get_constants_ptr(const document_body_entry_t *entry) diff --git a/ext/liquid_c/liquid.c b/ext/liquid_c/liquid.c index 094cb19d..a1a5c7c0 100644 --- a/ext/liquid_c/liquid.c +++ b/ext/liquid_c/liquid.c @@ -6,11 +6,14 @@ #include "raw.h" #include "resource_limits.h" #include "expression.h" +#include "template.h" +#include "document.h" #include "document_body.h" #include "block.h" #include "tag_markup.h" #include "context.h" #include "parse_context.h" +#include "serialize_parse_context.h" #include "variable_lookup.h" #include "vm_assembler_pool.h" #include "vm.h" @@ -83,11 +86,14 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void) liquid_define_resource_limits(); liquid_define_expression(); liquid_define_variable(); + liquid_define_template(); + liquid_define_document(); liquid_define_document_body(); liquid_define_block_body(); liquid_define_tag_markup(); liquid_define_context(); liquid_define_parse_context(); + liquid_define_serialize_parse_context(); liquid_define_variable_lookup(); liquid_define_vm_assembler_pool(); liquid_define_vm_assembler(); diff --git a/ext/liquid_c/parse_context.c b/ext/liquid_c/parse_context.c index 5e36fb99..55033bf8 100644 --- a/ext/liquid_c/parse_context.c +++ b/ext/liquid_c/parse_context.c @@ -1,6 +1,7 @@ #include "parse_context.h" #include "document_body.h" +VALUE cLiquidParseContext; static ID id_document_body, id_vm_assembler_pool, id_parent_tag; bool parse_context_document_body_initialized_p(VALUE self) @@ -12,7 +13,7 @@ void parse_context_init_document_body(VALUE self) { assert(!parse_context_document_body_initialized_p(self)); - VALUE document_body = document_body_new_instance(); + VALUE document_body = document_body_new_mutable_instance(); rb_ivar_set(self, id_document_body, document_body); } @@ -72,9 +73,13 @@ void parse_context_set_parent_tag(VALUE self, VALUE tag_header) rb_ivar_set(self, id_parent_tag, tag_header); } + void liquid_define_parse_context() { id_document_body = rb_intern("document_body"); id_vm_assembler_pool = rb_intern("vm_assembler_pool"); id_parent_tag = rb_intern("parent_tag"); + + cLiquidParseContext = rb_const_get(mLiquid, rb_intern("ParseContext")); + rb_global_variable(&cLiquidParseContext); } diff --git a/ext/liquid_c/parse_context.h b/ext/liquid_c/parse_context.h index e980c420..2f5740af 100644 --- a/ext/liquid_c/parse_context.h +++ b/ext/liquid_c/parse_context.h @@ -4,6 +4,9 @@ #include #include #include "vm_assembler_pool.h" +#include "tag_markup.h" + +extern VALUE cLiquidParseContext; void liquid_define_parse_context(); bool parse_context_document_body_initialized_p(VALUE self); diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c new file mode 100644 index 00000000..cd0e930b --- /dev/null +++ b/ext/liquid_c/serialize_parse_context.c @@ -0,0 +1,76 @@ +#include +#include "serialize_parse_context.h" +#include "liquid.h" +#include "parse_context.h" + +static VALUE cLiquidCSerializeParseContext; +static ID id_initialize; + +static void serialize_parse_context_mark(void *ptr) +{ + serialize_parse_context_t *serialize_context = ptr; + rb_gc_mark(serialize_context->document_body); +} + +static void serialize_parse_context_free(void *ptr) +{ + xfree(ptr); +} + +static size_t serialize_parse_context_memsize(const void *ptr) +{ + return sizeof(serialize_parse_context_t); +} + +const rb_data_type_t serialize_parse_context_data_type = { + "liquid_serialize_parse_context", + { serialize_parse_context_mark, serialize_parse_context_free, serialize_parse_context_memsize, }, + NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY +}; + +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header) +{ + VALUE obj; + serialize_parse_context_t *serialize_context; + + obj = TypedData_Make_Struct(cLiquidCSerializeParseContext, serialize_parse_context_t, + &serialize_parse_context_data_type, serialize_context); + assert(header->entrypoint_block_index < header->buffer_len); + serialize_context->document_body = document_body; + document_body_setup_entry_for_header(document_body, header->entrypoint_block_index, + &serialize_context->current_entry); + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); + + // Call initialize method of parent class + rb_funcall(obj, id_initialize, 0); + + return obj; +} + +bool is_serialize_parse_context_p(VALUE self) +{ + return CLASS_OF(self) == cLiquidCSerializeParseContext; +} + +void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag) +{ + serialize_context->current_entry.buffer_offset = tag->block_body_offset; + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); +} + +void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, + tag_markup_header_t *tag) +{ + assert(serialize_context->current_entry.body == entry->body); + serialize_context->current_entry = *entry; + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, tag); +} + +void liquid_define_serialize_parse_context() +{ + id_initialize = rb_intern("initialize"); + + cLiquidCSerializeParseContext = rb_define_class_under(mLiquidC, "SerializeParseContext", cLiquidParseContext); + rb_global_variable(&cLiquidCSerializeParseContext); + rb_undef_alloc_func(cLiquidCSerializeParseContext); +} diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h new file mode 100644 index 00000000..c7648296 --- /dev/null +++ b/ext/liquid_c/serialize_parse_context.h @@ -0,0 +1,22 @@ +#ifndef LIQUID_SERIALIZE_PARSE_CONTEXT_H +#define LIQUID_SERIALIZE_PARSE_CONTEXT_H + +#include "document_body.h" +#include "tag_markup.h" + +typedef struct serialize_parse_context { + VALUE document_body; + document_body_entry_t current_entry; + tag_markup_header_t *current_tag; +} serialize_parse_context_t; + +extern const rb_data_type_t serialize_parse_context_data_type; +#define SerializeParseContext_Get_Struct(obj, sval) TypedData_Get_Struct(obj, serialize_parse_context_t, &serialize_parse_context_data_type, sval) + +void liquid_define_serialize_parse_context(); +VALUE serialize_parse_context_new(); +bool is_serialize_parse_context_p(VALUE self); +void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag); +void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, tag_markup_header_t *tag); + +#endif diff --git a/ext/liquid_c/tag_markup.c b/ext/liquid_c/tag_markup.c index a484f04a..39bb3fd9 100644 --- a/ext/liquid_c/tag_markup.c +++ b/ext/liquid_c/tag_markup.c @@ -66,6 +66,36 @@ void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *b tag->block_body = block_body; } +tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag) +{ + // Should only be used for (deserialized) immutable document body + assert(!entry->body->mutable); + + if (BUFFER_OFFSET_UNDEF_P(entry->buffer_offset)) { + return NULL; + } + + block_body_header_t *header = document_body_get_block_body_header_ptr(entry); + + tag_markup_header_t *next_tag; + if (current_tag) { + assert(current_tag >= (tag_markup_header_t *)((char *)header + header->tags_offset)); + next_tag = (tag_markup_header_t *)((char *)current_tag + current_tag->total_len); + } else { + next_tag = (tag_markup_header_t *)((char *)header + header->tags_offset); + } + + tag_markup_header_t *tags_end = (tag_markup_header_t *)((char *)header + header->tags_offset + header->tags_bytes); + + if (next_tag < tags_end) { + assert((unsigned long)tags_end - (unsigned long)next_tag > sizeof(tag_markup_header_t)); + return next_tag; + } else { // End of tags have been reached + assert(next_tag == tags_end); + return NULL; + } +} + void liquid_define_tag_markup() { cLiquidCTagMarkup = rb_define_class_under(mLiquidC, "TagMarkup", rb_cObject); diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 8443509f..c1076ca0 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -33,5 +33,16 @@ VALUE tag_markup_new(uint32_t line_number, VALUE tag_name, VALUE markup, bool un VALUE tag_markup_get_tag_name(VALUE self); VALUE tag_markup_get_markup(VALUE self); void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body); +tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag); + +static inline char *tag_markup_header_name(tag_markup_header_t *header) +{ + return ((char *)header) + header->tag_name_offset; +} + +static inline char *tag_markup_header_markup(tag_markup_header_t *header) +{ + return ((char *)header) + header->markup_offset; +} #endif diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c new file mode 100644 index 00000000..a7565010 --- /dev/null +++ b/ext/liquid_c/template.c @@ -0,0 +1,52 @@ +#include +#include "liquid.h" +#include "document.h" +#include "document_body.h" +#include "serialize_parse_context.h" +#include "tokenizer.h" + +static ID id_ivar_root, id_configure_options; + +static VALUE marshal_load_constants(const char *str, size_t len) +{ + VALUE str_obj = rb_str_new_static(str, len); + VALUE constants = rb_marshal_load(str_obj); + if (BUILTIN_TYPE(constants) != T_ARRAY) { + rb_raise(rb_eArgError, "expected constants to be an array"); + } + return constants; +} + +static VALUE template_load(VALUE self, VALUE source, VALUE options) +{ + rb_funcall(self, id_configure_options, 1, options); + + Check_Type(source, T_STRING); + source = rb_str_dup_frozen(source); + const char *data = RSTRING_PTR(source); + + document_body_header_t *header = (document_body_header_t *)data; + + assert(RSTRING_LEN(source) >= header->buffer_offset + header->buffer_offset); + const char *body_data = data + header->buffer_offset; + + assert(RSTRING_LEN(source) >= header->constants_offset + header->constants_len); + VALUE constants = marshal_load_constants(data + header->constants_offset, header->constants_len); + + VALUE document_body = document_body_new_immutable_instance(constants, source, body_data); + + VALUE parse_context = serialize_parse_context_new(document_body, header); + + rb_ivar_set(self, id_ivar_root, document_parse(Qnil, parse_context)); + + return self; +} + +void liquid_define_template() +{ + id_ivar_root = rb_intern("@root"); + id_configure_options = rb_intern("configure_options"); + + VALUE cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template")); + rb_define_method(cLiquidTemplate, "load", template_load, 2); +} diff --git a/ext/liquid_c/template.h b/ext/liquid_c/template.h new file mode 100644 index 00000000..0b8e40ad --- /dev/null +++ b/ext/liquid_c/template.h @@ -0,0 +1,6 @@ +#ifndef LIQUID_TEMPLATE_H +#define LIQUID_TEMPLATE_H + +void liquid_define_template(); + +#endif diff --git a/lib/liquid/c.rb b/lib/liquid/c.rb index 0f670732..acd102c4 100644 --- a/lib/liquid/c.rb +++ b/lib/liquid/c.rb @@ -92,7 +92,7 @@ module Liquid module C module DocumentClassPatch def parse(tokenizer, parse_context) - if tokenizer.is_a?(Liquid::C::Tokenizer) + if tokenizer.is_a?(Liquid::C::Tokenizer) || tokenizer.nil? # Temporary to test rollout of the fix for this bug if parse_context[:bug_compatible_whitespace_trimming] tokenizer.bug_compatible_whitespace_trimming! @@ -110,6 +110,12 @@ def parse(tokenizer, parse_context) end Liquid::Template.class_eval do + class << self + def load(source, options = {}) + new.load(source, options) + end + end + def dump @root.dump end diff --git a/test/unit/template_test.rb b/test/unit/template_test.rb new file mode 100644 index 00000000..24afeec6 --- /dev/null +++ b/test/unit/template_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require 'test_helper' + +class TemplateTest < MiniTest::Test + def test_serialize + assert_equal('hello world', dump_load_eval('hello world')) + assert_equal('hello world', dump_load_eval('{% assign greeting = "hello" %}{{ greeting }} world')) + assert_equal('hello world', dump_load_eval('{% raw %}hello {% endraw %}world')) + assert_equal('hello world', + dump_load_eval('{% if test %}goodbye {% else %}hello {% endif %}world', 'test' => false)) + assert_equal('hello world', dump_load_eval('{% if true %}hello {% endif %}{% if true %}world{% endif %}')) + assert_equal('123', dump_load_eval('{% for i in (1..10) %}{{i}}{% if i == 3 %}{% break %}{% endif %}{% endfor %}')) + end + + private + + def dump_load_eval(source, assigns = {}) + serialize = Liquid::Template.parse(source).dump + Liquid::Template.load(serialize).render!(assigns) + end +end From fde0cc23af5090f120e3c2e74df4ca800051e4b8 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 7 Jan 2021 11:33:40 -0500 Subject: [PATCH 02/18] Raise unknown tag in deserialization --- ext/liquid_c/block.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index bf04ae6c..f915dd32 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -19,10 +19,12 @@ static ID intern_parse, intern_square_brackets, intern_unknown_tag_in_liquid_tag, - intern_ivar_nodelist; + intern_ivar_nodelist, + intern_raise_unknown_tag; static VALUE tag_registry; static VALUE variable_placeholder = Qnil; +static VALUE cLiquidBlock; typedef struct parse_context { tokenizer_t *tokenizer; @@ -376,7 +378,9 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); - assert(RTEST(tag_class)); + if (!RTEST(tag_class)) { + return rb_funcall(cLiquidBlock, intern_raise_unknown_tag, 4, tag_name, Qnil, Qnil, parse_context_obj); + } serialize_parse_context_enter_tag(serialize_context, current_tag); VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, @@ -706,6 +710,7 @@ void liquid_define_block_body() intern_square_brackets = rb_intern("[]"); intern_unknown_tag_in_liquid_tag = rb_intern("unknown_tag_in_liquid_tag"); intern_ivar_nodelist = rb_intern("@nodelist"); + intern_raise_unknown_tag = rb_intern("raise_unknown_tag"); tag_registry = rb_funcall(cLiquidTemplate, rb_intern("tags"), 0); rb_global_variable(&tag_registry); @@ -713,6 +718,9 @@ void liquid_define_block_body() VALUE cLiquidCBlockBody = rb_define_class_under(mLiquidC, "BlockBody", rb_cObject); rb_define_alloc_func(cLiquidCBlockBody, block_body_allocate); + cLiquidBlock = rb_const_get(mLiquid, rb_intern("Block")); + rb_global_variable(&cLiquidBlock); + rb_define_method(cLiquidCBlockBody, "initialize", block_body_initialize, 1); rb_define_method(cLiquidCBlockBody, "parse", block_body_parse, 2); rb_define_method(cLiquidCBlockBody, "freeze", block_body_freeze, 0); From 116920de8db516e1d28d2f7f8a815a6c45ef988d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 11:13:06 -0500 Subject: [PATCH 03/18] Remove unnecessary offset fields in tag_markup_header_t Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/tag_markup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index c1076ca0..988833ef 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -37,12 +37,12 @@ tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_m static inline char *tag_markup_header_name(tag_markup_header_t *header) { - return ((char *)header) + header->tag_name_offset; + return (char *)&header[1]; } static inline char *tag_markup_header_markup(tag_markup_header_t *header) { - return ((char *)header) + header->markup_offset; + return tag_markup_header_name(header) + header->tag_name_len; } #endif From ce6892449af0be88a93741dcbb37997c3ddcdfbc Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 11:14:20 -0500 Subject: [PATCH 04/18] Rename tags to tag_markups on the vm_assembler_t to avoid confusion Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index f915dd32..5186cf15 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -305,7 +305,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars tokenizer->raw_tag_body = NULL; tokenizer->raw_tag_body_len = 0; } else { - vm_assembler_write_tag(body->as.intermediate.code, tag_markup); + vm_assembler_write_tag_markup(body->as.intermediate.code, tag_markup); block_body_add_node(body, new_tag); } From 3ff9c152b7b60c486f2dc26c9b67ba4abde30fc2 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 11:30:36 -0500 Subject: [PATCH 05/18] Remove serialize_parse_context.current_tag and split tag_markup_get_next_tag Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/block.c | 4 ++-- ext/liquid_c/serialize_parse_context.h | 1 - ext/liquid_c/tag_markup.c | 25 ++++++++++--------------- ext/liquid_c/tag_markup.h | 3 ++- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 5186cf15..855b5df1 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -362,7 +362,7 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer body->as.serialize.document_body_entry = serialize_context->current_entry; - tag_markup_header_t *current_tag = serialize_context->current_tag; + tag_markup_header_t *current_tag = tag_markup_get_first_tag(&serialize_context->current_entry); while (current_tag) { bool tag_unknown = TAG_UNKNOWN_P(current_tag); @@ -390,7 +390,7 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer c_buffer_write_ruby_value(&body->tags, new_tag); } - current_tag = tag_markup_get_next_tag(&body->as.serialize.document_body_entry, current_tag); + current_tag = tag_markup_get_next_tag(current_tag); } return rb_yield_values(2, Qnil, Qnil); diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h index c7648296..04f17feb 100644 --- a/ext/liquid_c/serialize_parse_context.h +++ b/ext/liquid_c/serialize_parse_context.h @@ -7,7 +7,6 @@ typedef struct serialize_parse_context { VALUE document_body; document_body_entry_t current_entry; - tag_markup_header_t *current_tag; } serialize_parse_context_t; extern const rb_data_type_t serialize_parse_context_data_type; diff --git a/ext/liquid_c/tag_markup.c b/ext/liquid_c/tag_markup.c index 39bb3fd9..71830607 100644 --- a/ext/liquid_c/tag_markup.c +++ b/ext/liquid_c/tag_markup.c @@ -66,7 +66,7 @@ void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *b tag->block_body = block_body; } -tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag) +tag_markup_header_t *tag_markup_get_first_tag(document_body_entry_t *entry) { // Should only be used for (deserialized) immutable document body assert(!entry->body->mutable); @@ -77,23 +77,18 @@ tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_m block_body_header_t *header = document_body_get_block_body_header_ptr(entry); - tag_markup_header_t *next_tag; - if (current_tag) { - assert(current_tag >= (tag_markup_header_t *)((char *)header + header->tags_offset)); - next_tag = (tag_markup_header_t *)((char *)current_tag + current_tag->total_len); - } else { - next_tag = (tag_markup_header_t *)((char *)header + header->tags_offset); - } + if (!header->first_tag_offset) + return NULL; - tag_markup_header_t *tags_end = (tag_markup_header_t *)((char *)header + header->tags_offset + header->tags_bytes); + return (tag_markup_header_t *)((char *)header + header->first_tag_offset); +} - if (next_tag < tags_end) { - assert((unsigned long)tags_end - (unsigned long)next_tag > sizeof(tag_markup_header_t)); - return next_tag; - } else { // End of tags have been reached - assert(next_tag == tags_end); +tag_markup_header_t *tag_markup_get_next_tag(tag_markup_header_t *current_tag) +{ + if (!current_tag->next_tag_offset) return NULL; - } + + return (tag_markup_header_t *)((char *)current_tag + current_tag->next_tag_offset); } void liquid_define_tag_markup() diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 988833ef..93046037 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -33,7 +33,8 @@ VALUE tag_markup_new(uint32_t line_number, VALUE tag_name, VALUE markup, bool un VALUE tag_markup_get_tag_name(VALUE self); VALUE tag_markup_get_markup(VALUE self); void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body); -tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag); +tag_markup_header_t *tag_markup_get_first_tag(document_body_entry_t *entry); +tag_markup_header_t *tag_markup_get_next_tag(tag_markup_header_t *current_tag); static inline char *tag_markup_header_name(tag_markup_header_t *header) { From 84301b542d3fc3394938253f98dfa8741d366b89 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 13:53:02 -0500 Subject: [PATCH 06/18] Remove block_body_parse_from_serialize exception handler Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/block.c | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 855b5df1..df90a054 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -321,32 +321,6 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars return unknown_tag; } -typedef struct block_body_yield_tag_args { - block_body_t *body; - serialize_parse_context_t *serialize_context; - tag_markup_header_t *current_tag; -} block_body_yield_tag_args_t; - -static VALUE block_body_try_yield_tag(VALUE uncast_args) -{ - block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; - tag_markup_header_t *current_tag = args->current_tag; - - serialize_parse_context_enter_tag(args->serialize_context, current_tag); - VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); - VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); - return rb_yield_values(2, tag_name, markup); -} - -static VALUE block_body_rescue_yield_tag(VALUE uncast_args, VALUE exception) -{ - block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; - - serialize_parse_context_exit_tag(args->serialize_context, &args->body->as.serialize.document_body_entry, - args->current_tag); - rb_exc_raise(exception); -} - static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) { assert(body->from_serialize); @@ -367,12 +341,12 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer bool tag_unknown = TAG_UNKNOWN_P(current_tag); if (tag_unknown) { - block_body_yield_tag_args_t yield_args = { - .body = body, - .serialize_context = serialize_context, - .current_tag = current_tag - }; - return rb_rescue(block_body_try_yield_tag, (VALUE)&yield_args, block_body_rescue_yield_tag, (VALUE)&yield_args); + serialize_parse_context_enter_tag(serialize_context, current_tag); + + VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); + VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); + + return rb_yield_values(2, tag_name, markup); } else { VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); From 997b2137c168d6327bdf933aca270707ec1fcdfa Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 13:53:44 -0500 Subject: [PATCH 07/18] Fix rebase conflicts Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/document_body.c | 10 +++++----- ext/liquid_c/serialize_parse_context.c | 7 ++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 8398124d..653b7416 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -79,10 +79,10 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); uint32_t total_len = sizeof(tag_markup_header_t) + tag_name_len + markup_len; - assert(c_buffer_size(&body->buffer) % alignof(tag_markup_header_t) == 0); - tag_markup_header_t *header = c_buffer_extend_for_write(&body->buffer, total_len); + assert(c_buffer_size(&body->as.mutable.buffer) % alignof(tag_markup_header_t) == 0); + tag_markup_header_t *header = c_buffer_extend_for_write(&body->as.mutable.buffer, total_len); if (!last) { - total_len += (uint32_t)c_buffer_zero_pad_for_alignment(&body->buffer, alignof(tag_markup_header_t)); + total_len += (uint32_t)c_buffer_zero_pad_for_alignment(&body->as.mutable.buffer, alignof(tag_markup_header_t)); } char *name = (char *)&header[1]; @@ -119,7 +119,7 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor size_t instructions_byte_size = c_buffer_size(&code->instructions); size_t header_and_instructions_size = sizeof(block_body_header_t) + instructions_byte_size; - block_body_header_t *buf_block_body = c_buffer_extend_for_write(&body->buffer, header_and_instructions_size); + block_body_header_t *buf_block_body = c_buffer_extend_for_write(&body->as.mutable.buffer, header_and_instructions_size); uint8_t *instructions = (uint8_t *)&buf_block_body[1]; buf_block_body->flags = 0; @@ -141,7 +141,7 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tag_markups) / sizeof(VALUE)); if (tags_len > 0) { buf_block_body->first_tag_offset = (uint32_t)header_and_instructions_size; - buf_block_body->first_tag_offset += (uint32_t)c_buffer_zero_pad_for_alignment(&body->buffer, alignof(tag_markup_header_t)); + buf_block_body->first_tag_offset += (uint32_t)c_buffer_zero_pad_for_alignment(&body->as.mutable.buffer, alignof(tag_markup_header_t)); uint32_t i; for (i = 0; i < tags_len - 1; i++) { diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c index cd0e930b..ad18f3b7 100644 --- a/ext/liquid_c/serialize_parse_context.c +++ b/ext/liquid_c/serialize_parse_context.c @@ -35,11 +35,10 @@ VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *h obj = TypedData_Make_Struct(cLiquidCSerializeParseContext, serialize_parse_context_t, &serialize_parse_context_data_type, serialize_context); - assert(header->entrypoint_block_index < header->buffer_len); + assert(header->entrypoint_block_offset < header->buffer_len); serialize_context->document_body = document_body; - document_body_setup_entry_for_header(document_body, header->entrypoint_block_index, + document_body_setup_entry_for_header(document_body, header->entrypoint_block_offset, &serialize_context->current_entry); - serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); // Call initialize method of parent class rb_funcall(obj, id_initialize, 0); @@ -55,7 +54,6 @@ bool is_serialize_parse_context_p(VALUE self) void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag) { serialize_context->current_entry.buffer_offset = tag->block_body_offset; - serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); } void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, @@ -63,7 +61,6 @@ void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_conte { assert(serialize_context->current_entry.body == entry->body); serialize_context->current_entry = *entry; - serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, tag); } void liquid_define_serialize_parse_context() From 2d1aabc047968c917741994e59044a971e866362 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 14:06:25 -0500 Subject: [PATCH 08/18] Add Liquid::C::DeserializationError --- ext/liquid_c/block.c | 10 ++-------- ext/liquid_c/liquid.c | 5 ++++- ext/liquid_c/liquid.h | 2 +- ext/liquid_c/template.c | 4 ++++ lib/liquid/c.rb | 3 ++- lib/liquid/c/errors.rb | 7 +++++++ 6 files changed, 20 insertions(+), 11 deletions(-) create mode 100644 lib/liquid/c/errors.rb diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index df90a054..9fbb31cd 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -19,12 +19,10 @@ static ID intern_parse, intern_square_brackets, intern_unknown_tag_in_liquid_tag, - intern_ivar_nodelist, - intern_raise_unknown_tag; + intern_ivar_nodelist; static VALUE tag_registry; static VALUE variable_placeholder = Qnil; -static VALUE cLiquidBlock; typedef struct parse_context { tokenizer_t *tokenizer; @@ -353,7 +351,7 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); if (!RTEST(tag_class)) { - return rb_funcall(cLiquidBlock, intern_raise_unknown_tag, 4, tag_name, Qnil, Qnil, parse_context_obj); + rb_raise(cLiquidCDeserializationError, "cannot find known tag `%"PRIsVALUE"`", tag_name); } serialize_parse_context_enter_tag(serialize_context, current_tag); @@ -684,7 +682,6 @@ void liquid_define_block_body() intern_square_brackets = rb_intern("[]"); intern_unknown_tag_in_liquid_tag = rb_intern("unknown_tag_in_liquid_tag"); intern_ivar_nodelist = rb_intern("@nodelist"); - intern_raise_unknown_tag = rb_intern("raise_unknown_tag"); tag_registry = rb_funcall(cLiquidTemplate, rb_intern("tags"), 0); rb_global_variable(&tag_registry); @@ -692,9 +689,6 @@ void liquid_define_block_body() VALUE cLiquidCBlockBody = rb_define_class_under(mLiquidC, "BlockBody", rb_cObject); rb_define_alloc_func(cLiquidCBlockBody, block_body_allocate); - cLiquidBlock = rb_const_get(mLiquid, rb_intern("Block")); - rb_global_variable(&cLiquidBlock); - rb_define_method(cLiquidCBlockBody, "initialize", block_body_initialize, 1); rb_define_method(cLiquidCBlockBody, "parse", block_body_parse, 2); rb_define_method(cLiquidCBlockBody, "freeze", block_body_freeze, 0); diff --git a/ext/liquid_c/liquid.c b/ext/liquid_c/liquid.c index a1a5c7c0..335869ed 100644 --- a/ext/liquid_c/liquid.c +++ b/ext/liquid_c/liquid.c @@ -28,7 +28,7 @@ ID id_ivar_line_number; VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody; VALUE cLiquidVariableLookup, cLiquidRangeLookup; -VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError; +VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError, cLiquidCDeserializationError; rb_encoding *utf8_encoding; int utf8_encoding_index; @@ -65,6 +65,9 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void) cMemoryError = rb_const_get(mLiquid, rb_intern("MemoryError")); rb_global_variable(&cMemoryError); + cLiquidCDeserializationError = rb_const_get(mLiquidC, rb_intern("DeserializationError")); + rb_global_variable(&cLiquidCDeserializationError); + cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable")); rb_global_variable(&cLiquidVariable); diff --git a/ext/liquid_c/liquid.h b/ext/liquid_c/liquid.h index 8c9f223b..ca3468ee 100644 --- a/ext/liquid_c/liquid.h +++ b/ext/liquid_c/liquid.h @@ -14,7 +14,7 @@ extern ID id_ivar_line_number; extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody; extern VALUE cLiquidVariableLookup, cLiquidRangeLookup; -extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError; +extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError, cLiquidCDeserializationError; extern rb_encoding *utf8_encoding; extern int utf8_encoding_index; diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c index a7565010..abf20bc2 100644 --- a/ext/liquid_c/template.c +++ b/ext/liquid_c/template.c @@ -27,6 +27,10 @@ static VALUE template_load(VALUE self, VALUE source, VALUE options) document_body_header_t *header = (document_body_header_t *)data; + if (header->version != DOCUMENT_BODY_CURRENT_VERSION) { + rb_raise(cLiquidCDeserializationError, "Incompatible serialization versions, expected %u but got %u\n", DOCUMENT_BODY_CURRENT_VERSION, header->version); + } + assert(RSTRING_LEN(source) >= header->buffer_offset + header->buffer_offset); const char *body_data = data + header->buffer_offset; diff --git a/lib/liquid/c.rb b/lib/liquid/c.rb index acd102c4..29cc4a34 100644 --- a/lib/liquid/c.rb +++ b/lib/liquid/c.rb @@ -1,7 +1,8 @@ # frozen_string_literal: true -require 'liquid/c/version' require 'liquid' +require 'liquid/c/version' +require 'liquid/c/errors' require 'liquid_c' require 'liquid/c/compile_ext' diff --git a/lib/liquid/c/errors.rb b/lib/liquid/c/errors.rb new file mode 100644 index 00000000..0b3c67c1 --- /dev/null +++ b/lib/liquid/c/errors.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module Liquid + module C + DeserializationError = Class.new(Liquid::Error) + end +end From 3bf7485b5ce7734e907fdd2ad89659414e68e3a2 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 14:20:08 -0500 Subject: [PATCH 09/18] Pass options to SerializeParseContext --- ext/liquid_c/serialize_parse_context.c | 4 ++-- ext/liquid_c/serialize_parse_context.h | 2 +- ext/liquid_c/template.c | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c index ad18f3b7..277338ff 100644 --- a/ext/liquid_c/serialize_parse_context.c +++ b/ext/liquid_c/serialize_parse_context.c @@ -28,7 +28,7 @@ const rb_data_type_t serialize_parse_context_data_type = { NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY }; -VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header) +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options) { VALUE obj; serialize_parse_context_t *serialize_context; @@ -41,7 +41,7 @@ VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *h &serialize_context->current_entry); // Call initialize method of parent class - rb_funcall(obj, id_initialize, 0); + rb_funcall(obj, id_initialize, 1, options); return obj; } diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h index 04f17feb..53003694 100644 --- a/ext/liquid_c/serialize_parse_context.h +++ b/ext/liquid_c/serialize_parse_context.h @@ -13,7 +13,7 @@ extern const rb_data_type_t serialize_parse_context_data_type; #define SerializeParseContext_Get_Struct(obj, sval) TypedData_Get_Struct(obj, serialize_parse_context_t, &serialize_parse_context_data_type, sval) void liquid_define_serialize_parse_context(); -VALUE serialize_parse_context_new(); +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options); bool is_serialize_parse_context_p(VALUE self); void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag); void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, tag_markup_header_t *tag); diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c index abf20bc2..d4e693f1 100644 --- a/ext/liquid_c/template.c +++ b/ext/liquid_c/template.c @@ -19,9 +19,9 @@ static VALUE marshal_load_constants(const char *str, size_t len) static VALUE template_load(VALUE self, VALUE source, VALUE options) { - rb_funcall(self, id_configure_options, 1, options); - Check_Type(source, T_STRING); + Check_Type(options, T_HASH); + source = rb_str_dup_frozen(source); const char *data = RSTRING_PTR(source); @@ -39,7 +39,8 @@ static VALUE template_load(VALUE self, VALUE source, VALUE options) VALUE document_body = document_body_new_immutable_instance(constants, source, body_data); - VALUE parse_context = serialize_parse_context_new(document_body, header); + VALUE parse_context = serialize_parse_context_new(document_body, header, options); + rb_funcall(self, id_configure_options, 1, parse_context); rb_ivar_set(self, id_ivar_root, document_parse(Qnil, parse_context)); From 378ae55ac42fd53b799aa60b4bd80140e415bbb0 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 14:21:19 -0500 Subject: [PATCH 10/18] Rerfactor source to serialized_data --- ext/liquid_c/template.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c index d4e693f1..51fc93b0 100644 --- a/ext/liquid_c/template.c +++ b/ext/liquid_c/template.c @@ -17,13 +17,13 @@ static VALUE marshal_load_constants(const char *str, size_t len) return constants; } -static VALUE template_load(VALUE self, VALUE source, VALUE options) +static VALUE template_load(VALUE self, VALUE serialized_data, VALUE options) { - Check_Type(source, T_STRING); + Check_Type(serialized_data, T_STRING); Check_Type(options, T_HASH); - source = rb_str_dup_frozen(source); - const char *data = RSTRING_PTR(source); + serialized_data = rb_str_dup_frozen(serialized_data); + const char *data = RSTRING_PTR(serialized_data); document_body_header_t *header = (document_body_header_t *)data; @@ -31,13 +31,13 @@ static VALUE template_load(VALUE self, VALUE source, VALUE options) rb_raise(cLiquidCDeserializationError, "Incompatible serialization versions, expected %u but got %u\n", DOCUMENT_BODY_CURRENT_VERSION, header->version); } - assert(RSTRING_LEN(source) >= header->buffer_offset + header->buffer_offset); + assert(RSTRING_LEN(serialized_data) >= header->buffer_offset + header->buffer_offset); const char *body_data = data + header->buffer_offset; - assert(RSTRING_LEN(source) >= header->constants_offset + header->constants_len); + assert(RSTRING_LEN(serialized_data) >= header->constants_offset + header->constants_len); VALUE constants = marshal_load_constants(data + header->constants_offset, header->constants_len); - VALUE document_body = document_body_new_immutable_instance(constants, source, body_data); + VALUE document_body = document_body_new_immutable_instance(constants, serialized_data, body_data); VALUE parse_context = serialize_parse_context_new(document_body, header, options); rb_funcall(self, id_configure_options, 1, parse_context); From 1a83ab7edc5a63223a1f1a9a580c6bdedf5d19bd Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 14:23:48 -0500 Subject: [PATCH 11/18] Add assertion --- ext/liquid_c/template.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c index 51fc93b0..2e5f05b0 100644 --- a/ext/liquid_c/template.c +++ b/ext/liquid_c/template.c @@ -31,6 +31,7 @@ static VALUE template_load(VALUE self, VALUE serialized_data, VALUE options) rb_raise(cLiquidCDeserializationError, "Incompatible serialization versions, expected %u but got %u\n", DOCUMENT_BODY_CURRENT_VERSION, header->version); } + assert(RSTRING_LEN(serialized_data) >= (long)sizeof(*header)); assert(RSTRING_LEN(serialized_data) >= header->buffer_offset + header->buffer_offset); const char *body_data = data + header->buffer_offset; From 532d60fcf7f9e752a8e82747189457c91f9d8648 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 14:58:24 -0500 Subject: [PATCH 12/18] Add test for serialization with line numbers --- test/unit/template_test.rb | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/test/unit/template_test.rb b/test/unit/template_test.rb index 24afeec6..b9597ad9 100644 --- a/test/unit/template_test.rb +++ b/test/unit/template_test.rb @@ -13,10 +13,32 @@ def test_serialize assert_equal('123', dump_load_eval('{% for i in (1..10) %}{{i}}{% if i == 3 %}{% break %}{% endif %}{% endfor %}')) end + def test_serialize_with_line_numbers + template = <<-LIQUID + Hello, + + {{ errors.standard_error }} will raise a standard error. + LIQUID + + expected = <<-TEXT + Hello, + + Liquid error (line 3): standard error will raise a standard error. + TEXT + + error_drop_klass = Class.new(Liquid::Drop) do + def standard_error + raise Liquid::StandardError, 'standard error' + end + end + + assert_equal(expected, dump_load_eval(template, { 'errors' => error_drop_klass.new }, { line_numbers: true })) + end + private - def dump_load_eval(source, assigns = {}) - serialize = Liquid::Template.parse(source).dump - Liquid::Template.load(serialize).render!(assigns) + def dump_load_eval(source, assigns = {}, options = {}) + serialize = Liquid::Template.parse(source, options).dump + Liquid::Template.load(serialize).render(assigns) end end From a65f10e85ea7d73da23c36bb68c951e2f4ddadba Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:05:26 -0500 Subject: [PATCH 13/18] Run liquid integration tests for serialization --- Rakefile | 5 ++++- lib/liquid/c.rb | 2 +- test/integration_test.rb | 29 +++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Rakefile b/Rakefile index 23508205..ec4be2e4 100644 --- a/Rakefile +++ b/Rakefile @@ -45,6 +45,7 @@ namespace :test do env_vars.each { |key, value| ENV[key] = value } task.invoke ensure + env_vars.each { |key, _| ENV[key] = nil } old_env_values.each { |key, value| ENV[key] = value } task.reenable end @@ -55,9 +56,11 @@ namespace :test do task :strict, &integration_test_with_env.call('LIQUID_PARSER_MODE' => 'strict') + task :serialization, &integration_test_with_env.call('LIQUID_C_TEST_SERIALIZE' => '1') + task :without_vm, &integration_test_with_env.call('LIQUID_C_DISABLE_VM' => 'true') - task all: [:lax, :strict, :without_vm] + task all: [:lax, :strict, :serialization, :without_vm] end end diff --git a/lib/liquid/c.rb b/lib/liquid/c.rb index 29cc4a34..b98225a3 100644 --- a/lib/liquid/c.rb +++ b/lib/liquid/c.rb @@ -95,7 +95,7 @@ module DocumentClassPatch def parse(tokenizer, parse_context) if tokenizer.is_a?(Liquid::C::Tokenizer) || tokenizer.nil? # Temporary to test rollout of the fix for this bug - if parse_context[:bug_compatible_whitespace_trimming] + if tokenizer && parse_context[:bug_compatible_whitespace_trimming] tokenizer.bug_compatible_whitespace_trimming! end else diff --git a/test/integration_test.rb b/test/integration_test.rb index 5fb56a15..f1c637d6 100644 --- a/test/integration_test.rb +++ b/test/integration_test.rb @@ -11,6 +11,35 @@ Liquid::ParseContext.liquid_c_nodes_disabled = true end +if ENV['LIQUID_C_TEST_SERIALIZE'] + puts "-- Liquid-C serialization" + + module SkipSerializeFailingTestsSetup + SERIALIZE_SKIPPED_TESTS = %w( + ErrorHandlingTest#test_warning_line_numbers + ErrorHandlingTest#test_warnings + ForTagTest#test_instrument_for_offset_continue + ) + + def setup + skip if SERIALIZE_SKIPPED_TESTS.include?("#{class_name}##{name}") + super + end + end + + Minitest::Test.prepend(SkipSerializeFailingTestsSetup) + + Liquid::Template.singleton_class.class_eval do + alias_method :original_parse, :parse + + def parse(source, options = {}) + template = original_parse(source, options) + return template if template.root.parse_context.liquid_c_nodes_disabled? + Liquid::Template.load(template.dump, options) + end + end +end + test_files = FileList[File.join(liquid_test_dir, 'integration/**/*_test.rb')] test_files << File.join(liquid_test_dir, 'unit/tokenizer_unit_test.rb') test_files.each do |test_file| From 1cf94cab66d2ec4a4224366cac681e5ef0ba2d3f Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:29:24 -0500 Subject: [PATCH 14/18] Let SerializeParseContext behave like ParseContext after deserialization has completed --- ext/liquid_c/block.c | 4 ++-- ext/liquid_c/serialize_parse_context.c | 23 ++++++++++++++++++----- ext/liquid_c/serialize_parse_context.h | 6 ++++-- ext/liquid_c/template.c | 5 ++++- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 9fbb31cd..3e960b32 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -109,7 +109,7 @@ static VALUE block_body_initialize(VALUE self, VALUE parse_context) block_body_t *body; BlockBody_Get_Struct(self, body); - if (is_serialize_parse_context_p(parse_context)) { + if (is_parse_context_for_serialize(parse_context)) { body->from_serialize = true; body->as.serialize.document_body_entry = document_body_entry_init(); body->as.serialize.parse_context = parse_context; @@ -322,7 +322,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) { assert(body->from_serialize); - assert(is_serialize_parse_context_p(parse_context_obj)); + assert(is_parse_context_for_serialize(parse_context_obj)); ensure_intermediate(body); if (body->as.serialize.parse_context != parse_context_obj) { diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c index 277338ff..915e4dfa 100644 --- a/ext/liquid_c/serialize_parse_context.c +++ b/ext/liquid_c/serialize_parse_context.c @@ -28,14 +28,16 @@ const rb_data_type_t serialize_parse_context_data_type = { NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY }; -VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options) +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options, + serialize_parse_context_t **serialize_context_ptr_ptr) { VALUE obj; - serialize_parse_context_t *serialize_context; obj = TypedData_Make_Struct(cLiquidCSerializeParseContext, serialize_parse_context_t, - &serialize_parse_context_data_type, serialize_context); + &serialize_parse_context_data_type, *serialize_context_ptr_ptr); + serialize_parse_context_t *serialize_context = *serialize_context_ptr_ptr; assert(header->entrypoint_block_offset < header->buffer_len); + serialize_context->deserialize_complete = false; serialize_context->document_body = document_body; document_body_setup_entry_for_header(document_body, header->entrypoint_block_offset, &serialize_context->current_entry); @@ -46,19 +48,30 @@ VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *h return obj; } -bool is_serialize_parse_context_p(VALUE self) +bool is_parse_context_for_serialize(VALUE self) { - return CLASS_OF(self) == cLiquidCSerializeParseContext; + if (CLASS_OF(self) == cLiquidCSerializeParseContext) { + serialize_parse_context_t *serialize_context; + SerializeParseContext_Get_Struct(self, serialize_context); + + return !serialize_context->deserialize_complete; + } + + return false; } void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag) { + assert(!serialize_context->deserialize_complete); + serialize_context->current_entry.buffer_offset = tag->block_body_offset; } void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, tag_markup_header_t *tag) { + assert(!serialize_context->deserialize_complete); + assert(serialize_context->current_entry.body == entry->body); serialize_context->current_entry = *entry; } diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h index 53003694..80563634 100644 --- a/ext/liquid_c/serialize_parse_context.h +++ b/ext/liquid_c/serialize_parse_context.h @@ -5,6 +5,7 @@ #include "tag_markup.h" typedef struct serialize_parse_context { + bool deserialize_complete; VALUE document_body; document_body_entry_t current_entry; } serialize_parse_context_t; @@ -13,8 +14,9 @@ extern const rb_data_type_t serialize_parse_context_data_type; #define SerializeParseContext_Get_Struct(obj, sval) TypedData_Get_Struct(obj, serialize_parse_context_t, &serialize_parse_context_data_type, sval) void liquid_define_serialize_parse_context(); -VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options); -bool is_serialize_parse_context_p(VALUE self); +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header, VALUE options, + serialize_parse_context_t **serialize_context_ptr_ptr); +bool is_parse_context_for_serialize(VALUE self); void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag); void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, tag_markup_header_t *tag); diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c index 2e5f05b0..54a00544 100644 --- a/ext/liquid_c/template.c +++ b/ext/liquid_c/template.c @@ -40,11 +40,14 @@ static VALUE template_load(VALUE self, VALUE serialized_data, VALUE options) VALUE document_body = document_body_new_immutable_instance(constants, serialized_data, body_data); - VALUE parse_context = serialize_parse_context_new(document_body, header, options); + serialize_parse_context_t *serialize_context; + VALUE parse_context = serialize_parse_context_new(document_body, header, options, &serialize_context); rb_funcall(self, id_configure_options, 1, parse_context); rb_ivar_set(self, id_ivar_root, document_parse(Qnil, parse_context)); + serialize_context->deserialize_complete = true; + return self; } From 8ed1eda2a465d5e7d6be6ccdcce3eba60953fb2d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:30:44 -0500 Subject: [PATCH 15/18] Keep track of the current tag in the SerializeParseContext --- ext/liquid_c/block.c | 8 ++++++-- ext/liquid_c/serialize_parse_context.c | 3 +++ ext/liquid_c/serialize_parse_context.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 3e960b32..2b75ea7b 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -334,7 +334,7 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer body->as.serialize.document_body_entry = serialize_context->current_entry; - tag_markup_header_t *current_tag = tag_markup_get_first_tag(&serialize_context->current_entry); + tag_markup_header_t *current_tag = serialize_context->current_tag; while (current_tag) { bool tag_unknown = TAG_UNKNOWN_P(current_tag); @@ -344,7 +344,11 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); - return rb_yield_values(2, tag_name, markup); + VALUE ret = rb_yield_values(2, tag_name, markup); + if (BUFFER_OFFSET_UNDEF_P(current_tag->block_body_offset)) { + serialize_parse_context_exit_tag(serialize_context, &body->as.serialize.document_body_entry, current_tag); + } + return ret; } else { VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c index 915e4dfa..2dd6d1be 100644 --- a/ext/liquid_c/serialize_parse_context.c +++ b/ext/liquid_c/serialize_parse_context.c @@ -41,6 +41,7 @@ VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *h serialize_context->document_body = document_body; document_body_setup_entry_for_header(document_body, header->entrypoint_block_offset, &serialize_context->current_entry); + serialize_context->current_tag = tag_markup_get_first_tag(&serialize_context->current_entry); // Call initialize method of parent class rb_funcall(obj, id_initialize, 1, options); @@ -65,6 +66,7 @@ void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_cont assert(!serialize_context->deserialize_complete); serialize_context->current_entry.buffer_offset = tag->block_body_offset; + serialize_context->current_tag = tag_markup_get_first_tag(&serialize_context->current_entry); } void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, @@ -74,6 +76,7 @@ void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_conte assert(serialize_context->current_entry.body == entry->body); serialize_context->current_entry = *entry; + serialize_context->current_tag = tag_markup_get_next_tag(tag); } void liquid_define_serialize_parse_context() diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h index 80563634..34983407 100644 --- a/ext/liquid_c/serialize_parse_context.h +++ b/ext/liquid_c/serialize_parse_context.h @@ -8,6 +8,7 @@ typedef struct serialize_parse_context { bool deserialize_complete; VALUE document_body; document_body_entry_t current_entry; + tag_markup_header_t *current_tag; } serialize_parse_context_t; extern const rb_data_type_t serialize_parse_context_data_type; From 10e9294349e7274ebbfa03e57e076a213960d928 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:30:57 -0500 Subject: [PATCH 16/18] Set the line numbers during deserialize --- ext/liquid_c/block.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 2b75ea7b..523b9b43 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -338,6 +338,10 @@ static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer while (current_tag) { bool tag_unknown = TAG_UNKNOWN_P(current_tag); + if (current_tag->line_number != 0) { + rb_ivar_set(parse_context_obj, id_ivar_line_number, UINT2NUM(current_tag->line_number)); + } + if (tag_unknown) { serialize_parse_context_enter_tag(serialize_context, current_tag); From dd35354ec52194c760223bbefc4ffdf920d96abf Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:34:36 -0500 Subject: [PATCH 17/18] Fix BlockBody#blank? --- ext/liquid_c/block.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 523b9b43..c05c8a0a 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -486,7 +486,10 @@ static VALUE block_body_blank_p(VALUE self) { block_body_t *body; BlockBody_Get_Struct(self, body); - if (body->compiled) { + if (body->from_serialize) { + block_body_header_t *body_header = document_body_get_block_body_header_ptr(&body->as.serialize.document_body_entry); + return BLOCK_BODY_HEADER_BLANK_P(body_header) ? Qtrue : Qfalse; + } else if (body->compiled) { block_body_header_t *body_header = document_body_get_block_body_header_ptr(&body->as.compiled.document_body_entry); return BLOCK_BODY_HEADER_BLANK_P(body_header) ? Qtrue : Qfalse; } else { From 5b1f9d86ae3524e647f814641503d31e933ea10f Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:51:06 -0500 Subject: [PATCH 18/18] Fix rebase issues --- ext/liquid_c/block.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index c05c8a0a..e1a99efa 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -401,18 +401,14 @@ static VALUE block_body_parse_from_source(VALUE self, block_body_t *body, VALUE markup = tag_markup_get_markup(unknown_tag); block_body_push_tag_markup(body, parse_context_obj, unknown_tag); - if (RTEST(parse_context.parent_tag)) { + if (RTEST(parse_context.parent_tag) && !body->as.intermediate.bound_to_tag) { + body->as.intermediate.bound_to_tag = true; tag_markup_set_block_body(parse_context.parent_tag, self, body); } } VALUE block_ret = rb_yield_values(2, tag_name, markup); - if (RTEST(parse_context.parent_tag) && !body->as.intermediate.bound_to_tag) { - body->as.intermediate.bound_to_tag = true; - tag_markup_set_block_body(parse_context.parent_tag, self, body); - } - return block_ret; }