From ed940df097a2a8d55c7915c726a3242496267c20 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 30 Nov 2020 11:40:59 -0500 Subject: [PATCH] Implement deserialize --- ext/liquid_c/block.c | 191 +++++++++++++++++++------ ext/liquid_c/block.h | 5 + ext/liquid_c/document.c | 21 +++ ext/liquid_c/document.h | 7 + ext/liquid_c/document_body.c | 94 ++++++++---- ext/liquid_c/document_body.h | 33 ++++- ext/liquid_c/liquid.c | 6 + ext/liquid_c/parse_context.c | 7 +- ext/liquid_c/parse_context.h | 3 + ext/liquid_c/serialize_parse_context.c | 76 ++++++++++ ext/liquid_c/serialize_parse_context.h | 22 +++ ext/liquid_c/tag_markup.c | 30 ++++ ext/liquid_c/tag_markup.h | 11 ++ ext/liquid_c/template.c | 52 +++++++ ext/liquid_c/template.h | 6 + lib/liquid/c.rb | 8 +- test/unit/template_test.rb | 22 +++ 17 files changed, 513 insertions(+), 81 deletions(-) create mode 100644 ext/liquid_c/document.c create mode 100644 ext/liquid_c/document.h create mode 100644 ext/liquid_c/serialize_parse_context.c create mode 100644 ext/liquid_c/serialize_parse_context.h create mode 100644 ext/liquid_c/template.c create mode 100644 ext/liquid_c/template.h create mode 100644 test/unit/template_test.rb diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index e8a07bc1..a0703cf1 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -7,6 +7,7 @@ #include "variable.h" #include "context.h" #include "parse_context.h" +#include "serialize_parse_context.h" #include "vm_assembler.h" #include "tag_markup.h" #include @@ -44,6 +45,9 @@ static void block_body_mark(void *ptr) if (body->compiled) { document_body_entry_mark(&body->as.compiled.document_body_entry); rb_gc_mark(body->as.compiled.nodelist); + } else if (body->from_serialize) { + document_body_entry_mark(&body->as.serialize.document_body_entry); + rb_gc_mark(body->as.serialize.parse_context); } else { rb_gc_mark(body->as.intermediate.parse_context); if (body->as.intermediate.vm_assembler_pool) @@ -56,7 +60,7 @@ static void block_body_mark(void *ptr) static void block_body_free(void *ptr) { block_body_t *body = ptr; - if (!body->compiled) { + if (!body->compiled && !body->from_serialize) { // Free the assembler instead of recycling it because the vm_assembler_pool may have been GC'd vm_assembler_pool_free_assembler(body->as.intermediate.code); } @@ -67,7 +71,7 @@ static size_t block_body_memsize(const void *ptr) { const block_body_t *body = ptr; if (!ptr) return 0; - if (body->compiled) { + if (body->compiled || body->from_serialize) { return sizeof(block_body_t); } else { return sizeof(block_body_t) + vm_assembler_alloc_memsize(body->as.intermediate.code); @@ -88,6 +92,7 @@ static VALUE block_body_allocate(VALUE klass) VALUE obj = TypedData_Make_Struct(klass, block_body_t, &block_body_data_type, body); body->compiled = false; + body->from_serialize = false; body->obj = obj; body->tags = c_buffer_init(); body->as.intermediate.blank = true; @@ -103,18 +108,24 @@ static VALUE block_body_initialize(VALUE self, VALUE parse_context) block_body_t *body; BlockBody_Get_Struct(self, body); - body->as.intermediate.parse_context = parse_context; - - if (parse_context_document_body_initialized_p(parse_context)) { - body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context); + if (is_serialize_parse_context_p(parse_context)) { + body->from_serialize = true; + body->as.serialize.document_body_entry = document_body_entry_init(); + body->as.serialize.parse_context = parse_context; } else { - parse_context_init_document_body(parse_context); - body->as.intermediate.root = true; - body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context); - } + body->as.intermediate.parse_context = parse_context; + + if (parse_context_document_body_initialized_p(parse_context)) { + body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context); + } else { + parse_context_init_document_body(parse_context); + body->as.intermediate.root = true; + body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context); + } - body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool); - vm_assembler_add_leave(body->as.intermediate.code); + body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool); + vm_assembler_add_leave(body->as.intermediate.code); + } return Qnil; } @@ -138,6 +149,22 @@ static void block_body_push_tag_markup(block_body_t *body, VALUE parse_context, parse_context_set_parent_tag(parse_context, tag_markup); } +static void ensure_intermediate(block_body_t *body) +{ + if (body->compiled) { + rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled"); + } +} + +static void ensure_intermediate_not_parsing(block_body_t *body) +{ + ensure_intermediate(body); + + if (body->as.intermediate.code->parsing) { + rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state"); + } +} + static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *parse_context) { tokenizer_t *tokenizer = parse_context->tokenizer; @@ -257,7 +284,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars } VALUE tag_markup = tag_markup_new(tag_name, markup, false); - block_body_push_tag_markup(body, parse_context->ruby_obj, tag_markup); + parse_context_set_parent_tag(parse_context->ruby_obj, tag_markup); VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, parse_context->tokenizer_obj, parse_context->ruby_obj); @@ -270,11 +297,12 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars if (tokenizer->raw_tag_body) { if (tokenizer->raw_tag_body_len) { vm_assembler_add_write_raw(body->as.intermediate.code, tokenizer->raw_tag_body, - tokenizer->raw_tag_body_len); + tokenizer->raw_tag_body_len); } tokenizer->raw_tag_body = NULL; tokenizer->raw_tag_body_len = 0; } else { + vm_assembler_write_tag(body->as.intermediate.code, tag_markup); block_body_add_node(body, new_tag); } @@ -290,23 +318,80 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars return unknown_tag; } -static void ensure_intermediate(block_body_t *body) +typedef struct block_body_yield_tag_args { + block_body_t *body; + serialize_parse_context_t *serialize_context; + tag_markup_header_t *current_tag; +} block_body_yield_tag_args_t; + +static VALUE block_body_try_yield_tag(VALUE uncast_args) { - if (body->compiled) { - rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled"); - } + block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; + tag_markup_header_t *current_tag = args->current_tag; + + serialize_parse_context_enter_tag(args->serialize_context, current_tag); + VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); + VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); + return rb_yield_values(2, tag_name, markup); } -static void ensure_intermediate_not_parsing(block_body_t *body) +static VALUE block_body_rescue_yield_tag(VALUE uncast_args, VALUE exception) { + block_body_yield_tag_args_t *args = (block_body_yield_tag_args_t *)uncast_args; + + serialize_parse_context_exit_tag(args->serialize_context, &args->body->as.serialize.document_body_entry, + args->current_tag); + rb_exc_raise(exception); +} + +static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) +{ + assert(body->from_serialize); + assert(is_serialize_parse_context_p(parse_context_obj)); + ensure_intermediate(body); + if (body->as.serialize.parse_context != parse_context_obj) { + rb_raise(rb_eArgError, "Liquid::C::BlockBody#parse called with different parse context"); + } - if (body->as.intermediate.code->parsing) { - rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state"); + serialize_parse_context_t *serialize_context; + SerializeParseContext_Get_Struct(parse_context_obj, serialize_context); + + body->as.serialize.document_body_entry = serialize_context->current_entry; + + tag_markup_header_t *current_tag = serialize_context->current_tag; + while (current_tag) { + bool tag_unknown = TAG_UNKNOWN_P(current_tag); + + if (tag_unknown) { + block_body_yield_tag_args_t yield_args = { + .body = body, + .serialize_context = serialize_context, + .current_tag = current_tag + }; + return rb_rescue(block_body_try_yield_tag, (VALUE)&yield_args, block_body_rescue_yield_tag, (VALUE)&yield_args); + } else { + VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len); + VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len); + + VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); + assert(RTEST(tag_class)); + + serialize_parse_context_enter_tag(serialize_context, current_tag); + VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, + tag_name, markup, tokenizer_obj, parse_context_obj); + serialize_parse_context_exit_tag(serialize_context, &body->as.serialize.document_body_entry, current_tag); + + c_buffer_write_ruby_value(&body->tags, new_tag); + } + + current_tag = tag_markup_get_next_tag(&body->as.serialize.document_body_entry, current_tag); } + + return rb_yield_values(2, Qnil, Qnil); } -static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj) +static VALUE block_body_parse_from_source(VALUE self, block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj) { parse_context_t parse_context = { .parent_tag = parse_context_get_parent_tag(parse_context_obj), @@ -314,8 +399,6 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte .ruby_obj = parse_context_obj, }; Tokenizer_Get_Struct(tokenizer_obj, parse_context.tokenizer); - block_body_t *body; - BlockBody_Get_Struct(self, body); ensure_intermediate_not_parsing(body); if (body->as.intermediate.parse_context != parse_context_obj) { @@ -332,17 +415,29 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte tag_name = tag_markup_get_tag_name(unknown_tag); markup = tag_markup_get_markup(unknown_tag); block_body_push_tag_markup(body, parse_context_obj, unknown_tag); + + if (RTEST(parse_context.parent_tag)) { + tag_markup_set_block_body(parse_context.parent_tag, self, body); + } } VALUE block_ret = rb_yield_values(2, tag_name, markup); - if (RTEST(parse_context.parent_tag)) { - tag_markup_set_block_body(parse_context.parent_tag, self, body); - } - return block_ret; } +static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj) +{ + block_body_t *body; + BlockBody_Get_Struct(self, body); + + if (body->from_serialize) { + return block_body_parse_from_serialize(body, tokenizer_obj, parse_context_obj); + } else { + return block_body_parse_from_source(self, body, tokenizer_obj, parse_context_obj); + } +} + static VALUE block_body_freeze(VALUE self) { @@ -351,26 +446,32 @@ static VALUE block_body_freeze(VALUE self) if (body->compiled) return Qnil; - VALUE parse_context = body->as.intermediate.parse_context; - VALUE document_body = parse_context_get_document_body(parse_context); - - bool root = body->as.intermediate.root; - - vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool; - vm_assembler_t *assembler = body->as.intermediate.code; - bool blank = body->as.intermediate.blank; - uint32_t render_score = body->as.intermediate.render_score; - vm_assembler_t *code = body->as.intermediate.code; body->compiled = true; - body->as.compiled.nodelist = Qundef; - document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry); - vm_assembler_pool_recycle_assembler(assembler_pool, assembler); - if (root) { - parse_context_remove_document_body(parse_context); - parse_context_remove_vm_assembler_pool(parse_context); + if (body->from_serialize) { + body->as.compiled.nodelist = Qundef; + } else { + VALUE parse_context = body->as.intermediate.parse_context; + VALUE document_body = parse_context_get_document_body(parse_context); + + bool root = body->as.intermediate.root; + + vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool; + vm_assembler_t *assembler = body->as.intermediate.code; + bool blank = body->as.intermediate.blank; + uint32_t render_score = body->as.intermediate.render_score; + vm_assembler_t *code = body->as.intermediate.code; + body->as.compiled.nodelist = Qundef; + document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry); + vm_assembler_pool_recycle_assembler(assembler_pool, assembler); + + if (root) { + parse_context_remove_document_body(parse_context); + parse_context_remove_vm_assembler_pool(parse_context); + } } + rb_call_super(0, NULL); return Qnil; @@ -408,6 +509,8 @@ static VALUE block_body_remove_blank_strings(VALUE self) block_body_t *body; BlockBody_Get_Struct(self, body); + if (body->from_serialize) return Qnil; + ensure_intermediate_not_parsing(body); if (!body->as.intermediate.blank) { diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index e6e16070..bc3b72e6 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -6,6 +6,7 @@ typedef struct block_body { bool compiled; + bool from_serialize; VALUE obj; c_buffer_t tags; @@ -14,6 +15,10 @@ typedef struct block_body { document_body_entry_t document_body_entry; VALUE nodelist; } compiled; + struct { + document_body_entry_t document_body_entry; + VALUE parse_context; + } serialize; struct { VALUE parse_context; vm_assembler_pool_t *vm_assembler_pool; diff --git a/ext/liquid_c/document.c b/ext/liquid_c/document.c new file mode 100644 index 00000000..ea91f46b --- /dev/null +++ b/ext/liquid_c/document.c @@ -0,0 +1,21 @@ +#include +#include "liquid.h" +#include "document.h" +#include "parse_context.h" +#include "document_body.h" + +static ID id_parse; +static VALUE cLiquidDocument; + +VALUE document_parse(VALUE tokenizer, VALUE parse_context) +{ + return rb_funcall(cLiquidDocument, id_parse, 2, tokenizer, parse_context); +} + +void liquid_define_document() +{ + id_parse = rb_intern("parse"); + + cLiquidDocument = rb_const_get(mLiquid, rb_intern("Document")); + rb_global_variable(&cLiquidDocument); +} diff --git a/ext/liquid_c/document.h b/ext/liquid_c/document.h new file mode 100644 index 00000000..2ed41573 --- /dev/null +++ b/ext/liquid_c/document.h @@ -0,0 +1,7 @@ +#ifndef LIQUID_DOCUMENT_H +#define LIQUID_DOCUMENT_H + +void liquid_define_document(); +VALUE document_parse(VALUE tokenizer, VALUE parse_context); + +#endif diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 5d8abc09..ee66f416 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -11,19 +11,28 @@ static void document_body_mark(void *ptr) { document_body_t *body = ptr; rb_gc_mark(body->constants); + if (!body->mutable) { + rb_gc_mark(body->as.immutable.serialize_str); + } } static void document_body_free(void *ptr) { document_body_t *body = ptr; - c_buffer_free(&body->buffer); + if (body->mutable) { + c_buffer_free(&body->as.mutable.buffer); + } xfree(body); } static size_t document_body_memsize(const void *ptr) { const document_body_t *body = ptr; - return sizeof(document_body_t) + c_buffer_size(&body->buffer); + size_t size = sizeof(document_body_t); + if (body->mutable) { + size += c_buffer_size(&body->as.mutable.buffer); + } + return size; } const rb_data_type_t document_body_data_type = { @@ -32,23 +41,34 @@ const rb_data_type_t document_body_data_type = { NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY }; -static VALUE document_body_allocate(VALUE klass) +#define DocumentBody_Get_Struct(obj, sval) TypedData_Get_Struct(obj, document_body_t, &document_body_data_type, sval) + +VALUE document_body_new_mutable_instance() { document_body_t *body; - - VALUE obj = TypedData_Make_Struct(klass, document_body_t, &document_body_data_type, body); + VALUE obj = TypedData_Make_Struct(cLiquidCDocumentBody, document_body_t, &document_body_data_type, body); body->self = obj; body->constants = rb_ary_new(); - body->buffer = c_buffer_init(); + body->mutable = true; + body->as.mutable.buffer = c_buffer_init(); return obj; } -#define DocumentBody_Get_Struct(obj, sval) TypedData_Get_Struct(obj, document_body_t, &document_body_data_type, sval) - -VALUE document_body_new_instance() +VALUE document_body_new_immutable_instance(VALUE constants, VALUE serialize_str, const char *data) { - return rb_class_new_instance(0, NULL, cLiquidCDocumentBody); + assert(BUILTIN_TYPE(constants) == T_ARRAY); + assert(BUILTIN_TYPE(serialize_str) == T_STRING); + + document_body_t *body; + VALUE obj = TypedData_Make_Struct(cLiquidCDocumentBody, document_body_t, &document_body_data_type, body); + body->self = obj; + body->constants = constants; + body->mutable = false; + body->as.immutable.serialize_str = serialize_str; + body->as.immutable.data = data; + + return obj; } static void document_body_write_tag_markup(document_body_t *body, VALUE tag_markup_obj) @@ -56,21 +76,21 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark tag_markup_t *tag_markup; TagMarkup_Get_Struct(tag_markup_obj, tag_markup); - size_t tag_markup_offset = c_buffer_size(&body->buffer); - c_buffer_extend_for_write(&body->buffer, sizeof(tag_markup_header_t)); + size_t tag_markup_offset = c_buffer_size(&body->as.mutable.buffer); + c_buffer_extend_for_write(&body->as.mutable.buffer, sizeof(tag_markup_header_t)); tag_markup_header_t header; header.flags = tag_markup->flags; uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); header.tag_name_len = tag_name_len; - header.tag_name_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); - c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->tag_name), tag_name_len); + header.tag_name_offset = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - tag_markup_offset); + c_buffer_write(&body->as.mutable.buffer, RSTRING_PTR(tag_markup->tag_name), tag_name_len); uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); header.markup_len = markup_len; - header.markup_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); - c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->markup), markup_len); + header.markup_offset = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - tag_markup_offset); + c_buffer_write(&body->as.mutable.buffer, RSTRING_PTR(tag_markup->markup), markup_len); if (tag_markup->block_body) { assert(tag_markup->block_body->compiled); @@ -79,23 +99,24 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark header.block_body_offset = BUFFER_OFFSET_UNDEF; } - header.total_len = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); + header.total_len = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - tag_markup_offset); - memcpy(body->buffer.data + tag_markup_offset, &header, sizeof(tag_markup_header_t)); + memcpy(body->as.mutable.buffer.data + tag_markup_offset, &header, sizeof(tag_markup_header_t)); } void document_body_write_block_body(VALUE self, bool blank, uint32_t render_score, vm_assembler_t *code, document_body_entry_t *entry) { document_body_t *body; DocumentBody_Get_Struct(self, body); + assert(body->mutable); - c_buffer_zero_pad_for_alignment(&body->buffer, alignof(block_body_header_t)); + c_buffer_zero_pad_for_alignment(&body->as.mutable.buffer, alignof(block_body_header_t)); entry->body = body; - entry->buffer_offset = c_buffer_size(&body->buffer); + entry->buffer_offset = c_buffer_size(&body->as.mutable.buffer); - size_t buf_block_body_offset = c_buffer_size(&body->buffer); - c_buffer_extend_for_write(&body->buffer, sizeof(block_body_header_t)); + size_t buf_block_body_offset = c_buffer_size(&body->as.mutable.buffer); + c_buffer_extend_for_write(&body->as.mutable.buffer, sizeof(block_body_header_t)); block_body_header_t buf_block_body; @@ -104,26 +125,27 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor buf_block_body.render_score = render_score; buf_block_body.max_stack_size = code->max_stack_size; - buf_block_body.instructions_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); + buf_block_body.instructions_offset = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - buf_block_body_offset); buf_block_body.instructions_bytes = (uint32_t)c_buffer_size(&code->instructions); - c_buffer_concat(&body->buffer, &code->instructions); + c_buffer_concat(&body->as.mutable.buffer, &code->instructions); assert(c_buffer_size(&code->tags) % sizeof(VALUE) == 0); uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); - buf_block_body.tags_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); - size_t tags_start_offset = c_buffer_size(&body->buffer); + buf_block_body.tags_offset = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - buf_block_body_offset); + size_t tags_start_offset = c_buffer_size(&body->as.mutable.buffer); for (uint32_t i = 0; i < tags_len; i++) { document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i]); } - buf_block_body.tags_bytes = (uint32_t)(c_buffer_size(&body->buffer) - tags_start_offset); + buf_block_body.tags_bytes = (uint32_t)(c_buffer_size(&body->as.mutable.buffer) - tags_start_offset); assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); buf_block_body.constants_offset = (uint32_t)RARRAY_LEN(body->constants); buf_block_body.constants_len = constants_len; + rb_ary_cat(body->constants, (VALUE *)code->constants.data, constants_len); - memcpy(body->buffer.data + buf_block_body_offset, &buf_block_body, sizeof(block_body_header_t)); + memcpy(body->as.mutable.buffer.data + buf_block_body_offset, &buf_block_body, sizeof(block_body_header_t)); } @@ -131,7 +153,7 @@ VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_index) { assert(BUILTIN_TYPE(body->constants) == T_ARRAY); - uint32_t buffer_len = (uint32_t)c_buffer_size(&body->buffer); + uint32_t buffer_len = (uint32_t)c_buffer_size(&body->as.mutable.buffer); VALUE constants = rb_marshal_dump(body->constants, Qnil); uint32_t constants_len = (uint32_t)RSTRING_LEN(constants); @@ -147,16 +169,26 @@ VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_index) }; rb_str_cat(str, (const char *)&header, sizeof(document_body_header_t)); - rb_str_cat(str, (const char *)body->buffer.data, buffer_len); + rb_str_cat(str, (const char *)body->as.mutable.buffer.data, buffer_len); rb_str_append(str, constants); return str; } +void document_body_setup_entry_for_header(VALUE self, uint32_t offset, document_body_entry_t *entry) +{ + document_body_t *body; + DocumentBody_Get_Struct(self, body); + + entry->body = body; + entry->buffer_offset = offset; +} + + void liquid_define_document_body() { cLiquidCDocumentBody = rb_define_class_under(mLiquidC, "DocumentBody", rb_cObject); rb_global_variable(&cLiquidCDocumentBody); - rb_define_alloc_func(cLiquidCDocumentBody, document_body_allocate); + rb_undef_alloc_func(cLiquidCDocumentBody); } diff --git a/ext/liquid_c/document_body.h b/ext/liquid_c/document_body.h index f4007cb1..29b1bf8f 100644 --- a/ext/liquid_c/document_body.h +++ b/ext/liquid_c/document_body.h @@ -25,7 +25,16 @@ typedef struct block_body_header { typedef struct document_body { VALUE self; VALUE constants; - c_buffer_t buffer; + bool mutable; + union { + struct { + c_buffer_t buffer; + } mutable; + struct { + VALUE serialize_str; + const char *data; + } immutable; + } as; } document_body_t; typedef struct document_body_header { @@ -42,19 +51,35 @@ typedef struct document_body_entry { } document_body_entry_t; void liquid_define_document_body(); -VALUE document_body_new_instance(); +VALUE document_body_new_mutable_instance(); +VALUE document_body_new_immutable_instance(VALUE constants, VALUE serialize_str, const char *data); void document_body_write_block_body(VALUE self, bool blank, uint32_t render_score, vm_assembler_t *code, document_body_entry_t *entry); VALUE document_body_dump(document_body_t *body, uint32_t entrypoint_block_index); +void document_body_setup_entry_for_header(VALUE self, uint32_t offset, document_body_entry_t *entry); + +static inline document_body_entry_t document_body_entry_init() +{ + return (document_body_entry_t) { NULL, 0 }; +} static inline void document_body_entry_mark(document_body_entry_t *entry) { + if (!entry->body) return; + rb_gc_mark(entry->body->self); - rb_gc_mark(entry->body->constants); + + if (!entry->body->mutable) { + rb_gc_mark(entry->body->as.immutable.serialize_str); + } } static inline block_body_header_t *document_body_get_block_body_header_ptr(const document_body_entry_t *entry) { - return (block_body_header_t *)(entry->body->buffer.data + entry->buffer_offset); + if (entry->body->mutable) { + return (block_body_header_t *)(entry->body->as.mutable.buffer.data + entry->buffer_offset); + } else { + return (block_body_header_t *)(entry->body->as.immutable.data + entry->buffer_offset); + } } static inline const VALUE *document_body_get_constants_ptr(const document_body_entry_t *entry) diff --git a/ext/liquid_c/liquid.c b/ext/liquid_c/liquid.c index 094cb19d..a1a5c7c0 100644 --- a/ext/liquid_c/liquid.c +++ b/ext/liquid_c/liquid.c @@ -6,11 +6,14 @@ #include "raw.h" #include "resource_limits.h" #include "expression.h" +#include "template.h" +#include "document.h" #include "document_body.h" #include "block.h" #include "tag_markup.h" #include "context.h" #include "parse_context.h" +#include "serialize_parse_context.h" #include "variable_lookup.h" #include "vm_assembler_pool.h" #include "vm.h" @@ -83,11 +86,14 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void) liquid_define_resource_limits(); liquid_define_expression(); liquid_define_variable(); + liquid_define_template(); + liquid_define_document(); liquid_define_document_body(); liquid_define_block_body(); liquid_define_tag_markup(); liquid_define_context(); liquid_define_parse_context(); + liquid_define_serialize_parse_context(); liquid_define_variable_lookup(); liquid_define_vm_assembler_pool(); liquid_define_vm_assembler(); diff --git a/ext/liquid_c/parse_context.c b/ext/liquid_c/parse_context.c index 5e36fb99..55033bf8 100644 --- a/ext/liquid_c/parse_context.c +++ b/ext/liquid_c/parse_context.c @@ -1,6 +1,7 @@ #include "parse_context.h" #include "document_body.h" +VALUE cLiquidParseContext; static ID id_document_body, id_vm_assembler_pool, id_parent_tag; bool parse_context_document_body_initialized_p(VALUE self) @@ -12,7 +13,7 @@ void parse_context_init_document_body(VALUE self) { assert(!parse_context_document_body_initialized_p(self)); - VALUE document_body = document_body_new_instance(); + VALUE document_body = document_body_new_mutable_instance(); rb_ivar_set(self, id_document_body, document_body); } @@ -72,9 +73,13 @@ void parse_context_set_parent_tag(VALUE self, VALUE tag_header) rb_ivar_set(self, id_parent_tag, tag_header); } + void liquid_define_parse_context() { id_document_body = rb_intern("document_body"); id_vm_assembler_pool = rb_intern("vm_assembler_pool"); id_parent_tag = rb_intern("parent_tag"); + + cLiquidParseContext = rb_const_get(mLiquid, rb_intern("ParseContext")); + rb_global_variable(&cLiquidParseContext); } diff --git a/ext/liquid_c/parse_context.h b/ext/liquid_c/parse_context.h index e980c420..2f5740af 100644 --- a/ext/liquid_c/parse_context.h +++ b/ext/liquid_c/parse_context.h @@ -4,6 +4,9 @@ #include #include #include "vm_assembler_pool.h" +#include "tag_markup.h" + +extern VALUE cLiquidParseContext; void liquid_define_parse_context(); bool parse_context_document_body_initialized_p(VALUE self); diff --git a/ext/liquid_c/serialize_parse_context.c b/ext/liquid_c/serialize_parse_context.c new file mode 100644 index 00000000..cd0e930b --- /dev/null +++ b/ext/liquid_c/serialize_parse_context.c @@ -0,0 +1,76 @@ +#include +#include "serialize_parse_context.h" +#include "liquid.h" +#include "parse_context.h" + +static VALUE cLiquidCSerializeParseContext; +static ID id_initialize; + +static void serialize_parse_context_mark(void *ptr) +{ + serialize_parse_context_t *serialize_context = ptr; + rb_gc_mark(serialize_context->document_body); +} + +static void serialize_parse_context_free(void *ptr) +{ + xfree(ptr); +} + +static size_t serialize_parse_context_memsize(const void *ptr) +{ + return sizeof(serialize_parse_context_t); +} + +const rb_data_type_t serialize_parse_context_data_type = { + "liquid_serialize_parse_context", + { serialize_parse_context_mark, serialize_parse_context_free, serialize_parse_context_memsize, }, + NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY +}; + +VALUE serialize_parse_context_new(VALUE document_body, document_body_header_t *header) +{ + VALUE obj; + serialize_parse_context_t *serialize_context; + + obj = TypedData_Make_Struct(cLiquidCSerializeParseContext, serialize_parse_context_t, + &serialize_parse_context_data_type, serialize_context); + assert(header->entrypoint_block_index < header->buffer_len); + serialize_context->document_body = document_body; + document_body_setup_entry_for_header(document_body, header->entrypoint_block_index, + &serialize_context->current_entry); + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); + + // Call initialize method of parent class + rb_funcall(obj, id_initialize, 0); + + return obj; +} + +bool is_serialize_parse_context_p(VALUE self) +{ + return CLASS_OF(self) == cLiquidCSerializeParseContext; +} + +void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag) +{ + serialize_context->current_entry.buffer_offset = tag->block_body_offset; + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, NULL); +} + +void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, + tag_markup_header_t *tag) +{ + assert(serialize_context->current_entry.body == entry->body); + serialize_context->current_entry = *entry; + serialize_context->current_tag = tag_markup_get_next_tag(&serialize_context->current_entry, tag); +} + +void liquid_define_serialize_parse_context() +{ + id_initialize = rb_intern("initialize"); + + cLiquidCSerializeParseContext = rb_define_class_under(mLiquidC, "SerializeParseContext", cLiquidParseContext); + rb_global_variable(&cLiquidCSerializeParseContext); + rb_undef_alloc_func(cLiquidCSerializeParseContext); +} diff --git a/ext/liquid_c/serialize_parse_context.h b/ext/liquid_c/serialize_parse_context.h new file mode 100644 index 00000000..c7648296 --- /dev/null +++ b/ext/liquid_c/serialize_parse_context.h @@ -0,0 +1,22 @@ +#ifndef LIQUID_SERIALIZE_PARSE_CONTEXT_H +#define LIQUID_SERIALIZE_PARSE_CONTEXT_H + +#include "document_body.h" +#include "tag_markup.h" + +typedef struct serialize_parse_context { + VALUE document_body; + document_body_entry_t current_entry; + tag_markup_header_t *current_tag; +} serialize_parse_context_t; + +extern const rb_data_type_t serialize_parse_context_data_type; +#define SerializeParseContext_Get_Struct(obj, sval) TypedData_Get_Struct(obj, serialize_parse_context_t, &serialize_parse_context_data_type, sval) + +void liquid_define_serialize_parse_context(); +VALUE serialize_parse_context_new(); +bool is_serialize_parse_context_p(VALUE self); +void serialize_parse_context_enter_tag(serialize_parse_context_t *serialize_context, tag_markup_header_t *tag); +void serialize_parse_context_exit_tag(serialize_parse_context_t *serialize_context, document_body_entry_t *entry, tag_markup_header_t *tag); + +#endif diff --git a/ext/liquid_c/tag_markup.c b/ext/liquid_c/tag_markup.c index f4f0e332..5575408c 100644 --- a/ext/liquid_c/tag_markup.c +++ b/ext/liquid_c/tag_markup.c @@ -65,6 +65,36 @@ void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *b tag->block_body = block_body; } +tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag) +{ + // Should only be used for (deserialized) immutable document body + assert(!entry->body->mutable); + + if (BUFFER_OFFSET_UNDEF_P(entry->buffer_offset)) { + return NULL; + } + + block_body_header_t *header = document_body_get_block_body_header_ptr(entry); + + tag_markup_header_t *next_tag; + if (current_tag) { + assert(current_tag >= (tag_markup_header_t *)((char *)header + header->tags_offset)); + next_tag = (tag_markup_header_t *)((char *)current_tag + current_tag->total_len); + } else { + next_tag = (tag_markup_header_t *)((char *)header + header->tags_offset); + } + + tag_markup_header_t *tags_end = (tag_markup_header_t *)((char *)header + header->tags_offset + header->tags_bytes); + + if (next_tag < tags_end) { + assert((unsigned long)tags_end - (unsigned long)next_tag > sizeof(tag_markup_header_t)); + return next_tag; + } else { // End of tags have been reached + assert(next_tag == tags_end); + return NULL; + } +} + void liquid_define_tag_markup() { cLiquidCTagMarkup = rb_define_class_under(mLiquidC, "TagMarkup", rb_cObject); diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 9bc7c4a3..82b31f38 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -33,5 +33,16 @@ VALUE tag_markup_new(VALUE tag_name, VALUE markup, bool unknown); VALUE tag_markup_get_tag_name(VALUE self); VALUE tag_markup_get_markup(VALUE self); void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body); +tag_markup_header_t *tag_markup_get_next_tag(document_body_entry_t *entry, tag_markup_header_t *current_tag); + +static inline char *tag_markup_header_name(tag_markup_header_t *header) +{ + return ((char *)header) + header->tag_name_offset; +} + +static inline char *tag_markup_header_markup(tag_markup_header_t *header) +{ + return ((char *)header) + header->markup_offset; +} #endif diff --git a/ext/liquid_c/template.c b/ext/liquid_c/template.c new file mode 100644 index 00000000..a7565010 --- /dev/null +++ b/ext/liquid_c/template.c @@ -0,0 +1,52 @@ +#include +#include "liquid.h" +#include "document.h" +#include "document_body.h" +#include "serialize_parse_context.h" +#include "tokenizer.h" + +static ID id_ivar_root, id_configure_options; + +static VALUE marshal_load_constants(const char *str, size_t len) +{ + VALUE str_obj = rb_str_new_static(str, len); + VALUE constants = rb_marshal_load(str_obj); + if (BUILTIN_TYPE(constants) != T_ARRAY) { + rb_raise(rb_eArgError, "expected constants to be an array"); + } + return constants; +} + +static VALUE template_load(VALUE self, VALUE source, VALUE options) +{ + rb_funcall(self, id_configure_options, 1, options); + + Check_Type(source, T_STRING); + source = rb_str_dup_frozen(source); + const char *data = RSTRING_PTR(source); + + document_body_header_t *header = (document_body_header_t *)data; + + assert(RSTRING_LEN(source) >= header->buffer_offset + header->buffer_offset); + const char *body_data = data + header->buffer_offset; + + assert(RSTRING_LEN(source) >= header->constants_offset + header->constants_len); + VALUE constants = marshal_load_constants(data + header->constants_offset, header->constants_len); + + VALUE document_body = document_body_new_immutable_instance(constants, source, body_data); + + VALUE parse_context = serialize_parse_context_new(document_body, header); + + rb_ivar_set(self, id_ivar_root, document_parse(Qnil, parse_context)); + + return self; +} + +void liquid_define_template() +{ + id_ivar_root = rb_intern("@root"); + id_configure_options = rb_intern("configure_options"); + + VALUE cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template")); + rb_define_method(cLiquidTemplate, "load", template_load, 2); +} diff --git a/ext/liquid_c/template.h b/ext/liquid_c/template.h new file mode 100644 index 00000000..0b8e40ad --- /dev/null +++ b/ext/liquid_c/template.h @@ -0,0 +1,6 @@ +#ifndef LIQUID_TEMPLATE_H +#define LIQUID_TEMPLATE_H + +void liquid_define_template(); + +#endif diff --git a/lib/liquid/c.rb b/lib/liquid/c.rb index 0f670732..acd102c4 100644 --- a/lib/liquid/c.rb +++ b/lib/liquid/c.rb @@ -92,7 +92,7 @@ module Liquid module C module DocumentClassPatch def parse(tokenizer, parse_context) - if tokenizer.is_a?(Liquid::C::Tokenizer) + if tokenizer.is_a?(Liquid::C::Tokenizer) || tokenizer.nil? # Temporary to test rollout of the fix for this bug if parse_context[:bug_compatible_whitespace_trimming] tokenizer.bug_compatible_whitespace_trimming! @@ -110,6 +110,12 @@ def parse(tokenizer, parse_context) end Liquid::Template.class_eval do + class << self + def load(source, options = {}) + new.load(source, options) + end + end + def dump @root.dump end diff --git a/test/unit/template_test.rb b/test/unit/template_test.rb new file mode 100644 index 00000000..24afeec6 --- /dev/null +++ b/test/unit/template_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require 'test_helper' + +class TemplateTest < MiniTest::Test + def test_serialize + assert_equal('hello world', dump_load_eval('hello world')) + assert_equal('hello world', dump_load_eval('{% assign greeting = "hello" %}{{ greeting }} world')) + assert_equal('hello world', dump_load_eval('{% raw %}hello {% endraw %}world')) + assert_equal('hello world', + dump_load_eval('{% if test %}goodbye {% else %}hello {% endif %}world', 'test' => false)) + assert_equal('hello world', dump_load_eval('{% if true %}hello {% endif %}{% if true %}world{% endif %}')) + assert_equal('123', dump_load_eval('{% for i in (1..10) %}{{i}}{% if i == 3 %}{% break %}{% endif %}{% endfor %}')) + end + + private + + def dump_load_eval(source, assigns = {}) + serialize = Liquid::Template.parse(source).dump + Liquid::Template.load(serialize).render!(assigns) + end +end