Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement deserialize #138

Open
wants to merge 18 commits into
base: pz-serialize
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace :test do
env_vars.each { |key, value| ENV[key] = value }
task.invoke
ensure
env_vars.each { |key, _| ENV[key] = nil }
old_env_values.each { |key, value| ENV[key] = value }
task.reenable
end
Expand All @@ -55,9 +56,11 @@ namespace :test do

task :strict, &integration_test_with_env.call('LIQUID_PARSER_MODE' => 'strict')

task :serialization, &integration_test_with_env.call('LIQUID_C_TEST_SERIALIZE' => '1')

task :without_vm, &integration_test_with_env.call('LIQUID_C_DISABLE_VM' => 'true')

task all: [:lax, :strict, :without_vm]
task all: [:lax, :strict, :serialization, :without_vm]
end
end

Expand Down
186 changes: 138 additions & 48 deletions ext/liquid_c/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "variable.h"
#include "context.h"
#include "parse_context.h"
#include "serialize_parse_context.h"
#include "vm_assembler.h"
#include "tag_markup.h"
#include <stdio.h>
Expand Down Expand Up @@ -44,6 +45,9 @@ static void block_body_mark(void *ptr)
if (body->compiled) {
document_body_entry_mark(&body->as.compiled.document_body_entry);
rb_gc_mark(body->as.compiled.nodelist);
} else if (body->from_serialize) {
document_body_entry_mark(&body->as.serialize.document_body_entry);
rb_gc_mark(body->as.serialize.parse_context);
} else {
rb_gc_mark(body->as.intermediate.parse_context);
if (body->as.intermediate.vm_assembler_pool)
Expand All @@ -56,7 +60,7 @@ static void block_body_mark(void *ptr)
static void block_body_free(void *ptr)
{
block_body_t *body = ptr;
if (!body->compiled) {
if (!body->compiled && !body->from_serialize) {
// Free the assembler instead of recycling it because the vm_assembler_pool may have been GC'd
vm_assembler_pool_free_assembler(body->as.intermediate.code);
}
Expand All @@ -67,7 +71,7 @@ static size_t block_body_memsize(const void *ptr)
{
const block_body_t *body = ptr;
if (!ptr) return 0;
if (body->compiled) {
if (body->compiled || body->from_serialize) {
return sizeof(block_body_t);
} else {
return sizeof(block_body_t) + vm_assembler_alloc_memsize(body->as.intermediate.code);
Expand All @@ -88,6 +92,7 @@ static VALUE block_body_allocate(VALUE klass)
VALUE obj = TypedData_Make_Struct(klass, block_body_t, &block_body_data_type, body);

body->compiled = false;
body->from_serialize = false;
body->obj = obj;
body->tags = c_buffer_init();
body->as.intermediate.blank = true;
Expand All @@ -104,18 +109,24 @@ static VALUE block_body_initialize(VALUE self, VALUE parse_context)
block_body_t *body;
BlockBody_Get_Struct(self, body);

body->as.intermediate.parse_context = parse_context;

if (parse_context_document_body_initialized_p(parse_context)) {
body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context);
if (is_parse_context_for_serialize(parse_context)) {
body->from_serialize = true;
body->as.serialize.document_body_entry = document_body_entry_init();
body->as.serialize.parse_context = parse_context;
} else {
parse_context_init_document_body(parse_context);
body->as.intermediate.root = true;
body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context);
}
body->as.intermediate.parse_context = parse_context;

if (parse_context_document_body_initialized_p(parse_context)) {
body->as.intermediate.vm_assembler_pool = parse_context_get_vm_assembler_pool(parse_context);
} else {
parse_context_init_document_body(parse_context);
body->as.intermediate.root = true;
body->as.intermediate.vm_assembler_pool = parse_context_init_vm_assembler_pool(parse_context);
}

body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool);
vm_assembler_add_leave(body->as.intermediate.code);
body->as.intermediate.code = vm_assembler_pool_alloc_assembler(body->as.intermediate.vm_assembler_pool);
vm_assembler_add_leave(body->as.intermediate.code);
}

return Qnil;
}
Expand All @@ -139,6 +150,22 @@ static void block_body_push_tag_markup(block_body_t *body, VALUE parse_context,
parse_context_set_parent_tag(parse_context, tag_markup);
}

static void ensure_intermediate(block_body_t *body)
{
if (body->compiled) {
rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled");
}
}

static void ensure_intermediate_not_parsing(block_body_t *body)
{
ensure_intermediate(body);

if (body->as.intermediate.code->parsing) {
rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state");
}
}

static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *parse_context)
{
tokenizer_t *tokenizer = parse_context->tokenizer;
Expand Down Expand Up @@ -258,7 +285,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars
}

VALUE tag_markup = tag_markup_new(token_start_line_number, tag_name, markup, false);
block_body_push_tag_markup(body, parse_context->ruby_obj, tag_markup);
parse_context_set_parent_tag(parse_context->ruby_obj, tag_markup);

VALUE new_tag = rb_funcall(tag_class, intern_parse, 4,
tag_name, markup, parse_context->tokenizer_obj, parse_context->ruby_obj);
Expand All @@ -271,11 +298,12 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars
if (tokenizer->raw_tag_body) {
if (tokenizer->raw_tag_body_len) {
vm_assembler_add_write_raw(body->as.intermediate.code, tokenizer->raw_tag_body,
tokenizer->raw_tag_body_len);
tokenizer->raw_tag_body_len);
}
tokenizer->raw_tag_body = NULL;
tokenizer->raw_tag_body_len = 0;
} else {
vm_assembler_write_tag_markup(body->as.intermediate.code, tag_markup);
block_body_add_node(body, new_tag);
}

Expand All @@ -291,32 +319,71 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars
return unknown_tag;
}

static void ensure_intermediate(block_body_t *body)
static VALUE block_body_parse_from_serialize(block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj)
{
if (body->compiled) {
rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is already compiled");
}
}
assert(body->from_serialize);
assert(is_parse_context_for_serialize(parse_context_obj));

static void ensure_intermediate_not_parsing(block_body_t *body)
{
ensure_intermediate(body);
if (body->as.serialize.parse_context != parse_context_obj) {
rb_raise(rb_eArgError, "Liquid::C::BlockBody#parse called with different parse context");
}

if (body->as.intermediate.code->parsing) {
rb_raise(rb_eRuntimeError, "Liquid::C::BlockBody is in a incompletely parsed state");
serialize_parse_context_t *serialize_context;
SerializeParseContext_Get_Struct(parse_context_obj, serialize_context);

body->as.serialize.document_body_entry = serialize_context->current_entry;

tag_markup_header_t *current_tag = serialize_context->current_tag;
while (current_tag) {
bool tag_unknown = TAG_UNKNOWN_P(current_tag);

if (current_tag->line_number != 0) {
rb_ivar_set(parse_context_obj, id_ivar_line_number, UINT2NUM(current_tag->line_number));
}

if (tag_unknown) {
serialize_parse_context_enter_tag(serialize_context, current_tag);

VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len);
VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len);

VALUE ret = rb_yield_values(2, tag_name, markup);
if (BUFFER_OFFSET_UNDEF_P(current_tag->block_body_offset)) {
serialize_parse_context_exit_tag(serialize_context, &body->as.serialize.document_body_entry, current_tag);
}
return ret;
} else {
VALUE tag_name = rb_utf8_str_new(tag_markup_header_name(current_tag), current_tag->tag_name_len);
VALUE markup = rb_utf8_str_new(tag_markup_header_markup(current_tag), current_tag->markup_len);

VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name);
if (!RTEST(tag_class)) {
rb_raise(cLiquidCDeserializationError, "cannot find known tag `%"PRIsVALUE"`", tag_name);
}

serialize_parse_context_enter_tag(serialize_context, current_tag);
VALUE new_tag = rb_funcall(tag_class, intern_parse, 4,
tag_name, markup, tokenizer_obj, parse_context_obj);
serialize_parse_context_exit_tag(serialize_context, &body->as.serialize.document_body_entry, current_tag);

c_buffer_write_ruby_value(&body->tags, new_tag);
}

current_tag = tag_markup_get_next_tag(current_tag);
}

return rb_yield_values(2, Qnil, Qnil);
}

static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj)
static VALUE block_body_parse_from_source(VALUE self, block_body_t *body, VALUE tokenizer_obj, VALUE parse_context_obj)
{
parse_context_t parse_context = {
.parent_tag = parse_context_get_parent_tag(parse_context_obj),
.tokenizer_obj = tokenizer_obj,
.ruby_obj = parse_context_obj,
};
Tokenizer_Get_Struct(tokenizer_obj, parse_context.tokenizer);
block_body_t *body;
BlockBody_Get_Struct(self, body);

ensure_intermediate_not_parsing(body);
if (body->as.intermediate.parse_context != parse_context_obj) {
Expand All @@ -333,18 +400,30 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte
tag_name = tag_markup_get_tag_name(unknown_tag);
markup = tag_markup_get_markup(unknown_tag);
block_body_push_tag_markup(body, parse_context_obj, unknown_tag);

if (RTEST(parse_context.parent_tag) && !body->as.intermediate.bound_to_tag) {
body->as.intermediate.bound_to_tag = true;
tag_markup_set_block_body(parse_context.parent_tag, self, body);
}
}

VALUE block_ret = rb_yield_values(2, tag_name, markup);

if (RTEST(parse_context.parent_tag) && !body->as.intermediate.bound_to_tag) {
body->as.intermediate.bound_to_tag = true;
tag_markup_set_block_body(parse_context.parent_tag, self, body);
}

return block_ret;
}

static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj)
{
block_body_t *body;
BlockBody_Get_Struct(self, body);

if (body->from_serialize) {
return block_body_parse_from_serialize(body, tokenizer_obj, parse_context_obj);
} else {
return block_body_parse_from_source(self, body, tokenizer_obj, parse_context_obj);
}
}


static VALUE block_body_freeze(VALUE self)
{
Expand All @@ -353,26 +432,32 @@ static VALUE block_body_freeze(VALUE self)

if (body->compiled) return Qnil;

VALUE parse_context = body->as.intermediate.parse_context;
VALUE document_body = parse_context_get_document_body(parse_context);

bool root = body->as.intermediate.root;

vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool;
vm_assembler_t *assembler = body->as.intermediate.code;
bool blank = body->as.intermediate.blank;
uint32_t render_score = body->as.intermediate.render_score;
vm_assembler_t *code = body->as.intermediate.code;
body->compiled = true;
body->as.compiled.nodelist = Qundef;
document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry);
vm_assembler_pool_recycle_assembler(assembler_pool, assembler);

if (root) {
parse_context_remove_document_body(parse_context);
parse_context_remove_vm_assembler_pool(parse_context);
if (body->from_serialize) {
body->as.compiled.nodelist = Qundef;
} else {
VALUE parse_context = body->as.intermediate.parse_context;
VALUE document_body = parse_context_get_document_body(parse_context);

bool root = body->as.intermediate.root;

vm_assembler_pool_t *assembler_pool = body->as.intermediate.vm_assembler_pool;
vm_assembler_t *assembler = body->as.intermediate.code;
bool blank = body->as.intermediate.blank;
uint32_t render_score = body->as.intermediate.render_score;
vm_assembler_t *code = body->as.intermediate.code;
body->as.compiled.nodelist = Qundef;
document_body_write_block_body(document_body, blank, render_score, code, &body->as.compiled.document_body_entry);
vm_assembler_pool_recycle_assembler(assembler_pool, assembler);

if (root) {
parse_context_remove_document_body(parse_context);
parse_context_remove_vm_assembler_pool(parse_context);
}
}


rb_call_super(0, NULL);

return Qnil;
Expand All @@ -397,7 +482,10 @@ static VALUE block_body_blank_p(VALUE self)
{
block_body_t *body;
BlockBody_Get_Struct(self, body);
if (body->compiled) {
if (body->from_serialize) {
block_body_header_t *body_header = document_body_get_block_body_header_ptr(&body->as.serialize.document_body_entry);
return BLOCK_BODY_HEADER_BLANK_P(body_header) ? Qtrue : Qfalse;
} else if (body->compiled) {
block_body_header_t *body_header = document_body_get_block_body_header_ptr(&body->as.compiled.document_body_entry);
return BLOCK_BODY_HEADER_BLANK_P(body_header) ? Qtrue : Qfalse;
} else {
Expand All @@ -410,6 +498,8 @@ static VALUE block_body_remove_blank_strings(VALUE self)
block_body_t *body;
BlockBody_Get_Struct(self, body);

if (body->from_serialize) return Qnil;

ensure_intermediate_not_parsing(body);

if (!body->as.intermediate.blank) {
Expand Down
5 changes: 5 additions & 0 deletions ext/liquid_c/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

typedef struct block_body {
bool compiled;
bool from_serialize;
VALUE obj;
c_buffer_t tags;

Expand All @@ -14,6 +15,10 @@ typedef struct block_body {
document_body_entry_t document_body_entry;
VALUE nodelist;
} compiled;
struct {
document_body_entry_t document_body_entry;
VALUE parse_context;
} serialize;
struct {
VALUE parse_context;
vm_assembler_pool_t *vm_assembler_pool;
Expand Down
21 changes: 21 additions & 0 deletions ext/liquid_c/document.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <ruby.h>
#include "liquid.h"
#include "document.h"
#include "parse_context.h"
#include "document_body.h"

static ID id_parse;
static VALUE cLiquidDocument;

VALUE document_parse(VALUE tokenizer, VALUE parse_context)
{
return rb_funcall(cLiquidDocument, id_parse, 2, tokenizer, parse_context);
}

void liquid_define_document()
{
id_parse = rb_intern("parse");

cLiquidDocument = rb_const_get(mLiquid, rb_intern("Document"));
rb_global_variable(&cLiquidDocument);
}
7 changes: 7 additions & 0 deletions ext/liquid_c/document.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef LIQUID_DOCUMENT_H
#define LIQUID_DOCUMENT_H

void liquid_define_document();
VALUE document_parse(VALUE tokenizer, VALUE parse_context);

#endif
Loading