From 1c693ba6eb5008d194d4bb6f5e010adc94a9db63 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 15 Dec 2020 16:01:36 -0500 Subject: [PATCH 01/12] Store tag markup for serialization --- ext/liquid_c/block.c | 75 +++++++++++++++++++++++++++--------- ext/liquid_c/block.h | 1 + ext/liquid_c/document_body.c | 70 +++++++++++++++++++++++++++------ ext/liquid_c/document_body.h | 5 +++ ext/liquid_c/expression.c | 2 +- ext/liquid_c/liquid.c | 2 + ext/liquid_c/parse_context.c | 13 ++++++- ext/liquid_c/parse_context.h | 3 ++ ext/liquid_c/tag_markup.c | 73 +++++++++++++++++++++++++++++++++++ ext/liquid_c/tag_markup.h | 37 ++++++++++++++++++ ext/liquid_c/vm.c | 14 +++++-- ext/liquid_c/vm.h | 4 +- ext/liquid_c/vm_assembler.c | 13 ++++--- ext/liquid_c/vm_assembler.h | 12 ++++-- 14 files changed, 278 insertions(+), 46 deletions(-) create mode 100644 ext/liquid_c/tag_markup.c create mode 100644 ext/liquid_c/tag_markup.h diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index bc2b92c3..52564b92 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -8,6 +8,7 @@ #include "context.h" #include "parse_context.h" #include "vm_assembler.h" +#include "tag_markup.h" #include static ID @@ -22,15 +23,11 @@ static ID static VALUE tag_registry; static VALUE variable_placeholder = Qnil; -typedef struct tag_markup { - VALUE name; - VALUE markup; -} tag_markup_t; - typedef struct parse_context { tokenizer_t *tokenizer; VALUE tokenizer_obj; VALUE ruby_obj; + VALUE parent_tag; } parse_context_t; static void ensure_body_compiled(const block_body_t *body) @@ -43,6 +40,7 @@ static void ensure_body_compiled(const block_body_t *body) static void block_body_mark(void *ptr) { block_body_t *body = ptr; + c_buffer_rb_gc_mark(&body->tags); if (body->compiled) { document_body_entry_mark(&body->as.compiled.document_body_entry); rb_gc_mark(body->as.compiled.nodelist); @@ -91,6 +89,7 @@ static VALUE block_body_allocate(VALUE klass) body->compiled = false; body->obj = obj; + body->tags = c_buffer_init(); body->as.intermediate.blank = true; body->as.intermediate.root = false; body->as.intermediate.render_score = 0; @@ -125,11 +124,25 @@ static int is_id(int c) return rb_isalnum(c) || c == '_'; } -static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_t *parse_context) +static void block_body_add_node(block_body_t *body, VALUE node) +{ + assert(!body->compiled); + c_buffer_write_ruby_value(&body->tags, node); + vm_assembler_add_write_node(body->as.intermediate.code); +} + +static void block_body_push_tag_markup(block_body_t *body, VALUE parse_context, VALUE tag_markup) +{ + assert(!body->compiled); + vm_assembler_write_tag(body->as.intermediate.code, tag_markup); + parse_context_set_parent_tag(parse_context, tag_markup); +} + +static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *parse_context) { tokenizer_t *tokenizer = parse_context->tokenizer; token_t token; - tag_markup_t unknown_tag = { Qnil, Qnil }; + VALUE unknown_tag = Qnil; int render_score_increment = 0; while (true) { @@ -209,7 +222,7 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ if (name_len == 0) { VALUE str = rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding); - unknown_tag = (tag_markup_t) { str, str }; + unknown_tag = tag_markup_new(str, str, true); goto loop_break; } @@ -224,8 +237,9 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ tokenizer_setup_for_liquid_tag(tokenizer, markup_start, end, line_number); unknown_tag = internal_block_body_parse(body, parse_context); *tokenizer = saved_tokenizer; - if (unknown_tag.name != Qnil) { - rb_funcall(cLiquidBlockBody, intern_unknown_tag_in_liquid_tag, 2, unknown_tag.name, parse_context->ruby_obj); + if (RTEST(unknown_tag)) { + rb_funcall(cLiquidBlockBody, intern_unknown_tag_in_liquid_tag, 2, + tag_markup_get_tag_name(unknown_tag), parse_context->ruby_obj); goto loop_break; } break; @@ -238,13 +252,18 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding); if (tag_class == Qnil) { - unknown_tag = (tag_markup_t) { tag_name, markup }; + unknown_tag = tag_markup_new(tag_name, markup, true); goto loop_break; } + VALUE tag_markup = tag_markup_new(tag_name, markup, false); + block_body_push_tag_markup(body, parse_context->ruby_obj, tag_markup); + VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, parse_context->tokenizer_obj, parse_context->ruby_obj); + parse_context_set_parent_tag(parse_context->ruby_obj, parse_context->parent_tag); + if (body->as.intermediate.blank && !RTEST(rb_funcall(new_tag, intern_is_blank, 0))) body->as.intermediate.blank = false; @@ -256,7 +275,7 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ tokenizer->raw_tag_body = NULL; tokenizer->raw_tag_body_len = 0; } else { - vm_assembler_add_write_node(body->as.intermediate.code, new_tag); + block_body_add_node(body, new_tag); } render_score_increment += 1; @@ -290,6 +309,7 @@ static void ensure_intermediate_not_parsing(block_body_t *body) static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_context_obj) { parse_context_t parse_context = { + .parent_tag = parse_context_get_parent_tag(parse_context_obj), .tokenizer_obj = tokenizer_obj, .ruby_obj = parse_context_obj, }; @@ -303,10 +323,24 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte } vm_assembler_remove_leave(body->as.intermediate.code); // to extend block - tag_markup_t unknown_tag = internal_block_body_parse(body, &parse_context); + VALUE unknown_tag = internal_block_body_parse(body, &parse_context); vm_assembler_add_leave(body->as.intermediate.code); - return rb_yield_values(2, unknown_tag.name, unknown_tag.markup); + VALUE tag_name = Qnil; + VALUE markup = Qnil; + if (RTEST(unknown_tag)) { + tag_name = tag_markup_get_tag_name(unknown_tag); + markup = tag_markup_get_markup(unknown_tag); + block_body_push_tag_markup(body, parse_context_obj, unknown_tag); + } + + VALUE block_ret = rb_yield_values(2, tag_name, markup); + + if (RTEST(parse_context.parent_tag)) { + tag_markup_set_block_body(parse_context.parent_tag, self, body); + } + + return block_ret; } @@ -352,7 +386,8 @@ static VALUE block_body_render_to_output_buffer(VALUE self, VALUE context, VALUE ensure_body_compiled(body); document_body_entry_t *entry = &body->as.compiled.document_body_entry; - liquid_vm_render(document_body_get_block_body_header_ptr(entry), document_body_get_constants_ptr(entry), context, output); + liquid_vm_render(document_body_get_block_body_header_ptr(entry), document_body_get_constants_ptr(entry), + (const VALUE *)body->tags.data, context, output); return output; } @@ -379,6 +414,7 @@ static VALUE block_body_remove_blank_strings(VALUE self) rb_raise(rb_eRuntimeError, "remove_blank_strings only support being called on a blank block body"); } + VALUE *tags_ptr = (VALUE *)body->tags.data; VALUE *const_ptr = (VALUE *)body->as.intermediate.code->constants.data; uint8_t *ip = body->as.intermediate.code->instructions.data; @@ -394,7 +430,7 @@ static VALUE block_body_remove_blank_strings(VALUE self) body->as.intermediate.render_score--; } } - liquid_vm_next_instruction((const uint8_t **)&ip, (const VALUE **)&const_ptr); + liquid_vm_next_instruction((const uint8_t **)&ip, (const VALUE **)&const_ptr, (const VALUE **)&tags_ptr); } return Qnil; @@ -425,6 +461,7 @@ static VALUE block_body_nodelist(VALUE self) VALUE nodelist = rb_ary_new_capa(body_header->render_score); const VALUE *const_ptr = document_body_get_constants_ptr(entry); + const VALUE *tags_ptr = (VALUE *)body->tags.data; const uint8_t *ip = block_body_instructions_ptr(body_header); while (true) { switch (*ip) { @@ -448,7 +485,7 @@ static VALUE block_body_nodelist(VALUE self) } case OP_WRITE_NODE: { - rb_ary_push(nodelist, const_ptr[0]); + rb_ary_push(nodelist, tags_ptr[0]); break; } @@ -456,7 +493,7 @@ static VALUE block_body_nodelist(VALUE self) rb_ary_push(nodelist, variable_placeholder); break; } - liquid_vm_next_instruction(&ip, &const_ptr); + liquid_vm_next_instruction(&ip, &const_ptr, &tags_ptr); } loop_break: @@ -473,7 +510,7 @@ static VALUE block_body_disassemble(VALUE self) block_body_header_t *header = document_body_get_block_body_header_ptr(entry); const uint8_t *start_ip = block_body_instructions_ptr(header); return vm_assembler_disassemble(start_ip, start_ip + header->instructions_bytes, - document_body_get_constants_ptr(entry)); + document_body_get_constants_ptr(entry), (const VALUE *)body->tags.data); } diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index 40b3f5a0..e6e16070 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -7,6 +7,7 @@ typedef struct block_body { bool compiled; VALUE obj; + c_buffer_t tags; union { struct { diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index ae73233d..1d3d711f 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -3,6 +3,7 @@ #include "liquid.h" #include "vm_assembler.h" #include "document_body.h" +#include "tag_markup.h" static VALUE cLiquidCDocumentBody; @@ -50,6 +51,39 @@ VALUE document_body_new_instance() return rb_class_new_instance(0, NULL, cLiquidCDocumentBody); } +static void document_body_write_tag_markup(document_body_t *body, VALUE tag_markup_obj) +{ + tag_markup_t *tag_markup; + TagMarkup_Get_Struct(tag_markup_obj, tag_markup); + + size_t tag_markup_offset = c_buffer_size(&body->buffer); + c_buffer_extend_for_write(&body->buffer, sizeof(tag_markup_header_t)); + + tag_markup_header_t header; + header.flags = tag_markup->flags; + + uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); + header.tag_name_len = tag_name_len; + header.tag_name_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); + c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->tag_name), tag_name_len); + + uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); + header.markup_len = markup_len; + header.markup_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); + c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->markup), markup_len); + + if (tag_markup->block_body) { + assert(tag_markup->block_body->compiled); + header.block_body_offset = (uint32_t)tag_markup->block_body->as.compiled.document_body_entry.buffer_offset; + } else { + header.block_body_offset = BUFFER_OFFSET_UNDEF; + } + + header.total_len = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); + + memcpy(body->buffer.data + tag_markup_offset, &header, sizeof(tag_markup_header_t)); +} + void document_body_write_block_body(VALUE self, bool blank, uint32_t render_score, vm_assembler_t *code, document_body_entry_t *entry) { document_body_t *body; @@ -60,22 +94,36 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor entry->body = body; entry->buffer_offset = c_buffer_size(&body->buffer); - assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); - uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); + size_t buf_block_body_offset = c_buffer_size(&body->buffer); + c_buffer_extend_for_write(&body->buffer, sizeof(block_body_header_t)); + + block_body_header_t buf_block_body; - block_body_header_t *buf_block_body = c_buffer_extend_for_write(&body->buffer, sizeof(block_body_header_t)); - buf_block_body->instructions_offset = (uint32_t)sizeof(block_body_header_t); - buf_block_body->instructions_bytes = (uint32_t)c_buffer_size(&code->instructions); - buf_block_body->constants_offset = (uint32_t)RARRAY_LEN(body->constants); - buf_block_body->constants_len = constants_len; - buf_block_body->flags = 0; - if (blank) buf_block_body->flags |= BLOCK_BODY_HEADER_FLAG_BLANK; - buf_block_body->render_score = render_score; - buf_block_body->max_stack_size = code->max_stack_size; + buf_block_body.flags = 0; + if (blank) buf_block_body.flags |= BLOCK_BODY_HEADER_FLAG_BLANK; + buf_block_body.render_score = render_score; + buf_block_body.max_stack_size = code->max_stack_size; + buf_block_body.instructions_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); + buf_block_body.instructions_bytes = (uint32_t)c_buffer_size(&code->instructions); c_buffer_concat(&body->buffer, &code->instructions); + assert(c_buffer_size(&code->tags) % sizeof(VALUE) == 0); + uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); + buf_block_body.tags_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); + size_t tags_start_offset = c_buffer_size(&body->buffer); + for (uint32_t i = 0; i < tags_len; i++) { + document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i]); + } + buf_block_body.tags_bytes = (uint32_t)(c_buffer_size(&body->buffer) - tags_start_offset); + + assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); + uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); + buf_block_body.constants_offset = (uint32_t)RARRAY_LEN(body->constants); + buf_block_body.constants_len = constants_len; rb_ary_cat(body->constants, (VALUE *)code->constants.data, constants_len); + + memcpy(body->buffer.data + buf_block_body_offset, &buf_block_body, sizeof(block_body_header_t)); } void liquid_define_document_body() diff --git a/ext/liquid_c/document_body.h b/ext/liquid_c/document_body.h index c55ef6af..8305ea3a 100644 --- a/ext/liquid_c/document_body.h +++ b/ext/liquid_c/document_body.h @@ -7,6 +7,8 @@ typedef struct block_body_header { uint32_t instructions_offset; uint32_t instructions_bytes; + uint32_t tags_offset; + uint32_t tags_bytes; uint32_t constants_offset; uint32_t constants_len; uint32_t flags; @@ -17,6 +19,9 @@ typedef struct block_body_header { #define BLOCK_BODY_HEADER_FLAG_BLANK (1 << 0) #define BLOCK_BODY_HEADER_BLANK_P(header) (header->flags & BLOCK_BODY_HEADER_FLAG_BLANK) +#define BUFFER_OFFSET_UNDEF UINT32_MAX +#define BUFFER_OFFSET_UNDEF_P(val) (val == BUFFER_OFFSET_UNDEF) + typedef struct document_body { VALUE self; VALUE constants; diff --git a/ext/liquid_c/expression.c b/ext/liquid_c/expression.c index ae370f34..35e6cbd0 100644 --- a/ext/liquid_c/expression.c +++ b/ext/liquid_c/expression.c @@ -94,7 +94,7 @@ static VALUE expression_disassemble(VALUE self) expression_t *expression; Expression_Get_Struct(self, expression); return vm_assembler_disassemble(expression->code.instructions.data, expression->code.instructions.data_end, - (const VALUE *)expression->code.constants.data); + (const VALUE *)expression->code.constants.data, NULL); } void liquid_define_expression() diff --git a/ext/liquid_c/liquid.c b/ext/liquid_c/liquid.c index b8d505a8..094cb19d 100644 --- a/ext/liquid_c/liquid.c +++ b/ext/liquid_c/liquid.c @@ -8,6 +8,7 @@ #include "expression.h" #include "document_body.h" #include "block.h" +#include "tag_markup.h" #include "context.h" #include "parse_context.h" #include "variable_lookup.h" @@ -84,6 +85,7 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void) liquid_define_variable(); liquid_define_document_body(); liquid_define_block_body(); + liquid_define_tag_markup(); liquid_define_context(); liquid_define_parse_context(); liquid_define_variable_lookup(); diff --git a/ext/liquid_c/parse_context.c b/ext/liquid_c/parse_context.c index 3ccc0ef5..5e36fb99 100644 --- a/ext/liquid_c/parse_context.c +++ b/ext/liquid_c/parse_context.c @@ -1,7 +1,7 @@ #include "parse_context.h" #include "document_body.h" -static ID id_document_body, id_vm_assembler_pool; +static ID id_document_body, id_vm_assembler_pool, id_parent_tag; bool parse_context_document_body_initialized_p(VALUE self) { @@ -62,8 +62,19 @@ void parse_context_remove_vm_assembler_pool(VALUE self) } +VALUE parse_context_get_parent_tag(VALUE self) +{ + return rb_attr_get(self, id_parent_tag); +} + +void parse_context_set_parent_tag(VALUE self, VALUE tag_header) +{ + rb_ivar_set(self, id_parent_tag, tag_header); +} + void liquid_define_parse_context() { id_document_body = rb_intern("document_body"); id_vm_assembler_pool = rb_intern("vm_assembler_pool"); + id_parent_tag = rb_intern("parent_tag"); } diff --git a/ext/liquid_c/parse_context.h b/ext/liquid_c/parse_context.h index 7a13a215..e980c420 100644 --- a/ext/liquid_c/parse_context.h +++ b/ext/liquid_c/parse_context.h @@ -15,4 +15,7 @@ vm_assembler_pool_t *parse_context_init_vm_assembler_pool(VALUE self); vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self); void parse_context_remove_vm_assembler_pool(VALUE self); +VALUE parse_context_get_parent_tag(VALUE self); +void parse_context_set_parent_tag(VALUE self, VALUE tag_header); + #endif diff --git a/ext/liquid_c/tag_markup.c b/ext/liquid_c/tag_markup.c new file mode 100644 index 00000000..f4f0e332 --- /dev/null +++ b/ext/liquid_c/tag_markup.c @@ -0,0 +1,73 @@ +#include "liquid.h" +#include "tag_markup.h" + +static VALUE cLiquidCTagMarkup; + +static void tag_markup_mark(void *ptr) +{ + tag_markup_t *markup = ptr; + + rb_gc_mark(markup->markup); + rb_gc_mark(markup->tag_name); + rb_gc_mark(markup->block_body_obj); +} + +static void tag_markup_free(void *ptr) +{ + xfree(ptr); +} + +static size_t tag_markup_memsize(const void *ptr) +{ + return sizeof(tag_markup_t); +} + +const rb_data_type_t tag_markup_data_type = { + "liquid_tag_markup", + { tag_markup_mark, tag_markup_free, tag_markup_memsize, }, + NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY +}; + +VALUE tag_markup_new(VALUE tag_name, VALUE markup, bool unknown) +{ + tag_markup_t *tag; + VALUE obj = TypedData_Make_Struct(cLiquidCTagMarkup, tag_markup_t, &tag_markup_data_type, tag); + + tag->flags = 0; + if (unknown) tag->flags |= TAG_FLAG_UNKNOWN; + tag->tag_name = tag_name; + tag->markup = markup; + tag->block_body = NULL; + + return obj; +} + +VALUE tag_markup_get_tag_name(VALUE self) +{ + tag_markup_t *tag; + TagMarkup_Get_Struct(self, tag); + return tag->tag_name; +} + +VALUE tag_markup_get_markup(VALUE self) +{ + tag_markup_t *tag; + TagMarkup_Get_Struct(self, tag); + return tag->markup; +} + +void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body) +{ + tag_markup_t *tag; + TagMarkup_Get_Struct(self, tag); + assert(tag->block_body == NULL); + tag->block_body_obj = block_body_obj; + tag->block_body = block_body; +} + +void liquid_define_tag_markup() +{ + cLiquidCTagMarkup = rb_define_class_under(mLiquidC, "TagMarkup", rb_cObject); + rb_global_variable(&cLiquidCTagMarkup); + rb_undef_alloc_func(cLiquidCTagMarkup); +} diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h new file mode 100644 index 00000000..9bc7c4a3 --- /dev/null +++ b/ext/liquid_c/tag_markup.h @@ -0,0 +1,37 @@ +#ifndef LIQUID_TAG_MARKUP_H +#define LIQUID_TAG_MARKUP_H + +#include "c_buffer.h" +#include "block.h" + +typedef struct tag_markup { + uint32_t flags; + VALUE tag_name; + VALUE markup; + VALUE block_body_obj; + block_body_t *block_body; +} tag_markup_t; + +typedef struct tag_markup_header { + uint32_t flags; + uint32_t total_len; + uint32_t tag_name_offset; + uint32_t tag_name_len; + uint32_t markup_offset; + uint32_t markup_len; + uint32_t block_body_offset; +} tag_markup_header_t; + +#define TAG_FLAG_UNKNOWN (1 << 0) +#define TAG_UNKNOWN_P(tag) (tag->flags & TAG_FLAG_UNKNOWN) + +extern const rb_data_type_t tag_markup_data_type; +#define TagMarkup_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tag_markup_t, &tag_markup_data_type, sval) + +void liquid_define_tag_markup(); +VALUE tag_markup_new(VALUE tag_name, VALUE markup, bool unknown); +VALUE tag_markup_get_tag_name(VALUE self); +VALUE tag_markup_get_markup(VALUE self); +void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body); + +#endif diff --git a/ext/liquid_c/vm.c b/ext/liquid_c/vm.c index 6c06ff62..3c11d798 100644 --- a/ext/liquid_c/vm.c +++ b/ext/liquid_c/vm.c @@ -185,6 +185,7 @@ typedef struct vm_render_until_error_args { vm_t *vm; const uint8_t *ip; // use for initial address and to save an address for rescuing const size_t *const_ptr; + const VALUE *tags_ptr; /* rendering fields */ VALUE output; @@ -232,6 +233,7 @@ static VALUE vm_render_until_error(VALUE uncast_args) { vm_render_until_error_args_t *args = (void *)uncast_args; const VALUE *const_ptr = args->const_ptr; + const VALUE *tags_ptr = args->tags_ptr; const uint8_t *ip = args->ip; vm_t *vm = args->vm; VALUE output = args->output; @@ -361,7 +363,7 @@ static VALUE vm_render_until_error(VALUE uncast_args) } case OP_WRITE_NODE: - rb_funcall(cLiquidBlockBody, id_render_node, 3, vm->context.self, output, (VALUE)*const_ptr++); + rb_funcall(cLiquidBlockBody, id_render_node, 3, vm->context.self, output, *tags_ptr++); if (RARRAY_LEN(vm->context.interrupts)) { return false; } @@ -414,7 +416,7 @@ VALUE liquid_vm_evaluate(VALUE context, vm_assembler_t *code) return ret; } -void liquid_vm_next_instruction(const uint8_t **ip_ptr, const VALUE **const_ptr_ptr) +void liquid_vm_next_instruction(const uint8_t **ip_ptr, const VALUE **const_ptr_ptr, const VALUE **tags_ptr_ptr) { const uint8_t *ip = *ip_ptr; @@ -440,6 +442,9 @@ void liquid_vm_next_instruction(const uint8_t **ip_ptr, const VALUE **const_ptr_ break; case OP_WRITE_NODE: + (*tags_ptr_ptr)++; + break; + case OP_PUSH_CONST: case OP_FIND_STATIC_VAR: case OP_LOOKUP_CONST_KEY: @@ -514,7 +519,7 @@ static VALUE vm_render_rescue(VALUE uncast_args, VALUE exception) enum opcode last_op; do { last_op = *ip; - liquid_vm_next_instruction(&ip, &render_args->const_ptr); + liquid_vm_next_instruction(&ip, &render_args->const_ptr, &render_args->tags_ptr); } while (last_op != OP_POP_WRITE); render_args->ip = ip; // remove temporary stack values from variable evaluation @@ -529,7 +534,7 @@ static VALUE vm_render_rescue(VALUE uncast_args, VALUE exception) return true; } -void liquid_vm_render(block_body_header_t *body, const VALUE *const_ptr, VALUE context, VALUE output) +void liquid_vm_render(block_body_header_t *body, const VALUE *const_ptr, const VALUE *tags_ptr, VALUE context, VALUE output) { vm_t *vm = vm_from_context(context); @@ -539,6 +544,7 @@ void liquid_vm_render(block_body_header_t *body, const VALUE *const_ptr, VALUE c vm_render_until_error_args_t render_args = { .vm = vm, .const_ptr = const_ptr, + .tags_ptr = tags_ptr, .ip = block_body_instructions_ptr(body), .output = output, }; diff --git a/ext/liquid_c/vm.h b/ext/liquid_c/vm.h index 51cc3bd3..5accb59e 100644 --- a/ext/liquid_c/vm.h +++ b/ext/liquid_c/vm.h @@ -14,8 +14,8 @@ typedef struct vm { void liquid_define_vm(); vm_t *vm_from_context(VALUE context); -void liquid_vm_render(block_body_header_t *block, const VALUE *const_ptr, VALUE context, VALUE output); -void liquid_vm_next_instruction(const uint8_t **ip_ptr, const size_t **const_ptr_ptr); +void liquid_vm_render(block_body_header_t *body, const VALUE *const_ptr, const VALUE *tags_ptr, VALUE context, VALUE output); +void liquid_vm_next_instruction(const uint8_t **ip_ptr, const size_t **const_ptr_ptr, const VALUE **tags_ptr_ptr); bool liquid_vm_filtering(VALUE context); VALUE liquid_vm_evaluate(VALUE context, vm_assembler_t *code); diff --git a/ext/liquid_c/vm_assembler.c b/ext/liquid_c/vm_assembler.c index 2cf21fe0..0d921abd 100644 --- a/ext/liquid_c/vm_assembler.c +++ b/ext/liquid_c/vm_assembler.c @@ -74,6 +74,7 @@ void vm_assembler_init(vm_assembler_t *code) { code->instructions = c_buffer_allocate(8); code->constants = c_buffer_allocate(8 * sizeof(VALUE)); + code->tags = c_buffer_init(); vm_assembler_common_init(code); } @@ -81,6 +82,7 @@ void vm_assembler_reset(vm_assembler_t *code) { c_buffer_reset(&code->instructions); c_buffer_reset(&code->constants); + c_buffer_reset(&code->tags); vm_assembler_common_init(code); } @@ -88,14 +90,16 @@ void vm_assembler_free(vm_assembler_t *code) { c_buffer_free(&code->instructions); c_buffer_free(&code->constants); + c_buffer_free(&code->tags); } void vm_assembler_gc_mark(vm_assembler_t *code) { c_buffer_rb_gc_mark(&code->constants); + c_buffer_rb_gc_mark(&code->tags); } -VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *const_ptr) +VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *const_ptr, const VALUE *tags_ptr) { const uint8_t *ip = start_ip; VALUE output = rb_str_buf_new(32); @@ -177,7 +181,7 @@ VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, c } case OP_WRITE_NODE: - rb_str_catf(output, "write_node(%+"PRIsVALUE")\n", const_ptr[0]); + rb_str_catf(output, "write_node(%+"PRIsVALUE")\n", tags_ptr[0]); break; case OP_PUSH_CONST: @@ -208,7 +212,7 @@ VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, c rb_str_catf(output, "\n", ip[0]); break; } - liquid_vm_next_instruction(&ip, &const_ptr); + liquid_vm_next_instruction(&ip, &const_ptr, &tags_ptr); } return output; } @@ -248,10 +252,9 @@ void vm_assembler_add_write_raw(vm_assembler_t *code, const char *string, size_t c_buffer_write(&code->instructions, (char *)string, size); } -void vm_assembler_add_write_node(vm_assembler_t *code, VALUE node) +void vm_assembler_add_write_node(vm_assembler_t *code) { vm_assembler_write_opcode(code, OP_WRITE_NODE); - vm_assembler_write_ruby_constant(code, node); } void vm_assembler_add_push_fixnum(vm_assembler_t *code, VALUE num) diff --git a/ext/liquid_c/vm_assembler.h b/ext/liquid_c/vm_assembler.h index 1a64cec9..4a1ba511 100644 --- a/ext/liquid_c/vm_assembler.h +++ b/ext/liquid_c/vm_assembler.h @@ -42,6 +42,7 @@ extern filter_desc_t builtin_filters[]; typedef struct vm_assembler { c_buffer_t instructions; + c_buffer_t tags; c_buffer_t constants; size_t max_stack_size; size_t stack_size; @@ -54,12 +55,12 @@ void vm_assembler_init(vm_assembler_t *code); void vm_assembler_reset(vm_assembler_t *code); void vm_assembler_free(vm_assembler_t *code); void vm_assembler_gc_mark(vm_assembler_t *code); -VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *const_ptr); +VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *const_ptr, const VALUE *tags_ptr); void vm_assembler_concat(vm_assembler_t *dest, vm_assembler_t *src); void vm_assembler_require_stack_args(vm_assembler_t *code, unsigned int count); void vm_assembler_add_write_raw(vm_assembler_t *code, const char *string, size_t size); -void vm_assembler_add_write_node(vm_assembler_t *code, VALUE node); +void vm_assembler_add_write_node(vm_assembler_t *code); void vm_assembler_add_push_fixnum(vm_assembler_t *code, VALUE num); void vm_assembler_add_push_literal(vm_assembler_t *code, VALUE literal); void vm_assembler_add_filter(vm_assembler_t *code, VALUE filter_name, size_t arg_count); @@ -74,7 +75,7 @@ void vm_assembler_add_filter_from_ruby(vm_assembler_t *code, VALUE filter_name, static inline size_t vm_assembler_alloc_memsize(const vm_assembler_t *code) { - return c_buffer_capacity(&code->instructions) + c_buffer_capacity(&code->constants); + return c_buffer_capacity(&code->instructions) + c_buffer_capacity(&code->constants) + c_buffer_capacity(&code->tags); } static inline void vm_assembler_write_opcode(vm_assembler_t *code, enum opcode op) @@ -220,4 +221,9 @@ static inline void vm_assembler_add_render_variable_rescue(vm_assembler_t *code, uint24_to_bytes((unsigned int)node_line_number, &instructions[1]); } +static inline void vm_assembler_write_tag(vm_assembler_t *code, VALUE tag) +{ + c_buffer_write_ruby_value(&code->tags, tag); +} + #endif From 3f60dd7d87dae719a265dba91fba45e1d5bcf586 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 5 Jan 2021 10:09:09 -0500 Subject: [PATCH 02/12] Raise an error when child block body is not compiled --- ext/liquid_c/document_body.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 1d3d711f..0058ab0b 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -73,7 +73,10 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->markup), markup_len); if (tag_markup->block_body) { - assert(tag_markup->block_body->compiled); + if (!tag_markup->block_body->compiled) { + rb_raise(rb_eRuntimeError, "child %"PRIsVALUE" has not been frozen before the parent", tag_markup->block_body_obj); + } + header.block_body_offset = (uint32_t)tag_markup->block_body->as.compiled.document_body_entry.buffer_offset; } else { header.block_body_offset = BUFFER_OFFSET_UNDEF; From 89684999c177e62e0513e36761017033c0630cc1 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 5 Jan 2021 11:20:35 -0500 Subject: [PATCH 03/12] Address comments --- ext/liquid_c/vm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/liquid_c/vm.c b/ext/liquid_c/vm.c index 3c11d798..c3bc179a 100644 --- a/ext/liquid_c/vm.c +++ b/ext/liquid_c/vm.c @@ -377,6 +377,7 @@ static VALUE vm_render_until_error(VALUE uncast_args) ip += 3; args->ip = ip; args->const_ptr = const_ptr; + args->tags_ptr = tags_ptr; break; case OP_POP_WRITE: { @@ -408,7 +409,8 @@ VALUE liquid_vm_evaluate(VALUE context, vm_assembler_t *code) vm_render_until_error_args_t args = { .vm = vm, .const_ptr = (const size_t *)code->constants.data, - .ip = code->instructions.data + .ip = code->instructions.data, + .tags_ptr = (const VALUE *)code->tags.data }; vm_render_until_error((VALUE)&args); VALUE ret = vm_stack_pop(vm); From d2cf89a38c9845236a695bdd07be4ddeecccf126 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 7 Jan 2021 17:56:57 -0500 Subject: [PATCH 04/12] Remove unnecessary offset fields in tag_markup_header_t Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/document_body.c | 2 -- ext/liquid_c/tag_markup.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 0058ab0b..a0f3a68f 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -64,12 +64,10 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); header.tag_name_len = tag_name_len; - header.tag_name_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->tag_name), tag_name_len); uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); header.markup_len = markup_len; - header.markup_offset = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->markup), markup_len); if (tag_markup->block_body) { diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 9bc7c4a3..36d10a3a 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -15,9 +15,7 @@ typedef struct tag_markup { typedef struct tag_markup_header { uint32_t flags; uint32_t total_len; - uint32_t tag_name_offset; uint32_t tag_name_len; - uint32_t markup_offset; uint32_t markup_len; uint32_t block_body_offset; } tag_markup_header_t; From 4c030a95b1c5ae89659bcfbf94b76864e68693cb Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 7 Jan 2021 17:58:57 -0500 Subject: [PATCH 05/12] Write directly into the buffer in document_body_write_tag_markup Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/document_body.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index a0f3a68f..8104519b 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -56,33 +56,29 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark tag_markup_t *tag_markup; TagMarkup_Get_Struct(tag_markup_obj, tag_markup); - size_t tag_markup_offset = c_buffer_size(&body->buffer); - c_buffer_extend_for_write(&body->buffer, sizeof(tag_markup_header_t)); - - tag_markup_header_t header; - header.flags = tag_markup->flags; - uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); - header.tag_name_len = tag_name_len; - c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->tag_name), tag_name_len); - uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); - header.markup_len = markup_len; - c_buffer_write(&body->buffer, RSTRING_PTR(tag_markup->markup), markup_len); - + uint32_t total_len = sizeof(tag_markup_header_t) + tag_name_len + markup_len; + tag_markup_header_t *header = c_buffer_extend_for_write(&body->buffer, total_len); + char *name = (char *)&header[1]; + + header->flags = tag_markup->flags; + header->tag_name_len = tag_name_len; + header->markup_len = markup_len; + header->total_len = total_len; if (tag_markup->block_body) { if (!tag_markup->block_body->compiled) { rb_raise(rb_eRuntimeError, "child %"PRIsVALUE" has not been frozen before the parent", tag_markup->block_body_obj); } - header.block_body_offset = (uint32_t)tag_markup->block_body->as.compiled.document_body_entry.buffer_offset; + header->block_body_offset = (uint32_t)tag_markup->block_body->as.compiled.document_body_entry.buffer_offset; } else { - header.block_body_offset = BUFFER_OFFSET_UNDEF; + header->block_body_offset = BUFFER_OFFSET_UNDEF; } - header.total_len = (uint32_t)(c_buffer_size(&body->buffer) - tag_markup_offset); - - memcpy(body->buffer.data + tag_markup_offset, &header, sizeof(tag_markup_header_t)); + memcpy(name, RSTRING_PTR(tag_markup->tag_name), tag_name_len); + char *markup = name + tag_name_len; + memcpy(markup, RSTRING_PTR(tag_markup->markup), markup_len); } void document_body_write_block_body(VALUE self, bool blank, uint32_t render_score, vm_assembler_t *code, document_body_entry_t *entry) From 8adb79241cdd21d0793604109396b1dae75995a2 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 7 Jan 2021 18:01:44 -0500 Subject: [PATCH 06/12] Remove unnecessary offset field in block_body_header_t Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/block.h | 4 ++-- ext/liquid_c/document_body.c | 1 - ext/liquid_c/document_body.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index e6e16070..b96915c7 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -27,9 +27,9 @@ typedef struct block_body { void liquid_define_block_body(); -static inline uint8_t *block_body_instructions_ptr(block_body_header_t *body) +static inline uint8_t *block_body_instructions_ptr(block_body_header_t *body_header) { - return ((uint8_t *)body) + body->instructions_offset; + return (uint8_t *)&body_header[1]; } #endif diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 8104519b..27a837b8 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -101,7 +101,6 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor buf_block_body.render_score = render_score; buf_block_body.max_stack_size = code->max_stack_size; - buf_block_body.instructions_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); buf_block_body.instructions_bytes = (uint32_t)c_buffer_size(&code->instructions); c_buffer_concat(&body->buffer, &code->instructions); diff --git a/ext/liquid_c/document_body.h b/ext/liquid_c/document_body.h index 8305ea3a..4497fa35 100644 --- a/ext/liquid_c/document_body.h +++ b/ext/liquid_c/document_body.h @@ -5,7 +5,6 @@ #include "vm_assembler.h" typedef struct block_body_header { - uint32_t instructions_offset; uint32_t instructions_bytes; uint32_t tags_offset; uint32_t tags_bytes; From 6c3f8332a951da77fb384030dd9d4e34d614636e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 7 Jan 2021 18:09:44 -0500 Subject: [PATCH 07/12] Write directly into the buffer in document_body_write_block_body Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/document_body.c | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 27a837b8..34320e42 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -92,34 +92,35 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor entry->buffer_offset = c_buffer_size(&body->buffer); size_t buf_block_body_offset = c_buffer_size(&body->buffer); - c_buffer_extend_for_write(&body->buffer, sizeof(block_body_header_t)); + size_t instructions_byte_size = c_buffer_size(&code->instructions); + size_t header_and_instructions_size = sizeof(block_body_header_t) + instructions_byte_size; + block_body_header_t *buf_block_body = c_buffer_extend_for_write(&body->buffer, header_and_instructions_size); + uint8_t *instructions = (uint8_t *)&buf_block_body[1]; + + buf_block_body->flags = 0; + if (blank) buf_block_body->flags |= BLOCK_BODY_HEADER_FLAG_BLANK; + buf_block_body->render_score = render_score; + buf_block_body->max_stack_size = code->max_stack_size; + buf_block_body->instructions_bytes = (uint32_t)instructions_byte_size; + buf_block_body->tags_offset = (uint32_t)header_and_instructions_size; - block_body_header_t buf_block_body; + assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); + uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); + buf_block_body->constants_offset = (uint32_t)RARRAY_LEN(body->constants); + buf_block_body->constants_len = constants_len; - buf_block_body.flags = 0; - if (blank) buf_block_body.flags |= BLOCK_BODY_HEADER_FLAG_BLANK; - buf_block_body.render_score = render_score; - buf_block_body.max_stack_size = code->max_stack_size; + rb_ary_cat(body->constants, (VALUE *)code->constants.data, constants_len); - buf_block_body.instructions_bytes = (uint32_t)c_buffer_size(&code->instructions); - c_buffer_concat(&body->buffer, &code->instructions); + memcpy(instructions, code->instructions.data, instructions_byte_size); assert(c_buffer_size(&code->tags) % sizeof(VALUE) == 0); uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); - buf_block_body.tags_offset = (uint32_t)(c_buffer_size(&body->buffer) - buf_block_body_offset); size_t tags_start_offset = c_buffer_size(&body->buffer); for (uint32_t i = 0; i < tags_len; i++) { document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i]); } - buf_block_body.tags_bytes = (uint32_t)(c_buffer_size(&body->buffer) - tags_start_offset); - - assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); - uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); - buf_block_body.constants_offset = (uint32_t)RARRAY_LEN(body->constants); - buf_block_body.constants_len = constants_len; - rb_ary_cat(body->constants, (VALUE *)code->constants.data, constants_len); - - memcpy(body->buffer.data + buf_block_body_offset, &buf_block_body, sizeof(block_body_header_t)); + buf_block_body = (block_body_header_t *)(body->buffer.data + buf_block_body_offset); + buf_block_body->tags_bytes = (uint32_t)(c_buffer_size(&body->buffer) - tags_start_offset); } void liquid_define_document_body() From e8835c7018b922bbc1b6a80b147cdab013ca683d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 10:17:26 -0500 Subject: [PATCH 08/12] Store first tag offset and next tag offsets Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/document_body.c | 21 ++++++++++++--------- ext/liquid_c/document_body.h | 3 +-- ext/liquid_c/tag_markup.h | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 34320e42..0d56c79f 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -51,7 +51,7 @@ VALUE document_body_new_instance() return rb_class_new_instance(0, NULL, cLiquidCDocumentBody); } -static void document_body_write_tag_markup(document_body_t *body, VALUE tag_markup_obj) +static void document_body_write_tag_markup(document_body_t *body, VALUE tag_markup_obj, bool last) { tag_markup_t *tag_markup; TagMarkup_Get_Struct(tag_markup_obj, tag_markup); @@ -65,7 +65,7 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark header->flags = tag_markup->flags; header->tag_name_len = tag_name_len; header->markup_len = markup_len; - header->total_len = total_len; + header->next_tag_offset = last ? 0 : total_len; if (tag_markup->block_body) { if (!tag_markup->block_body->compiled) { rb_raise(rb_eRuntimeError, "child %"PRIsVALUE" has not been frozen before the parent", tag_markup->block_body_obj); @@ -91,7 +91,6 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor entry->body = body; entry->buffer_offset = c_buffer_size(&body->buffer); - size_t buf_block_body_offset = c_buffer_size(&body->buffer); size_t instructions_byte_size = c_buffer_size(&code->instructions); size_t header_and_instructions_size = sizeof(block_body_header_t) + instructions_byte_size; block_body_header_t *buf_block_body = c_buffer_extend_for_write(&body->buffer, header_and_instructions_size); @@ -102,7 +101,6 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor buf_block_body->render_score = render_score; buf_block_body->max_stack_size = code->max_stack_size; buf_block_body->instructions_bytes = (uint32_t)instructions_byte_size; - buf_block_body->tags_offset = (uint32_t)header_and_instructions_size; assert(c_buffer_size(&code->constants) % sizeof(VALUE) == 0); uint32_t constants_len = (uint32_t)(c_buffer_size(&code->constants) / sizeof(VALUE)); @@ -115,12 +113,17 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor assert(c_buffer_size(&code->tags) % sizeof(VALUE) == 0); uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); - size_t tags_start_offset = c_buffer_size(&body->buffer); - for (uint32_t i = 0; i < tags_len; i++) { - document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i]); + if (tags_len > 0) { + buf_block_body->first_tag_offset = (uint32_t)header_and_instructions_size; + + uint32_t i; + for (i = 0; i < tags_len - 1; i++) { + document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i], false); + } + document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i], true); + } else { + buf_block_body->first_tag_offset = 0; } - buf_block_body = (block_body_header_t *)(body->buffer.data + buf_block_body_offset); - buf_block_body->tags_bytes = (uint32_t)(c_buffer_size(&body->buffer) - tags_start_offset); } void liquid_define_document_body() diff --git a/ext/liquid_c/document_body.h b/ext/liquid_c/document_body.h index 4497fa35..c4923cc2 100644 --- a/ext/liquid_c/document_body.h +++ b/ext/liquid_c/document_body.h @@ -6,8 +6,7 @@ typedef struct block_body_header { uint32_t instructions_bytes; - uint32_t tags_offset; - uint32_t tags_bytes; + uint32_t first_tag_offset; uint32_t constants_offset; uint32_t constants_len; uint32_t flags; diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 36d10a3a..5b9c8167 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -14,9 +14,9 @@ typedef struct tag_markup { typedef struct tag_markup_header { uint32_t flags; - uint32_t total_len; uint32_t tag_name_len; uint32_t markup_len; + uint32_t next_tag_offset; uint32_t block_body_offset; } tag_markup_header_t; From 8245feb3f76e10da7a66268612710003c42e9b30 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 10:33:38 -0500 Subject: [PATCH 09/12] Align tag markup headers Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/c_buffer.c | 4 +++- ext/liquid_c/c_buffer.h | 2 +- ext/liquid_c/document_body.c | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ext/liquid_c/c_buffer.c b/ext/liquid_c/c_buffer.c index af66840a..e17dc961 100644 --- a/ext/liquid_c/c_buffer.c +++ b/ext/liquid_c/c_buffer.c @@ -16,14 +16,16 @@ static void c_buffer_expand_for_write(c_buffer_t *buffer, size_t write_size) buffer->capacity_end = buffer->data + capacity; } -void c_buffer_zero_pad_for_alignment(c_buffer_t *buffer, size_t alignment) +size_t c_buffer_zero_pad_for_alignment(c_buffer_t *buffer, size_t alignment) { size_t unaligned_bytes = c_buffer_size(buffer) % alignment; if (unaligned_bytes) { size_t pad_size = alignment - unaligned_bytes; uint8_t *padding = c_buffer_extend_for_write(buffer, pad_size); memset(padding, 0, pad_size); + return pad_size; } + return 0; } void c_buffer_reserve_for_write(c_buffer_t *buffer, size_t write_size) diff --git a/ext/liquid_c/c_buffer.h b/ext/liquid_c/c_buffer.h index 91b10dfb..d66aba97 100644 --- a/ext/liquid_c/c_buffer.h +++ b/ext/liquid_c/c_buffer.h @@ -40,7 +40,7 @@ static inline size_t c_buffer_capacity(const c_buffer_t *buffer) return buffer->capacity_end - buffer->data; } -void c_buffer_zero_pad_for_alignment(c_buffer_t *buffer, size_t alignment); +size_t c_buffer_zero_pad_for_alignment(c_buffer_t *buffer, size_t alignment); void c_buffer_reserve_for_write(c_buffer_t *buffer, size_t write_size); void c_buffer_write(c_buffer_t *buffer, void *data, size_t size); diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index 0d56c79f..adba8998 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -59,7 +59,11 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark uint32_t tag_name_len = (uint32_t)RSTRING_LEN(tag_markup->tag_name); uint32_t markup_len = (uint32_t)RSTRING_LEN(tag_markup->markup); uint32_t total_len = sizeof(tag_markup_header_t) + tag_name_len + markup_len; + assert(c_buffer_size(&body->buffer) % alignof(tag_markup_header_t) == 0); tag_markup_header_t *header = c_buffer_extend_for_write(&body->buffer, total_len); + if (!last) { + total_len += (uint32_t)c_buffer_zero_pad_for_alignment(&body->buffer, alignof(tag_markup_header_t)); + } char *name = (char *)&header[1]; header->flags = tag_markup->flags; @@ -115,6 +119,7 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); if (tags_len > 0) { buf_block_body->first_tag_offset = (uint32_t)header_and_instructions_size; + buf_block_body->first_tag_offset += (uint32_t)c_buffer_zero_pad_for_alignment(&body->buffer, alignof(tag_markup_header_t)); uint32_t i; for (i = 0; i < tags_len - 1; i++) { From 87d9dfdf2094362d50da29cfb1aceaf0dd0c5153 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 10:40:45 -0500 Subject: [PATCH 10/12] Rename tags to tag_markups on the vm_assembler_t to avoid confusion Co-Authored-By: Dylan Thacker-Smith --- ext/liquid_c/block.c | 2 +- ext/liquid_c/document_body.c | 8 ++++---- ext/liquid_c/vm.c | 2 +- ext/liquid_c/vm_assembler.c | 8 ++++---- ext/liquid_c/vm_assembler.h | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 52564b92..5aa5cf3c 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -134,7 +134,7 @@ static void block_body_add_node(block_body_t *body, VALUE node) static void block_body_push_tag_markup(block_body_t *body, VALUE parse_context, VALUE tag_markup) { assert(!body->compiled); - vm_assembler_write_tag(body->as.intermediate.code, tag_markup); + vm_assembler_write_tag_markup(body->as.intermediate.code, tag_markup); parse_context_set_parent_tag(parse_context, tag_markup); } diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index adba8998..addab3e7 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -115,17 +115,17 @@ void document_body_write_block_body(VALUE self, bool blank, uint32_t render_scor memcpy(instructions, code->instructions.data, instructions_byte_size); - assert(c_buffer_size(&code->tags) % sizeof(VALUE) == 0); - uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tags) / sizeof(VALUE)); + assert(c_buffer_size(&code->tag_markups) % sizeof(VALUE) == 0); + uint32_t tags_len = (uint32_t)(c_buffer_size(&code->tag_markups) / sizeof(VALUE)); if (tags_len > 0) { buf_block_body->first_tag_offset = (uint32_t)header_and_instructions_size; buf_block_body->first_tag_offset += (uint32_t)c_buffer_zero_pad_for_alignment(&body->buffer, alignof(tag_markup_header_t)); uint32_t i; for (i = 0; i < tags_len - 1; i++) { - document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i], false); + document_body_write_tag_markup(body, ((VALUE *)code->tag_markups.data)[i], false); } - document_body_write_tag_markup(body, ((VALUE *)code->tags.data)[i], true); + document_body_write_tag_markup(body, ((VALUE *)code->tag_markups.data)[i], true); } else { buf_block_body->first_tag_offset = 0; } diff --git a/ext/liquid_c/vm.c b/ext/liquid_c/vm.c index c3bc179a..47124678 100644 --- a/ext/liquid_c/vm.c +++ b/ext/liquid_c/vm.c @@ -410,7 +410,7 @@ VALUE liquid_vm_evaluate(VALUE context, vm_assembler_t *code) .vm = vm, .const_ptr = (const size_t *)code->constants.data, .ip = code->instructions.data, - .tags_ptr = (const VALUE *)code->tags.data + .tags_ptr = (const VALUE *)code->tag_markups.data }; vm_render_until_error((VALUE)&args); VALUE ret = vm_stack_pop(vm); diff --git a/ext/liquid_c/vm_assembler.c b/ext/liquid_c/vm_assembler.c index 0d921abd..d0c20376 100644 --- a/ext/liquid_c/vm_assembler.c +++ b/ext/liquid_c/vm_assembler.c @@ -74,7 +74,7 @@ void vm_assembler_init(vm_assembler_t *code) { code->instructions = c_buffer_allocate(8); code->constants = c_buffer_allocate(8 * sizeof(VALUE)); - code->tags = c_buffer_init(); + code->tag_markups = c_buffer_init(); vm_assembler_common_init(code); } @@ -82,7 +82,7 @@ void vm_assembler_reset(vm_assembler_t *code) { c_buffer_reset(&code->instructions); c_buffer_reset(&code->constants); - c_buffer_reset(&code->tags); + c_buffer_reset(&code->tag_markups); vm_assembler_common_init(code); } @@ -90,13 +90,13 @@ void vm_assembler_free(vm_assembler_t *code) { c_buffer_free(&code->instructions); c_buffer_free(&code->constants); - c_buffer_free(&code->tags); + c_buffer_free(&code->tag_markups); } void vm_assembler_gc_mark(vm_assembler_t *code) { c_buffer_rb_gc_mark(&code->constants); - c_buffer_rb_gc_mark(&code->tags); + c_buffer_rb_gc_mark(&code->tag_markups); } VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *const_ptr, const VALUE *tags_ptr) diff --git a/ext/liquid_c/vm_assembler.h b/ext/liquid_c/vm_assembler.h index 4a1ba511..4e63353b 100644 --- a/ext/liquid_c/vm_assembler.h +++ b/ext/liquid_c/vm_assembler.h @@ -42,7 +42,7 @@ extern filter_desc_t builtin_filters[]; typedef struct vm_assembler { c_buffer_t instructions; - c_buffer_t tags; + c_buffer_t tag_markups; c_buffer_t constants; size_t max_stack_size; size_t stack_size; @@ -75,7 +75,7 @@ void vm_assembler_add_filter_from_ruby(vm_assembler_t *code, VALUE filter_name, static inline size_t vm_assembler_alloc_memsize(const vm_assembler_t *code) { - return c_buffer_capacity(&code->instructions) + c_buffer_capacity(&code->constants) + c_buffer_capacity(&code->tags); + return c_buffer_capacity(&code->instructions) + c_buffer_capacity(&code->constants) + c_buffer_capacity(&code->tag_markups); } static inline void vm_assembler_write_opcode(vm_assembler_t *code, enum opcode op) @@ -221,9 +221,9 @@ static inline void vm_assembler_add_render_variable_rescue(vm_assembler_t *code, uint24_to_bytes((unsigned int)node_line_number, &instructions[1]); } -static inline void vm_assembler_write_tag(vm_assembler_t *code, VALUE tag) +static inline void vm_assembler_write_tag_markup(vm_assembler_t *code, VALUE tag_markup) { - c_buffer_write_ruby_value(&code->tags, tag); + c_buffer_write_ruby_value(&code->tag_markups, tag_markup); } #endif From 4f37858272a3bc51a2ec1d64b5c392662a06c854 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 8 Jan 2021 17:01:23 -0500 Subject: [PATCH 11/12] Write line numbers in tag_markup --- ext/liquid_c/block.c | 6 +++--- ext/liquid_c/document_body.c | 1 + ext/liquid_c/tag_markup.c | 3 ++- ext/liquid_c/tag_markup.h | 4 +++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 5aa5cf3c..db0f31bd 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -222,7 +222,7 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars if (name_len == 0) { VALUE str = rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding); - unknown_tag = tag_markup_new(str, str, true); + unknown_tag = tag_markup_new(token_start_line_number, str, str, true); goto loop_break; } @@ -252,11 +252,11 @@ static VALUE internal_block_body_parse(block_body_t *body, parse_context_t *pars VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding); if (tag_class == Qnil) { - unknown_tag = tag_markup_new(tag_name, markup, true); + unknown_tag = tag_markup_new(token_start_line_number, tag_name, markup, true); goto loop_break; } - VALUE tag_markup = tag_markup_new(tag_name, markup, false); + VALUE tag_markup = tag_markup_new(token_start_line_number, tag_name, markup, false); block_body_push_tag_markup(body, parse_context->ruby_obj, tag_markup); VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, diff --git a/ext/liquid_c/document_body.c b/ext/liquid_c/document_body.c index addab3e7..922920bc 100644 --- a/ext/liquid_c/document_body.c +++ b/ext/liquid_c/document_body.c @@ -67,6 +67,7 @@ static void document_body_write_tag_markup(document_body_t *body, VALUE tag_mark char *name = (char *)&header[1]; header->flags = tag_markup->flags; + header->line_number = tag_markup->line_number; header->tag_name_len = tag_name_len; header->markup_len = markup_len; header->next_tag_offset = last ? 0 : total_len; diff --git a/ext/liquid_c/tag_markup.c b/ext/liquid_c/tag_markup.c index f4f0e332..a484f04a 100644 --- a/ext/liquid_c/tag_markup.c +++ b/ext/liquid_c/tag_markup.c @@ -28,13 +28,14 @@ const rb_data_type_t tag_markup_data_type = { NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY }; -VALUE tag_markup_new(VALUE tag_name, VALUE markup, bool unknown) +VALUE tag_markup_new(uint32_t line_number, VALUE tag_name, VALUE markup, bool unknown) { tag_markup_t *tag; VALUE obj = TypedData_Make_Struct(cLiquidCTagMarkup, tag_markup_t, &tag_markup_data_type, tag); tag->flags = 0; if (unknown) tag->flags |= TAG_FLAG_UNKNOWN; + tag->line_number = line_number; tag->tag_name = tag_name; tag->markup = markup; tag->block_body = NULL; diff --git a/ext/liquid_c/tag_markup.h b/ext/liquid_c/tag_markup.h index 5b9c8167..8443509f 100644 --- a/ext/liquid_c/tag_markup.h +++ b/ext/liquid_c/tag_markup.h @@ -6,6 +6,7 @@ typedef struct tag_markup { uint32_t flags; + uint32_t line_number; VALUE tag_name; VALUE markup; VALUE block_body_obj; @@ -14,6 +15,7 @@ typedef struct tag_markup { typedef struct tag_markup_header { uint32_t flags; + uint32_t line_number; uint32_t tag_name_len; uint32_t markup_len; uint32_t next_tag_offset; @@ -27,7 +29,7 @@ extern const rb_data_type_t tag_markup_data_type; #define TagMarkup_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tag_markup_t, &tag_markup_data_type, sval) void liquid_define_tag_markup(); -VALUE tag_markup_new(VALUE tag_name, VALUE markup, bool unknown); +VALUE tag_markup_new(uint32_t line_number, VALUE tag_name, VALUE markup, bool unknown); VALUE tag_markup_get_tag_name(VALUE self); VALUE tag_markup_get_markup(VALUE self); void tag_markup_set_block_body(VALUE self, VALUE block_body_obj, block_body_t *block_body); From bc1c70543e8ad1859416909efefd6ce282fe57b3 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Jan 2021 10:43:00 -0500 Subject: [PATCH 12/12] Bind block to first tag only --- ext/liquid_c/block.c | 4 +++- ext/liquid_c/block.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index db0f31bd..9ce95888 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -92,6 +92,7 @@ static VALUE block_body_allocate(VALUE klass) body->tags = c_buffer_init(); body->as.intermediate.blank = true; body->as.intermediate.root = false; + body->as.intermediate.bound_to_tag = false; body->as.intermediate.render_score = 0; body->as.intermediate.vm_assembler_pool = NULL; body->as.intermediate.code = NULL; @@ -336,7 +337,8 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte VALUE block_ret = rb_yield_values(2, tag_name, markup); - if (RTEST(parse_context.parent_tag)) { + if (RTEST(parse_context.parent_tag) && !body->as.intermediate.bound_to_tag) { + body->as.intermediate.bound_to_tag = true; tag_markup_set_block_body(parse_context.parent_tag, self, body); } diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index b96915c7..6e538bb2 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -19,6 +19,7 @@ typedef struct block_body { vm_assembler_pool_t *vm_assembler_pool; bool blank; bool root; + bool bound_to_tag; unsigned int render_score; vm_assembler_t *code; } intermediate;