Skip to content

Commit 7e75178

Browse files
authored
C: Cleanup parser and bindings (#1212)
This pull request hardens the C parser core and its language bindings (Ruby, WASM, Node.js) against memory safety issues. The Ruby C extension functions now use `rb_ensure` to guarantee that C memory is freed even when a Ruby exception unwinds the stack The Ruby node and error conversion functions were also calling `rb_define_module` and `rb_define_class_under` on every conversion to look up the target class. The class references are now cached in static globals, initialized once during `Init_herb`.
1 parent 35d4bd0 commit 7e75178

27 files changed

+283
-79
lines changed

config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ errors:
297297

298298
- name: NestedERBTagError
299299
message:
300-
template: "ERB tag `%s` at (%u:%u) was terminated by nested `<%%` tag at (%u:%u). Nesting `<%%` tags is not supported."
300+
template: "ERB tag `%s` at (%u:%u) was terminated by nested `<%%` tag at (%zu:%zu). Nesting `<%%` tags is not supported."
301301
arguments:
302302
- opening_tag->value
303303
- opening_tag->location.start.line
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<div>
2+
<span>
3+
<p>Text
4+
</span>
5+
</div>

ext/herb/extension.c

Lines changed: 71 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,77 @@ VALUE cResult;
1414
VALUE cLexResult;
1515
VALUE cParseResult;
1616

17-
static VALUE Herb_lex(VALUE self, VALUE source) {
18-
char* string = (char*) check_string(source);
17+
typedef struct {
18+
AST_DOCUMENT_NODE_T* root;
19+
VALUE source;
20+
} parse_args_T;
21+
22+
typedef struct {
23+
hb_array_T* tokens;
24+
VALUE source;
25+
} lex_args_T;
26+
27+
typedef struct {
28+
char* buffer_value;
29+
} buffer_args_T;
30+
31+
static VALUE parse_convert_body(VALUE arg) {
32+
parse_args_T* args = (parse_args_T*) arg;
33+
34+
return create_parse_result(args->root, args->source);
35+
}
36+
37+
static VALUE parse_cleanup(VALUE arg) {
38+
parse_args_T* args = (parse_args_T*) arg;
39+
40+
if (args->root != NULL) { ast_node_free((AST_NODE_T*) args->root); }
41+
42+
return Qnil;
43+
}
44+
45+
static VALUE lex_convert_body(VALUE arg) {
46+
lex_args_T* args = (lex_args_T*) arg;
47+
48+
return create_lex_result(args->tokens, args->source);
49+
}
50+
51+
static VALUE lex_cleanup(VALUE arg) {
52+
lex_args_T* args = (lex_args_T*) arg;
53+
54+
if (args->tokens != NULL) { herb_free_tokens(&args->tokens); }
1955

20-
hb_array_T* tokens = herb_lex(string);
56+
return Qnil;
57+
}
58+
59+
static VALUE buffer_to_string_body(VALUE arg) {
60+
buffer_args_T* args = (buffer_args_T*) arg;
61+
62+
return rb_utf8_str_new_cstr(args->buffer_value);
63+
}
64+
65+
static VALUE buffer_cleanup(VALUE arg) {
66+
buffer_args_T* args = (buffer_args_T*) arg;
67+
68+
if (args->buffer_value != NULL) { free(args->buffer_value); }
69+
70+
return Qnil;
71+
}
2172

22-
VALUE result = create_lex_result(tokens, source);
73+
static VALUE Herb_lex(VALUE self, VALUE source) {
74+
char* string = (char*) check_string(source);
2375

24-
herb_free_tokens(&tokens);
76+
lex_args_T args = { .tokens = herb_lex(string), .source = source };
2577

26-
return result;
78+
return rb_ensure(lex_convert_body, (VALUE) &args, lex_cleanup, (VALUE) &args);
2779
}
2880

2981
static VALUE Herb_lex_file(VALUE self, VALUE path) {
3082
char* file_path = (char*) check_string(path);
31-
hb_array_T* tokens = herb_lex_file(file_path);
3283

3384
VALUE source_value = read_file_to_ruby_string(file_path);
34-
VALUE result = create_lex_result(tokens, source_value);
85+
lex_args_T args = { .tokens = herb_lex_file(file_path), .source = source_value };
3586

36-
herb_free_tokens(&tokens);
37-
38-
return result;
87+
return rb_ensure(lex_convert_body, (VALUE) &args, lex_cleanup, (VALUE) &args);
3988
}
4089

4190
static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) {
@@ -60,13 +109,9 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) {
60109
if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); }
61110
}
62111

63-
AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options);
64-
65-
VALUE result = create_parse_result(root, source);
112+
parse_args_T args = { .root = herb_parse(string, &parser_options), .source = source };
66113

67-
ast_node_free((AST_NODE_T*) root);
68-
69-
return result;
114+
return rb_ensure(parse_convert_body, (VALUE) &args, parse_cleanup, (VALUE) &args);
70115
}
71116

72117
static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) {
@@ -94,13 +139,9 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) {
94139
if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); }
95140
}
96141

97-
AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options);
98-
99-
VALUE result = create_parse_result(root, source_value);
142+
parse_args_T args = { .root = herb_parse(string, &parser_options), .source = source_value };
100143

101-
ast_node_free((AST_NODE_T*) root);
102-
103-
return result;
144+
return rb_ensure(parse_convert_body, (VALUE) &args, parse_cleanup, (VALUE) &args);
104145
}
105146

106147
static VALUE Herb_extract_ruby(int argc, VALUE* argv, VALUE self) {
@@ -132,10 +173,9 @@ static VALUE Herb_extract_ruby(int argc, VALUE* argv, VALUE self) {
132173

133174
herb_extract_ruby_to_buffer_with_options(string, &output, &extract_options);
134175

135-
VALUE result = rb_utf8_str_new_cstr(output.value);
136-
free(output.value);
176+
buffer_args_T args = { .buffer_value = output.value };
137177

138-
return result;
178+
return rb_ensure(buffer_to_string_body, (VALUE) &args, buffer_cleanup, (VALUE) &args);
139179
}
140180

141181
static VALUE Herb_extract_html(VALUE self, VALUE source) {
@@ -146,10 +186,9 @@ static VALUE Herb_extract_html(VALUE self, VALUE source) {
146186

147187
herb_extract_html_to_buffer(string, &output);
148188

149-
VALUE result = rb_utf8_str_new_cstr(output.value);
150-
free(output.value);
189+
buffer_args_T args = { .buffer_value = output.value };
151190

152-
return result;
191+
return rb_ensure(buffer_to_string_body, (VALUE) &args, buffer_cleanup, (VALUE) &args);
153192
}
154193

155194
static VALUE Herb_version(VALUE self) {
@@ -171,6 +210,9 @@ __attribute__((__visibility__("default"))) void Init_herb(void) {
171210
cLexResult = rb_define_class_under(mHerb, "LexResult", cResult);
172211
cParseResult = rb_define_class_under(mHerb, "ParseResult", cResult);
173212

213+
rb_init_node_classes();
214+
rb_init_error_classes();
215+
174216
rb_define_singleton_method(mHerb, "parse", Herb_parse, -1);
175217
rb_define_singleton_method(mHerb, "lex", Herb_lex, 1);
176218
rb_define_singleton_method(mHerb, "parse_file", Herb_parse_file, -1);

src/analyze.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,9 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source,
15371537
herb_visit_node((AST_NODE_T*) document, analyze_erb_content, NULL);
15381538

15391539
analyze_ruby_context_T* context = malloc(sizeof(analyze_ruby_context_T));
1540+
1541+
if (!context) { return; }
1542+
15401543
context->document = document;
15411544
context->parent = NULL;
15421545
context->ruby_context_stack = hb_array_init(8);
@@ -1546,6 +1549,13 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source,
15461549
herb_transform_conditional_open_tags(document);
15471550

15481551
invalid_erb_context_T* invalid_context = malloc(sizeof(invalid_erb_context_T));
1552+
1553+
if (!invalid_context) {
1554+
hb_array_free(&context->ruby_context_stack);
1555+
free(context);
1556+
return;
1557+
}
1558+
15491559
invalid_context->loop_depth = 0;
15501560
invalid_context->rescue_depth = 0;
15511561

src/analyze_conditional_elements.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,9 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document
285285

286286
if (contains_single_open_tag(statements, &open_tag)) {
287287
conditional_open_tag_T* entry = malloc(sizeof(conditional_open_tag_T));
288+
289+
if (!entry) { continue; }
290+
288291
entry->open_index = node_index;
289292
entry->open_conditional = node;
290293
entry->open_tag = open_tag;
@@ -416,17 +419,23 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document
416419

417420
for (size_t body_index = matched_open->open_index + 1; body_index < node_index; body_index++) {
418421
size_t* consumed_index = malloc(sizeof(size_t));
419-
*consumed_index = body_index;
420422

421-
hb_array_append(consumed_indices, consumed_index);
423+
if (consumed_index) {
424+
*consumed_index = body_index;
425+
hb_array_append(consumed_indices, consumed_index);
426+
}
427+
422428
hb_array_set(nodes, body_index, NULL);
423429
}
424430

425431
hb_array_set(nodes, matched_open->open_index, conditional_element);
426432

427433
size_t* close_index = malloc(sizeof(size_t));
428-
*close_index = node_index;
429-
hb_array_append(consumed_indices, close_index);
434+
435+
if (close_index) {
436+
*close_index = node_index;
437+
hb_array_append(consumed_indices, close_index);
438+
}
430439
hb_array_set(nodes, node_index, NULL);
431440

432441
if (matched_open->condition) { free((void*) matched_open->condition); }

src/analyze_conditional_open_tags.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,11 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen
424424

425425
for (size_t j = i + 1; j <= close_index; j++) {
426426
size_t* index = malloc(sizeof(size_t));
427-
*index = j;
428-
hb_array_append(consumed_indices, index);
427+
428+
if (index) {
429+
*index = j;
430+
hb_array_append(consumed_indices, index);
431+
}
429432
}
430433
}
431434

src/analyzed_ruby.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
analyzed_ruby_T* init_analyzed_ruby(hb_string_T source) {
88
analyzed_ruby_T* analyzed = malloc(sizeof(analyzed_ruby_T));
99

10+
if (!analyzed) { return NULL; }
11+
1012
pm_parser_init(&analyzed->parser, (const uint8_t*) source.data, source.length, NULL);
1113

1214
analyzed->root = pm_parse(&analyzed->parser);

src/ast_node.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ void ast_node_init(AST_NODE_T* node, const ast_node_type_T type, position_T star
3030
AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {
3131
AST_LITERAL_NODE_T* literal = malloc(sizeof(AST_LITERAL_NODE_T));
3232

33+
if (!literal) { return NULL; }
34+
3335
ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL);
3436

3537
literal->content = herb_strdup(token->value);

src/include/util/hb_array.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef HERB_ARRAY_H
22
#define HERB_ARRAY_H
33

4+
#include <stdbool.h>
45
#include <stdlib.h>
56

67
typedef struct HB_ARRAY_STRUCT {
@@ -15,15 +16,15 @@ void* hb_array_get(const hb_array_T* array, size_t index);
1516
void* hb_array_first(hb_array_T* array);
1617
void* hb_array_last(hb_array_T* array);
1718

18-
void hb_array_append(hb_array_T* array, void* item);
19+
bool hb_array_append(hb_array_T* array, void* item);
1920
void hb_array_set(const hb_array_T* array, size_t index, void* item);
2021
void hb_array_free(hb_array_T** array);
2122
void hb_array_remove(hb_array_T* array, size_t index);
2223

2324
size_t hb_array_index_of(hb_array_T* array, void* item);
2425
void hb_array_remove_item(hb_array_T* array, void* item);
2526

26-
void hb_array_push(hb_array_T* array, void* item);
27+
bool hb_array_push(hb_array_T* array, void* item);
2728
void* hb_array_pop(hb_array_T* array);
2829

2930
size_t hb_array_capacity(const hb_array_T* array);

src/include/util/hb_narray.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ typedef struct HB_NARRAY_STRUCT {
1212
size_t capacity;
1313
} hb_narray_T;
1414

15-
void hb_narray_init(hb_narray_T* array, size_t item_size, size_t initial_capacity);
15+
bool hb_narray_init(hb_narray_T* array, size_t item_size, size_t initial_capacity);
1616
#define hb_narray_pointer_init(array, initial_capacity) (hb_narray_init(array, sizeof(void*), initial_capacity))
1717

1818
void* hb_narray_get(const hb_narray_T* array, size_t index);
1919
void* hb_narray_first(hb_narray_T* array);
2020
void* hb_narray_last(hb_narray_T* array);
2121
size_t hb_narray_size(const hb_narray_T* array);
2222

23-
void hb_narray_append(hb_narray_T* array, void* item);
23+
bool hb_narray_append(hb_narray_T* array, void* item);
2424
void hb_narray_remove(hb_narray_T* array, size_t index);
2525
void hb_narray_deinit(hb_narray_T* array);
2626

0 commit comments

Comments
 (0)