Skip to content

Commit c2d3ee6

Browse files
authored
Parser: Improve error recovery for ERB tags (#1170)
1 parent ccb76fe commit c2d3ee6

File tree

26 files changed

+759
-102
lines changed

26 files changed

+759
-102
lines changed

config.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,38 @@ errors:
253253
- name: opening_quote
254254
type: token
255255
256+
- name: UnclosedERBTagError
257+
message:
258+
template: "ERB tag `%s` at (%u:%u) is missing closing `%%>`."
259+
arguments:
260+
- opening_tag->value
261+
- opening_tag->location.start.line
262+
- opening_tag->location.start.column
263+
264+
fields:
265+
- name: opening_tag
266+
type: token
267+
268+
- name: NestedERBTagError
269+
message:
270+
template: "ERB tag `%s` at (%u:%u) was terminated by nested `<%%` tag at (%u:%u). Nesting `<%%` tags is not supported."
271+
arguments:
272+
- opening_tag->value
273+
- opening_tag->location.start.line
274+
- opening_tag->location.start.column
275+
- nested_tag_line
276+
- nested_tag_column
277+
278+
fields:
279+
- name: opening_tag
280+
type: token
281+
282+
- name: nested_tag_line
283+
type: size_t
284+
285+
- name: nested_tag_column
286+
type: size_t
287+
256288
warnings:
257289
fields: []
258290
types: []

sig/herb/errors.rbs

Lines changed: 40 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/analyze.c

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@
2626
#include <stdlib.h>
2727
#include <string.h>
2828

29+
static position_T erb_content_end_position(const AST_ERB_CONTENT_NODE_T* erb_node) {
30+
if (erb_node->tag_closing != NULL) {
31+
return erb_node->tag_closing->location.end;
32+
} else if (erb_node->content != NULL) {
33+
return erb_node->content->location.end;
34+
} else {
35+
return erb_node->tag_opening->location.end;
36+
}
37+
}
38+
2939
static analyzed_ruby_T* herb_analyze_ruby(hb_string_T source) {
3040
analyzed_ruby_T* analyzed = init_analyzed_ruby(source);
3141

@@ -279,11 +289,11 @@ static control_type_t find_earliest_control_keyword(pm_node_t* root, const uint8
279289

280290
static control_type_t detect_control_type(AST_ERB_CONTENT_NODE_T* erb_node) {
281291
if (!erb_node || erb_node->base.type != AST_ERB_CONTENT_NODE) { return CONTROL_TYPE_UNKNOWN; }
292+
if (erb_node->tag_closing == NULL) { return CONTROL_TYPE_UNKNOWN; }
282293

283294
analyzed_ruby_T* ruby = erb_node->analyzed_ruby;
284295

285296
if (!ruby) { return CONTROL_TYPE_UNKNOWN; }
286-
287297
if (ruby->valid) { return CONTROL_TYPE_UNKNOWN; }
288298

289299
pm_node_t* root = ruby->root;
@@ -340,7 +350,7 @@ static AST_NODE_T* create_control_node(
340350
erb_node->base.errors = NULL;
341351

342352
position_T start_position = erb_node->tag_opening->location.start;
343-
position_T end_position = erb_node->tag_closing->location.end;
353+
position_T end_position = erb_content_end_position(erb_node);
344354

345355
if (end_node) {
346356
end_position = end_node->base.location.end;
@@ -695,14 +705,16 @@ static size_t process_control_structure(
695705
}
696706
}
697707

708+
position_T when_end_position = erb_content_end_position(erb_content);
709+
698710
AST_ERB_WHEN_NODE_T* when_node = ast_erb_when_node_init(
699711
erb_content->tag_opening,
700712
erb_content->content,
701713
erb_content->tag_closing,
702714
then_keyword,
703715
when_statements,
704716
erb_content->tag_opening->location.start,
705-
erb_content->tag_closing->location.end,
717+
when_end_position,
706718
when_errors
707719
);
708720

@@ -736,14 +748,16 @@ static size_t process_control_structure(
736748
}
737749
}
738750

751+
position_T in_end_position = erb_content_end_position(erb_content);
752+
739753
AST_ERB_IN_NODE_T* in_node = ast_erb_in_node_init(
740754
erb_content->tag_opening,
741755
erb_content->content,
742756
erb_content->tag_closing,
743757
in_then_keyword,
744758
in_statements,
745759
erb_content->tag_opening->location.start,
746-
erb_content->tag_closing->location.end,
760+
in_end_position,
747761
in_errors
748762
);
749763

@@ -785,7 +799,7 @@ static size_t process_control_structure(
785799
next_erb->tag_closing,
786800
else_children,
787801
next_erb->tag_opening->location.start,
788-
next_erb->tag_closing->location.end,
802+
erb_content_end_position(next_erb),
789803
else_errors
790804
);
791805

@@ -811,7 +825,7 @@ static size_t process_control_structure(
811825
end_erb->content,
812826
end_erb->tag_closing,
813827
end_erb->tag_opening->location.start,
814-
end_erb->tag_closing->location.end,
828+
erb_content_end_position(end_erb),
815829
end_errors
816830
);
817831

@@ -823,7 +837,7 @@ static size_t process_control_structure(
823837
}
824838

825839
position_T start_position = erb_node->tag_opening->location.start;
826-
position_T end_position = erb_node->tag_closing->location.end;
840+
position_T end_position = erb_content_end_position(erb_node);
827841

828842
if (end_node) {
829843
end_position = end_node->base.location.end;
@@ -933,7 +947,7 @@ static size_t process_control_structure(
933947
next_erb->tag_closing,
934948
else_children,
935949
next_erb->tag_opening->location.start,
936-
next_erb->tag_closing->location.end,
950+
erb_content_end_position(next_erb),
937951
else_errors
938952
);
939953

@@ -979,7 +993,7 @@ static size_t process_control_structure(
979993
next_erb->tag_closing,
980994
ensure_children,
981995
next_erb->tag_opening->location.start,
982-
next_erb->tag_closing->location.end,
996+
erb_content_end_position(next_erb),
983997
ensure_errors
984998
);
985999

@@ -1005,7 +1019,7 @@ static size_t process_control_structure(
10051019
end_erb->content,
10061020
end_erb->tag_closing,
10071021
end_erb->tag_opening->location.start,
1008-
end_erb->tag_closing->location.end,
1022+
erb_content_end_position(end_erb),
10091023
end_errors
10101024
);
10111025

@@ -1017,7 +1031,7 @@ static size_t process_control_structure(
10171031
}
10181032

10191033
position_T start_position = erb_node->tag_opening->location.start;
1020-
position_T end_position = erb_node->tag_closing->location.end;
1034+
position_T end_position = erb_content_end_position(erb_node);
10211035

10221036
if (end_node) {
10231037
end_position = end_node->base.location.end;
@@ -1068,12 +1082,14 @@ static size_t process_control_structure(
10681082
hb_array_T* end_errors = close_erb->base.errors;
10691083
close_erb->base.errors = NULL;
10701084

1085+
position_T close_end_pos = erb_content_end_position(close_erb);
1086+
10711087
end_node = ast_erb_end_node_init(
10721088
close_erb->tag_opening,
10731089
close_erb->content,
10741090
close_erb->tag_closing,
10751091
close_erb->tag_opening->location.start,
1076-
close_erb->tag_closing->location.end,
1092+
close_end_pos,
10771093
end_errors
10781094
);
10791095

@@ -1085,7 +1101,7 @@ static size_t process_control_structure(
10851101
}
10861102

10871103
position_T start_position = erb_node->tag_opening->location.start;
1088-
position_T end_position = erb_node->tag_closing->location.end;
1104+
position_T end_position = erb_content_end_position(erb_node);
10891105

10901106
if (end_node) {
10911107
end_position = end_node->base.location.end;
@@ -1142,12 +1158,14 @@ static size_t process_control_structure(
11421158
hb_array_T* end_errors = end_erb->base.errors;
11431159
end_erb->base.errors = NULL;
11441160

1161+
position_T end_erb_final_pos = erb_content_end_position(end_erb);
1162+
11451163
end_node = ast_erb_end_node_init(
11461164
end_erb->tag_opening,
11471165
end_erb->content,
11481166
end_erb->tag_closing,
11491167
end_erb->tag_opening->location.start,
1150-
end_erb->tag_closing->location.end,
1168+
end_erb_final_pos,
11511169
end_errors
11521170
);
11531171

src/include/lexer_peek_helpers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset);
3030
bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset);
3131
bool lexer_peek_for_html_comment_invalid_end(const lexer_T* lexer, uint32_t offset);
3232

33+
bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset);
3334
bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset);
3435
bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset);
3536
bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset);

src/lexer.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,17 @@ static token_T* lexer_parse_erb_content(lexer_T* lexer) {
204204

205205
while (!lexer_peek_erb_end(lexer, 0)) {
206206
if (lexer_eof(lexer)) {
207-
token_T* token = token_init(
208-
hb_string_range(lexer->source, start_position, lexer->current_position),
209-
TOKEN_ERROR,
210-
lexer
211-
); // Handle unexpected EOF
207+
token_T* token =
208+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
209+
210+
return token;
211+
}
212+
213+
if (lexer_peek_erb_start(lexer, 0)) {
214+
lexer->state = STATE_DATA;
215+
216+
token_T* token =
217+
token_init(hb_string_range(lexer->source, start_position, lexer->current_position), TOKEN_ERB_CONTENT, lexer);
212218

213219
return token;
214220
}

src/lexer_peek_helpers.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
7878
);
7979
}
8080

81+
bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset) {
82+
return lexer_peek_for(lexer, offset, hb_string("<%"), false);
83+
}
84+
8185
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
8286
uint32_t saved_position = lexer->current_position;
8387
uint32_t saved_line = lexer->current_line;

src/parser.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
914914
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
915915

916916
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
917-
if (token_is(parser, TOKEN_HTML_TAG_START)) {
917+
if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
918918
append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors);
919919

920920
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
@@ -1190,7 +1190,32 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
11901190

11911191
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
11921192
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
1193-
token_T* closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1193+
1194+
token_T* closing_tag = NULL;
1195+
position_T end_position;
1196+
1197+
if (token_is(parser, TOKEN_ERB_END)) {
1198+
closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1199+
end_position = closing_tag->location.end;
1200+
} else if (token_is(parser, TOKEN_ERB_START)) {
1201+
append_nestederb_tag_error(
1202+
opening_tag,
1203+
parser->current_token->location.start.line,
1204+
parser->current_token->location.start.column,
1205+
parser->current_token->location.start,
1206+
parser->current_token->location.end,
1207+
errors
1208+
);
1209+
end_position = parser->current_token->location.start;
1210+
} else {
1211+
append_unclosederb_tag_error(
1212+
opening_tag,
1213+
opening_tag->location.start,
1214+
parser->current_token->location.start,
1215+
errors
1216+
);
1217+
end_position = parser->current_token->location.start;
1218+
}
11941219

11951220
AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
11961221
opening_tag,
@@ -1200,13 +1225,13 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
12001225
false,
12011226
false,
12021227
opening_tag->location.start,
1203-
closing_tag->location.end,
1228+
end_position,
12041229
errors
12051230
);
12061231

12071232
token_free(opening_tag);
12081233
token_free(content);
1209-
token_free(closing_tag);
1234+
if (closing_tag != NULL) { token_free(closing_tag); }
12101235

12111236
return erb_node;
12121237
}

test/engine/evaluation_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ class EvaluationTest < Minitest::Spec
251251
<div>
252252
<%# This is a comment %>
253253
<p>Visible content</p>
254-
<%# Another comment with <%= "erb" %> inside %>
254+
<%# Another comment %>
255255
<p>More content</p>
256256
</div>
257257
ERB

0 commit comments

Comments
 (0)