Skip to content

Commit 7dfee03

Browse files
authored
Parser: Friendly token names in Unexpected Token error messages (#194)
This pull request changes the parser's error messages from technical token names to user-friendly format. **Before:** ``` Unexpected Token. Expected: `TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE`, found: `TOKEN_COLON`. ``` **After:** ``` Unexpected Token. Expected: an identifier, `@`, `<%`, whitespace, or a newline, found: `:`. ``` ### Format conventions - **Literal tokens** (punctuation/delimiters) are backtick-quoted: `` `<` ``, `` `<%` ``, `` `@` ``, `` `:` `` - **Abstract tokens** use natural English with articles: `an identifier`, `a quote`, `whitespace`, `end of file` - **Lists** use Oxford comma: `an identifier, @, or <%`
1 parent 539cc6e commit 7dfee03

File tree

26 files changed

+248
-75
lines changed

26 files changed

+248
-75
lines changed

config.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ errors:
1212
types:
1313
- name: UnexpectedError
1414
message:
15-
template: "%s. Expected: `%s`, found: `%s`."
15+
template: "%s. Expected: %s, found: %s."
1616
arguments:
1717
- description
1818
- expected
@@ -30,10 +30,10 @@ errors:
3030

3131
- name: UnexpectedTokenError
3232
message:
33-
template: "Found `%s` when expecting `%s` at (%u:%u)."
33+
template: "Found %s when expecting %s at (%u:%u)."
3434
arguments:
35-
- token_type_to_string(found->type)
36-
- token_type_to_string(expected_type)
35+
- token_type_to_friendly_string(found->type)
36+
- token_type_to_friendly_string(expected_type)
3737
- found->location.start.line
3838
- found->location.start.column
3939

javascript/packages/linter/test/__snapshots__/cli.test.ts.snap

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/include/parser_helpers.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,21 @@ void parser_push_open_tag(const parser_T* parser, token_T* tag_name);
1313
bool parser_check_matching_tag(const parser_T* parser, hb_string_T tag_name);
1414
token_T* parser_pop_open_tag(const parser_T* parser);
1515

16-
void parser_append_unexpected_error(
16+
void parser_append_unexpected_error_impl(
1717
parser_T* parser,
18+
hb_array_T* errors,
1819
const char* description,
19-
const char* expected,
20-
hb_array_T* errors
20+
token_type_T first_token,
21+
...
22+
);
23+
#define parser_append_unexpected_error(parser, errors, description, ...) \
24+
parser_append_unexpected_error_impl(parser, errors, description, __VA_ARGS__, TOKEN_SENTINEL)
25+
26+
void parser_append_unexpected_error_string(
27+
parser_T* parser,
28+
hb_array_T* errors,
29+
const char* description,
30+
const char* expected
2131
);
2232
void parser_append_unexpected_token_error(parser_T* parser, token_type_T expected_type, hb_array_T* errors);
2333

src/include/token.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,16 @@
66
#include "token_struct.h"
77
#include "util/hb_string.h"
88

9+
#include <stdarg.h>
10+
911
token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer);
1012
hb_string_T token_to_string(const token_T* token);
1113
const char* token_type_to_string(token_type_T type);
14+
const char* token_type_to_friendly_string(token_type_T type);
15+
char* token_types_to_friendly_string_va(token_type_T first_token, ...);
16+
char* token_types_to_friendly_string_valist(token_type_T first_token, va_list args);
17+
18+
#define token_types_to_friendly_string(...) token_types_to_friendly_string_va(__VA_ARGS__, TOKEN_SENTINEL)
1219

1320
token_T* token_copy(token_T* token);
1421

src/include/token_struct.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ typedef enum {
4949
TOKEN_EOF,
5050
} token_type_T;
5151

52+
// Sentinel value for variadic functions
53+
#define TOKEN_SENTINEL 99999999
54+
5255
typedef struct TOKEN_STRUCT {
5356
char* value;
5457
range_T range;

src/parser.c

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -260,17 +260,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
260260
if (token_is(parser, TOKEN_ERROR)) {
261261
free(content.value);
262262

263-
token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
264-
append_unexpected_error(
265-
"Token Error",
266-
"not TOKEN_ERROR",
267-
token->value,
268-
token->location.start,
269-
token->location.end,
270-
document_errors
271-
);
272-
273-
token_free(token);
263+
parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
274264

275265
return NULL;
276266
}
@@ -641,7 +631,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
641631
append_unexpected_error(
642632
"Invalid quote character for HTML attribute",
643633
"single quote (') or double quote (\")",
644-
"backtick (`)",
634+
"a backtick",
645635
start,
646636
end,
647637
errors
@@ -655,15 +645,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
655645
return value;
656646
}
657647

648+
char* expected = token_types_to_friendly_string(TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
649+
658650
append_unexpected_error(
659651
"Unexpected Token",
660-
"TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
661-
token_type_to_string(parser->current_token->type),
652+
expected,
653+
token_type_to_friendly_string(parser->current_token->type),
662654
parser->current_token->location.start,
663655
parser->current_token->location.end,
664656
errors
665657
);
666658

659+
free(expected);
660+
667661
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
668662
NULL,
669663
children,
@@ -1057,9 +1051,13 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
10571051

10581052
parser_append_unexpected_error(
10591053
parser,
1054+
errors,
10601055
"Unexpected Token",
1061-
"TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
1062-
errors
1056+
TOKEN_IDENTIFIER,
1057+
TOKEN_AT,
1058+
TOKEN_ERB_START,
1059+
TOKEN_WHITESPACE,
1060+
TOKEN_NEWLINE
10631061
);
10641062
}
10651063

@@ -1474,10 +1472,17 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
14741472

14751473
parser_append_unexpected_error(
14761474
parser,
1475+
errors,
14771476
"Unexpected token",
1478-
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
1479-
"TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
1480-
errors
1477+
TOKEN_ERB_START,
1478+
TOKEN_HTML_DOCTYPE,
1479+
TOKEN_HTML_COMMENT_START,
1480+
TOKEN_IDENTIFIER,
1481+
TOKEN_WHITESPACE,
1482+
TOKEN_NBSP,
1483+
TOKEN_AT,
1484+
TOKEN_BACKSLASH,
1485+
TOKEN_NEWLINE
14811486
);
14821487

14831488
parser_synchronize(parser, errors);

src/parser_helpers.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "include/util/hb_buffer.h"
99
#include "include/util/hb_string.h"
1010

11+
#include <stdarg.h>
1112
#include <stdio.h>
1213

1314
void parser_push_open_tag(const parser_T* parser, token_T* tag_name) {
@@ -90,18 +91,45 @@ void parser_exit_foreign_content(parser_T* parser) {
9091
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
9192
}
9293

93-
void parser_append_unexpected_error(
94+
void parser_append_unexpected_error_impl(
9495
parser_T* parser,
96+
hb_array_T* errors,
9597
const char* description,
96-
const char* expected,
97-
hb_array_T* errors
98+
token_type_T first_token,
99+
...
100+
) {
101+
token_T* token = parser_advance(parser);
102+
103+
va_list args;
104+
va_start(args, first_token);
105+
char* expected = token_types_to_friendly_string_valist(first_token, args);
106+
va_end(args);
107+
108+
append_unexpected_error(
109+
description,
110+
expected,
111+
token_type_to_friendly_string(token->type),
112+
token->location.start,
113+
token->location.end,
114+
errors
115+
);
116+
117+
free(expected);
118+
token_free(token);
119+
}
120+
121+
void parser_append_unexpected_error_string(
122+
parser_T* parser,
123+
hb_array_T* errors,
124+
const char* description,
125+
const char* expected
98126
) {
99127
token_T* token = parser_advance(parser);
100128

101129
append_unexpected_error(
102130
description,
103131
expected,
104-
token_type_to_string(token->type),
132+
token_type_to_friendly_string(token->type),
105133
token->location.start,
106134
token->location.end,
107135
errors

src/token.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
#include "include/range.h"
44
#include "include/token_struct.h"
55
#include "include/util.h"
6+
#include "include/util/hb_buffer.h"
67

8+
#include <stdarg.h>
79
#include <stdbool.h>
810
#include <stdio.h>
911
#include <stdlib.h>
@@ -78,8 +80,82 @@ const char* token_type_to_string(const token_type_T type) {
7880
case TOKEN_ERROR: return "TOKEN_ERROR";
7981
case TOKEN_EOF: return "TOKEN_EOF";
8082
}
83+
}
84+
85+
const char* token_type_to_friendly_string(const token_type_T type) {
86+
switch (type) {
87+
case TOKEN_WHITESPACE: return "whitespace";
88+
case TOKEN_NBSP: return "non-breaking space";
89+
case TOKEN_NEWLINE: return "a newline";
90+
case TOKEN_IDENTIFIER: return "an identifier";
91+
case TOKEN_HTML_DOCTYPE: return "`<!DOCTYPE`";
92+
case TOKEN_XML_DECLARATION: return "`<?xml`";
93+
case TOKEN_XML_DECLARATION_END: return "`?>`";
94+
case TOKEN_CDATA_START: return "`<![CDATA[`";
95+
case TOKEN_CDATA_END: return "`]]>`";
96+
case TOKEN_HTML_TAG_START: return "`<`";
97+
case TOKEN_HTML_TAG_END: return "`>`";
98+
case TOKEN_HTML_TAG_START_CLOSE: return "`</`";
99+
case TOKEN_HTML_TAG_SELF_CLOSE: return "`/>`";
100+
case TOKEN_HTML_COMMENT_START: return "`<!--`";
101+
case TOKEN_HTML_COMMENT_END: return "`-->`";
102+
case TOKEN_HTML_COMMENT_INVALID_END: return "`--!>`";
103+
case TOKEN_EQUALS: return "`=`";
104+
case TOKEN_QUOTE: return "a quote";
105+
case TOKEN_BACKTICK: return "a backtick";
106+
case TOKEN_BACKSLASH: return "`\\`";
107+
case TOKEN_DASH: return "`-`";
108+
case TOKEN_UNDERSCORE: return "`_`";
109+
case TOKEN_EXCLAMATION: return "`!`";
110+
case TOKEN_SLASH: return "`/`";
111+
case TOKEN_SEMICOLON: return "`;`";
112+
case TOKEN_COLON: return "`:`";
113+
case TOKEN_AT: return "`@`";
114+
case TOKEN_LT: return "`<`";
115+
case TOKEN_PERCENT: return "`%`";
116+
case TOKEN_AMPERSAND: return "`&`";
117+
case TOKEN_ERB_START: return "`<%`";
118+
case TOKEN_ERB_CONTENT: return "ERB content";
119+
case TOKEN_ERB_END: return "`%>`";
120+
case TOKEN_CHARACTER: return "a character";
121+
case TOKEN_ERROR: return "an error token";
122+
case TOKEN_EOF: return "end of file";
123+
}
124+
}
125+
126+
char* token_types_to_friendly_string_valist(token_type_T first_token, va_list args) {
127+
if ((int) first_token == TOKEN_SENTINEL) { return herb_strdup(""); }
128+
129+
size_t count = 0;
130+
const char* names[32];
131+
token_type_T current = first_token;
132+
133+
while ((int) current != TOKEN_SENTINEL && count < 32) {
134+
names[count++] = token_type_to_friendly_string(current);
135+
current = va_arg(args, token_type_T);
136+
}
137+
138+
hb_buffer_T buffer;
139+
hb_buffer_init(&buffer, 128);
140+
141+
for (size_t i = 0; i < count; i++) {
142+
hb_buffer_append(&buffer, names[i]);
143+
144+
if (i < count - 1) {
145+
if (count > 2) { hb_buffer_append(&buffer, ", "); }
146+
if (i == count - 2) { hb_buffer_append(&buffer, count == 2 ? " or " : "or "); }
147+
}
148+
}
149+
150+
return hb_buffer_value(&buffer);
151+
}
81152

82-
return "Unknown token_type_T";
153+
char* token_types_to_friendly_string_va(token_type_T first_token, ...) {
154+
va_list args;
155+
va_start(args, first_token);
156+
char* result = token_types_to_friendly_string_valist(first_token, args);
157+
va_end(args);
158+
return result;
83159
}
84160

85161
hb_string_T token_to_string(const token_T* token) {

test/c/test_token.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,48 @@ TEST(test_token)
77
ck_assert_str_eq(token_type_to_string(TOKEN_IDENTIFIER), "TOKEN_IDENTIFIER");
88
END
99

10+
TEST(test_token_type_to_friendly_string)
11+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_IDENTIFIER), "an identifier");
12+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_WHITESPACE), "whitespace");
13+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_NEWLINE), "a newline");
14+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_QUOTE), "a quote");
15+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_CHARACTER), "a character");
16+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_EOF), "end of file");
17+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_TAG_START), "`<`");
18+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_TAG_END), "`>`");
19+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_TAG_SELF_CLOSE), "`/>`");
20+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_TAG_START_CLOSE), "`</`");
21+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_COMMENT_START), "`<!--`");
22+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_HTML_COMMENT_END), "`-->`");
23+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_EQUALS), "`=`");
24+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_SLASH), "`/`");
25+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_COLON), "`:`");
26+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_ERB_START), "`<%`");
27+
ck_assert_str_eq(token_type_to_friendly_string(TOKEN_ERB_END), "`%>`");
28+
END
29+
30+
TEST(test_token_types_to_friendly_string)
31+
char* result1 = token_types_to_friendly_string(TOKEN_IDENTIFIER);
32+
ck_assert_str_eq(result1, "an identifier");
33+
free(result1);
34+
35+
char* result2 = token_types_to_friendly_string(TOKEN_IDENTIFIER, TOKEN_QUOTE);
36+
ck_assert_str_eq(result2, "an identifier or a quote");
37+
free(result2);
38+
39+
char* result3 = token_types_to_friendly_string(TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
40+
ck_assert_str_eq(result3, "an identifier, a quote, or `<%`");
41+
free(result3);
42+
43+
char* result4 = token_types_to_friendly_string(TOKEN_IDENTIFIER, TOKEN_ERB_START, TOKEN_WHITESPACE, TOKEN_NEWLINE);
44+
ck_assert_str_eq(result4, "an identifier, `<%`, whitespace, or a newline");
45+
free(result4);
46+
47+
char* result5 = token_types_to_friendly_string(TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_END, TOKEN_EQUALS);
48+
ck_assert_str_eq(result5, "`<`, `>`, or `=`");
49+
free(result5);
50+
END
51+
1052
TEST(test_token_to_string)
1153
hb_buffer_T output;
1254
hb_buffer_init(&output, 1024);
@@ -25,6 +67,8 @@ TCase *token_tests(void) {
2567
TCase *token = tcase_create("Token");
2668

2769
tcase_add_test(token, test_token);
70+
tcase_add_test(token, test_token_type_to_friendly_string);
71+
tcase_add_test(token, test_token_types_to_friendly_string);
2872
tcase_add_test(token, test_token_to_string);
2973

3074
return token;

test/snapshots/parser/attributes_test/test_0018_apostrophe_inside_single_quotes_e431474b58446f910c9425491add27a0.txt

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)