Skip to content

Commit 4048888

Browse files
committed
Also support \u{...} for universal character names
Add an error message to the failure path Add `//G` grammar comments Run regression tests
1 parent 10115ca commit 4048888

File tree

1 file changed

+53
-10
lines changed

1 file changed

+53
-10
lines changed

source/lex.h

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -930,21 +930,27 @@ auto lex_line(
930930
return 0;
931931
};
932932

933+
//G simple-hexadecimal-digit-sequence:
934+
//G hexadecimal-digit
935+
//G simple-hexadecimal-digit-sequence hexadecimal-digit
936+
//G
933937
//G hexadecimal-escape-sequence:
934938
//G '\x' hexadecimal-digit
935939
//G hexadecimal-escape-sequence hexadecimal-digit
940+
//G '\x{' simple-hexadecimal-digit-sequence '}'
936941
//G
937942
auto peek_is_hexadecimal_escape_sequence = [&](int offset)
938943
{
939944
if (
940-
peek( offset) == '\\'
945+
peek(offset) == '\\'
941946
&& peek(1+offset) == 'x'
942-
&& (is_hexadecimal_digit(peek(2+offset))
943-
|| (peek(2+offset) == '{' && is_hexadecimal_digit(peek(3+offset)))
947+
&& (
948+
is_hexadecimal_digit(peek(2+offset))
949+
|| (peek(2+offset) == '{' && is_hexadecimal_digit(peek(3+offset)))
950+
)
944951
)
945-
)
946952
{
947-
bool has_bracket = peek(2+offset) == '{';
953+
auto has_bracket = peek(2+offset) == '{';
948954
auto j = 3;
949955

950956
if (has_bracket) { ++j; }
@@ -961,6 +967,11 @@ auto lex_line(
961967
if (peek(j+offset) == '}') {
962968
++j;
963969
} else {
970+
errors.emplace_back(
971+
source_position(lineno, i + offset),
972+
"invalid hexadecimal escape sequence - \\x{ must"
973+
" be followed by hexadecimal digits and a closing }"
974+
);
964975
return 0;
965976
}
966977
}
@@ -972,6 +983,7 @@ auto lex_line(
972983
//G universal-character-name:
973984
//G '\u' hex-quad
974985
//G '\U' hex-quad hex-quad
986+
//G '\u{' simple-hexadecimal-digit-sequence '}'
975987
//G
976988
//G hex-quad:
977989
//G hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
@@ -981,6 +993,7 @@ auto lex_line(
981993
if (
982994
peek(offset) == '\\'
983995
&& peek(1 + offset) == 'u'
996+
&& peek(2 + offset) != '{'
984997
)
985998
{
986999
auto j = 2;
@@ -994,11 +1007,41 @@ auto lex_line(
9941007
if (j == 6) { return j; }
9951008
errors.emplace_back(
9961009
source_position( lineno, i + offset ),
997-
"invalid universal character name (\\u must"
998-
" be followed by 4 hexadecimal digits)"
1010+
"invalid universal character name - \\u without { must"
1011+
" be followed by 4 hexadecimal digits"
9991012
);
10001013
}
1001-
if (
1014+
1015+
else if (
1016+
peek(offset) == '\\'
1017+
&& peek(1 + offset) == 'u'
1018+
&& peek(2 + offset) == '{'
1019+
)
1020+
{
1021+
auto j = 4;
1022+
1023+
while (
1024+
peek(j + offset)
1025+
&& is_hexadecimal_digit(peek(j + offset))
1026+
)
1027+
{
1028+
++j;
1029+
}
1030+
1031+
if (peek(j + offset) == '}') {
1032+
++j;
1033+
}
1034+
else {
1035+
errors.emplace_back(
1036+
source_position(lineno, i + offset),
1037+
"invalid universal character name - \\u{ must"
1038+
" be followed by hexadecimal digits and a closing }"
1039+
);
1040+
}
1041+
return j;
1042+
}
1043+
1044+
else if (
10021045
peek(offset) == '\\'
10031046
&& peek(1+offset) == 'U'
10041047
)
@@ -1014,8 +1057,8 @@ auto lex_line(
10141057
if (j == 10) { return j; }
10151058
errors.emplace_back(
10161059
source_position(lineno, i+offset),
1017-
"invalid universal character name (\\U must"
1018-
" be followed by 8 hexadecimal digits)"
1060+
"invalid universal character name - \\U must"
1061+
" be followed by 8 hexadecimal digits"
10191062
);
10201063
}
10211064
return 0;

0 commit comments

Comments
 (0)