@@ -930,21 +930,27 @@ auto lex_line(
930
930
return 0 ;
931
931
};
932
932
933
+ // G simple-hexadecimal-digit-sequence:
934
+ // G hexadecimal-digit
935
+ // G simple-hexadecimal-digit-sequence hexadecimal-digit
936
+ // G
933
937
// G hexadecimal-escape-sequence:
934
938
// G '\x' hexadecimal-digit
935
939
// G hexadecimal-escape-sequence hexadecimal-digit
940
+ // G '\x{' simple-hexadecimal-digit-sequence '}'
936
941
// G
937
942
auto peek_is_hexadecimal_escape_sequence = [&](int offset)
938
943
{
939
944
if (
940
- peek ( offset) == ' \\ '
945
+ peek (offset) == ' \\ '
941
946
&& peek (1 +offset) == ' x'
942
- && (is_hexadecimal_digit (peek (2 +offset))
943
- || (peek (2 +offset) == ' {' && is_hexadecimal_digit (peek (3 +offset)))
947
+ && (
948
+ is_hexadecimal_digit (peek (2 +offset))
949
+ || (peek (2 +offset) == ' {' && is_hexadecimal_digit (peek (3 +offset)))
950
+ )
944
951
)
945
- )
946
952
{
947
- bool has_bracket = peek (2 +offset) == ' {' ;
953
+ auto has_bracket = peek (2 +offset) == ' {' ;
948
954
auto j = 3 ;
949
955
950
956
if (has_bracket) { ++j; }
@@ -961,6 +967,11 @@ auto lex_line(
961
967
if (peek (j+offset) == ' }' ) {
962
968
++j;
963
969
} else {
970
+ errors.emplace_back (
971
+ source_position (lineno, i + offset),
972
+ " invalid hexadecimal escape sequence - \\ x{ must"
973
+ " be followed by hexadecimal digits and a closing }"
974
+ );
964
975
return 0 ;
965
976
}
966
977
}
@@ -972,6 +983,7 @@ auto lex_line(
972
983
// G universal-character-name:
973
984
// G '\u' hex-quad
974
985
// G '\U' hex-quad hex-quad
986
+ // G '\u{' simple-hexadecimal-digit-sequence '}'
975
987
// G
976
988
// G hex-quad:
977
989
// G hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
@@ -981,6 +993,7 @@ auto lex_line(
981
993
if (
982
994
peek (offset) == ' \\ '
983
995
&& peek (1 + offset) == ' u'
996
+ && peek (2 + offset) != ' {'
984
997
)
985
998
{
986
999
auto j = 2 ;
@@ -994,11 +1007,41 @@ auto lex_line(
994
1007
if (j == 6 ) { return j; }
995
1008
errors.emplace_back (
996
1009
source_position ( lineno, i + offset ),
997
- " invalid universal character name ( \\ u must"
998
- " be followed by 4 hexadecimal digits) "
1010
+ " invalid universal character name - \\ u without { must"
1011
+ " be followed by 4 hexadecimal digits"
999
1012
);
1000
1013
}
1001
- if (
1014
+
1015
+ else if (
1016
+ peek (offset) == ' \\ '
1017
+ && peek (1 + offset) == ' u'
1018
+ && peek (2 + offset) == ' {'
1019
+ )
1020
+ {
1021
+ auto j = 4 ;
1022
+
1023
+ while (
1024
+ peek (j + offset)
1025
+ && is_hexadecimal_digit (peek (j + offset))
1026
+ )
1027
+ {
1028
+ ++j;
1029
+ }
1030
+
1031
+ if (peek (j + offset) == ' }' ) {
1032
+ ++j;
1033
+ }
1034
+ else {
1035
+ errors.emplace_back (
1036
+ source_position (lineno, i + offset),
1037
+ " invalid universal character name - \\ u{ must"
1038
+ " be followed by hexadecimal digits and a closing }"
1039
+ );
1040
+ }
1041
+ return j;
1042
+ }
1043
+
1044
+ else if (
1002
1045
peek (offset) == ' \\ '
1003
1046
&& peek (1 +offset) == ' U'
1004
1047
)
@@ -1014,8 +1057,8 @@ auto lex_line(
1014
1057
if (j == 10 ) { return j; }
1015
1058
errors.emplace_back (
1016
1059
source_position (lineno, i+offset),
1017
- " invalid universal character name ( \\ U must"
1018
- " be followed by 8 hexadecimal digits) "
1060
+ " invalid universal character name - \\ U must"
1061
+ " be followed by 8 hexadecimal digits"
1019
1062
);
1020
1063
}
1021
1064
return 0 ;
0 commit comments