@@ -1155,6 +1155,40 @@ unichar readUtf8CodePoint4c(const char *s) {
11551155 '\\E' => {
11561156 fgoto main;
11571157 };
1158+
1159+ #unicode chars
1160+ utf8_2c when is_utf8 => {
1161+ assert(mode.utf8);
1162+ /* leverage ComponentClass to generate the vertices */
1163+ auto cc = getComponentClass(mode);
1164+ cc->add(readUtf8CodePoint2c(ts));
1165+ cc->finalize();
1166+ currentSeq->addComponent(move(cc));
1167+ };
1168+
1169+ utf8_3c when is_utf8 => {
1170+ assert(mode.utf8);
1171+ /* leverage ComponentClass to generate the vertices */
1172+ auto cc = getComponentClass(mode);
1173+ cc->add(readUtf8CodePoint3c(ts));
1174+ cc->finalize();
1175+ currentSeq->addComponent(move(cc));
1176+ };
1177+
1178+ utf8_4c when is_utf8 => {
1179+ assert(mode.utf8);
1180+ /* leverage ComponentClass to generate the vertices */
1181+ auto cc = getComponentClass(mode);
1182+ cc->add(readUtf8CodePoint4c(ts));
1183+ cc->finalize();
1184+ currentSeq->addComponent(move(cc));
1185+ };
1186+
1187+ hi_byte when is_utf8 => {
1188+ assert(mode.utf8);
1189+ throwInvalidUtf8();
1190+ };
1191+
11581192 # Literal character
11591193 any => {
11601194 addLiteral(currentSeq, *ts, mode);
@@ -1169,6 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) {
11691203 '\\E' => {
11701204 fret;
11711205 };
1206+
1207+ #unicode chars
1208+ utf8_2c when is_utf8 => {
1209+ assert(mode.utf8);
1210+ currentCls->add(readUtf8CodePoint2c(ts));
1211+ inCharClassEarly = false;
1212+ };
1213+
1214+ utf8_3c when is_utf8 => {
1215+ assert(mode.utf8);
1216+ currentCls->add(readUtf8CodePoint3c(ts));
1217+ inCharClassEarly = false;
1218+ };
1219+
1220+ utf8_4c when is_utf8 => {
1221+ assert(mode.utf8);
1222+ currentCls->add(readUtf8CodePoint4c(ts));
1223+ inCharClassEarly = false;
1224+ };
1225+
1226+ hi_byte when is_utf8 => {
1227+ assert(mode.utf8);
1228+ throwInvalidUtf8();
1229+ };
1230+
11721231 # Literal character
11731232 any => {
11741233 currentCls->add(*ts);
0 commit comments