27
27
InvalidCodePointException
28
28
};
29
29
30
- final class UnicodeString implements Countable, ArrayAccess, JsonSerializable
30
+ class UnicodeString implements Countable, ArrayAccess, JsonSerializable
31
31
{
32
32
const KEEP_CASE = 0 ;
33
33
@@ -832,7 +832,7 @@ public function __invoke(int $offset): int
832
832
/**
833
833
* @inheritDoc
834
834
*/
835
- public function offsetExists ($ offset )
835
+ public function offsetExists ($ offset ): bool
836
836
{
837
837
// Allow negative index
838
838
if ($ offset < 0 ) {
@@ -845,7 +845,7 @@ public function offsetExists($offset)
845
845
/**
846
846
* @inheritDoc
847
847
*/
848
- public function offsetGet ($ offset )
848
+ public function offsetGet ($ offset ): string
849
849
{
850
850
if ($ offset < 0 ) {
851
851
if ($ offset + $ this ->length < 0 ) {
@@ -905,7 +905,7 @@ public function offsetUnset($offset)
905
905
/**
906
906
* @inheritDoc
907
907
*/
908
- public function count ()
908
+ public function count (): int
909
909
{
910
910
return $ this ->length ;
911
911
}
@@ -925,7 +925,7 @@ public function __toString(): string
925
925
/**
926
926
* @inheritDoc
927
927
*/
928
- public function jsonSerialize ()
928
+ public function jsonSerialize (): string
929
929
{
930
930
return $ this ->__toString ();
931
931
}
@@ -1156,6 +1156,122 @@ public static function getCodePointsFromString(string $str, int $mode = self::KE
1156
1156
return $ codes ;
1157
1157
}
1158
1158
1159
+ /**
1160
+ * @param string $str
1161
+ * @return iterable
1162
+ *
1163
+ * The key represents the current char index
1164
+ * Value is a two element array
1165
+ * - first element is an integer representing the code point
1166
+ * - second element is an array of integers (length 1 to 4) representing bytes
1167
+ */
1168
+ public static function walkString (string $ str ): iterable
1169
+ {
1170
+ $ i = 0 ;
1171
+ $ length = strlen ($ str );
1172
+
1173
+ while ($ i < $ length ) {
1174
+ $ index = $ i ;
1175
+
1176
+ $ ord0 = ord ($ str [$ i ++]);
1177
+
1178
+ if ($ ord0 < 0x80 ) {
1179
+ yield $ index => [
1180
+ $ ord0 ,
1181
+ [$ ord0 ]
1182
+ ];
1183
+ continue ;
1184
+ }
1185
+
1186
+ if ($ i === $ length || $ ord0 < 0xC2 || $ ord0 > 0xF4 ) {
1187
+ throw new InvalidStringException ($ str , $ i - 1 );
1188
+ }
1189
+
1190
+ $ ord1 = ord ($ str [$ i ++]);
1191
+
1192
+ if ($ ord0 < 0xE0 ) {
1193
+ if ($ ord1 < 0x80 || $ ord1 >= 0xC0 ) {
1194
+ throw new InvalidStringException ($ str , $ i - 1 );
1195
+ }
1196
+
1197
+ yield $ index => [
1198
+ ($ ord0 - 0xC0 ) * 64 + $ ord1 - 0x80 ,
1199
+ [$ ord0 , $ ord1 ]
1200
+ ];
1201
+
1202
+ continue ;
1203
+ }
1204
+
1205
+ if ($ i === $ length ) {
1206
+ throw new InvalidStringException ($ str , $ i - 1 );
1207
+ }
1208
+
1209
+ $ ord2 = ord ($ str [$ i ++]);
1210
+
1211
+ if ($ ord0 < 0xF0 ) {
1212
+ if ($ ord0 === 0xE0 ) {
1213
+ if ($ ord1 < 0xA0 || $ ord1 >= 0xC0 ) {
1214
+ throw new InvalidStringException ($ str , $ i - 2 );
1215
+ }
1216
+ } elseif ($ ord0 === 0xED ) {
1217
+ if ($ ord1 < 0x80 || $ ord1 >= 0xA0 ) {
1218
+ throw new InvalidStringException ($ str , $ i - 2 );
1219
+ }
1220
+ } elseif ($ ord1 < 0x80 || $ ord1 >= 0xC0 ) {
1221
+ throw new InvalidStringException ($ str , $ i - 2 );
1222
+ }
1223
+
1224
+ if ($ ord2 < 0x80 || $ ord2 >= 0xC0 ) {
1225
+ throw new InvalidStringException ($ str , $ i - 1 );
1226
+ }
1227
+
1228
+ yield $ index => [
1229
+ ($ ord0 - 0xE0 ) * 0x1000 + ($ ord1 - 0x80 ) * 64 + $ ord2 - 0x80 ,
1230
+ [$ ord0 , $ ord1 , $ ord2 ]
1231
+ ];
1232
+
1233
+ continue ;
1234
+ }
1235
+
1236
+ if ($ i === $ length ) {
1237
+ throw new InvalidStringException ($ str , $ i - 1 );
1238
+ }
1239
+
1240
+ $ ord3 = ord ($ str [$ i ++]);
1241
+
1242
+ if ($ ord0 < 0xF5 ) {
1243
+ if ($ ord0 === 0xF0 ) {
1244
+ if ($ ord1 < 0x90 || $ ord1 >= 0xC0 ) {
1245
+ throw new InvalidStringException ($ str , $ i - 3 );
1246
+ }
1247
+ } elseif ($ ord0 === 0xF4 ) {
1248
+ if ($ ord1 < 0x80 || $ ord1 >= 0x90 ) {
1249
+ throw new InvalidStringException ($ str , $ i - 3 );
1250
+ }
1251
+ } elseif ($ ord1 < 0x80 || $ ord1 >= 0xC0 ) {
1252
+ throw new InvalidStringException ($ str , $ i - 3 );
1253
+ }
1254
+
1255
+ if ($ ord2 < 0x80 || $ ord2 >= 0xC0 ) {
1256
+ throw new InvalidStringException ($ str , $ i - 2 );
1257
+ }
1258
+
1259
+ if ($ ord3 < 0x80 || $ ord3 >= 0xC0 ) {
1260
+ throw new InvalidStringException ($ str , $ i - 1 );
1261
+ }
1262
+
1263
+ yield $ index => [
1264
+ ($ ord0 - 0xF0 ) * 0x40000 + ($ ord1 - 0x80 ) * 0x1000 + ($ ord2 - 0x80 ) * 64 + $ ord3 - 0x80 ,
1265
+ [$ ord0 , $ ord1 , $ ord2 , $ ord3 ]
1266
+ ];
1267
+
1268
+ continue ;
1269
+ }
1270
+
1271
+ throw new InvalidStringException ($ str , $ i - 1 );
1272
+ }
1273
+ }
1274
+
1159
1275
/**
1160
1276
* Converts each code point to a char
1161
1277
* @param array $codes
@@ -1179,6 +1295,16 @@ public static function getCharsFromCodePoints(array $codes, int $mode = self::KE
1179
1295
return $ codes ;
1180
1296
}
1181
1297
1298
+ /**
1299
+ * @param string $str
1300
+ * @param int $mode
1301
+ * @return string[]
1302
+ */
1303
+ public static function getCharsFromString (string $ str , int $ mode = self ::KEEP_CASE ): array
1304
+ {
1305
+ return self ::getCharsFromCodePoints (self ::getCodePointsFromString ($ str ), $ mode );
1306
+ }
1307
+
1182
1308
/**
1183
1309
* Converts all code points to chars and returns the string
1184
1310
* Invalid code points are ignored
@@ -1463,6 +1589,7 @@ private static function getMapByMode(int $mode): array
1463
1589
return [];
1464
1590
}
1465
1591
1592
+ /** @noinspection PhpIncludeInspection */
1466
1593
return self ::$ maps [$ mode ] = include (__DIR__ . "/../res/ {$ file }.php " );
1467
1594
}
1468
1595
}
0 commit comments