Skip to content

Commit 7fc021f

Browse files
committed
Updated
1 parent d096d33 commit 7fc021f

File tree

2 files changed

+134
-7
lines changed

2 files changed

+134
-7
lines changed

src/Exception/InvalidStringException.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ class InvalidStringException extends UnicodeException
2424
/**
2525
* @var string
2626
*/
27-
protected $string;
27+
protected string $string;
2828

2929
/**
3030
* @var int
3131
*/
32-
protected $offset;
32+
protected int $offset;
3333

3434
/**
3535
* @param string $string

src/UnicodeString.php

Lines changed: 132 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
InvalidCodePointException
2828
};
2929

30-
final class UnicodeString implements Countable, ArrayAccess, JsonSerializable
30+
class UnicodeString implements Countable, ArrayAccess, JsonSerializable
3131
{
3232
const KEEP_CASE = 0;
3333

@@ -832,7 +832,7 @@ public function __invoke(int $offset): int
832832
/**
833833
* @inheritDoc
834834
*/
835-
public function offsetExists($offset)
835+
public function offsetExists($offset): bool
836836
{
837837
// Allow negative index
838838
if ($offset < 0) {
@@ -845,7 +845,7 @@ public function offsetExists($offset)
845845
/**
846846
* @inheritDoc
847847
*/
848-
public function offsetGet($offset)
848+
public function offsetGet($offset): string
849849
{
850850
if ($offset < 0) {
851851
if ($offset + $this->length < 0) {
@@ -905,7 +905,7 @@ public function offsetUnset($offset)
905905
/**
906906
* @inheritDoc
907907
*/
908-
public function count()
908+
public function count(): int
909909
{
910910
return $this->length;
911911
}
@@ -925,7 +925,7 @@ public function __toString(): string
925925
/**
926926
* @inheritDoc
927927
*/
928-
public function jsonSerialize()
928+
public function jsonSerialize(): string
929929
{
930930
return $this->__toString();
931931
}
@@ -1156,6 +1156,122 @@ public static function getCodePointsFromString(string $str, int $mode = self::KE
11561156
return $codes;
11571157
}
11581158

1159+
/**
1160+
* @param string $str
1161+
* @return iterable
1162+
*
1163+
* The key represents the current char index
1164+
* Value is a two element array
1165+
* - first element is an integer representing the code point
1166+
* - second element is an array of integers (length 1 to 4) representing bytes
1167+
*/
1168+
public static function walkString(string $str): iterable
1169+
{
1170+
$i = 0;
1171+
$length = strlen($str);
1172+
1173+
while ($i < $length) {
1174+
$index = $i;
1175+
1176+
$ord0 = ord($str[$i++]);
1177+
1178+
if ($ord0 < 0x80) {
1179+
yield $index => [
1180+
$ord0,
1181+
[$ord0]
1182+
];
1183+
continue;
1184+
}
1185+
1186+
if ($i === $length || $ord0 < 0xC2 || $ord0 > 0xF4) {
1187+
throw new InvalidStringException($str, $i - 1);
1188+
}
1189+
1190+
$ord1 = ord($str[$i++]);
1191+
1192+
if ($ord0 < 0xE0) {
1193+
if ($ord1 < 0x80 || $ord1 >= 0xC0) {
1194+
throw new InvalidStringException($str, $i - 1);
1195+
}
1196+
1197+
yield $index => [
1198+
($ord0 - 0xC0) * 64 + $ord1 - 0x80,
1199+
[$ord0, $ord1]
1200+
];
1201+
1202+
continue;
1203+
}
1204+
1205+
if ($i === $length) {
1206+
throw new InvalidStringException($str, $i - 1);
1207+
}
1208+
1209+
$ord2 = ord($str[$i++]);
1210+
1211+
if ($ord0 < 0xF0) {
1212+
if ($ord0 === 0xE0) {
1213+
if ($ord1 < 0xA0 || $ord1 >= 0xC0) {
1214+
throw new InvalidStringException($str, $i - 2);
1215+
}
1216+
} elseif ($ord0 === 0xED) {
1217+
if ($ord1 < 0x80 || $ord1 >= 0xA0) {
1218+
throw new InvalidStringException($str, $i - 2);
1219+
}
1220+
} elseif ($ord1 < 0x80 || $ord1 >= 0xC0) {
1221+
throw new InvalidStringException($str, $i - 2);
1222+
}
1223+
1224+
if ($ord2 < 0x80 || $ord2 >= 0xC0) {
1225+
throw new InvalidStringException($str, $i - 1);
1226+
}
1227+
1228+
yield $index => [
1229+
($ord0 - 0xE0) * 0x1000 + ($ord1 - 0x80) * 64 + $ord2 - 0x80,
1230+
[$ord0, $ord1, $ord2]
1231+
];
1232+
1233+
continue;
1234+
}
1235+
1236+
if ($i === $length) {
1237+
throw new InvalidStringException($str, $i - 1);
1238+
}
1239+
1240+
$ord3 = ord($str[$i++]);
1241+
1242+
if ($ord0 < 0xF5) {
1243+
if ($ord0 === 0xF0) {
1244+
if ($ord1 < 0x90 || $ord1 >= 0xC0) {
1245+
throw new InvalidStringException($str, $i - 3);
1246+
}
1247+
} elseif ($ord0 === 0xF4) {
1248+
if ($ord1 < 0x80 || $ord1 >= 0x90) {
1249+
throw new InvalidStringException($str, $i - 3);
1250+
}
1251+
} elseif ($ord1 < 0x80 || $ord1 >= 0xC0) {
1252+
throw new InvalidStringException($str, $i - 3);
1253+
}
1254+
1255+
if ($ord2 < 0x80 || $ord2 >= 0xC0) {
1256+
throw new InvalidStringException($str, $i - 2);
1257+
}
1258+
1259+
if ($ord3 < 0x80 || $ord3 >= 0xC0) {
1260+
throw new InvalidStringException($str, $i - 1);
1261+
}
1262+
1263+
yield $index => [
1264+
($ord0 - 0xF0) * 0x40000 + ($ord1 - 0x80) * 0x1000 + ($ord2 - 0x80) * 64 + $ord3 - 0x80,
1265+
[$ord0, $ord1, $ord2, $ord3]
1266+
];
1267+
1268+
continue;
1269+
}
1270+
1271+
throw new InvalidStringException($str, $i - 1);
1272+
}
1273+
}
1274+
11591275
/**
11601276
* Converts each code point to a char
11611277
* @param array $codes
@@ -1179,6 +1295,16 @@ public static function getCharsFromCodePoints(array $codes, int $mode = self::KE
11791295
return $codes;
11801296
}
11811297

1298+
/**
1299+
* @param string $str
1300+
* @param int $mode
1301+
* @return string[]
1302+
*/
1303+
public static function getCharsFromString(string $str, int $mode = self::KEEP_CASE): array
1304+
{
1305+
return self::getCharsFromCodePoints(self::getCodePointsFromString($str), $mode);
1306+
}
1307+
11821308
/**
11831309
* Converts all code points to chars and returns the string
11841310
* Invalid code points are ignored
@@ -1463,6 +1589,7 @@ private static function getMapByMode(int $mode): array
14631589
return [];
14641590
}
14651591

1592+
/** @noinspection PhpIncludeInspection */
14661593
return self::$maps[$mode] = include(__DIR__ . "/../res/{$file}.php");
14671594
}
14681595
}

0 commit comments

Comments
 (0)