|
1 | 1 | <?php |
2 | | -function normalizeRussianText(string $text): string |
3 | | -{ |
4 | | - $text = mb_strtolower($text, 'UTF-8'); |
5 | | - $text = str_replace('ё', 'е', $text); |
| 2 | +declare(strict_types=1); |
6 | 3 |
|
7 | | - $latinToCyrillic = [ |
8 | | - 'a' => 'а', |
9 | | - 'c' => 'с', |
10 | | - 'e' => 'е', |
11 | | - 'o' => 'о', |
12 | | - 'p' => 'р', |
13 | | - 'x' => 'х', |
14 | | - 'y' => 'у', |
15 | | - 'b' => 'б', |
16 | | - 'm' => 'м', |
17 | | - 'h' => 'н', |
18 | | - 'k' => 'к', |
19 | | - 't' => 'т', |
20 | | - 'B' => 'В', |
21 | | - 'D' => 'Д', |
22 | | - 'H' => 'Н', |
23 | | - 'K' => 'К', |
24 | | - 'M' => 'М', |
25 | | - 'O' => 'О', |
26 | | - 'P' => 'Р', |
27 | | - 'C' => 'С', |
28 | | - 'T' => 'Т', |
29 | | - 'X' => 'Х', |
30 | | - 'Y' => 'У' |
31 | | - ]; |
32 | | - $latinToCyrillic += [ |
33 | | - 'b' => 'б', 'd' => 'д', 'f' => 'ф', 'g' => 'г', 'i' => 'и', 'j' => 'ј', 'l' => 'л', 'n' => 'п', 'q' => 'қ', 'v' => 'в', 'w' => 'ш', 'u' => 'u' |
34 | | - ]; |
35 | | - $text = strtr($text, $latinToCyrillic); |
| 4 | +namespace JonPurvis\Profanify\Support; |
36 | 5 |
|
37 | | - $charSubstitutions = [ |
38 | | - '@' => 'а', |
39 | | - '€' => 'е', |
40 | | - '£' => 'л', |
41 | | - '₽' => 'р', |
42 | | - '0' => 'о', |
43 | | - '3' => 'з', |
44 | | - '4' => 'ч', |
45 | | - '6' => 'б', |
46 | | - '1' => 'л', |
47 | | - '$' => 's', |
48 | | - '|' => 'л', |
49 | | - '!' => 'і', |
50 | | - '?' => '', |
51 | | - '*' => '', |
52 | | - '.' => '', |
53 | | - ',' => '', |
54 | | - '-' => '', |
55 | | - '_' => '', |
56 | | - '+' => '', |
57 | | - '=' => '', |
58 | | - '/' => '', |
59 | | - '\\' => '', |
60 | | - '"' => '', |
61 | | - '\''=> '', |
62 | | - ':' => '', |
63 | | - ';' => '', |
64 | | - '~' => '', |
65 | | - '`' => '', |
66 | | - '^' => '', |
67 | | - ]; |
68 | | - $text = strtr($text, $charSubstitutions); |
| 6 | +final class RussianNormalizer |
| 7 | +{ |
| 8 | + public static function normalizeRussianText(string $text): string |
| 9 | + { |
| 10 | + $text = mb_strtolower(str_replace('ё', 'е', $text), 'UTF-8'); |
69 | 11 |
|
70 | | - $text = preg_replace('/[^\\p{L}\\p{N}]+/u', '', $text); |
| 12 | + $text = strtr($text, [ |
| 13 | + '@'=>'а','0'=>'о','1'=>'и','3'=>'з','4'=>'ч','6'=>'б', |
| 14 | + 'a'=>'а','c'=>'с','e'=>'е','o'=>'о','p'=>'р','x'=>'х','y'=>'й','k'=>'к', |
| 15 | + 'b'=>'б','d'=>'д','g'=>'г','h'=>'н','m'=>'м','t'=>'т','v'=>'в','i'=>'и', |
| 16 | + '|'=>'л','!'=>'и','_'=>'','-'=>'','*'=>'','.'=>'',','=>'', |
| 17 | + ]); |
71 | 18 |
|
72 | | - return $text; |
73 | | -} |
| 19 | + return preg_replace('/[^а-я]+/u', '', $text) ?: ''; |
| 20 | + } |
74 | 21 |
|
75 | | -function filterRussianProfanity(string $text, string $path = ''): ?array |
76 | | -{ |
77 | | - $profanities = include __DIR__ . '/ru.php'; |
78 | | - $normalizedText = normalizeRussianText($text); |
| 22 | + /** |
| 23 | + * @return array<int,string>|null |
| 24 | + */ |
| 25 | + public static function filterRussianProfanity(string $text): ?array |
| 26 | + { |
| 27 | + /** @var array<int,string> $profanities */ |
| 28 | + $profanities = include __DIR__ . '/../Config/profanities/ru.php'; |
79 | 29 |
|
80 | | - $found = []; |
81 | | - foreach ($profanities as $badWord) { |
82 | | - if ($badWord === '') { |
83 | | - continue; |
84 | | - } |
85 | | - if (mb_strpos($normalizedText, $badWord) !== false) { |
86 | | - $found[] = $badWord; |
87 | | - } |
88 | | - } |
89 | | - if (!empty($found)) { |
90 | | - $uniqueWords = array_unique($found); |
91 | | - $message = "Profanity detected"; |
92 | | - if ($path !== '') { |
93 | | - $message .= " in file '{$path}'"; |
| 30 | + $normalized = self::normalizeRussianText($text); |
| 31 | + $found = []; |
| 32 | + |
| 33 | + foreach ($profanities as $bad) { |
| 34 | + if ($bad !== '' && str_contains($normalized, (string) $bad)) { |
| 35 | + $found[] = $bad; |
| 36 | + } |
94 | 37 | } |
95 | | - $message .= ": [" . implode(', ', $uniqueWords) . "]"; |
96 | 38 |
|
97 | | - error_log($message); |
98 | | - return $uniqueWords; |
| 39 | + return $found !== [] ? $found : null; |
99 | 40 | } |
100 | | - return null; |
101 | | -} |
102 | 41 |
|
103 | | -function assertNoRussianProfanity(string $filePath) |
104 | | -{ |
105 | | - $lines = file($filePath); |
106 | | - $badWords = include __DIR__ . '/ru.php'; |
107 | | - $offenses = []; |
| 42 | + public static function assertNoRussianProfanity(string $filePath): void |
| 43 | + { |
| 44 | + $lines = file($filePath, FILE_IGNORE_NEW_LINES) ?: []; |
| 45 | + /** @var array<int,string> $badWords */ |
| 46 | + $badWords = include __DIR__ . '/../Config/profanities/ru.php'; |
108 | 47 |
|
109 | | - foreach ($lines as $num => $line) { |
110 | | - $normalizedLine = normalizeRussianText($line); |
111 | | - foreach ($badWords as $bad) { |
112 | | - if ($bad !== '' && mb_strpos($normalizedLine, $bad) !== false) { |
113 | | - $offenses[] = $num + 1; |
114 | | - break; |
| 48 | + $offended = []; |
| 49 | + |
| 50 | + foreach ($lines as $num => $line) { |
| 51 | + $norm = self::normalizeRussianText($line); |
| 52 | + foreach ($badWords as $bad) { |
| 53 | + if ($bad !== '' && str_contains($norm, (string) $bad)) { |
| 54 | + $offended[] = $num + 1; |
| 55 | + break; |
| 56 | + } |
115 | 57 | } |
116 | 58 | } |
117 | | - } |
118 | 59 |
|
119 | | - if (!empty($offenses)) { |
120 | | - $lineList = implode(', ', $offenses); |
121 | | - $message = "Expecting '{$filePath}' to not use profanity.\nat {$filePath}:{$lineList}"; |
122 | | - throw new \Exception($message); |
| 60 | + if ($offended !== []) { |
| 61 | + throw new \Exception( |
| 62 | + sprintf('Profanity in %s, lines: %s', $filePath, implode(', ', $offended)) |
| 63 | + ); |
| 64 | + } |
123 | 65 | } |
124 | 66 | } |
0 commit comments