|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace JonPurvis\Profanify\Support; |
| 6 | + |
| 7 | +final class RussianNormalizer |
| 8 | +{ |
| 9 | + public static function normalizeRussianText(string $text): string |
| 10 | + { |
| 11 | + $text = mb_strtolower(str_replace('ё', 'е', $text), 'UTF-8'); |
| 12 | + |
| 13 | + $text = strtr($text, [ |
| 14 | + '@' => 'а', '0' => 'о', '1' => 'и', '3' => 'з', '4' => 'ч', '6' => 'б', |
| 15 | + 'a' => 'а', 'c' => 'с', 'e' => 'е', 'o' => 'о', 'p' => 'р', 'x' => 'х', 'y' => 'й', 'k' => 'к', |
| 16 | + 'b' => 'б', 'd' => 'д', 'g' => 'г', 'h' => 'н', 'm' => 'м', 't' => 'т', 'v' => 'в', 'i' => 'и', |
| 17 | + '|' => 'л', '!' => 'и', '_' => '', '-' => '', '*' => '', '.' => '', ',' => '', |
| 18 | + ]); |
| 19 | + |
| 20 | + return preg_replace('/[^а-я]+/u', '', $text) ?: ''; |
| 21 | + } |
| 22 | + |
| 23 | + /** |
| 24 | + * @return array<int, string>|null |
| 25 | + */ |
| 26 | + public static function filterRussianProfanity(string $text): ?array |
| 27 | + { |
| 28 | + /** @var array<int, string> $profanities */ |
| 29 | + $profanities = include __DIR__.'/../Config/profanities/ru.php'; |
| 30 | + |
| 31 | + $normalized = self::normalizeRussianText($text); |
| 32 | + $found = []; |
| 33 | + |
| 34 | + foreach ($profanities as $bad) { |
| 35 | + if ($bad !== '' && str_contains($normalized, (string) $bad)) { |
| 36 | + $found[] = $bad; |
| 37 | + } |
| 38 | + } |
| 39 | + |
| 40 | + return $found !== [] ? $found : null; |
| 41 | + } |
| 42 | + |
| 43 | + public static function assertNoRussianProfanity(string $filePath): void |
| 44 | + { |
| 45 | + $lines = file($filePath, FILE_IGNORE_NEW_LINES) ?: []; |
| 46 | + /** @var array<int, string> $badWords */ |
| 47 | + $badWords = include __DIR__.'/../Config/profanities/ru.php'; |
| 48 | + |
| 49 | + $offended = []; |
| 50 | + |
| 51 | + foreach ($lines as $num => $line) { |
| 52 | + $norm = self::normalizeRussianText($line); |
| 53 | + |
| 54 | + foreach ($badWords as $bad) { |
| 55 | + if ($bad !== '' && str_contains($norm, (string) $bad)) { |
| 56 | + $offended[] = $num + 1; |
| 57 | + break; |
| 58 | + } |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + if ($offended !== []) { |
| 63 | + throw new \Exception( |
| 64 | + sprintf('Profanity in %s, lines: %s', $filePath, implode(', ', $offended)), |
| 65 | + ); |
| 66 | + } |
| 67 | + } |
| 68 | +} |
0 commit comments