diff --git a/src/Expectations/Profanity.php b/src/Expectations/Profanity.php index 2d4efb4..64950fb 100644 --- a/src/Expectations/Profanity.php +++ b/src/Expectations/Profanity.php @@ -3,6 +3,7 @@ declare(strict_types=1); use JonPurvis\Profanify\Expectations\TargetedProfanity; +use JonPurvis\Profanify\Support\Russian; use Pest\Arch\Contracts\ArchExpectation; use Pest\Arch\Support\FileLineFinder; use PHPUnit\Architecture\Elements\ObjectDescription; @@ -46,12 +47,19 @@ function (ObjectDescription $object) use (&$foundWords, $excluding, $including, $fileContents = (string) file_get_contents($object->path); - $foundWords = array_filter($words, function (string $word) use ($fileContents): bool { + $russian = new Russian; + + $foundWords = array_filter($words, function (string $word) use ($fileContents, $russian): bool { if (preg_match('/\b'.preg_quote($word, '/').'\b/i', $fileContents)) { return true; } - preg_match_all('/[a-zA-Z]\w*/', $fileContents, $matches); + if ($russian->is($word)) { + $fileContents = Russian::normalize($fileContents); + preg_match_all(Russian::pattern(), $fileContents, $matches); + } else { + preg_match_all('/[a-zA-Z]\w*/', $fileContents, $matches); + } foreach ($matches[0] as $token) { $snakeParts = explode('_', $token); @@ -78,6 +86,10 @@ function (ObjectDescription $object) use (&$foundWords, $excluding, $including, return false; }); + if ($russian->isDetected()) { + $foundWords = Russian::backToOrigin($foundWords); + } + return $foundWords === []; }, function ($path) use (&$foundWords): string { diff --git a/src/Support/Russian.php b/src/Support/Russian.php new file mode 100644 index 0000000..e3ec65b --- /dev/null +++ b/src/Support/Russian.php @@ -0,0 +1,65 @@ + */ + private static array $normalized = []; + + /** @var array */ + private static array $toNormalize = [ + '3' => 'з', '4' => 'ч', '6' => 'б', + 'a' => 'а', 'c' => 'с', 'e' => 'е', 'o' => 'о', 'p' => 'р', 'x' => 'х', 'k' => 'к', + 'A' => 'д', 'r' => 'г', 'H' => 'н', 'M' => 'м', 'T' => 'т', 'B' => 'в', + ]; + + public function is(string $text): bool + { + if ((bool) preg_match(self::$pattern, $text)) { + $this->detected = true; + } + + return $this->detected; + } + + public function isDetected(): bool + { + return $this->detected; + } + + public static function pattern(): string + { + return self::$pattern; + } + + public static function normalize(string $text): string + { + preg_match_all('/\w+/u', $text, $words); + $toNormalizeKeysString = implode('', array_keys(self::$toNormalize)); + + foreach ($words[0] as $word) { + if (strpbrk($word, $toNormalizeKeysString)) { + $normalized = strtr($word, self::$toNormalize); + self::$normalized[$word] = $normalized; + } + } + + return str_replace(array_keys(self::$normalized), array_values(self::$normalized), $text); + } + + /** + * @param array $profanities + * @return array + */ + public static function backToOrigin(array $profanities): array + { + return array_map(fn ($profanity): string => array_search($profanity, self::$normalized) ?: $profanity, $profanities); + } +} diff --git a/src/Support/RussianNormalizer.php b/src/Support/RussianNormalizer.php deleted file mode 100644 index be66188..0000000 --- a/src/Support/RussianNormalizer.php +++ /dev/null @@ -1,68 +0,0 @@ - 'а', '0' => 'о', '1' => 'и', '3' => 'з', '4' => 'ч', '6' => 'б', - 'a' => 'а', 'c' => 'с', 'e' => 'е', 'o' => 'о', 'p' => 'р', 'x' => 'х', 'y' => 'й', 'k' => 'к', - 'b' => 'б', 'd' => 'д', 'g' => 'г', 'h' => 'н', 'm' => 'м', 't' => 'т', 'v' => 'в', 'i' => 'и', - '|' => 'л', '!' => 'и', '_' => '', '-' => '', '*' => '', '.' => '', ',' => '', - ]); - - return preg_replace('/[^а-я]+/u', '', $text) ?: ''; - } - - /** - * @return array|null - */ - public static function filterRussianProfanity(string $text): ?array - { - /** @var array $profanities */ - $profanities = include __DIR__.'/../Config/profanities/ru.php'; - - $normalized = self::normalizeRussianText($text); - $found = []; - - foreach ($profanities as $bad) { - if ($bad !== '' && str_contains($normalized, (string) $bad)) { - $found[] = $bad; - } - } - - return $found !== [] ? $found : null; - } - - public static function assertNoRussianProfanity(string $filePath): void - { - $lines = file($filePath, FILE_IGNORE_NEW_LINES) ?: []; - /** @var array $badWords */ - $badWords = include __DIR__.'/../Config/profanities/ru.php'; - - $offended = []; - - foreach ($lines as $num => $line) { - $norm = self::normalizeRussianText($line); - - foreach ($badWords as $bad) { - if ($bad !== '' && str_contains($norm, (string) $bad)) { - $offended[] = $num + 1; - break; - } - } - } - - if ($offended !== []) { - throw new \Exception( - sprintf('Profanity in %s, lines: %s', $filePath, implode(', ', $offended)), - ); - } - } -} diff --git a/tests/Fixtures/HasExplicitRussianProfanity.php b/tests/Fixtures/HasExplicitRussianProfanity.php index 99f0171..529bde7 100644 --- a/tests/Fixtures/HasExplicitRussianProfanity.php +++ b/tests/Fixtures/HasExplicitRussianProfanity.php @@ -7,4 +7,9 @@ class HasExplicitRussianProfanity { public string $bad = 'Это полная хуйня!'; + + public function е6ёт() + { + // Comment... + } } diff --git a/tests/Fixtures/HasMaskedRussianProfanity.php b/tests/Fixtures/HasMaskedRussianProfanity.php deleted file mode 100644 index 9da6ffe..0000000 --- a/tests/Fixtures/HasMaskedRussianProfanity.php +++ /dev/null @@ -1,10 +0,0 @@ -toHaveNoProfanity(); })->throws(ArchExpectationFailedException::class); +it('fails if a file contains russian profanity', function () { + expect('Tests\Fixtures\HasExplicitRussianProfanity')->toHaveNoProfanity(language: 'ru'); +})->throws(ArchExpectationFailedException::class); + it('fails if file contains profanity manually included', function () { expect('Tests\Fixtures\HasUncoveredProfanity') ->toHaveNoProfanity(including: ['dagnabbit']);