Skip to content

Commit c296b75

Browse files
UnMelowUnMelow
authored andcommitted
feat: edit README and add simple regex
1 parent c1d3d12 commit c296b75

File tree

2 files changed

+125
-0
lines changed

2 files changed

+125
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Profanify currently supports the following languages:
138138
- Italian
139139
- Portuguese
140140
- Spanish
141+
- Russian
141142

142143
Each language has its own configuration file. If you'd like to add a language, please create a new configuration file.
143144

src/Support/RussianNormalizer.php

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
<?php
2+
function normalizeRussianText(string $text): string
3+
{
4+
$text = mb_strtolower($text, 'UTF-8');
5+
$text = str_replace('ё', 'е', $text);
6+
7+
$latinToCyrillic = [
8+
'a' => 'а',
9+
'c' => 'с',
10+
'e' => 'е',
11+
'o' => 'о',
12+
'p' => 'р',
13+
'x' => 'х',
14+
'y' => 'у',
15+
'b' => 'б',
16+
'm' => 'м',
17+
'h' => 'н',
18+
'k' => 'к',
19+
't' => 'т',
20+
'B' => 'В',
21+
'D' => 'Д',
22+
'H' => 'Н',
23+
'K' => 'К',
24+
'M' => 'М',
25+
'O' => 'О',
26+
'P' => 'Р',
27+
'C' => 'С',
28+
'T' => 'Т',
29+
'X' => 'Х',
30+
'Y' => 'У'
31+
];
32+
$latinToCyrillic += [
33+
'b' => 'б', 'd' => 'д', 'f' => 'ф', 'g' => 'г', 'i' => 'и', 'j' => 'ј', 'l' => 'л', 'n' => 'п', 'q' => 'қ', 'v' => 'в', 'w' => 'ш', 'u' => 'u'
34+
];
35+
$text = strtr($text, $latinToCyrillic);
36+
37+
$charSubstitutions = [
38+
'@' => 'а',
39+
'' => 'е',
40+
'£' => 'л',
41+
'' => 'р',
42+
'0' => 'о',
43+
'3' => 'з',
44+
'4' => 'ч',
45+
'6' => 'б',
46+
'1' => 'л',
47+
'$' => 's',
48+
'|' => 'л',
49+
'!' => 'і',
50+
'?' => '',
51+
'*' => '',
52+
'.' => '',
53+
',' => '',
54+
'-' => '',
55+
'_' => '',
56+
'+' => '',
57+
'=' => '',
58+
'/' => '',
59+
'\\' => '',
60+
'"' => '',
61+
'\''=> '',
62+
':' => '',
63+
';' => '',
64+
'~' => '',
65+
'`' => '',
66+
'^' => '',
67+
];
68+
$text = strtr($text, $charSubstitutions);
69+
70+
$text = preg_replace('/[^\\p{L}\\p{N}]+/u', '', $text);
71+
72+
return $text;
73+
}
74+
75+
function filterRussianProfanity(string $text, string $path = ''): ?array
76+
{
77+
$profanities = include __DIR__ . '/ru.php';
78+
$normalizedText = normalizeRussianText($text);
79+
80+
$found = [];
81+
foreach ($profanities as $badWord) {
82+
if ($badWord === '') {
83+
continue;
84+
}
85+
if (mb_strpos($normalizedText, $badWord) !== false) {
86+
$found[] = $badWord;
87+
}
88+
}
89+
if (!empty($found)) {
90+
$uniqueWords = array_unique($found);
91+
$message = "Profanity detected";
92+
if ($path !== '') {
93+
$message .= " in file '{$path}'";
94+
}
95+
$message .= ": [" . implode(', ', $uniqueWords) . "]";
96+
97+
error_log($message);
98+
return $uniqueWords;
99+
}
100+
return null;
101+
}
102+
103+
function assertNoRussianProfanity(string $filePath)
104+
{
105+
$lines = file($filePath);
106+
$badWords = include __DIR__ . '/ru.php';
107+
$offenses = [];
108+
109+
foreach ($lines as $num => $line) {
110+
$normalizedLine = normalizeRussianText($line);
111+
foreach ($badWords as $bad) {
112+
if ($bad !== '' && mb_strpos($normalizedLine, $bad) !== false) {
113+
$offenses[] = $num + 1;
114+
break;
115+
}
116+
}
117+
}
118+
119+
if (!empty($offenses)) {
120+
$lineList = implode(', ', $offenses);
121+
$message = "Expecting '{$filePath}' to not use profanity.\nat {$filePath}:{$lineList}";
122+
throw new \Exception($message);
123+
}
124+
}

0 commit comments

Comments
 (0)