Skip to content

Commit 7f641ec

Browse files
fix: add Dutch language support and fix Turkish dictionary (#76)
- Fix Turkish dictionary that incorrectly contained Dutch words - Create new Dutch dictionary with proper Dutch profanity entries - Add Dutch language support to JS and Python packages - Add missing ignoreWords property to ProfanityCheckerConfig - Remove malformed metadata entries from Turkish dictionary - Fix alphabetical ordering of Dutch in language types Co-authored-by: Kaan <kaan-playabit@users.noreply.github.com>
1 parent b538264 commit 7f641ec

File tree

7 files changed

+1238
-190
lines changed

7 files changed

+1238
-190
lines changed

packages/js/src/core/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export interface ProfanityCheckerConfig {
99
customWords?: string[];
1010
replaceWith?: string;
1111
severityLevels?: boolean;
12+
ignoreWords?: string[];
1213
allowObfuscatedMatch?: boolean;
1314
fuzzyToleranceLevel?: number;
1415
minSeverity?: SeverityLevel;

packages/js/src/data/dictionary.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import Arabic from '@shared/dictionaries/arabic.json';
22
import Chinese from '@shared/dictionaries/chinese.json';
33
import Czech from '@shared/dictionaries/czech.json';
44
import Danish from '@shared/dictionaries/danish.json';
5+
import Dutch from '@shared/dictionaries/dutch.json';
56
import Esperanto from '@shared/dictionaries/esperanto.json';
67
import English from '@shared/dictionaries/english.json';
78
import Finnish from '@shared/dictionaries/finnish.json';
@@ -26,6 +27,7 @@ export default {
2627
chinese: Chinese.words,
2728
czech: Czech.words,
2829
danish: Danish.words,
30+
dutch: Dutch.words,
2931
english: English.words,
3032
esperanto: Esperanto.words,
3133
finnish: Finnish.words,

packages/js/src/types/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ export type Language =
1515
| 'chinese'
1616
| 'czech'
1717
| 'danish'
18+
| 'dutch'
1819
| 'english'
1920
| 'esperanto'
2021
| 'finnish'

packages/py/glin_profanity/data/dictionary.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def _load_dictionaries(self) -> None:
3131
"chinese": "chinese.json",
3232
"czech": "czech.json",
3333
"danish": "danish.json",
34+
"dutch": "dutch.json",
3435
"english": "english.json",
3536
"esperanto": "esperanto.json",
3637
"finnish": "finnish.json",

packages/py/glin_profanity/types/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"chinese",
1313
"czech",
1414
"danish",
15+
"dutch",
1516
"english",
1617
"esperanto",
1718
"finnish",

shared/dictionaries/dutch.json

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
{
2+
"words": [
3+
"aardappels afgieteng",
4+
"achter het raam zitten",
5+
"afberen",
6+
"aflebberen",
7+
"afrossen",
8+
"afrukken",
9+
"aftrekken",
10+
"afwerkplaats",
11+
"afzeiken",
12+
"afzuigen",
13+
"anderhalve man en een paardekop",
14+
"anita",
15+
"asbak",
16+
"aso",
17+
"bagger schijten",
18+
"balen",
19+
"bedonderen",
20+
"befborstelg",
21+
"beffen",
22+
"bekken",
23+
"belazeren",
24+
"besodemieterd zijn",
25+
"besodemieteren",
26+
"beurt",
27+
"boemelen",
28+
"boerelul",
29+
"boerenpummelg",
30+
"bokkelul",
31+
"botergeil",
32+
"broekhoesten",
33+
"brugpieperg",
34+
"buffelen",
35+
"buiten de pot piesen",
36+
"da's kloten van de bok",
37+
"de ballen",
38+
"de hoer spelen",
39+
"de hond uitlaten",
40+
"de koffer induiken",
41+
"delg",
42+
"de pijp aan maarten geven",
43+
"de pijp uitgaan",
44+
"dombo",
45+
"draaikontg",
46+
"driehoog achter wonen",
47+
"drolg",
48+
"drooggeiler",
49+
"droogkloot",
50+
"een beurt geven",
51+
"een nummertje maken",
52+
"een wip maken",
53+
"eikel",
54+
"engerd",
55+
"flamoes",
56+
"flikken",
57+
"flikker",
58+
"gadverdamme",
59+
"galbak",
60+
"gat",
61+
"gedoogzone",
62+
"geilneef",
63+
"gesodemieter",
64+
"godverdomme",
65+
"graftak",
66+
"gras maaien",
67+
"gratenkutg",
68+
"greppeldel",
69+
"griet",
70+
"hoempert",
71+
"hoer",
72+
"hoerenbuurt",
73+
"hoerenloper",
74+
"hoerig",
75+
"hol",
76+
"hufter",
77+
"huisdealer",
78+
"johny",
79+
"kanen",
80+
"kettingzeugg",
81+
"klaarkomen",
82+
"klerebeer",
83+
"klojo",
84+
"klooien",
85+
"klootjesvolk",
86+
"klootoog",
87+
"klootzak",
88+
"kloten",
89+
"knor",
90+
"kontg",
91+
"kontneuken",
92+
"krentekakker",
93+
"kut",
94+
"kuttelikkertje",
95+
"kwakkieg",
96+
"liefdesgrot",
97+
"lul",
98+
"lul-de-behanger",
99+
"lulhannes",
100+
"lummel",
101+
"mafketel",
102+
"matennaaierg",
103+
"matje",
104+
"mof",
105+
"mutsg",
106+
"naaien",
107+
"naakt",
108+
"neuken",
109+
"neukstier",
110+
"nicht",
111+
"oetlul",
112+
"opgeilen",
113+
"opkankeren",
114+
"oprotten",
115+
"opsodemieteren",
116+
"op z'n hondjes",
117+
"op z'n sodemieter geven",
118+
"opzouten",
119+
"ouwehoer",
120+
"ouwehoeren",
121+
"ouwe rukker",
122+
"paal",
123+
"paardelul",
124+
"palen",
125+
"penozeg",
126+
"piesen",
127+
"pijpbekkieg",
128+
"pijpen",
129+
"pik",
130+
"pleurislaaier",
131+
"poep",
132+
"poepen",
133+
"poot",
134+
"portiekslet",
135+
"pot",
136+
"potverdorie",
137+
"publiciteitsgeil",
138+
"raaskallen",
139+
"reet",
140+
"reetridder",
141+
"reet trappen, voor zijn",
142+
"remsporeng",
143+
"reutelen",
144+
"rothoer",
145+
"rotzak",
146+
"rukhond",
147+
"rukken",
148+
"schatje",
149+
"schijt",
150+
"schijten",
151+
"schoft",
152+
"schuinsmarcheerder",
153+
"shit",
154+
"slempen",
155+
"sletg",
156+
"sletterig",
157+
"slik mijn zaad",
158+
"snolg",
159+
"spuiten",
160+
"standje",
161+
"standje-69g",
162+
"stoephoer",
163+
"stootje",
164+
"strontg",
165+
"sufferdg",
166+
"tapijtnek",
167+
"teefg",
168+
"temeier",
169+
"teringlijer",
170+
"toeter",
171+
"tongzoeng",
172+
"triootjeg",
173+
"trottoir prostituée",
174+
"trottoirteef",
175+
"vergallen",
176+
"verkloten",
177+
"verneuken",
178+
"viespeuk",
179+
"vingeren",
180+
"vleesroos",
181+
"voor jan lul",
182+
"voor jan-met-de-korte-achternaam",
183+
"watje",
184+
"welzijnsmafia",
185+
"wijf",
186+
"wippen",
187+
"wuftje",
188+
"zaadje",
189+
"zakkenwasser",
190+
"zeiken",
191+
"zeiker",
192+
"zuigen",
193+
"zuiplap"
194+
]
195+
}

0 commit comments

Comments
 (0)