From 8aca905f59a8acb143139ff3ae3d31fdab906e44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muzaffer=20C=C4=B1kay?= Date: Wed, 1 Apr 2026 20:19:52 +0300 Subject: [PATCH 1/3] Add new stop words and sort alphabetically --- spacy/lang/kmr/stop_words.py | 91 ++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/spacy/lang/kmr/stop_words.py b/spacy/lang/kmr/stop_words.py index aee33c2b748..f9e55ff3db8 100644 --- a/spacy/lang/kmr/stop_words.py +++ b/spacy/lang/kmr/stop_words.py @@ -1,44 +1,75 @@ STOP_WORDS = set( """ -û -li +a +an +ber +belê bi -di +bo +çawa +çend +çi +çima +çiqas da de -ji -ku +di +divê +e +ê +em +ên +ev ew +ewqas ez -tu -em +gelek +hemû +her +hê +hin +hîn hûn -ew -ev -min -te -wî -wê -me -we -wan -vê -vî -va -çi -kî +hwd +in +jê +ji +jî kê -çawa -çima +kêm kengî -li ku -çend -çiqas -her -hin -gelek -hemû kes +kî +ku +lê +lêbelê +li +me +min +mîna +ne +pir +ra +re +ser +te +tenê tişt +tu +û +va +ve +vê +vî +wan +we +wê +weke +wekî +wî +ya +yan +ye +yên """.split() ) From 9acdadcc457e165374390d68f72f856feb489d24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muzaffer=20C=C4=B1kay?= Date: Wed, 1 Apr 2026 20:22:17 +0300 Subject: [PATCH 2/3] Correct spelling --- spacy/lang/kmr/lex_attrs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/lang/kmr/lex_attrs.py b/spacy/lang/kmr/lex_attrs.py index 6b80204104d..14c7af4b82d 100644 --- a/spacy/lang/kmr/lex_attrs.py +++ b/spacy/lang/kmr/lex_attrs.py @@ -12,12 +12,12 @@ "heşt", "neh", "deh", - "yazde", - "dazde", + "yanzde", + "danzde", "sêzde", "çarde", - "pazde", - "şazde", + "panzde", + "şanzde", "hevde", "hejde", "nozde", From 5d3cb6bd9d7c8b91054d680610f1f1ccedd242ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muzaffer=20C=C4=B1kay?= Date: Wed, 1 Apr 2026 20:24:35 +0300 Subject: [PATCH 3/3] Add another veriation as Kurmanji is not fully standardized --- spacy/lang/kmr/lex_attrs.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spacy/lang/kmr/lex_attrs.py b/spacy/lang/kmr/lex_attrs.py index 14c7af4b82d..3be9b52442e 100644 --- a/spacy/lang/kmr/lex_attrs.py +++ b/spacy/lang/kmr/lex_attrs.py @@ -21,6 +21,15 @@ "hevde", "hejde", "nozde", + "yanzdeh", + "danzdeh", + "sêzdeh", + "çardeh", + "panzdeh", + "şanzdeh", + "hevdeh", + "hejdeh", + "nozdeh", "bîst", "sî", "çil",