Skip to content

Commit

Permalink
Expand character class for language names
Browse files Browse the repository at this point in the history
  • Loading branch information
nathan-williams committed Jul 22, 2024
1 parent 5dc2b3b commit 640c050
Show file tree
Hide file tree
Showing 14 changed files with 23 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/bdh_Latn.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "bdh_Latn"
language: "bdh"
script: "Latn"
name: "Baka (DRC/South Sudan)"
name: "Baka, DRC/South Sudan"
autonym: "Tara Baká"
population: 60000
region: "CD"
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/bkc_Latn.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "bkc_Latn"
language: "bkc"
script: "Latn"
name: "Baka (Cameroon/Gabon)"
name: "Baka, Cameroon/Gabon"
population: 71000
region: "CM"
region: "GA"
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/bm_Nkoo.textproto
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
id: "bm_Nkoo"
language: "bm"
script: "Nkoo"
name: "Bambara (Nko)"
name: "Bambara (N’Ko)"
population: 16000000
region: "ML"
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/bsq_Bass.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "bsq_Bass"
language: "bsq"
script: "Bass"
name: "Bassa (Vah)"
name: "Bassa (Bassa Vah)"
population: 410000
region: "LR"
region: "SL"
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/cbk_Latn.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "cbk_Latn"
language: "cbk"
script: "Latn"
name: "Chavacano, Latin, Philippines"
name: "Chavacano, Philippines (Latin)"
region: "PH"
sample_text {
masthead_full: "TtOo"
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/chn_Dupl.textproto
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
id: "chn_Dupl"
language: "chn"
script: "Dupl"
name: "Chinook Jargon (Duployan shorthand)"
name: "Chinook Jargon (Duployan)"
region: "US"
region: "CA"
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/de_Dupl.textproto
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: "de_Dupl"
language: "de"
script: "Dupl"
name: "German (Duployan shorthand)"
name: "German (Duployan)"
region: "DE"
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/dyu_Nkoo.textproto
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: "dyu_Nkoo"
language: "dyu"
script: "Nkoo"
name: "Dyula (Nko)"
name: "Dyula (N’Ko)"
region: "CI"
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/eto_Latn.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "eto_Latn"
language: "eto"
script: "Latn"
name: "Eton (Cameroon)"
name: "Eton, Cameroon"
population: 400000
region: "CM"
exemplar_chars {
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/fr_Dupl.textproto
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: "fr_Dupl"
language: "fr"
script: "Dupl"
name: "French (Duployan shorthand)"
name: "French (Duployan)"
historical: true
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/gcf_Latn.textproto
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: "gcf_Latn"
language: "gcf"
script: "Latn"
name: "Guadeloupean Creole French, Latin, Martinique"
name: "Guadeloupean Creole French, Martinique (Latin)"
region: "GP"
region: "MQ"
sample_text {
Expand Down
2 changes: 1 addition & 1 deletion Lib/gflanguages/data/languages/man_Nkoo.textproto
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: "man_Nkoo"
language: "man"
script: "Nkoo"
name: "Mandingo (Nko)"
name: "Mandingo (N’Ko)"
region: "GN"
3 changes: 1 addition & 2 deletions Lib/gflanguages/data/scripts/Beng.textproto
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
id: "Beng"
name: "Bangla"

name: "Bengali"
12 changes: 10 additions & 2 deletions tests/test_data_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,15 @@
}

# "ʼ" allowed as last character in language name for Metaʼ
LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$"
LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$"
# Some scripts have abbreviated names for reference in language names that are
# sufficient in context. If an alternate is listed here, it should be used
# universally and consistently across all language names.
ALTERNATE_SCRIPT_NAMES = {
"Dupl": "Duployan",
"Hans": "Simplified",
"Hant": "Traditional",
}


@pytest.mark.parametrize("lang_code", LANGUAGES)
Expand Down Expand Up @@ -291,7 +299,7 @@ def test_language_uniqueness():
def test_language_name_structure():
languages_with_bad_name_structure = {}
for lang in LANGUAGES.values():
script_name = SCRIPTS[lang.script].name
script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script]
names = [["name", lang.name]]
if lang.preferred_name:
names += [["preferred_name", lang.preferred_name]]
Expand Down

0 comments on commit 640c050

Please sign in to comment.