diff --git a/Lib/gflanguages/data/languages/bdh_Latn.textproto b/Lib/gflanguages/data/languages/bdh_Latn.textproto index 7e7ccf6d..ecbbb850 100644 --- a/Lib/gflanguages/data/languages/bdh_Latn.textproto +++ b/Lib/gflanguages/data/languages/bdh_Latn.textproto @@ -1,7 +1,7 @@ id: "bdh_Latn" language: "bdh" script: "Latn" -name: "Baka (DRC/South Sudan)" +name: "Baka, DRC/South Sudan" autonym: "Tara Baká" population: 60000 region: "CD" diff --git a/Lib/gflanguages/data/languages/bkc_Latn.textproto b/Lib/gflanguages/data/languages/bkc_Latn.textproto index 02b8bca3..08da4d26 100644 --- a/Lib/gflanguages/data/languages/bkc_Latn.textproto +++ b/Lib/gflanguages/data/languages/bkc_Latn.textproto @@ -1,7 +1,7 @@ id: "bkc_Latn" language: "bkc" script: "Latn" -name: "Baka (Cameroon/Gabon)" +name: "Baka, Cameroon/Gabon" population: 71000 region: "CM" region: "GA" diff --git a/Lib/gflanguages/data/languages/bm_Nkoo.textproto b/Lib/gflanguages/data/languages/bm_Nkoo.textproto index 2734cdca..1136be51 100644 --- a/Lib/gflanguages/data/languages/bm_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/bm_Nkoo.textproto @@ -1,6 +1,6 @@ id: "bm_Nkoo" language: "bm" script: "Nkoo" -name: "Bambara (Nko)" +name: "Bambara (N’Ko)" population: 16000000 region: "ML" diff --git a/Lib/gflanguages/data/languages/bsq_Bass.textproto b/Lib/gflanguages/data/languages/bsq_Bass.textproto index 3d76b53b..e2984fef 100644 --- a/Lib/gflanguages/data/languages/bsq_Bass.textproto +++ b/Lib/gflanguages/data/languages/bsq_Bass.textproto @@ -1,7 +1,7 @@ id: "bsq_Bass" language: "bsq" script: "Bass" -name: "Bassa (Vah)" +name: "Bassa (Bassa Vah)" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/cbk_Latn.textproto b/Lib/gflanguages/data/languages/cbk_Latn.textproto index 7053e530..748d555e 100644 --- a/Lib/gflanguages/data/languages/cbk_Latn.textproto +++ b/Lib/gflanguages/data/languages/cbk_Latn.textproto @@ -1,7 +1,7 @@ id: "cbk_Latn" language: "cbk" script: "Latn" -name: "Chavacano, Latin, Philippines" +name: "Chavacano, Philippines (Latin)" region: "PH" sample_text { masthead_full: "TtOo" diff --git a/Lib/gflanguages/data/languages/chn_Dupl.textproto b/Lib/gflanguages/data/languages/chn_Dupl.textproto index d402556f..2e8e8213 100644 --- a/Lib/gflanguages/data/languages/chn_Dupl.textproto +++ b/Lib/gflanguages/data/languages/chn_Dupl.textproto @@ -1,6 +1,6 @@ id: "chn_Dupl" language: "chn" script: "Dupl" -name: "Chinook Jargon (Duployan shorthand)" +name: "Chinook Jargon (Duployan)" region: "US" region: "CA" diff --git a/Lib/gflanguages/data/languages/de_Dupl.textproto b/Lib/gflanguages/data/languages/de_Dupl.textproto index 7910e1de..718b6fd7 100644 --- a/Lib/gflanguages/data/languages/de_Dupl.textproto +++ b/Lib/gflanguages/data/languages/de_Dupl.textproto @@ -1,5 +1,5 @@ id: "de_Dupl" language: "de" script: "Dupl" -name: "German (Duployan shorthand)" +name: "German (Duployan)" region: "DE" diff --git a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto index 7edc157e..eb891abd 100644 --- a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto @@ -1,5 +1,5 @@ id: "dyu_Nkoo" language: "dyu" script: "Nkoo" -name: "Dyula (Nko)" +name: "Dyula (N’Ko)" region: "CI" diff --git a/Lib/gflanguages/data/languages/eto_Latn.textproto b/Lib/gflanguages/data/languages/eto_Latn.textproto index d016fd39..2da83f6b 100644 --- a/Lib/gflanguages/data/languages/eto_Latn.textproto +++ b/Lib/gflanguages/data/languages/eto_Latn.textproto @@ -1,7 +1,7 @@ id: "eto_Latn" language: "eto" script: "Latn" -name: "Eton (Cameroon)" +name: "Eton, Cameroon" population: 400000 region: "CM" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/fr_Dupl.textproto b/Lib/gflanguages/data/languages/fr_Dupl.textproto index 584d4cbd..0093d282 100644 --- a/Lib/gflanguages/data/languages/fr_Dupl.textproto +++ b/Lib/gflanguages/data/languages/fr_Dupl.textproto @@ -1,5 +1,5 @@ id: "fr_Dupl" language: "fr" script: "Dupl" -name: "French (Duployan shorthand)" +name: "French (Duployan)" historical: true diff --git a/Lib/gflanguages/data/languages/gcf_Latn.textproto b/Lib/gflanguages/data/languages/gcf_Latn.textproto index 56401d24..b36c6e60 100644 --- a/Lib/gflanguages/data/languages/gcf_Latn.textproto +++ b/Lib/gflanguages/data/languages/gcf_Latn.textproto @@ -1,7 +1,7 @@ id: "gcf_Latn" language: "gcf" script: "Latn" -name: "Guadeloupean Creole French, Latin, Martinique" +name: "Guadeloupean Creole French, Martinique (Latin)" region: "GP" region: "MQ" sample_text { diff --git a/Lib/gflanguages/data/languages/man_Nkoo.textproto b/Lib/gflanguages/data/languages/man_Nkoo.textproto index 2ba2349b..4e9166d0 100644 --- a/Lib/gflanguages/data/languages/man_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/man_Nkoo.textproto @@ -1,5 +1,5 @@ id: "man_Nkoo" language: "man" script: "Nkoo" -name: "Mandingo (Nko)" +name: "Mandingo (N’Ko)" region: "GN" diff --git a/Lib/gflanguages/data/scripts/Beng.textproto b/Lib/gflanguages/data/scripts/Beng.textproto index d583c7ea..d68be9e4 100644 --- a/Lib/gflanguages/data/scripts/Beng.textproto +++ b/Lib/gflanguages/data/scripts/Beng.textproto @@ -1,3 +1,2 @@ id: "Beng" -name: "Bangla" - +name: "Bengali" diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index c05b7d8a..a859cf78 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -84,7 +84,15 @@ } # "ʼ" allowed as last character in language name for Metaʼ -LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$" +# Some scripts have abbreviated names for reference in language names that are +# sufficient in context. If an alternate is listed here, it should be used +# universally and consistently across all language names. +ALTERNATE_SCRIPT_NAMES = { + "Dupl": "Duployan", + "Hans": "Simplified", + "Hant": "Traditional", +} @pytest.mark.parametrize("lang_code", LANGUAGES) @@ -291,7 +299,7 @@ def test_language_uniqueness(): def test_language_name_structure(): languages_with_bad_name_structure = {} for lang in LANGUAGES.values(): - script_name = SCRIPTS[lang.script].name + script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script] names = [["name", lang.name]] if lang.preferred_name: names += [["preferred_name", lang.preferred_name]]