Skip to content

Commit 38b59f7

Browse files
committed
chore: deburred and refreshed data
chore: integrated xcarpentier#475 from kanimetov chore: updated country code list
1 parent 66a9696 commit 38b59f7

8 files changed

+20566
-2297
lines changed

data/countries-emoji.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

data/countries-more-translations.json

Lines changed: 7305 additions & 0 deletions
Large diffs are not rendered by default.

data/countries.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

scripts/deburr.js

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// From
2+
// https://github.com/lodash/lodash/blob/master/deburr.js
3+
4+
/** Used to match Latin Unicode letters (excluding mathematical operators). */
5+
const reLatin = /[\xc0-\xd6\xd8-\xf6\xf8-\xff\u0100-\u017f]/g
6+
7+
/** Used to compose unicode character classes. */
8+
const rsComboMarksRange = '\\u0300-\\u036f'
9+
const reComboHalfMarksRange = '\\ufe20-\\ufe2f'
10+
const rsComboSymbolsRange = '\\u20d0-\\u20ff'
11+
const rsComboMarksExtendedRange = '\\u1ab0-\\u1aff'
12+
const rsComboMarksSupplementRange = '\\u1dc0-\\u1dff'
13+
const rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange + rsComboMarksExtendedRange + rsComboMarksSupplementRange
14+
15+
/** Used to compose unicode capture groups. */
16+
const rsCombo = `[${rsComboRange}]`
17+
18+
/**
19+
* Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) and
20+
* [combining diacritical marks for symbols](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks_for_Symbols).
21+
*/
22+
const reComboMark = RegExp(rsCombo, 'g')
23+
24+
/** Used to map Latin Unicode letters to basic Latin letters. */
25+
const deburredLetters = {
26+
// Latin-1 Supplement block.
27+
'\xc0': 'A', '\xc1': 'A', '\xc2': 'A', '\xc3': 'A', '\xc4': 'A', '\xc5': 'A',
28+
'\xe0': 'a', '\xe1': 'a', '\xe2': 'a', '\xe3': 'a', '\xe4': 'a', '\xe5': 'a',
29+
'\xc7': 'C', '\xe7': 'c',
30+
'\xd0': 'D', '\xf0': 'd',
31+
'\xc8': 'E', '\xc9': 'E', '\xca': 'E', '\xcb': 'E',
32+
'\xe8': 'e', '\xe9': 'e', '\xea': 'e', '\xeb': 'e',
33+
'\xcc': 'I', '\xcd': 'I', '\xce': 'I', '\xcf': 'I',
34+
'\xec': 'i', '\xed': 'i', '\xee': 'i', '\xef': 'i',
35+
'\xd1': 'N', '\xf1': 'n',
36+
'\xd2': 'O', '\xd3': 'O', '\xd4': 'O', '\xd5': 'O', '\xd6': 'O', '\xd8': 'O',
37+
'\xf2': 'o', '\xf3': 'o', '\xf4': 'o', '\xf5': 'o', '\xf6': 'o', '\xf8': 'o',
38+
'\xd9': 'U', '\xda': 'U', '\xdb': 'U', '\xdc': 'U',
39+
'\xf9': 'u', '\xfa': 'u', '\xfb': 'u', '\xfc': 'u',
40+
'\xdd': 'Y', '\xfd': 'y', '\xff': 'y',
41+
'\xc6': 'Ae', '\xe6': 'ae',
42+
'\xde': 'Th', '\xfe': 'th',
43+
'\xdf': 'ss',
44+
// Latin Extended-A block.
45+
'\u0100': 'A', '\u0102': 'A', '\u0104': 'A',
46+
'\u0101': 'a', '\u0103': 'a', '\u0105': 'a',
47+
'\u0106': 'C', '\u0108': 'C', '\u010a': 'C', '\u010c': 'C',
48+
'\u0107': 'c', '\u0109': 'c', '\u010b': 'c', '\u010d': 'c',
49+
'\u010e': 'D', '\u0110': 'D', '\u010f': 'd', '\u0111': 'd',
50+
'\u0112': 'E', '\u0114': 'E', '\u0116': 'E', '\u0118': 'E', '\u011a': 'E',
51+
'\u0113': 'e', '\u0115': 'e', '\u0117': 'e', '\u0119': 'e', '\u011b': 'e',
52+
'\u011c': 'G', '\u011e': 'G', '\u0120': 'G', '\u0122': 'G',
53+
'\u011d': 'g', '\u011f': 'g', '\u0121': 'g', '\u0123': 'g',
54+
'\u0124': 'H', '\u0126': 'H', '\u0125': 'h', '\u0127': 'h',
55+
'\u0128': 'I', '\u012a': 'I', '\u012c': 'I', '\u012e': 'I', '\u0130': 'I',
56+
'\u0129': 'i', '\u012b': 'i', '\u012d': 'i', '\u012f': 'i', '\u0131': 'i',
57+
'\u0134': 'J', '\u0135': 'j',
58+
'\u0136': 'K', '\u0137': 'k', '\u0138': 'k',
59+
'\u0139': 'L', '\u013b': 'L', '\u013d': 'L', '\u013f': 'L', '\u0141': 'L',
60+
'\u013a': 'l', '\u013c': 'l', '\u013e': 'l', '\u0140': 'l', '\u0142': 'l',
61+
'\u0143': 'N', '\u0145': 'N', '\u0147': 'N', '\u014a': 'N',
62+
'\u0144': 'n', '\u0146': 'n', '\u0148': 'n', '\u014b': 'n',
63+
'\u014c': 'O', '\u014e': 'O', '\u0150': 'O',
64+
'\u014d': 'o', '\u014f': 'o', '\u0151': 'o',
65+
'\u0154': 'R', '\u0156': 'R', '\u0158': 'R',
66+
'\u0155': 'r', '\u0157': 'r', '\u0159': 'r',
67+
'\u015a': 'S', '\u015c': 'S', '\u015e': 'S', '\u0160': 'S',
68+
'\u015b': 's', '\u015d': 's', '\u015f': 's', '\u0161': 's',
69+
'\u0162': 'T', '\u0164': 'T', '\u0166': 'T',
70+
'\u0163': 't', '\u0165': 't', '\u0167': 't',
71+
'\u0168': 'U', '\u016a': 'U', '\u016c': 'U', '\u016e': 'U', '\u0170': 'U', '\u0172': 'U',
72+
'\u0169': 'u', '\u016b': 'u', '\u016d': 'u', '\u016f': 'u', '\u0171': 'u', '\u0173': 'u',
73+
'\u0174': 'W', '\u0175': 'w',
74+
'\u0176': 'Y', '\u0177': 'y', '\u0178': 'Y',
75+
'\u0179': 'Z', '\u017b': 'Z', '\u017d': 'Z',
76+
'\u017a': 'z', '\u017c': 'z', '\u017e': 'z',
77+
'\u0132': 'IJ', '\u0133': 'ij',
78+
'\u0152': 'Oe', '\u0153': 'oe',
79+
'\u0149': "'n", '\u017f': 's'
80+
}
81+
82+
const deburrLetter = function(str) {
83+
return deburredLetters[str] || str
84+
}
85+
86+
function deburr(str) {
87+
return str && str.replace(reLatin, deburrLetter).replace(reComboMark, '')
88+
}
89+
90+
module.exports = deburr

scripts/transform-world-countries.js

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
const countries = require('world-countries')
44
const flags = require('./countryFlags')
5+
const deburr = require('./deburr')
6+
const moreTranslations = require('../data/countries-more-translations.json')
57

68
const isEmoji = process.argv.includes('--emoji')
79
const isCca2 = process.argv.includes('--cca2')
@@ -10,7 +12,7 @@ const isSubRegion = process.argv.includes('--subregions')
1012

1113
const getCountryNameAsyncs = (common, translations) =>
1214
Object.keys(translations)
13-
.filter(k => k !== 'common')
15+
.filter(k => (k !== 'common'))
1416
.map(key => ({ [key]: translations[key].common }))
1517
.reduce(
1618
(prev, cur) => ({
@@ -20,26 +22,65 @@ const getCountryNameAsyncs = (common, translations) =>
2022
{}
2123
)
2224

25+
function uniq(arr) { return [...new Set(arr)].sort() }
26+
27+
const TranslationLanguageCodes = {}
28+
29+
function bagsort(bag) {
30+
const pairs = Object.entries(bag).sort(([kk1], [kk2]) => (kk1.localeCompare(kk2)))
31+
return Object.fromEntries(pairs)
32+
}
33+
2334
const newcountries = countries
2435
.map(
25-
({
26-
cca2,
27-
currency,
28-
callingCode,
29-
region,
30-
subregion,
31-
name: { common },
32-
translations
33-
}) => ({
34-
[cca2]: {
35-
currency,
36-
callingCode,
36+
(info) => {
37+
const {
38+
cca2,
39+
idd,
3740
region,
3841
subregion,
39-
flag: isEmoji ? `flag-${cca2.toLowerCase()}` : flags[cca2],
40-
name: { common, ...getCountryNameAsyncs(common, translations) }
42+
name: { common, official, native },
43+
translations,
44+
currencies,
45+
altSpellings,
46+
latlng,
47+
flag,
48+
..._rest
49+
} = info
50+
const callingCodeRoot = idd.root.replace(/^\+/, '')
51+
const callingCodes = (cca2 === 'US') ? ['1'] : idd.suffixes.map((suff) => (callingCodeRoot + suff)).sort()
52+
const natives = {}
53+
Object.keys(native).forEach((lang) => { natives[lang] = native[lang].common })
54+
const deburred = deburr(common)
55+
const translatedNames = {
56+
...moreTranslations[cca2].name, ...natives, ...getCountryNameAsyncs(common, translations),
4157
}
42-
})
58+
const names = { common, deburred, ...bagsort(translatedNames) }
59+
if (names.per && (! names.fas)) { names.fas = names.per }
60+
if (! names.eng) { names.eng = names.common }
61+
const altsearch = uniq([...altSpellings, ...altSpellings.map(deburr), ...Object.values(names).map(deburr)])
62+
.filter((str) => /\w\w\w/.test(str))
63+
Object.keys(names).forEach((lang) => { TranslationLanguageCodes[lang] = 1 + (TranslationLanguageCodes[lang] || 0) })
64+
// if (/^T/.test(cca2)) { console.warn(info, natives, native, names) }
65+
return {
66+
[cca2]: {
67+
code: cca2,
68+
common: names.common,
69+
official,
70+
native,
71+
latlng,
72+
flagchar: flag,
73+
currency: Object.keys(currencies || []).sort(),
74+
callingCode: callingCodes,
75+
region,
76+
subregion,
77+
flag: isEmoji ? `flag-${cca2.toLowerCase()}` : flags[cca2],
78+
altSpellings,
79+
name: names,
80+
altsearch,
81+
}
82+
}
83+
}
4384
)
4485
.sort((a, b) => {
4586
if (a[Object.keys(a)[0]].name.common === b[Object.keys(b)[0]].name.common) {
@@ -59,6 +100,10 @@ const newcountries = countries
59100
{}
60101
)
61102

103+
// console.warn(TranslationLanguageCodes)
104+
const WellTranslated = Object.fromEntries(Object.entries(TranslationLanguageCodes).filter(([lang, ct]) => (ct > 50)))
105+
console.warn('export const TranslationLanguageCodeList = ', Object.keys(WellTranslated), 'as const')
106+
62107
if (isCca2) {
63108
console.log(JSON.stringify(Object.keys(newcountries)))
64109
} else if (isRegion) {

0 commit comments

Comments
 (0)