diff --git a/scripts/gen-tag-table.py b/scripts/gen-tag-table.py
index f8fabc24..7d1e1533 100755
--- a/scripts/gen-tag-table.py
+++ b/scripts/gen-tag-table.py
@@ -330,11 +330,15 @@ def __init__(self):
self.from_bcp_47 = collections.defaultdict(set)
# Whether the parser is in a
element
self._td = False
+ # Whether the parser is after a element within the current | element
+ self._br = False
# The text of the elements of the current |
element.
self._current_tr = []
def handle_starttag(self, tag, attrs):
- if tag == 'meta':
+ if tag == 'br':
+ self._br = True
+ elif tag == 'meta':
for attr, value in attrs:
if attr == 'name' and value == 'updated_at':
self.header = self.get_starttag_text()
@@ -343,6 +347,7 @@ def handle_starttag(self, tag, attrs):
self._td = True
self._current_tr.append('')
elif tag == 'tr':
+ self._br = False
self._current_tr = []
def handle_endtag(self, tag):
@@ -367,7 +372,7 @@ def handle_endtag(self, tag):
self.ranks[tag] = rank
def handle_data(self, data):
- if self._td:
+ if self._td and not self._br:
self._current_tr[-1] += data
def handle_charref(self, name):
@@ -699,6 +704,8 @@ def get_name(self, lt):
ot.remove_language_ot('MONT')
ot.add_language('mnw', 'MONT')
+ot.add_language ('mnw-TH', 'MONT')
+
ot.add_language('no', 'NOR')
ot.add_language('oc-provenc', 'PRO')
diff --git a/src/tag_table.rs b/src/tag_table.rs
index a448e643..cce46717 100644
--- a/src/tag_table.rs
+++ b/src/tag_table.rs
@@ -176,7 +176,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "bmb", tag: Tag(0) }, // Bembe != Bambara (Bamanankan)
LangTag { language: "bml", tag: Tag(0) }, // Bomboli != Bamileke
LangTag { language: "bmm", tag: Tag::from_bytes(b"MLG ") }, // Northern Betsimisaraka Malagasy -> Malagasy
- LangTag { language: "bn", tag: Tag::from_bytes(b"BEN ") }, // Bengali
+ LangTag { language: "bn", tag: Tag::from_bytes(b"BEN ") }, // Bangla
LangTag { language: "bo", tag: Tag::from_bytes(b"TIB ") }, // Tibetan
LangTag { language: "bpd", tag: Tag::from_bytes(b"BAD0") }, // Banda-Banda -> Banda
LangTag { language: "bpl", tag: Tag::from_bytes(b"CPP ") }, // Broome Pearling Lugger Pidgin -> Creoles
@@ -473,7 +473,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "gaa", tag: Tag::from_bytes(b"GAD ") }, // Ga
LangTag { language: "gac", tag: Tag::from_bytes(b"CPP ") }, // Mixed Great Andamanese -> Creoles
LangTag { language: "gad", tag: Tag(0) }, // Gaddang != Ga
- LangTag { language: "gae", tag: Tag(0) }, // Guarequena != Scottish Gaelic (Gaelic)
+ LangTag { language: "gae", tag: Tag(0) }, // Guarequena != Scottish Gaelic
// LangTag { language: "gag", tag: Tag::from_bytes(b"GAG ") }, // Gagauz
LangTag { language: "gal", tag: Tag(0) }, // Galolen != Galician
LangTag { language: "gan", tag: Tag::from_bytes(b"ZHS ") }, // Gan Chinese -> Chinese, Simplified
@@ -486,7 +486,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "gcf", tag: Tag::from_bytes(b"CPP ") }, // Guadeloupean Creole French -> Creoles
LangTag { language: "gcl", tag: Tag::from_bytes(b"CPP ") }, // Grenadian Creole English -> Creoles
LangTag { language: "gcr", tag: Tag::from_bytes(b"CPP ") }, // Guianese Creole French -> Creoles
- LangTag { language: "gd", tag: Tag::from_bytes(b"GAE ") }, // Scottish Gaelic (Gaelic)
+ LangTag { language: "gd", tag: Tag::from_bytes(b"GAE ") }, // Scottish Gaelic
LangTag { language: "gda", tag: Tag::from_bytes(b"RAJ ") }, // Gade Lohar -> Rajasthani
// LangTag { language: "gez", tag: Tag::from_bytes(b"GEZ ") }, // Geez
LangTag { language: "ggo", tag: Tag::from_bytes(b"GON ") }, // Southern Gondi(retired code) -> Gondi
@@ -934,7 +934,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "mnw", tag: Tag::from_bytes(b"MON ") }, // Mon
LangTag { language: "mnw", tag: Tag::from_bytes(b"MONT") }, // Mon -> Thailand Mon
LangTag { language: "mnx", tag: Tag(0) }, // Manikion != Manx
- LangTag { language: "mo", tag: Tag::from_bytes(b"MOL ") }, // Moldavian(retired code)
+ LangTag { language: "mo", tag: Tag::from_bytes(b"MOL ") }, // Moldavian(retired code) -> Romanian (Moldova)
LangTag { language: "mod", tag: Tag::from_bytes(b"CPP ") }, // Mobilian -> Creoles
// LangTag { language: "moh", tag: Tag::from_bytes(b"MOH ") }, // Mohawk
LangTag { language: "mok", tag: Tag(0) }, // Morori != Moksha
@@ -1081,13 +1081,13 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "om", tag: Tag::from_bytes(b"ORO ") }, // Oromo [macrolanguage]
LangTag { language: "onx", tag: Tag::from_bytes(b"CPP ") }, // Onin Based Pidgin -> Creoles
LangTag { language: "oor", tag: Tag::from_bytes(b"CPP ") }, // Oorlams -> Creoles
- LangTag { language: "or", tag: Tag::from_bytes(b"ORI ") }, // Odia (formerly Oriya) [macrolanguage]
+ LangTag { language: "or", tag: Tag::from_bytes(b"ORI ") }, // Odia [macrolanguage]
LangTag { language: "orc", tag: Tag::from_bytes(b"ORO ") }, // Orma -> Oromo
LangTag { language: "orn", tag: Tag::from_bytes(b"MLY ") }, // Orang Kanaq -> Malay
LangTag { language: "oro", tag: Tag(0) }, // Orokolo != Oromo
LangTag { language: "orr", tag: Tag::from_bytes(b"IJO ") }, // Oruma -> Ijo
LangTag { language: "ors", tag: Tag::from_bytes(b"MLY ") }, // Orang Seletar -> Malay
- LangTag { language: "ory", tag: Tag::from_bytes(b"ORI ") }, // Odia (formerly Oriya)
+ LangTag { language: "ory", tag: Tag::from_bytes(b"ORI ") }, // Odia
LangTag { language: "os", tag: Tag::from_bytes(b"OSS ") }, // Ossetian
LangTag { language: "otw", tag: Tag::from_bytes(b"OJB ") }, // Ottawa -> Ojibway
LangTag { language: "oua", tag: Tag::from_bytes(b"BBR ") }, // Tagargrent -> Berber
@@ -1345,7 +1345,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "so", tag: Tag::from_bytes(b"SML ") }, // Somali
LangTag { language: "sog", tag: Tag(0) }, // Sogdian != Sodo Gurage
// LangTag { language: "sop", tag: Tag::from_bytes(b"SOP ") }, // Songe
- LangTag { language: "spv", tag: Tag::from_bytes(b"ORI ") }, // Sambalpuri -> Odia (formerly Oriya)
+ LangTag { language: "spv", tag: Tag::from_bytes(b"ORI ") }, // Sambalpuri -> Odia
LangTag { language: "spy", tag: Tag::from_bytes(b"KAL ") }, // Sabaot -> Kalenjin
LangTag { language: "sq", tag: Tag::from_bytes(b"SQI ") }, // Albanian [macrolanguage]
LangTag { language: "sr", tag: Tag::from_bytes(b"SRB ") }, // Serbian
@@ -2252,6 +2252,11 @@ pub fn tags_from_complex_language(language: &str, tags: &mut smallvec::SmallVec<
tags.push(Tag::from_bytes(b"ZHT ")); // Chinese, Traditional
return true;
}
+ if strncmp(&language[1..], "nw-", 3) && subtag_matches(language, "-th") {
+ // Mon; Thailand
+ tags.push(Tag::from_bytes(b"MONT")); // Thailand Mon
+ return true;
+ }
}
b'n' => {
if lang_matches(&language[1..], "an-hant-hk") {
@@ -2315,7 +2320,7 @@ pub fn tags_from_complex_language(language: &str, tags: &mut smallvec::SmallVec<
b'r' => {
if strncmp(&language[1..], "o-", 2) && subtag_matches(language, "-md") {
// Romanian; Moldova
- tags.push(Tag::from_bytes(b"MOL ")); // Moldavian
+ tags.push(Tag::from_bytes(b"MOL ")); // Romanian (Moldova)
return true;
}
}