Skip to content

Commit 860e431

Browse files
LaurenzValerque
authored andcommitted
[ot-tags] Update IANA and OT language registries
1 parent 6ad754b commit 860e431

File tree

2 files changed

+30
-25
lines changed

2 files changed

+30
-25
lines changed

scripts/gen-tag-table.py

+28-23
Original file line numberDiff line numberDiff line change
@@ -335,49 +335,54 @@ def __init__(self):
335335
self.from_bcp_47_uninherited = None
336336
# Whether the parser is in a <td> element
337337
self._td = False
338-
# Whether the parser is after a <br> element within the current <tr> element
339-
self._br = False
338+
# Whether the parser ignores the rest of the current <td> element
339+
self._disengaged = False
340340
# The text of the <td> elements of the current <tr> element.
341341
self._current_tr = []
342342

343-
def handle_starttag(self, tag, attrs):
344-
if tag == 'br':
345-
self._br = True
343+
def handle_starttag (self, tag, attrs):
344+
if tag == 'a':
345+
if self._current_tr and not self._disengaged:
346+
self._current_tr[-1] = ''
347+
self._disengaged = True
348+
elif tag == 'br':
349+
self._disengaged = True
346350
elif tag == 'meta':
347351
for attr, value in attrs:
348352
if attr == 'name' and value == 'updated_at':
349-
self.header = self.get_starttag_text()
353+
self.header = self.get_starttag_text ()
350354
break
351355
elif tag == 'td':
352356
self._td = True
353-
self._current_tr.append('')
357+
self._current_tr.append ('')
354358
elif tag == 'tr':
355-
self._br = False
359+
self._disengaged = False
356360
self._current_tr = []
357361

358-
def handle_endtag(self, tag):
362+
def handle_endtag (self, tag):
359363
if tag == 'td':
360364
self._td = False
365+
self._disengaged = False
361366
elif tag == 'tr' and self._current_tr:
362-
expect(2 <= len(self._current_tr) <= 3)
363-
name = self._current_tr[0].strip()
364-
tag = self._current_tr[1].strip("\t\n\v\f\r '")
367+
expect (2 <= len (self._current_tr) <= 3)
368+
name = self._current_tr[0].strip ()
369+
tag = self._current_tr[1].strip ("\t\n\v\f\r '")
365370
rank = 0
366-
if len(tag) > 4:
367-
expect(tag.endswith('(deprecated)'), 'ill-formed OpenType tag: %s' % tag)
368-
name += '(deprecated)'
369-
tag = tag.split(' ')[0]
371+
if len (tag) > 4:
372+
expect (tag.endswith (' (deprecated)'), 'ill-formed OpenType tag: %s' % tag)
373+
name += ' (deprecated)'
374+
tag = tag.split (' ')[0]
370375
rank = 1
371-
self.names[tag] = re.sub(' languages$', '', name)
376+
self.names[tag] = re.sub (' languages$', '', name)
372377
if not self._current_tr[2]:
373378
return
374-
iso_codes = self._current_tr[2].strip()
375-
self.to_bcp_47[tag].update(ISO_639_3_TO_1.get(code, code) for code in iso_codes.replace(' ', '').split(','))
376-
rank += 2 * len(self.to_bcp_47[tag])
379+
iso_codes = self._current_tr[2].strip ()
380+
self.to_bcp_47[tag].update (ISO_639_3_TO_1.get (code, code) for code in iso_codes.replace (' ', '').split (','))
381+
rank += 2 * len (self.to_bcp_47[tag])
377382
self.ranks[tag] = rank
378383

379-
def handle_data(self, data):
380-
if self._td and not self._br:
384+
def handle_data (self, data):
385+
if self._td and not self._disengaged:
381386
self._current_tr[-1] += data
382387

383388
def handle_charref(self, name):
@@ -980,7 +985,7 @@ def same_tag(bcp_47_tag, ot_tags):
980985
print('fn strncmp(s1: &str, s2: &str, n: usize) -> bool {')
981986
print(' let n1 = core::cmp::min(n, s1.len());')
982987
print(' let n2 = core::cmp::min(n, s2.len());')
983-
print(' &s1[..n1] == &s2[..n2]')
988+
print(' s1[..n1] == s2[..n2]')
984989
print('}')
985990
print()
986991
print('/// Converts a multi-subtag BCP 47 language tag to language tags.')

src/hb/tag_table.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
384384
LangTag { language: "dgr", tag: Tag::from_bytes(b"ATH ") }, // Tlicho -> Athapaskan
385385
LangTag { language: "dhd", tag: Tag::from_bytes(b"MAW ") }, // Dhundari -> Marwari
386386
// LangTag { language: "dhg", tag: Tag::from_bytes(b"DHG ") }, // Dhangu
387-
LangTag { language: "dhv", tag: Tag(0) }, // Dehu != Divehi (Dhivehi, Maldivian)(deprecated)
387+
LangTag { language: "dhv", tag: Tag(0) }, // Dehu != Divehi (Dhivehi, Maldivian) (deprecated)
388388
LangTag { language: "dib", tag: Tag::from_bytes(b"DNK ") }, // South Central Dinka -> Dinka
389389
LangTag { language: "dik", tag: Tag::from_bytes(b"DNK ") }, // Southwestern Dinka -> Dinka
390390
LangTag { language: "din", tag: Tag::from_bytes(b"DNK ") }, // Dinka [macrolanguage]
@@ -410,7 +410,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
410410
LangTag { language: "dun", tag: Tag(0) }, // Dusun Deyah != Dungan
411411
LangTag { language: "dup", tag: Tag::from_bytes(b"MLY ") }, // Duano -> Malay
412412
LangTag { language: "dv", tag: Tag::from_bytes(b"DIV ") }, // Divehi (Dhivehi, Maldivian)
413-
LangTag { language: "dv", tag: Tag::from_bytes(b"DHV ") }, // Divehi (Dhivehi, Maldivian)(deprecated)
413+
LangTag { language: "dv", tag: Tag::from_bytes(b"DHV ") }, // Divehi (Dhivehi, Maldivian) (deprecated)
414414
LangTag { language: "dwk", tag: Tag::from_bytes(b"KUI ") }, // Dawik Kui -> Kui
415415
LangTag { language: "dwu", tag: Tag::from_bytes(b"DUJ ") }, // Dhuwal
416416
LangTag { language: "dwy", tag: Tag::from_bytes(b"DUJ ") }, // Dhuwaya -> Dhuwal

0 commit comments

Comments
 (0)