@@ -319,6 +319,10 @@ class OpenTypeRegistryParser(HTMLParser):
319
319
from_bcp_47(DefaultDict[str, AbstractSet[str]]): ``to_bcp_47``
320
320
inverted. Its values start as unsorted sets;
321
321
``sort_languages`` converts them to sorted lists.
322
+ from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]):
323
+ A copy of ``from_bcp_47``. It starts as ``None`` and is
324
+ populated at the beginning of the first call to
325
+ ``inherit_from_macrolanguages``.
322
326
323
327
"""
324
328
def __init__ (self ):
@@ -328,6 +332,7 @@ def __init__(self):
328
332
self .ranks = collections .defaultdict (int )
329
333
self .to_bcp_47 = collections .defaultdict (set )
330
334
self .from_bcp_47 = collections .defaultdict (set )
335
+ self .from_bcp_47_uninherited = None
331
336
# Whether the parser is in a <td> element
332
337
self ._td = False
333
338
# Whether the parser is after a <br> element within the current <tr> element
@@ -448,34 +453,56 @@ def inherit_from_macrolanguages(self):
448
453
449
454
If a BCP 47 tag for an individual mapping has no OpenType
450
455
mapping but its macrolanguage does, the mapping is copied to
451
- the individual language. For example, als(Tosk Albanian) has no
452
- explicit mapping, so it inherits from sq(Albanian) the mapping
456
+ the individual language. For example, als (Tosk Albanian) has no
457
+ explicit mapping, so it inherits from sq (Albanian) the mapping
453
458
to SQI.
454
459
460
+ However, if an OpenType tag maps to a BCP 47 macrolanguage and
461
+ some but not all of its individual languages, the mapping is not
462
+ inherited from the macrolanguage to the missing individual
463
+ languages. For example, INUK (Nunavik Inuktitut) is mapped to
464
+ ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
465
+ ikt (Inuinnaqtun, which is an individual language of iu), so
466
+ this method does not add a mapping from ikt to INUK.
467
+
468
+
455
469
If a BCP 47 tag for a macrolanguage has no OpenType mapping but
456
- all of its individual languages do and they all map to the same
457
- tags, the mapping is copied to the macrolanguage.
470
+ some of its individual languages do, their mappings are copied
471
+ to the macrolanguage.
458
472
"""
459
473
global bcp_47
460
- original_ot_from_bcp_47 = dict (self .from_bcp_47 )
474
+ first_time = self .from_bcp_47_uninherited is None
475
+ if first_time :
476
+ self .from_bcp_47_uninherited = dict (self .from_bcp_47 )
461
477
for macrolanguage , languages in dict (bcp_47 .macrolanguages ).items ():
462
- ot_macrolanguages = set (original_ot_from_bcp_47 .get (macrolanguage , set ()))
478
+ ot_macrolanguages = {
479
+ ot_macrolanguage for ot_macrolanguage in self .from_bcp_47_uninherited .get (macrolanguage , set ())
480
+ }
481
+ blocked_ot_macrolanguages = set ()
482
+ if 'retired code' not in bcp_47 .scopes .get (macrolanguage , '' ):
483
+ for ot_macrolanguage in ot_macrolanguages :
484
+ round_trip_macrolanguages = {
485
+ l for l in self .to_bcp_47 [ot_macrolanguage ]
486
+ if 'retired code' not in bcp_47 .scopes .get (l , '' )
487
+ }
488
+ round_trip_languages = {
489
+ l for l in languages
490
+ if 'retired code' not in bcp_47 .scopes .get (l , '' )
491
+ }
492
+ intersection = round_trip_macrolanguages & round_trip_languages
493
+ if intersection and intersection != round_trip_languages :
494
+ blocked_ot_macrolanguages .add (ot_macrolanguage )
463
495
if ot_macrolanguages :
464
496
for ot_macrolanguage in ot_macrolanguages :
465
- for language in languages :
466
- self .add_language (language , ot_macrolanguage )
467
- self .ranks [ot_macrolanguage ] += 1
468
- else :
497
+ if ot_macrolanguage not in blocked_ot_macrolanguages :
498
+ for language in languages :
499
+ self .add_language (language , ot_macrolanguage )
500
+ if not blocked_ot_macrolanguages :
501
+ self .ranks [ot_macrolanguage ] += 1
502
+ elif first_time :
469
503
for language in languages :
470
- if language in original_ot_from_bcp_47 :
471
- if ot_macrolanguages :
472
- ml = original_ot_from_bcp_47 [language ]
473
- if ml :
474
- ot_macrolanguages &= ml
475
- else :
476
- pass
477
- else :
478
- ot_macrolanguages |= original_ot_from_bcp_47 [language ]
504
+ if language in self .from_bcp_47_uninherited :
505
+ ot_macrolanguages |= self .from_bcp_47_uninherited [language ]
479
506
else :
480
507
ot_macrolanguages .clear ()
481
508
if not ot_macrolanguages :
@@ -561,7 +588,7 @@ def parse(self, filename):
561
588
if scope == 'macrolanguage' :
562
589
scope = ' [macrolanguage]'
563
590
elif scope == 'collection' :
564
- scope = ' [family ]'
591
+ scope = ' [collection ]'
565
592
else :
566
593
continue
567
594
self .scopes [subtag ] = scope
@@ -710,6 +737,7 @@ def get_name(self, lt):
710
737
711
738
ot .add_language ('oc-provenc' , 'PRO' )
712
739
740
+ ot .remove_language_ot ('QUZ' )
713
741
ot .add_language ('qu' , 'QUZ' )
714
742
ot .add_language ('qub' , 'QWH' )
715
743
ot .add_language ('qud' , 'QVI' )
@@ -742,7 +770,6 @@ def get_name(self, lt):
742
770
ot .add_language ('qxt' , 'QWH' )
743
771
ot .add_language ('qxw' , 'QWH' )
744
772
745
- bcp_47 .macrolanguages ['ro' ].remove ('mo' )
746
773
bcp_47 .macrolanguages ['ro-MD' ].add ('mo' )
747
774
748
775
ot .remove_language_ot ('SYRE' )
@@ -993,6 +1020,8 @@ def print_subtag_matches(subtag, new_line):
993
1020
continue
994
1021
995
1022
for lt , tags in items :
1023
+ if not tags :
1024
+ continue
996
1025
if lt .variant in bcp_47 .prefixes :
997
1026
expect (next (iter (bcp_47 .prefixes [lt .variant ])) == lt .language ,
998
1027
'%s is not a valid prefix of %s' % (lt .language , lt .variant ))
@@ -1022,6 +1051,8 @@ def print_subtag_matches(subtag, new_line):
1022
1051
1023
1052
print (" b'%s' => {" % initial )
1024
1053
for lt , tags in items :
1054
+ if not tags :
1055
+ continue
1025
1056
print (' if ' , end = '' )
1026
1057
script = lt .script
1027
1058
region = lt .region
0 commit comments