Skip to content

Commit 660d0d0

Browse files
authored
Merge pull request #95 from LaurenzV/3.3.2
3.3.2
2 parents 7b5c622 + 7453381 commit 660d0d0

34 files changed

+1068
-372
lines changed

scripts/gen-shaping-tests.py

-9
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,6 @@
3838
'morx_36_001',
3939
# ttf-parser uses different rounding, not a bug
4040
'fallback_positioning_001',
41-
42-
# text-rendering-tests tests
43-
# Unknown issue. Investigate.
44-
'cmap_1_004',
45-
'shknda_3_031',
46-
'shlana_10_028',
47-
'shlana_10_041',
48-
'shlana_5_010',
49-
'shlana_5_012',
5041
]
5142

5243

scripts/gen-tag-table.py

+52-21
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,10 @@ class OpenTypeRegistryParser(HTMLParser):
319319
from_bcp_47(DefaultDict[str, AbstractSet[str]]): ``to_bcp_47``
320320
inverted. Its values start as unsorted sets;
321321
``sort_languages`` converts them to sorted lists.
322+
from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]):
323+
A copy of ``from_bcp_47``. It starts as ``None`` and is
324+
populated at the beginning of the first call to
325+
``inherit_from_macrolanguages``.
322326
323327
"""
324328
def __init__(self):
@@ -328,6 +332,7 @@ def __init__(self):
328332
self.ranks = collections.defaultdict(int)
329333
self.to_bcp_47 = collections.defaultdict(set)
330334
self.from_bcp_47 = collections.defaultdict(set)
335+
self.from_bcp_47_uninherited = None
331336
# Whether the parser is in a <td> element
332337
self._td = False
333338
# Whether the parser is after a <br> element within the current <tr> element
@@ -448,34 +453,56 @@ def inherit_from_macrolanguages(self):
448453
449454
If a BCP 47 tag for an individual mapping has no OpenType
450455
mapping but its macrolanguage does, the mapping is copied to
451-
the individual language. For example, als(Tosk Albanian) has no
452-
explicit mapping, so it inherits from sq(Albanian) the mapping
456+
the individual language. For example, als (Tosk Albanian) has no
457+
explicit mapping, so it inherits from sq (Albanian) the mapping
453458
to SQI.
454459
460+
However, if an OpenType tag maps to a BCP 47 macrolanguage and
461+
some but not all of its individual languages, the mapping is not
462+
inherited from the macrolanguage to the missing individual
463+
languages. For example, INUK (Nunavik Inuktitut) is mapped to
464+
ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
465+
ikt (Inuinnaqtun, which is an individual language of iu), so
466+
this method does not add a mapping from ikt to INUK.
467+
468+
455469
If a BCP 47 tag for a macrolanguage has no OpenType mapping but
456-
all of its individual languages do and they all map to the same
457-
tags, the mapping is copied to the macrolanguage.
470+
some of its individual languages do, their mappings are copied
471+
to the macrolanguage.
458472
"""
459473
global bcp_47
460-
original_ot_from_bcp_47 = dict(self.from_bcp_47)
474+
first_time = self.from_bcp_47_uninherited is None
475+
if first_time:
476+
self.from_bcp_47_uninherited = dict(self.from_bcp_47)
461477
for macrolanguage, languages in dict(bcp_47.macrolanguages).items():
462-
ot_macrolanguages = set(original_ot_from_bcp_47.get(macrolanguage, set()))
478+
ot_macrolanguages = {
479+
ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get(macrolanguage, set ())
480+
}
481+
blocked_ot_macrolanguages = set()
482+
if 'retired code' not in bcp_47.scopes.get(macrolanguage, ''):
483+
for ot_macrolanguage in ot_macrolanguages:
484+
round_trip_macrolanguages = {
485+
l for l in self.to_bcp_47[ot_macrolanguage]
486+
if 'retired code' not in bcp_47.scopes.get(l, '')
487+
}
488+
round_trip_languages = {
489+
l for l in languages
490+
if 'retired code' not in bcp_47.scopes.get(l, '')
491+
}
492+
intersection = round_trip_macrolanguages & round_trip_languages
493+
if intersection and intersection != round_trip_languages:
494+
blocked_ot_macrolanguages.add(ot_macrolanguage)
463495
if ot_macrolanguages:
464496
for ot_macrolanguage in ot_macrolanguages:
465-
for language in languages:
466-
self.add_language(language, ot_macrolanguage)
467-
self.ranks[ot_macrolanguage] += 1
468-
else:
497+
if ot_macrolanguage not in blocked_ot_macrolanguages:
498+
for language in languages:
499+
self.add_language(language, ot_macrolanguage)
500+
if not blocked_ot_macrolanguages:
501+
self.ranks[ot_macrolanguage] += 1
502+
elif first_time:
469503
for language in languages:
470-
if language in original_ot_from_bcp_47:
471-
if ot_macrolanguages:
472-
ml = original_ot_from_bcp_47[language]
473-
if ml:
474-
ot_macrolanguages &= ml
475-
else:
476-
pass
477-
else:
478-
ot_macrolanguages |= original_ot_from_bcp_47[language]
504+
if language in self.from_bcp_47_uninherited:
505+
ot_macrolanguages |= self.from_bcp_47_uninherited[language]
479506
else:
480507
ot_macrolanguages.clear()
481508
if not ot_macrolanguages:
@@ -561,7 +588,7 @@ def parse(self, filename):
561588
if scope == 'macrolanguage':
562589
scope = ' [macrolanguage]'
563590
elif scope == 'collection':
564-
scope = ' [family]'
591+
scope = ' [collection]'
565592
else:
566593
continue
567594
self.scopes[subtag] = scope
@@ -710,6 +737,7 @@ def get_name(self, lt):
710737

711738
ot.add_language('oc-provenc', 'PRO')
712739

740+
ot.remove_language_ot('QUZ')
713741
ot.add_language('qu', 'QUZ')
714742
ot.add_language('qub', 'QWH')
715743
ot.add_language('qud', 'QVI')
@@ -742,7 +770,6 @@ def get_name(self, lt):
742770
ot.add_language('qxt', 'QWH')
743771
ot.add_language('qxw', 'QWH')
744772

745-
bcp_47.macrolanguages['ro'].remove('mo')
746773
bcp_47.macrolanguages['ro-MD'].add('mo')
747774

748775
ot.remove_language_ot('SYRE')
@@ -993,6 +1020,8 @@ def print_subtag_matches(subtag, new_line):
9931020
continue
9941021

9951022
for lt, tags in items:
1023+
if not tags:
1024+
continue
9961025
if lt.variant in bcp_47.prefixes:
9971026
expect(next(iter(bcp_47.prefixes[lt.variant])) == lt.language,
9981027
'%s is not a valid prefix of %s' %(lt.language, lt.variant))
@@ -1022,6 +1051,8 @@ def print_subtag_matches(subtag, new_line):
10221051

10231052
print(" b'%s' => {" % initial)
10241053
for lt, tags in items:
1054+
if not tags:
1055+
continue
10251056
print(' if ', end='')
10261057
script = lt.script
10271058
region = lt.region

scripts/gen-vowel-constraints.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,6 @@ def __str__(self, index=0, depth=4):
185185

186186
print(' _ => {}')
187187
print(' }')
188-
print(' buffer.swap_buffers();')
188+
print(' buffer.sync();')
189189
print('}')
190190
print()

scripts/ms-use/IndicPositionalCategory-Additional.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Not derivable
33
# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
44
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
5-
# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
5+
# Amended for Unicode 10.0 by Andrew Glass 2018-09-21
66
# Updated for L2/19-083 by Andrew Glass 2019-05-06
77
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
88
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
@@ -58,16 +58,16 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
5858
# Indic_Positional_Category=Bottom
5959
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
6060
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
61-
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order
61+
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overridden, ccc controls order
6262
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
6363
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
64-
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order
64+
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overridden, ccc controls order
6565
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
66-
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order
66+
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overridden, ccc controls order
6767
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
68-
10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order
68+
10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overridden, ccc controls order
6969
10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW
70-
10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
70+
10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overridden, ccc controls order
7171
10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW
7272
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
7373
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI

src/aat/extended_kerning.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ pub(crate) fn apply(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) -> Optio
9292
continue;
9393
}
9494

95+
buffer.unsafe_to_concat(None, None);
96+
9597
apply_simple_kerning(&subtable, plan, face, buffer);
9698
}
9799
kerx::Format::Format4(ref sub) => {
@@ -140,7 +142,10 @@ fn apply_simple_kerning(
140142
}
141143

142144
let mut iter = SkippyIter::new(&ctx, i, 1, false);
143-
if !iter.next() {
145+
146+
let mut unsafe_to = 0;
147+
if !iter.next(Some(&mut unsafe_to)) {
148+
ctx.buffer.unsafe_to_concat(Some(i), Some(unsafe_to));
144149
i += 1;
145150
continue;
146151
}
@@ -179,7 +184,7 @@ fn apply_simple_kerning(
179184
}
180185
}
181186

182-
ctx.buffer.unsafe_to_break(i, j + 1)
187+
ctx.buffer.unsafe_to_break(Some(i), Some(j + 1))
183188
}
184189

185190
i = j;
@@ -235,7 +240,10 @@ fn apply_state_machine_kerning<T, E>(
235240
// If there's no value and we're just epsilon-transitioning to state 0, safe to break.
236241
if entry.is_actionable() || !(entry.new_state == START_OF_TEXT && !entry.has_advance())
237242
{
238-
buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1);
243+
buffer.unsafe_to_break_from_outbuffer(
244+
Some(buffer.backtrack_len() - 1),
245+
Some(buffer.idx + 1),
246+
);
239247
}
240248
}
241249

@@ -249,7 +257,7 @@ fn apply_state_machine_kerning<T, E>(
249257
};
250258

251259
if end_entry.is_actionable() {
252-
buffer.unsafe_to_break(buffer.idx, buffer.idx + 2);
260+
buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
253261
}
254262
}
255263

src/aat/metamorphosis.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ fn drive<T: FromData>(
210210
};
211211

212212
if !is_safe_to_break() && buffer.backtrack_len() > 0 && buffer.idx < buffer.len {
213-
buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1);
213+
buffer.unsafe_to_break_from_outbuffer(
214+
Some(buffer.backtrack_len() - 1),
215+
Some(buffer.idx + 1),
216+
);
214217
}
215218

216219
c.transition(&entry, buffer);
@@ -232,7 +235,7 @@ fn drive<T: FromData>(
232235
}
233236

234237
if !c.in_place() {
235-
buffer.swap_buffers();
238+
buffer.sync();
236239
}
237240
}
238241

@@ -459,7 +462,7 @@ impl Driver<morx::ContextualEntryData> for ContextualCtx<'_> {
459462
}
460463

461464
if let Some(replacement) = replacement {
462-
buffer.unsafe_to_break(self.mark, (buffer.idx + 1).min(buffer.len));
465+
buffer.unsafe_to_break(Some(self.mark), Some((buffer.idx + 1).min(buffer.len)));
463466
buffer.info[self.mark].glyph_id = u32::from(replacement);
464467

465468
if let Some(face) = self.face_if_has_glyph_classes {
@@ -565,8 +568,8 @@ impl Driver<morx::InsertionEntryData> for InsertionCtx<'_> {
565568
buffer.move_to(end + usize::from(count));
566569

567570
buffer.unsafe_to_break_from_outbuffer(
568-
self.mark as usize,
569-
(buffer.idx + 1).min(buffer.len),
571+
Some(self.mark as usize),
572+
Some((buffer.idx + 1).min(buffer.len)),
570573
);
571574
}
572575

0 commit comments

Comments
 (0)