diff --git a/scripts/gen-shaping-tests.py b/scripts/gen-shaping-tests.py
index cae73dc6..d1dcf0b8 100755
--- a/scripts/gen-shaping-tests.py
+++ b/scripts/gen-shaping-tests.py
@@ -38,15 +38,6 @@
'morx_36_001',
# ttf-parser uses different rounding, not a bug
'fallback_positioning_001',
-
- # text-rendering-tests tests
- # Unknown issue. Investigate.
- 'cmap_1_004',
- 'shknda_3_031',
- 'shlana_10_028',
- 'shlana_10_041',
- 'shlana_5_010',
- 'shlana_5_012',
]
diff --git a/scripts/gen-tag-table.py b/scripts/gen-tag-table.py
index 7d1e1533..f7ea2210 100755
--- a/scripts/gen-tag-table.py
+++ b/scripts/gen-tag-table.py
@@ -319,6 +319,10 @@ class OpenTypeRegistryParser(HTMLParser):
from_bcp_47(DefaultDict[str, AbstractSet[str]]): ``to_bcp_47``
inverted. Its values start as unsorted sets;
``sort_languages`` converts them to sorted lists.
+ from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]):
+ A copy of ``from_bcp_47``. It starts as ``None`` and is
+ populated at the beginning of the first call to
+ ``inherit_from_macrolanguages``.
"""
def __init__(self):
@@ -328,6 +332,7 @@ def __init__(self):
self.ranks = collections.defaultdict(int)
self.to_bcp_47 = collections.defaultdict(set)
self.from_bcp_47 = collections.defaultdict(set)
+ self.from_bcp_47_uninherited = None
# Whether the parser is in a
element
self._td = False
# Whether the parser is after a element within the current | element
@@ -448,34 +453,56 @@ def inherit_from_macrolanguages(self):
If a BCP 47 tag for an individual mapping has no OpenType
mapping but its macrolanguage does, the mapping is copied to
- the individual language. For example, als(Tosk Albanian) has no
- explicit mapping, so it inherits from sq(Albanian) the mapping
+ the individual language. For example, als (Tosk Albanian) has no
+ explicit mapping, so it inherits from sq (Albanian) the mapping
to SQI.
+ However, if an OpenType tag maps to a BCP 47 macrolanguage and
+ some but not all of its individual languages, the mapping is not
+ inherited from the macrolanguage to the missing individual
+ languages. For example, INUK (Nunavik Inuktitut) is mapped to
+ ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
+ ikt (Inuinnaqtun, which is an individual language of iu), so
+ this method does not add a mapping from ikt to INUK.
+
+
If a BCP 47 tag for a macrolanguage has no OpenType mapping but
- all of its individual languages do and they all map to the same
- tags, the mapping is copied to the macrolanguage.
+ some of its individual languages do, their mappings are copied
+ to the macrolanguage.
"""
global bcp_47
- original_ot_from_bcp_47 = dict(self.from_bcp_47)
+ first_time = self.from_bcp_47_uninherited is None
+ if first_time:
+ self.from_bcp_47_uninherited = dict(self.from_bcp_47)
for macrolanguage, languages in dict(bcp_47.macrolanguages).items():
- ot_macrolanguages = set(original_ot_from_bcp_47.get(macrolanguage, set()))
+ ot_macrolanguages = {
+ ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get(macrolanguage, set ())
+ }
+ blocked_ot_macrolanguages = set()
+ if 'retired code' not in bcp_47.scopes.get(macrolanguage, ''):
+ for ot_macrolanguage in ot_macrolanguages:
+ round_trip_macrolanguages = {
+ l for l in self.to_bcp_47[ot_macrolanguage]
+ if 'retired code' not in bcp_47.scopes.get(l, '')
+ }
+ round_trip_languages = {
+ l for l in languages
+ if 'retired code' not in bcp_47.scopes.get(l, '')
+ }
+ intersection = round_trip_macrolanguages & round_trip_languages
+ if intersection and intersection != round_trip_languages:
+ blocked_ot_macrolanguages.add(ot_macrolanguage)
if ot_macrolanguages:
for ot_macrolanguage in ot_macrolanguages:
- for language in languages:
- self.add_language(language, ot_macrolanguage)
- self.ranks[ot_macrolanguage] += 1
- else:
+ if ot_macrolanguage not in blocked_ot_macrolanguages:
+ for language in languages:
+ self.add_language(language, ot_macrolanguage)
+ if not blocked_ot_macrolanguages:
+ self.ranks[ot_macrolanguage] += 1
+ elif first_time:
for language in languages:
- if language in original_ot_from_bcp_47:
- if ot_macrolanguages:
- ml = original_ot_from_bcp_47[language]
- if ml:
- ot_macrolanguages &= ml
- else:
- pass
- else:
- ot_macrolanguages |= original_ot_from_bcp_47[language]
+ if language in self.from_bcp_47_uninherited:
+ ot_macrolanguages |= self.from_bcp_47_uninherited[language]
else:
ot_macrolanguages.clear()
if not ot_macrolanguages:
@@ -561,7 +588,7 @@ def parse(self, filename):
if scope == 'macrolanguage':
scope = ' [macrolanguage]'
elif scope == 'collection':
- scope = ' [family]'
+ scope = ' [collection]'
else:
continue
self.scopes[subtag] = scope
@@ -710,6 +737,7 @@ def get_name(self, lt):
ot.add_language('oc-provenc', 'PRO')
+ot.remove_language_ot('QUZ')
ot.add_language('qu', 'QUZ')
ot.add_language('qub', 'QWH')
ot.add_language('qud', 'QVI')
@@ -742,7 +770,6 @@ def get_name(self, lt):
ot.add_language('qxt', 'QWH')
ot.add_language('qxw', 'QWH')
-bcp_47.macrolanguages['ro'].remove('mo')
bcp_47.macrolanguages['ro-MD'].add('mo')
ot.remove_language_ot('SYRE')
@@ -993,6 +1020,8 @@ def print_subtag_matches(subtag, new_line):
continue
for lt, tags in items:
+ if not tags:
+ continue
if lt.variant in bcp_47.prefixes:
expect(next(iter(bcp_47.prefixes[lt.variant])) == lt.language,
'%s is not a valid prefix of %s' %(lt.language, lt.variant))
@@ -1022,6 +1051,8 @@ def print_subtag_matches(subtag, new_line):
print(" b'%s' => {" % initial)
for lt, tags in items:
+ if not tags:
+ continue
print(' if ', end='')
script = lt.script
region = lt.region
diff --git a/scripts/gen-vowel-constraints.py b/scripts/gen-vowel-constraints.py
index 046119f7..b6e78d30 100755
--- a/scripts/gen-vowel-constraints.py
+++ b/scripts/gen-vowel-constraints.py
@@ -185,6 +185,6 @@ def __str__(self, index=0, depth=4):
print(' _ => {}')
print(' }')
-print(' buffer.swap_buffers();')
+print(' buffer.sync();')
print('}')
print()
diff --git a/scripts/ms-use/IndicPositionalCategory-Additional.txt b/scripts/ms-use/IndicPositionalCategory-Additional.txt
index 83a164e4..5a338378 100644
--- a/scripts/ms-use/IndicPositionalCategory-Additional.txt
+++ b/scripts/ms-use/IndicPositionalCategory-Additional.txt
@@ -2,7 +2,7 @@
# Not derivable
# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
-# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
+# Amended for Unicode 10.0 by Andrew Glass 2018-09-21
# Updated for L2/19-083 by Andrew Glass 2019-05-06
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
@@ -58,16 +58,16 @@ AA35 ; Top # Mn CHAM CONSONANT SIGN
# Indic_Positional_Category=Bottom
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
-10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order
+10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overridden, ccc controls order
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
-10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order
+10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overridden, ccc controls order
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
-10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order
+10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overridden, ccc controls order
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
-10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order
+10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overridden, ccc controls order
10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW
-10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
+10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overridden, ccc controls order
10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
diff --git a/src/aat/extended_kerning.rs b/src/aat/extended_kerning.rs
index 6d14ab50..60d6bd32 100644
--- a/src/aat/extended_kerning.rs
+++ b/src/aat/extended_kerning.rs
@@ -92,6 +92,8 @@ pub(crate) fn apply(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) -> Optio
continue;
}
+ buffer.unsafe_to_concat(None, None);
+
apply_simple_kerning(&subtable, plan, face, buffer);
}
kerx::Format::Format4(ref sub) => {
@@ -140,7 +142,10 @@ fn apply_simple_kerning(
}
let mut iter = SkippyIter::new(&ctx, i, 1, false);
- if !iter.next() {
+
+ let mut unsafe_to = 0;
+ if !iter.next(Some(&mut unsafe_to)) {
+ ctx.buffer.unsafe_to_concat(Some(i), Some(unsafe_to));
i += 1;
continue;
}
@@ -179,7 +184,7 @@ fn apply_simple_kerning(
}
}
- ctx.buffer.unsafe_to_break(i, j + 1)
+ ctx.buffer.unsafe_to_break(Some(i), Some(j + 1))
}
i = j;
@@ -235,7 +240,10 @@ fn apply_state_machine_kerning(
// If there's no value and we're just epsilon-transitioning to state 0, safe to break.
if entry.is_actionable() || !(entry.new_state == START_OF_TEXT && !entry.has_advance())
{
- buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1);
+ buffer.unsafe_to_break_from_outbuffer(
+ Some(buffer.backtrack_len() - 1),
+ Some(buffer.idx + 1),
+ );
}
}
@@ -249,7 +257,7 @@ fn apply_state_machine_kerning(
};
if end_entry.is_actionable() {
- buffer.unsafe_to_break(buffer.idx, buffer.idx + 2);
+ buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
}
}
diff --git a/src/aat/metamorphosis.rs b/src/aat/metamorphosis.rs
index f393231d..eeb19fc0 100644
--- a/src/aat/metamorphosis.rs
+++ b/src/aat/metamorphosis.rs
@@ -210,7 +210,10 @@ fn drive(
};
if !is_safe_to_break() && buffer.backtrack_len() > 0 && buffer.idx < buffer.len {
- buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1);
+ buffer.unsafe_to_break_from_outbuffer(
+ Some(buffer.backtrack_len() - 1),
+ Some(buffer.idx + 1),
+ );
}
c.transition(&entry, buffer);
@@ -232,7 +235,7 @@ fn drive(
}
if !c.in_place() {
- buffer.swap_buffers();
+ buffer.sync();
}
}
@@ -459,7 +462,7 @@ impl Driver for ContextualCtx<'_> {
}
if let Some(replacement) = replacement {
- buffer.unsafe_to_break(self.mark, (buffer.idx + 1).min(buffer.len));
+ buffer.unsafe_to_break(Some(self.mark), Some((buffer.idx + 1).min(buffer.len)));
buffer.info[self.mark].glyph_id = u32::from(replacement);
if let Some(face) = self.face_if_has_glyph_classes {
@@ -565,8 +568,8 @@ impl Driver for InsertionCtx<'_> {
buffer.move_to(end + usize::from(count));
buffer.unsafe_to_break_from_outbuffer(
- self.mark as usize,
- (buffer.idx + 1).min(buffer.len),
+ Some(self.mark as usize),
+ Some((buffer.idx + 1).min(buffer.len)),
);
}
diff --git a/src/buffer.rs b/src/buffer.rs
index 52ac68b1..d219fe4b 100644
--- a/src/buffer.rs
+++ b/src/buffer.rs
@@ -1,8 +1,10 @@
use alloc::{string::String, vec::Vec};
use core::convert::TryFrom;
+use std::cmp::min;
use ttf_parser::GlyphId;
+use crate::buffer::glyph_flag::{UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT};
use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt, Space};
use crate::{script, Direction, Face, Language, Mask, Script};
@@ -12,22 +14,80 @@ pub mod glyph_flag {
/// Indicates that if input text is broken at the
/// beginning of the cluster this glyph is part of,
/// then both sides need to be re-shaped, as the
- /// result might be different. On the flip side,
- /// it means that when this flag is not present,
- /// then it's safe to break the glyph-run at the
- /// beginning of this cluster, and the two sides
- /// represent the exact same result one would get
- /// if breaking input text at the beginning of
- /// this cluster and shaping the two sides
- /// separately. This can be used to optimize
- /// paragraph layout, by avoiding re-shaping
- /// of each line after line-breaking, or limiting
- /// the reshaping to a small piece around the
- /// breaking point only.
+ /// result might be different.
+ ///
+ /// On the flip side, it means that when
+ /// this flag is not present, then it is safe
+ /// to break the glyph-run at the beginning of
+ /// this cluster, and the two sides will represent
+ /// the exact same result one would get if breaking
+ /// input text at the beginning of this cluster and
+ /// shaping the two sides separately.
+ ///
+ /// This can be used to optimize paragraph layout,
+ /// by avoiding re-shaping of each line after line-breaking.
pub const UNSAFE_TO_BREAK: u32 = 0x00000001;
+ /// Indicates that if input text is changed on one side
+ /// of the beginning of the cluster this glyph is part
+ /// of, then the shaping results for the other side
+ /// might change.
+ ///
+ /// Note that the absence of this flag will NOT by
+ /// itself mean that it IS safe to concat text. Only
+ /// two pieces of text both of which clear of this
+ /// flag can be concatenated safely.
+ ///
+ /// This can be used to optimize paragraph layout,
+ /// by avoiding re-shaping of each line after
+ /// line-breaking, by limiting the reshaping to a
+ /// small piece around the breaking position only,
+ /// even if the breaking position carries the
+ /// UNSAFE_TO_BREAK or when hyphenation or
+ /// other text transformation happens at
+ /// line-break position, in the following way:
+ ///
+ /// 1. Iterate back from the line-break
+ /// position until the first cluster
+ /// start position that is NOT unsafe-to-concat,
+ /// 2. shape the segment from there till the
+ /// end of line, 3. check whether the resulting
+ /// glyph-run also is clear of the unsafe-to-concat
+ /// at its start-of-text position; if it is, just
+ /// splice it into place and the line is shaped;
+ /// If not, move on to a position further back that
+ /// is clear of unsafe-to-concat and retry from
+ /// there, and repeat.
+ ///
+ /// At the start of next line a similar
+ /// algorithm can be implemented.
+ /// That is: 1. Iterate forward from
+ /// the line-break position until the first cluster
+ /// start position that is NOT unsafe-to-concat, 2.
+ /// shape the segment from beginning of the line to
+ /// that position, 3. check whether the resulting
+ /// glyph-run also is clear of the unsafe-to-concat
+ /// at its end-of-text position; if it is, just splice
+ /// it into place and the beginning is shaped; If not,
+ /// move on to a position further forward that is clear
+ /// of unsafe-to-concat and retry up to there, and repeat.
+ ///
+ /// A slight complication will arise in the
+ /// implementation of the algorithm above,
+ /// because while
+ /// our buffer API has a way to return flags
+ /// for position corresponding to
+ /// start-of-text, there is currently no
+ /// position corresponding to end-of-text.
+ /// This limitation can be alleviated by
+ /// shaping more text than needed and
+ /// looking for unsafe-to-concat flag
+ /// within text clusters.
+ ///
+ /// The UNSAFE_TO_BREAK flag will always imply this flag.
+ pub const UNSAFE_TO_CONCAT: u32 = 0x00000002;
/// All the currently defined flags.
- pub const DEFINED: u32 = 0x00000001; // OR of all defined flags
+ pub const DEFINED: u32 = 0x00000003; // OR of all defined flags
}
/// Holds the positions of the glyph in both horizontal and vertical directions.
@@ -514,11 +574,6 @@ pub struct Buffer {
pub flags: BufferFlags,
pub cluster_level: BufferClusterLevel,
pub invisible: Option,
- pub scratch_flags: BufferScratchFlags,
- // Maximum allowed len.
- pub max_len: usize,
- /// Maximum allowed operations.
- pub max_ops: i32,
// Buffer contents.
pub direction: Direction,
@@ -540,13 +595,19 @@ pub struct Buffer {
pub info: Vec,
pub pos: Vec,
- serial: u32,
-
// Text before / after the main buffer contents.
// Always in Unicode, and ordered outward.
// Index 0 is for "pre-context", 1 for "post-context".
pub context: [[char; CONTEXT_LENGTH]; 2],
pub context_len: [usize; 2],
+
+ // Managed by enter / leave
+ pub serial: u8,
+ pub scratch_flags: BufferScratchFlags,
+ /// Maximum allowed len.
+ pub max_len: usize,
+ /// Maximum allowed operations.
+ pub max_ops: i32,
}
impl Buffer {
@@ -656,7 +717,6 @@ impl Buffer {
self.direction = Direction::Invalid;
self.script = None;
self.language = None;
- self.scratch_flags = BufferScratchFlags::default();
self.successful = true;
self.have_output = false;
@@ -669,13 +729,15 @@ impl Buffer {
self.out_len = 0;
self.have_separate_output = false;
- self.serial = 0;
-
self.context = [
['\0', '\0', '\0', '\0', '\0'],
['\0', '\0', '\0', '\0', '\0'],
];
self.context_len = [0, 0];
+
+ self.serial = 0;
+ self.scratch_flags = BufferScratchFlags::default();
+ self.cluster_level = BufferClusterLevel::default();
}
#[inline]
@@ -693,8 +755,13 @@ impl Buffer {
}
#[inline]
- fn next_serial(&mut self) -> u32 {
+ fn next_serial(&mut self) -> u8 {
self.serial += 1;
+
+ if self.serial == 0 {
+ self.serial += 1;
+ }
+
self.serial
}
@@ -783,10 +850,6 @@ impl Buffer {
start
}
- pub fn reverse_clusters(&mut self) {
- self.reverse_groups(_cluster_group_func, false);
- }
-
#[inline]
fn reset_clusters(&mut self) {
for (i, info) in self.info.iter_mut().enumerate() {
@@ -820,7 +883,7 @@ impl Buffer {
// TODO: language must be set
}
- pub fn swap_buffers(&mut self) {
+ pub fn sync(&mut self) {
assert!(self.have_output);
assert!(self.idx <= self.len);
@@ -1044,7 +1107,7 @@ impl Buffer {
fn merge_clusters_impl(&mut self, mut start: usize, mut end: usize) {
if self.cluster_level == BufferClusterLevel::Characters {
- self.unsafe_to_break(start, end);
+ self.unsafe_to_break(Some(start), Some(end));
return;
}
@@ -1203,44 +1266,104 @@ impl Buffer {
self.len = j;
}
- pub fn unsafe_to_break(&mut self, start: usize, end: usize) {
- if end - start < 2 {
+ pub fn unsafe_to_break(&mut self, start: Option, end: Option) {
+ self._set_glyph_flags(
+ UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT,
+ start,
+ end,
+ Some(true),
+ None,
+ );
+ }
+
+ /// Adds glyph flags in mask to infos with clusters between start and end.
+ /// The start index will be from out-buffer if from_out_buffer is true.
+ /// If interior is true, then the cluster having the minimum value is skipped. */
+ fn _set_glyph_flags(
+ &mut self,
+ mask: Mask,
+ start: Option,
+ end: Option,
+ interior: Option,
+ from_out_buffer: Option,
+ ) {
+ let start = start.unwrap_or(0);
+ let end = min(end.unwrap_or(self.len), self.len);
+ let interior = interior.unwrap_or(false);
+ let from_out_buffer = from_out_buffer.unwrap_or(false);
+
+ if interior && !from_out_buffer && end - start < 2 {
return;
}
- self.unsafe_to_break_impl(start, end);
- }
+ self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
- fn unsafe_to_break_impl(&mut self, start: usize, end: usize) {
- let mut cluster = core::u32::MAX;
- cluster = Self::_infos_find_min_cluster(&self.info, start, end, cluster);
- let unsafe_to_break = Self::_unsafe_to_break_set_mask(&mut self.info, start, end, cluster);
- if unsafe_to_break {
- self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK;
- }
- }
+ if !from_out_buffer || !self.have_output {
+ if !interior {
+ for i in start..end {
+ self.info[i].mask |= mask;
+ }
+ } else {
+ let cluster = Self::_infos_find_min_cluster(&self.info, start, end, None);
+ if Self::_infos_set_glyph_flags(&mut self.info, start, end, cluster, mask) {
+ self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
+ }
+ }
+ } else {
+ assert!(start <= self.out_len);
+ assert!(self.idx <= end);
- pub fn unsafe_to_break_from_outbuffer(&mut self, start: usize, end: usize) {
- if !self.have_output {
- self.unsafe_to_break_impl(start, end);
- return;
+ if !interior {
+ for i in start..self.out_len {
+ self.out_info_mut()[i].mask |= mask;
+ }
+
+ for i in self.idx..end {
+ self.info[i].mask |= mask;
+ }
+ } else {
+ let mut cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, None);
+ cluster = Self::_infos_find_min_cluster(
+ &self.out_info(),
+ start,
+ self.out_len,
+ Some(cluster),
+ );
+
+ let out_len = self.out_len;
+ let first = Self::_infos_set_glyph_flags(
+ &mut self.out_info_mut(),
+ start,
+ out_len,
+ cluster,
+ mask,
+ );
+ let second =
+ Self::_infos_set_glyph_flags(&mut self.info, self.idx, end, cluster, mask);
+
+ if first || second {
+ self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
+ }
+ }
}
+ }
- assert!(start <= self.out_len);
- assert!(self.idx <= end);
+ pub fn unsafe_to_concat(&mut self, start: Option, end: Option) {
+ self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(true), None);
+ }
- let mut cluster = core::u32::MAX;
- cluster = Self::_infos_find_min_cluster(self.out_info(), start, self.out_len, cluster);
- cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, cluster);
- let idx = self.idx;
- let out_len = self.out_len;
- let unsafe_to_break1 =
- Self::_unsafe_to_break_set_mask(self.out_info_mut(), start, out_len, cluster);
- let unsafe_to_break2 = Self::_unsafe_to_break_set_mask(&mut self.info, idx, end, cluster);
+ pub fn unsafe_to_break_from_outbuffer(&mut self, start: Option, end: Option) {
+ self._set_glyph_flags(
+ UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT,
+ start,
+ end,
+ Some(true),
+ Some(true),
+ );
+ }
- if unsafe_to_break1 || unsafe_to_break2 {
- self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK;
- }
+ pub fn unsafe_to_concat_from_outbuffer(&mut self, start: Option, end: Option) {
+ self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(false), Some(true));
}
pub fn move_to(&mut self, i: usize) -> bool {
@@ -1384,22 +1507,43 @@ impl Buffer {
pub fn set_cluster(info: &mut GlyphInfo, cluster: u32, mask: Mask) {
if info.cluster != cluster {
- if mask & glyph_flag::UNSAFE_TO_BREAK != 0 {
- info.mask |= glyph_flag::UNSAFE_TO_BREAK;
- } else {
- info.mask &= !glyph_flag::UNSAFE_TO_BREAK;
- }
+ info.mask = (info.mask & !glyph_flag::DEFINED) | (mask & glyph_flag::DEFINED);
}
info.cluster = cluster;
}
+ // Called around shape()
+ pub(crate) fn enter(&mut self) {
+ self.serial = 0;
+ self.scratch_flags = BufferScratchFlags::empty();
+
+ if let Some(len) = self.len.checked_mul(Buffer::MAX_LEN_FACTOR) {
+ self.max_len = len.max(Buffer::MAX_LEN_MIN);
+ }
+
+ if let Ok(len) = i32::try_from(self.len) {
+ if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) {
+ self.max_ops = ops.max(Buffer::MAX_OPS_MIN);
+ }
+ }
+ }
+
+ // Called around shape()
+ pub(crate) fn leave(&mut self) {
+ self.max_len = Buffer::MAX_LEN_DEFAULT;
+ self.max_ops = Buffer::MAX_OPS_DEFAULT;
+ self.serial = 0;
+ }
+
fn _infos_find_min_cluster(
info: &[GlyphInfo],
start: usize,
end: usize,
- mut cluster: u32,
+ cluster: Option,
) -> u32 {
+ let mut cluster = cluster.unwrap_or(core::u32::MAX);
+
for glyph_info in &info[start..end] {
cluster = core::cmp::min(cluster, glyph_info.cluster);
}
@@ -1407,17 +1551,23 @@ impl Buffer {
cluster
}
- fn _unsafe_to_break_set_mask(
+ #[must_use]
+ fn _infos_set_glyph_flags(
info: &mut [GlyphInfo],
start: usize,
end: usize,
cluster: u32,
+ mask: Mask,
) -> bool {
+ // NOTE: Because of problems with ownership, we don't pass the scratch flags to this
+ // function, unlike in harfbuzz. Because of this, each time you call this function you
+ // the caller needs to set the "BufferScratchFlags::HAS_GLYPH_FLAGS" scratch flag
+ // themselves if the function returns true.
let mut unsafe_to_break = false;
for glyph_info in &mut info[start..end] {
if glyph_info.cluster != cluster {
+ glyph_info.mask |= mask;
unsafe_to_break = true;
- glyph_info.mask |= glyph_flag::UNSAFE_TO_BREAK;
}
}
@@ -1453,20 +1603,6 @@ impl Buffer {
}
}
- pub fn next_cluster(&self, mut start: usize) -> usize {
- if start >= self.len {
- return start;
- }
-
- let cluster = self.info[start].cluster;
- start += 1;
- while start < self.len && cluster == self.info[start].cluster {
- start += 1;
- }
-
- start
- }
-
pub fn next_syllable(&self, mut start: usize) -> usize {
if start >= self.len {
return start;
@@ -1481,27 +1617,9 @@ impl Buffer {
start
}
- pub fn next_grapheme(&self, mut start: usize) -> usize {
- if start >= self.len {
- return start;
- }
-
- start += 1;
- while start < self.len && self.info[start].is_continuation() {
- start += 1;
- }
-
- start
- }
-
#[inline]
pub fn allocate_lig_id(&mut self) -> u8 {
- let mut lig_id = self.next_serial() & 0x07;
- if lig_id == 0 {
- // In case of overflow.
- lig_id = self.next_serial() & 0x07;
- }
- lig_id as u8
+ self.next_serial() & 0x07
}
}
@@ -1600,6 +1718,8 @@ bitflags::bitflags! {
const REMOVE_DEFAULT_IGNORABLES = 1 << 4;
/// Indicates that a dotted circle should not be inserted in the rendering of incorrect character sequences (such as `<0905 093E>`).
const DO_NOT_INSERT_DOTTED_CIRCLE = 1 << 5;
+ /// Indicates that the shape() call and its variants should perform various verification processes on the results of the shaping operation on the buffer. If the verification fails, then either a buffer message is sent, if a message handler is installed on the buffer, or a message is written to standard error. In either case, the shaping result might be modified to show the failed output.
+ const VERIFY = 1 << 6;
}
}
@@ -1610,8 +1730,8 @@ bitflags::bitflags! {
const HAS_DEFAULT_IGNORABLES = 0x00000002;
const HAS_SPACE_FALLBACK = 0x00000004;
const HAS_GPOS_ATTACHMENT = 0x00000008;
- const HAS_UNSAFE_TO_BREAK = 0x00000010;
- const HAS_CGJ = 0x00000020;
+ const HAS_CGJ = 0x00000010;
+ const HAS_GLYPH_FLAGS = 0x00000020;
// Reserved for complex shapers' internal use.
const COMPLEX0 = 0x01000000;
diff --git a/src/complex/arabic.rs b/src/complex/arabic.rs
index da9e44c6..ac57a25e 100644
--- a/src/complex/arabic.rs
+++ b/src/complex/arabic.rs
@@ -130,7 +130,7 @@ const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[
],
];
-#[derive(Clone, Copy, PartialEq, Debug)]
+#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)]
pub enum JoiningType {
U = 0,
L = 1,
@@ -406,7 +406,7 @@ fn apply_stch(face: &Face, buffer: &mut Buffer) {
if step == MEASURE {
extra_glyphs_needed += (n_copies * n_repeating) as usize;
} else {
- buffer.unsafe_to_break(context, end);
+ buffer.unsafe_to_break(Some(context), Some(end));
let mut x_offset = 0;
for k in (start + 1..=end).rev() {
let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()) as i32;
@@ -513,7 +513,19 @@ fn arabic_joining(buffer: &mut Buffer) {
if entry.0 != action::NONE && prev.is_some() {
if let Some(prev) = prev {
buffer.info[prev].set_arabic_shaping_action(entry.0);
- buffer.unsafe_to_break(prev, i + 1);
+ buffer.unsafe_to_break(Some(prev), Some(i + 1));
+ }
+ }
+ // States that have a possible prev_action.
+ else {
+ if let Some(prev) = prev {
+ if this_type >= JoiningType::R || (2 <= state && state <= 5) {
+ buffer.unsafe_to_concat(Some(prev), Some(i + 1));
+ }
+ } else {
+ if this_type >= JoiningType::R {
+ buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1));
+ }
}
}
@@ -534,6 +546,13 @@ fn arabic_joining(buffer: &mut Buffer) {
if entry.0 != action::NONE && prev.is_some() {
if let Some(prev) = prev {
buffer.info[prev].set_arabic_shaping_action(entry.0);
+ buffer.unsafe_to_break(Some(prev), Some(buffer.len));
+ }
+ }
+ // States that have a possible prev_action.
+ else if 2 <= state && state <= 5 {
+ if let Some(prev) = prev {
+ buffer.unsafe_to_concat(Some(prev), Some(buffer.len));
}
}
diff --git a/src/complex/hangul.rs b/src/complex/hangul.rs
index e4f78f59..bfda01bc 100644
--- a/src/complex/hangul.rs
+++ b/src/complex/hangul.rs
@@ -89,7 +89,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
//
// - LV can be precomposed, or decomposed. Lets call those
// and ,
- // - LVT can be fully precomposed, partically precomposed, or
+ // - LVT can be fully precomposed, partially precomposed, or
// fully decomposed. Ie. , , or .
//
// The composition / decomposition is mechanical. However, not
@@ -147,7 +147,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
// I didn't bother for now.
if start < end && end == buffer.out_len {
// Tone mark follows a valid syllable; move it in front, unless it's zero width.
- buffer.unsafe_to_break_from_outbuffer(start, buffer.idx);
+ buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx));
buffer.next_glyph();
if !is_zero_width_char(face, c) {
buffer.merge_out_clusters(start, end + 1);
@@ -209,7 +209,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
}
let offset = if t != 0 { 3 } else { 2 };
- buffer.unsafe_to_break(buffer.idx, buffer.idx + offset);
+ buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset));
// We've got a syllable ; see if it can potentially be composed.
if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) {
@@ -267,7 +267,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
continue;
} else {
// Mark unsafe between LV and T.
- buffer.unsafe_to_break(buffer.idx, buffer.idx + 2);
+ buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
}
}
@@ -310,7 +310,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
} else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id)
{
// Mark unsafe between LV and T.
- buffer.unsafe_to_break(buffer.idx, buffer.idx + 2);
+ buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
}
}
@@ -327,7 +327,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
buffer.next_glyph();
}
- buffer.swap_buffers();
+ buffer.sync();
}
fn is_hangul_tone(u: u32) -> bool {
diff --git a/src/complex/indic.rs b/src/complex/indic.rs
index 08cde528..e04e7eca 100644
--- a/src/complex/indic.rs
+++ b/src/complex/indic.rs
@@ -770,7 +770,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
let mut start = 0;
let mut end = buffer.next_syllable(0);
while start < buffer.len {
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
start = end;
end = buffer.next_syllable(start);
}
@@ -1951,7 +1951,7 @@ fn final_reordering_impl(
{
buffer.info[start].mask |= plan.mask_array[indic_feature::INIT];
} else {
- buffer.unsafe_to_break(start - 1, start + 1);
+ buffer.unsafe_to_break(Some(start - 1), Some(start + 1));
}
}
}
diff --git a/src/complex/khmer.rs b/src/complex/khmer.rs
index fa788a22..e95a4dc8 100644
--- a/src/complex/khmer.rs
+++ b/src/complex/khmer.rs
@@ -160,7 +160,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
let mut start = 0;
let mut end = buffer.next_syllable(0);
while start < buffer.len {
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
start = end;
end = buffer.next_syllable(start);
}
diff --git a/src/complex/myanmar.rs b/src/complex/myanmar.rs
index c2f9b8bb..59720b23 100644
--- a/src/complex/myanmar.rs
+++ b/src/complex/myanmar.rs
@@ -176,7 +176,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
let mut start = 0;
let mut end = buffer.next_syllable(0);
while start < buffer.len {
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
start = end;
end = buffer.next_syllable(start);
}
diff --git a/src/complex/syllabic.rs b/src/complex/syllabic.rs
index a47f2fa2..96850794 100644
--- a/src/complex/syllabic.rs
+++ b/src/complex/syllabic.rs
@@ -72,5 +72,5 @@ pub fn insert_dotted_circles(
}
}
- buffer.swap_buffers();
+ buffer.sync();
}
diff --git a/src/complex/thai.rs b/src/complex/thai.rs
index cbda8f8c..9d73d25e 100644
--- a/src/complex/thai.rs
+++ b/src/complex/thai.rs
@@ -297,7 +297,7 @@ fn do_pua_shaping(face: &Face, buffer: &mut Buffer) {
below_edge.action
};
- buffer.unsafe_to_break(base, i);
+ buffer.unsafe_to_break(Some(base), Some(i));
if action == Action::RD {
buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face);
} else {
@@ -415,7 +415,7 @@ fn preprocess_text(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
}
}
- buffer.swap_buffers();
+ buffer.sync();
// If font has Thai GSUB, we are done.
if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) {
diff --git a/src/complex/universal.rs b/src/complex/universal.rs
index e12d137c..75b1afac 100644
--- a/src/complex/universal.rs
+++ b/src/complex/universal.rs
@@ -23,6 +23,7 @@ pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper {
};
pub type Category = u8;
+#[allow(dead_code)]
pub mod category {
pub const O: u8 = 0; // OTHER
@@ -232,7 +233,7 @@ fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
super::universal_machine::find_syllables(buffer);
foreach_syllable!(buffer, start, end, {
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
});
setup_rphf_mask(plan, buffer);
diff --git a/src/complex/vowel_constraints.rs b/src/complex/vowel_constraints.rs
index 243273e8..4e372bba 100644
--- a/src/complex/vowel_constraints.rs
+++ b/src/complex/vowel_constraints.rs
@@ -420,5 +420,5 @@ pub fn preprocess_text_vowel_constraints(buffer: &mut Buffer) {
_ => {}
}
- buffer.swap_buffers();
+ buffer.sync();
}
diff --git a/src/fallback.rs b/src/fallback.rs
index f69f63b9..087ccb0f 100644
--- a/src/fallback.rs
+++ b/src/fallback.rs
@@ -154,7 +154,7 @@ fn position_around_base(
adjust_offsets_when_zeroing: bool,
) {
let mut horizontal_dir = Direction::Invalid;
- buffer.unsafe_to_break(base, end);
+ buffer.unsafe_to_break(Some(base), Some(end));
let base_info = &buffer.info[base];
let base_pos = &buffer.pos[base];
diff --git a/src/normalize.rs b/src/normalize.rs
index d356dc25..54146ad9 100644
--- a/src/normalize.rs
+++ b/src/normalize.rs
@@ -171,7 +171,7 @@ pub fn normalize(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
}
}
- buffer.swap_buffers();
+ buffer.sync();
}
// Second round, reorder (inplace)
@@ -285,7 +285,7 @@ pub fn normalize(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
}
}
- buffer.swap_buffers();
+ buffer.sync();
}
}
@@ -366,7 +366,12 @@ fn decompose_current_character(ctx: &mut ShapeNormalizeContext, shortest: bool)
// Handle space characters.
if ctx.buffer.cur(0).general_category() == GeneralCategory::SpaceSeparator {
if let Some(space_type) = u.space_fallback() {
- if let Some(space_glyph) = ctx.face.glyph_index(u32::from(' ')) {
+ let space_glyph = ctx
+ .face
+ .glyph_index(u32::from(' '))
+ .or(ctx.buffer.invisible);
+
+ if let Some(space_glyph) = space_glyph {
ctx.buffer.cur_mut(0).set_space_fallback(space_type);
ctx.buffer.next_char(u32::from(space_glyph.0));
ctx.buffer.scratch_flags |= BufferScratchFlags::HAS_SPACE_FALLBACK;
diff --git a/src/ot/apply.rs b/src/ot/apply.rs
index 2dbec6e8..d638fe28 100644
--- a/src/ot/apply.rs
+++ b/src/ot/apply.rs
@@ -152,13 +152,13 @@ impl<'a, 'b> ApplyContext<'a, 'b> {
props |= GlyphPropsFlags::SUBSTITUTED.bits();
if ligature {
+ props |= GlyphPropsFlags::LIGATED.bits();
// In the only place that the MULTIPLIED bit is used, Uniscribe
// seems to only care about the "last" transformation between
// Ligature and Multiple substitutions. Ie. if you ligate, expand,
// and ligate again, it forgives the multiplication and acts as
// if only ligation happened. As such, clear MULTIPLIED bit.
props &= !GlyphPropsFlags::MULTIPLIED.bits();
- props |= GlyphPropsFlags::LIGATED.bits();
}
if component {
@@ -172,9 +172,13 @@ impl<'a, 'b> ApplyContext<'a, 'b> {
.map_or(false, |table| table.has_glyph_classes());
if has_glyph_classes {
+ props &= GlyphPropsFlags::PRESERVE.bits();
props = (props & !GlyphPropsFlags::CLASS_MASK.bits()) | self.face.glyph_props(glyph_id);
} else if !class_guess.is_empty() {
+ props &= GlyphPropsFlags::PRESERVE.bits();
props = (props & !GlyphPropsFlags::CLASS_MASK.bits()) | class_guess.bits();
+ } else {
+ props = props & !GlyphPropsFlags::CLASS_MASK.bits();
}
cur.set_glyph_props(props);
diff --git a/src/ot/contextual.rs b/src/ot/contextual.rs
index 8bfe902b..d1ee13b6 100644
--- a/src/ot/contextual.rs
+++ b/src/ot/contextual.rs
@@ -2,9 +2,7 @@ use ttf_parser::opentype_layout::*;
use ttf_parser::{GlyphId, LazyArray16};
use super::apply::{Apply, ApplyContext, WouldApply, WouldApplyContext};
-use super::matching::{
- match_backtrack, match_glyph, match_input, match_lookahead, MatchFunc, Matched,
-};
+use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead, MatchFunc};
use super::MAX_CONTEXT_LENGTH;
impl WouldApply for ContextLookup<'_> {
@@ -64,11 +62,32 @@ impl Apply for ContextLookup<'_> {
coverage.get(glyph).is_some()
};
- match_input(ctx, coverages_len as u16, &match_func).map(|matched| {
+ let mut match_end = 0;
+ let mut match_positions = [0; MAX_CONTEXT_LENGTH];
+
+ if match_input(
+ ctx,
+ coverages_len,
+ &match_func,
+ &mut match_end,
+ &mut match_positions,
+ None,
+ ) {
ctx.buffer
- .unsafe_to_break(ctx.buffer.idx, ctx.buffer.idx + matched.len);
- apply_lookup(ctx, usize::from(coverages_len), matched, lookups);
- })
+ .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end));
+ apply_lookup(
+ ctx,
+ usize::from(coverages_len),
+ &mut match_positions,
+ match_end,
+ lookups,
+ );
+ return Some(());
+ } else {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end));
+ return None;
+ }
}
}
}
@@ -208,21 +227,56 @@ impl Apply for ChainedContextLookup<'_> {
coverage.contains(glyph)
};
- if let Some(matched) = match_input(ctx, input_coverages.len(), &input) {
- if let Some(start_idx) = match_backtrack(ctx, backtrack_coverages.len(), &back)
- {
- if let Some(end_idx) =
- match_lookahead(ctx, lookahead_coverages.len(), &ahead, matched.len)
- {
- ctx.buffer
- .unsafe_to_break_from_outbuffer(start_idx, end_idx);
- apply_lookup(ctx, usize::from(input_coverages.len()), matched, lookups);
- return Some(());
- }
- }
+ let mut end_index = ctx.buffer.idx;
+ let mut match_end = 0;
+ let mut match_positions = [0; MAX_CONTEXT_LENGTH];
+
+ let input_matches = match_input(
+ ctx,
+ input_coverages.len(),
+ &input,
+ &mut match_end,
+ &mut match_positions,
+ None,
+ );
+
+ if input_matches {
+ end_index = match_end;
+ }
+
+ if !(input_matches
+ && match_lookahead(
+ ctx,
+ lookahead_coverages.len(),
+ &ahead,
+ match_end,
+ &mut end_index,
+ ))
+ {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index));
+ return None;
+ }
+
+ let mut start_index = ctx.buffer.out_len;
+
+ if !match_backtrack(ctx, backtrack_coverages.len(), &back, &mut start_index) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index));
+ return None;
}
- None
+ ctx.buffer
+ .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index));
+ apply_lookup(
+ ctx,
+ usize::from(input_coverages.len()),
+ &mut match_positions,
+ match_end,
+ lookups,
+ );
+
+ Some(())
}
}
}
@@ -291,11 +345,30 @@ fn apply_context(
match_func(glyph, value)
};
- match_input(ctx, input.len(), &match_func).map(|matched| {
+ let mut match_end = 0;
+ let mut match_positions = [0; MAX_CONTEXT_LENGTH];
+
+ if match_input(
+ ctx,
+ input.len(),
+ &match_func,
+ &mut match_end,
+ &mut match_positions,
+ None,
+ ) {
ctx.buffer
- .unsafe_to_break(ctx.buffer.idx, ctx.buffer.idx + matched.len);
- apply_lookup(ctx, usize::from(input.len()), matched, lookups);
- })
+ .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end));
+ apply_lookup(
+ ctx,
+ usize::from(input.len()),
+ &mut match_positions,
+ match_end,
+ lookups,
+ );
+ return Some(());
+ }
+
+ None
}
fn apply_chain_context(
@@ -306,6 +379,8 @@ fn apply_chain_context(
match_funcs: [&MatchFunc; 3],
lookups: LazyArray16,
) -> Option<()> {
+ // NOTE: Whenever something in this method changes, we also need to
+ // change it in the `apply` implementation for ChainedContextLookup.
let f1 = |glyph, num_items| {
let index = backtrack.len() - num_items;
let value = backtrack.get(index).unwrap();
@@ -324,24 +399,55 @@ fn apply_chain_context(
match_funcs[1](glyph, value)
};
- if let Some(matched) = match_input(ctx, input.len(), &f3) {
- if let Some(start_idx) = match_backtrack(ctx, backtrack.len(), &f1) {
- if let Some(end_idx) = match_lookahead(ctx, lookahead.len(), &f2, matched.len) {
- ctx.buffer
- .unsafe_to_break_from_outbuffer(start_idx, end_idx);
- apply_lookup(ctx, usize::from(input.len()), matched, lookups);
- return Some(());
- }
- }
+ let mut end_index = ctx.buffer.idx;
+ let mut match_end = 0;
+ let mut match_positions = [0; MAX_CONTEXT_LENGTH];
+
+ let input_matches = match_input(
+ ctx,
+ input.len(),
+ &f3,
+ &mut match_end,
+ &mut match_positions,
+ None,
+ );
+
+ if input_matches {
+ end_index = match_end;
}
- None
+ if !(input_matches && match_lookahead(ctx, lookahead.len(), &f2, match_end, &mut end_index)) {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index));
+ return None;
+ }
+
+ let mut start_index = ctx.buffer.out_len;
+
+ if !match_backtrack(ctx, backtrack.len(), &f1, &mut start_index) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index));
+ return None;
+ }
+
+ ctx.buffer
+ .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index));
+ apply_lookup(
+ ctx,
+ usize::from(input.len()),
+ &mut match_positions,
+ match_end,
+ lookups,
+ );
+
+ Some(())
}
fn apply_lookup(
ctx: &mut ApplyContext,
input_len: usize,
- mut matched: Matched,
+ match_positions: &mut [usize; MAX_CONTEXT_LENGTH],
+ match_end: usize,
lookups: LazyArray16,
) {
let mut count = input_len + 1;
@@ -354,10 +460,10 @@ fn apply_lookup(
// Convert positions to new indexing.
for j in 0..count {
- matched.positions[j] = (matched.positions[j] as isize + delta) as _;
+ match_positions[j] = (match_positions[j] as isize + delta) as _;
}
- backtrack_len + matched.len
+ backtrack_len + match_end - ctx.buffer.idx
};
for record in lookups {
@@ -376,7 +482,7 @@ fn apply_lookup(
continue;
}
- if !ctx.buffer.move_to(matched.positions[idx]) {
+ if !ctx.buffer.move_to(match_positions[idx]) {
break;
}
@@ -419,12 +525,12 @@ fn apply_lookup(
// It should be possible to construct tests for both of these cases.
end = (end as isize + delta) as _;
- if end <= matched.positions[idx] {
+ if end <= match_positions[idx] {
// End might end up being smaller than match_positions[idx] if the recursed
// lookup ended up removing many items, more than we have had matched.
// Just never rewind end back and get out of here.
// https://bugs.chromium.org/p/chromium/issues/detail?id=659496
- end = matched.positions[idx];
+ end = match_positions[idx];
// There can't be any further changes.
break;
@@ -444,20 +550,18 @@ fn apply_lookup(
}
// Shift!
- matched
- .positions
- .copy_within(next..count, (next as isize + delta) as _);
+ match_positions.copy_within(next..count, (next as isize + delta) as _);
next = (next as isize + delta) as _;
count = (count as isize + delta) as _;
// Fill in new entries.
for j in idx + 1..next {
- matched.positions[j] = matched.positions[j - 1] + 1;
+ match_positions[j] = match_positions[j - 1] + 1;
}
// And fixup the rest.
while next < count {
- matched.positions[next] = (matched.positions[next] as isize + delta) as _;
+ match_positions[next] = (match_positions[next] as isize + delta) as _;
next += 1;
}
}
diff --git a/src/ot/kerning.rs b/src/ot/kerning.rs
index 4d07d1fb..566ac9cc 100644
--- a/src/ot/kerning.rs
+++ b/src/ot/kerning.rs
@@ -85,6 +85,7 @@ fn machine_kern(
cross_stream: bool,
get_kerning: impl Fn(u32, u32) -> i32,
) {
+ buffer.unsafe_to_concat(None, None);
let mut ctx = ApplyContext::new(TableIndex::GPOS, face, buffer);
ctx.lookup_mask = kern_mask;
ctx.lookup_props = u32::from(lookup_flags::IGNORE_MARKS);
@@ -99,7 +100,9 @@ fn machine_kern(
}
let mut iter = SkippyIter::new(&ctx, i, 1, false);
- if !iter.next() {
+
+ let mut unsafe_to = 0;
+ if !iter.next(Some(&mut unsafe_to)) {
i += 1;
continue;
}
@@ -135,7 +138,7 @@ fn machine_kern(
}
}
- ctx.buffer.unsafe_to_break(i, j + 1)
+ ctx.buffer.unsafe_to_break(Some(i), Some(j + 1))
}
i = j;
@@ -204,7 +207,10 @@ fn apply_state_machine_kerning(subtable: &kern::Subtable, kern_mask: Mask, buffe
if entry.has_offset()
|| !(entry.new_state == apple_layout::state::START_OF_TEXT && !entry.has_advance())
{
- buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1);
+ buffer.unsafe_to_break_from_outbuffer(
+ Some(buffer.backtrack_len() - 1),
+ Some(buffer.idx + 1),
+ );
}
}
@@ -216,7 +222,7 @@ fn apply_state_machine_kerning(subtable: &kern::Subtable, kern_mask: Mask, buffe
};
if end_entry.has_offset() {
- buffer.unsafe_to_break(buffer.idx, buffer.idx + 2);
+ buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
}
}
diff --git a/src/ot/layout.rs b/src/ot/layout.rs
index cf57a829..36be1e14 100644
--- a/src/ot/layout.rs
+++ b/src/ot/layout.rs
@@ -227,7 +227,7 @@ fn apply_string(ctx: &mut ApplyContext, lookup: &T::Lookup) {
apply_forward(ctx, lookup);
if !T::IN_PLACE {
- ctx.buffer.swap_buffers();
+ ctx.buffer.sync();
}
} else {
// in-place backward substitution/positioning
diff --git a/src/ot/matching.rs b/src/ot/matching.rs
index 6235ccf3..44d7d030 100644
--- a/src/ot/matching.rs
+++ b/src/ot/matching.rs
@@ -1,5 +1,6 @@
//! Matching of glyph patterns.
+use std::cmp::max;
use ttf_parser::GlyphId;
use super::apply::ApplyContext;
@@ -14,18 +15,14 @@ pub fn match_glyph(glyph: GlyphId, value: u16) -> bool {
glyph == GlyphId(value)
}
-// TODO: Find out whether returning this by value is slow.
-pub struct Matched {
- pub len: usize,
- pub positions: [usize; MAX_CONTEXT_LENGTH],
- pub total_component_count: u8,
-}
-
pub fn match_input(
- ctx: &ApplyContext,
+ ctx: &mut ApplyContext,
input_len: u16,
match_func: &MatchingFunc,
-) -> Option {
+ end_position: &mut usize,
+ match_positions: &mut [usize; MAX_CONTEXT_LENGTH],
+ p_total_component_count: Option<&mut u8>,
+) -> bool {
// This is perhaps the trickiest part of OpenType... Remarks:
//
// - If all components of the ligature were marks, we call this a mark ligature.
@@ -57,7 +54,7 @@ pub fn match_input(
let count = usize::from(input_len) + 1;
if count > MAX_CONTEXT_LENGTH {
- return None;
+ return false;
}
let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, input_len, false);
@@ -66,15 +63,16 @@ pub fn match_input(
let first = ctx.buffer.cur(0);
let first_lig_id = first.lig_id();
let first_lig_comp = first.lig_comp();
- let mut positions = [0; MAX_CONTEXT_LENGTH];
let mut total_component_count = first.lig_num_comps();
let mut ligbase = Ligbase::NotChecked;
- positions[0] = ctx.buffer.idx;
+ match_positions[0] = ctx.buffer.idx;
- for position in &mut positions[1..count] {
- if !iter.next() {
- return None;
+ for position in &mut match_positions[1..count] {
+ let mut unsafe_to = 0;
+ if !iter.next(Some(&mut unsafe_to)) {
+ *end_position = unsafe_to;
+ return false;
}
*position = iter.index();
@@ -111,7 +109,7 @@ pub fn match_input(
}
if ligbase == Ligbase::MayNotSkip {
- return None;
+ return false;
}
}
} else {
@@ -119,53 +117,63 @@ pub fn match_input(
// all subsequent components should also NOT be attached to any ligature
// component, unless they are attached to the first component itself!
if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) {
- return None;
+ return false;
}
}
total_component_count += this.lig_num_comps();
}
- Some(Matched {
- len: iter.index() - ctx.buffer.idx + 1,
- positions,
- total_component_count,
- })
+ *end_position = iter.index() + 1;
+
+ if let Some(p_total_component_count) = p_total_component_count {
+ *p_total_component_count = total_component_count;
+ }
+
+ true
}
pub fn match_backtrack(
- ctx: &ApplyContext,
+ ctx: &mut ApplyContext,
backtrack_len: u16,
match_func: &MatchingFunc,
-) -> Option {
+ match_start: &mut usize,
+) -> bool {
let mut iter = SkippyIter::new(ctx, ctx.buffer.backtrack_len(), backtrack_len, true);
iter.enable_matching(match_func);
for _ in 0..backtrack_len {
- if !iter.prev() {
- return None;
+ let mut unsafe_from = 0;
+ if !iter.prev(Some(&mut unsafe_from)) {
+ *match_start = unsafe_from;
+ return false;
}
}
- Some(iter.index())
+ *match_start = iter.index();
+ true
}
pub fn match_lookahead(
- ctx: &ApplyContext,
+ ctx: &mut ApplyContext,
lookahead_len: u16,
match_func: &MatchingFunc,
- offset: usize,
-) -> Option {
- let mut iter = SkippyIter::new(ctx, ctx.buffer.idx + offset - 1, lookahead_len, true);
+ start_index: usize,
+ end_index: &mut usize,
+) -> bool {
+ let mut iter = SkippyIter::new(ctx, start_index - 1, lookahead_len, true);
iter.enable_matching(match_func);
for _ in 0..lookahead_len {
- if !iter.next() {
- return None;
+ let mut unsafe_to = 0;
+ if !iter.next(Some(&mut unsafe_to)) {
+ *end_index = unsafe_to;
+ return false;
}
}
- Some(iter.index() + 1)
+ *end_index = iter.index() + 1;
+ true
}
pub type MatchingFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a;
@@ -226,7 +234,7 @@ impl<'a, 'b> SkippyIter<'a, 'b> {
self.buf_idx
}
- pub fn next(&mut self) -> bool {
+ pub fn next(&mut self, unsafe_to: Option<&mut usize>) -> bool {
assert!(self.num_items > 0);
while self.buf_idx + usize::from(self.num_items) < self.buf_len {
self.buf_idx += 1;
@@ -244,14 +252,22 @@ impl<'a, 'b> SkippyIter<'a, 'b> {
}
if skip == Some(false) {
+ if let Some(unsafe_to) = unsafe_to {
+ *unsafe_to = self.buf_idx + 1;
+ }
+
return false;
}
}
+ if let Some(unsafe_to) = unsafe_to {
+ *unsafe_to = self.buf_idx + 1;
+ }
+
false
}
- pub fn prev(&mut self) -> bool {
+ pub fn prev(&mut self, unsafe_from: Option<&mut usize>) -> bool {
assert!(self.num_items > 0);
while self.buf_idx >= usize::from(self.num_items) {
self.buf_idx -= 1;
@@ -269,10 +285,18 @@ impl<'a, 'b> SkippyIter<'a, 'b> {
}
if skip == Some(false) {
+ if let Some(unsafe_from) = unsafe_from {
+ *unsafe_from = max(1, self.buf_idx) - 1;
+ }
+
return false;
}
}
+ if let Some(unsafe_from) = unsafe_from {
+ *unsafe_from = 0;
+ }
+
false
}
diff --git a/src/ot/position.rs b/src/ot/position.rs
index da141414..584b2c96 100644
--- a/src/ot/position.rs
+++ b/src/ot/position.rs
@@ -168,36 +168,78 @@ impl Apply for SingleAdjustment<'_> {
impl Apply for PairAdjustment<'_> {
fn apply(&self, ctx: &mut ApplyContext) -> Option<()> {
- let first = ctx.buffer.cur(0).as_glyph();
- let index = self.coverage().get(first)?;
+ let first_glyph = ctx.buffer.cur(0).as_glyph();
+ let first_glyph_coverage_index = self.coverage().get(first_glyph)?;
let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, 1, false);
- if !iter.next() {
+
+ let mut unsafe_to = 0;
+ if !iter.next(Some(&mut unsafe_to)) {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(unsafe_to));
return None;
}
- let pos = iter.index();
- let second = ctx.buffer.info[pos].as_glyph();
+ let second_glyph_index = iter.index();
+ let second_glyph = ctx.buffer.info[second_glyph_index].as_glyph();
+
+ let finish = |ctx: &mut ApplyContext, has_record2| {
+ ctx.buffer.idx = second_glyph_index;
+
+ if has_record2 {
+ ctx.buffer.idx += 1;
+ }
+
+ Some(())
+ };
+
+ let boring = |ctx: &mut ApplyContext, has_record2| {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(second_glyph_index + 1));
+ finish(ctx, has_record2)
+ };
+
+ let success = |ctx: &mut ApplyContext, flag1, flag2, has_record2| {
+ if flag1 || flag2 {
+ ctx.buffer
+ .unsafe_to_break(Some(ctx.buffer.idx), Some(second_glyph_index + 1));
+ finish(ctx, has_record2)
+ } else {
+ boring(ctx, has_record2)
+ }
+ };
+
+ let bail = |ctx: &mut ApplyContext, records: (ValueRecord, ValueRecord)| {
+ let flag1 = records.0.apply(ctx, ctx.buffer.idx);
+ let flag2 = records.1.apply(ctx, second_glyph_index);
+
+ let has_record2 = !records.1.is_empty();
+ success(ctx, flag1, flag2, has_record2)
+ };
let records = match self {
- Self::Format1 { sets, .. } => sets.get(index)?.get(second),
+ Self::Format1 { sets, .. } => {
+ sets.get(first_glyph_coverage_index)?.get(second_glyph)?
+ }
Self::Format2 {
classes, matrix, ..
} => {
- let classes = (classes.0.get(first), classes.1.get(second));
- matrix.get(classes)
+ let classes = (classes.0.get(first_glyph), classes.1.get(second_glyph));
+
+ let records = match matrix.get(classes) {
+ Some(v) => v,
+ None => {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(iter.index() + 1));
+ return None;
+ }
+ };
+
+ return bail(ctx, records);
}
- }?;
-
- let flag1 = records.0.apply(ctx, ctx.buffer.idx);
- let flag2 = records.1.apply(ctx, pos);
- // Note the intentional use of "|" instead of short-circuit "||".
- if flag1 | flag2 {
- ctx.buffer.unsafe_to_break(ctx.buffer.idx, pos + 1);
- }
+ };
- ctx.buffer.idx = pos + usize::from(flag2);
- Some(())
+ bail(ctx, records)
}
}
@@ -209,21 +251,29 @@ impl Apply for CursiveAdjustment<'_> {
let entry_this = self.sets.entry(index_this)?;
let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, 1, false);
- if !iter.prev() {
+
+ let mut unsafe_from = 0;
+ if !iter.prev(Some(&mut unsafe_from)) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1));
return None;
}
let i = iter.index();
let prev = ctx.buffer.info[i].as_glyph();
let index_prev = self.coverage.get(prev)?;
- let exit_prev = self.sets.exit(index_prev)?;
+ let Some(exit_prev) = self.sets.exit(index_prev) else {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter.index()), Some(ctx.buffer.idx + 1));
+ return None;
+ };
let (exit_x, exit_y) = exit_prev.get(ctx.face);
let (entry_x, entry_y) = entry_this.get(ctx.face);
let direction = ctx.buffer.direction;
let j = ctx.buffer.idx;
- ctx.buffer.unsafe_to_break(i, j);
+ ctx.buffer.unsafe_to_break(Some(i), Some(j));
let pos = &mut ctx.buffer.pos;
match direction {
@@ -345,7 +395,10 @@ impl Apply for MarkToBaseAdjustment<'_> {
let info = &buffer.info;
loop {
- if !iter.prev() {
+ let mut unsafe_from = 0;
+ if !iter.prev(Some(&mut unsafe_from)) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1));
return None;
}
@@ -369,12 +422,16 @@ impl Apply for MarkToBaseAdjustment<'_> {
// Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled
- let idx = iter.index();
- let base_glyph = info[idx].as_glyph();
- let base_index = self.base_coverage.get(base_glyph)?;
+ let iter_idx = iter.index();
+ let base_glyph = info[iter_idx].as_glyph();
+ let Some(base_index) = self.base_coverage.get(base_glyph) else {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1));
+ return None;
+ };
self.marks
- .apply(ctx, self.anchors, mark_index, base_index, idx)
+ .apply(ctx, self.anchors, mark_index, base_index, iter_idx)
}
}
@@ -387,20 +444,30 @@ impl Apply for MarkToLigatureAdjustment<'_> {
// Now we search backwards for a non-mark glyph
let mut iter = SkippyIter::new(ctx, buffer.idx, 1, false);
iter.set_lookup_props(u32::from(lookup_flags::IGNORE_MARKS));
- if !iter.prev() {
+
+ let mut unsafe_from = 0;
+ if !iter.prev(Some(&mut unsafe_from)) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1));
return None;
}
// Checking that matched glyph is actually a ligature by GDEF is too strong; disabled
- let idx = iter.index();
- let lig_glyph = buffer.info[idx].as_glyph();
- let lig_index = self.ligature_coverage.get(lig_glyph)?;
+ let iter_idx = iter.index();
+ let lig_glyph = buffer.info[iter_idx].as_glyph();
+ let Some(lig_index) = self.ligature_coverage.get(lig_glyph) else {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1));
+ return None;
+ };
let lig_attach = self.ligature_array.get(lig_index)?;
// Find component to attach to
let comp_count = lig_attach.rows;
if comp_count == 0 {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1));
return None;
}
@@ -408,7 +475,7 @@ impl Apply for MarkToLigatureAdjustment<'_> {
// is identical to the ligature ID of the found ligature. If yes, we
// can directly use the component index. If not, we attach the mark
// glyph to the last component of the ligature.
- let lig_id = buffer.info[idx].lig_id();
+ let lig_id = buffer.info[iter_idx].lig_id();
let mark_id = buffer.cur(0).lig_id();
let mark_comp = u16::from(buffer.cur(0).lig_comp());
let matches = lig_id != 0 && lig_id == mark_id && mark_comp > 0;
@@ -419,7 +486,7 @@ impl Apply for MarkToLigatureAdjustment<'_> {
} - 1;
self.marks
- .apply(ctx, lig_attach, mark_index, comp_index, idx)
+ .apply(ctx, lig_attach, mark_index, comp_index, iter_idx)
}
}
@@ -432,19 +499,25 @@ impl Apply for MarkToMarkAdjustment<'_> {
// Now we search backwards for a suitable mark glyph until a non-mark glyph
let mut iter = SkippyIter::new(ctx, buffer.idx, 1, false);
iter.set_lookup_props(ctx.lookup_props & !u32::from(lookup_flags::IGNORE_FLAGS));
- if !iter.prev() {
+
+ let mut unsafe_from = 0;
+ if !iter.prev(Some(&mut unsafe_from)) {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1));
return None;
}
- let idx = iter.index();
- if !buffer.info[idx].is_mark() {
+ let iter_idx = iter.index();
+ if !buffer.info[iter_idx].is_mark() {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1));
return None;
}
let id1 = buffer.cur(0).lig_id();
- let id2 = buffer.info[idx].lig_id();
+ let id2 = buffer.info[iter_idx].lig_id();
let comp1 = buffer.cur(0).lig_comp();
- let comp2 = buffer.info[idx].lig_comp();
+ let comp2 = buffer.info[iter_idx].lig_comp();
let matches = if id1 == id2 {
// Marks belonging to the same base
@@ -457,25 +530,46 @@ impl Apply for MarkToMarkAdjustment<'_> {
};
if !matches {
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1));
return None;
}
- let mark2_glyph = buffer.info[idx].as_glyph();
+ let mark2_glyph = buffer.info[iter_idx].as_glyph();
let mark2_index = self.mark2_coverage.get(mark2_glyph)?;
self.marks
- .apply(ctx, self.mark2_matrix, mark1_index, mark2_index, idx)
+ .apply(ctx, self.mark2_matrix, mark1_index, mark2_index, iter_idx)
}
}
trait ValueRecordExt {
+ fn is_empty(&self) -> bool;
fn apply(&self, ctx: &mut ApplyContext, idx: usize) -> bool;
+ fn apply_to_pos(&self, ctx: &mut ApplyContext, pos: &mut GlyphPosition) -> bool;
}
impl ValueRecordExt for ValueRecord<'_> {
+ fn is_empty(&self) -> bool {
+ self.x_placement == 0
+ && self.y_placement == 0
+ && self.x_advance == 0
+ && self.y_advance == 0
+ && self.x_placement_device.is_none()
+ && self.y_placement_device.is_none()
+ && self.x_advance_device.is_none()
+ && self.y_advance_device.is_none()
+ }
+
fn apply(&self, ctx: &mut ApplyContext, idx: usize) -> bool {
- let horizontal = ctx.buffer.direction.is_horizontal();
let mut pos = ctx.buffer.pos[idx];
+ let worked = self.apply_to_pos(ctx, &mut pos);
+ ctx.buffer.pos[idx] = pos;
+ worked
+ }
+
+ fn apply_to_pos(&self, ctx: &mut ApplyContext, pos: &mut GlyphPosition) -> bool {
+ let horizontal = ctx.buffer.direction.is_horizontal();
let mut worked = false;
if self.x_placement != 0 {
@@ -535,7 +629,6 @@ impl ValueRecordExt for ValueRecord<'_> {
}
}
- ctx.buffer.pos[idx] = pos;
worked
}
}
@@ -568,7 +661,8 @@ impl MarkArrayExt for MarkArray<'_> {
let (mark_x, mark_y) = mark_anchor.get(ctx.face);
let (base_x, base_y) = base_anchor.get(ctx.face);
- ctx.buffer.unsafe_to_break(glyph_pos, ctx.buffer.idx);
+ ctx.buffer
+ .unsafe_to_break(Some(glyph_pos), Some(ctx.buffer.idx + 1));
let idx = ctx.buffer.idx;
let pos = ctx.buffer.cur_pos_mut();
diff --git a/src/ot/substitute.rs b/src/ot/substitute.rs
index 41f310f1..5b11926d 100644
--- a/src/ot/substitute.rs
+++ b/src/ot/substitute.rs
@@ -9,9 +9,10 @@ use crate::unicode::GeneralCategory;
use crate::Face;
use super::apply::{Apply, ApplyContext, WouldApply, WouldApplyContext};
-use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead, Matched};
+use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead};
use super::{
- LayoutLookup, LayoutTable, Map, SubstLookup, SubstitutionTable, TableIndex, MAX_NESTING_LEVEL,
+ LayoutLookup, LayoutTable, Map, SubstLookup, SubstitutionTable, TableIndex, MAX_CONTEXT_LENGTH,
+ MAX_NESTING_LEVEL,
};
use ttf_parser::opentype_layout::LookupIndex;
@@ -224,7 +225,7 @@ impl Apply for AlternateSet<'_> {
if alt_index == Map::MAX_VALUE && ctx.random {
// Maybe we can do better than unsafe-to-break all; but since we are
// changing random state, it would be hard to track that. Good 'nough.
- ctx.buffer.unsafe_to_break(0, ctx.buffer.len);
+ ctx.buffer.unsafe_to_break(Some(0), Some(ctx.buffer.len));
alt_index = ctx.random_number() % u32::from(len) + 1;
}
@@ -296,15 +297,47 @@ impl Apply for Ligature<'_> {
match_glyph(glyph, value.0)
};
- match_input(ctx, self.components.len(), &f).map(|matched| {
- let count = usize::from(self.components.len()) + 1;
- ligate(ctx, count, matched, self.glyph);
- })
+ let mut match_end = 0;
+ let mut match_positions = [0; MAX_CONTEXT_LENGTH];
+ let mut total_component_count = 0;
+
+ if !match_input(
+ ctx,
+ self.components.len(),
+ &f,
+ &mut match_end,
+ &mut match_positions,
+ Some(&mut total_component_count),
+ ) {
+ ctx.buffer
+ .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end));
+ return None;
+ }
+
+ let count = usize::from(self.components.len()) + 1;
+ ligate(
+ ctx,
+ count,
+ &match_positions,
+ match_end,
+ total_component_count,
+ self.glyph,
+ );
+ return Some(());
}
}
}
-fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: GlyphId) {
+fn ligate(
+ ctx: &mut ApplyContext,
+ // Including the first glyph
+ count: usize,
+ // Including the first glyph
+ match_positions: &[usize; MAX_CONTEXT_LENGTH],
+ match_end: usize,
+ total_component_count: u8,
+ lig_glyph: GlyphId,
+) {
// - If a base and one or more marks ligate, consider that as a base, NOT
// ligature, such that all following marks can still attach to it.
// https://github.com/harfbuzz/harfbuzz/issues/1109
@@ -338,12 +371,12 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly
//
let mut buffer = &mut ctx.buffer;
- buffer.merge_clusters(buffer.idx, buffer.idx + matched.len);
+ buffer.merge_clusters(buffer.idx, match_end);
- let mut is_base_ligature = buffer.info[matched.positions[0]].is_base_glyph();
- let mut is_mark_ligature = buffer.info[matched.positions[0]].is_mark();
+ let mut is_base_ligature = buffer.info[match_positions[0]].is_base_glyph();
+ let mut is_mark_ligature = buffer.info[match_positions[0]].is_mark();
for i in 1..count {
- if !buffer.info[matched.positions[i]].is_mark() {
+ if !buffer.info[match_positions[i]].is_mark() {
is_base_ligature = false;
is_mark_ligature = false;
}
@@ -366,7 +399,7 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly
let mut comps_so_far = last_num_comps;
if is_ligature {
- first.set_lig_props_for_ligature(lig_id, matched.total_component_count);
+ first.set_lig_props_for_ligature(lig_id, total_component_count);
if first.general_category() == GeneralCategory::NonspacingMark {
first.set_general_category(GeneralCategory::OtherLetter);
}
@@ -376,7 +409,7 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly
buffer = &mut ctx.buffer;
for i in 1..count {
- while buffer.idx < matched.positions[i] && buffer.successful {
+ while buffer.idx < match_positions[i] && buffer.successful {
if is_ligature {
let cur = buffer.cur_mut(0);
let mut this_comp = cur.lig_comp();
@@ -450,10 +483,19 @@ impl Apply for ReverseChainSingleSubstitution<'_> {
value.contains(glyph)
};
- if let Some(start_idx) = match_backtrack(ctx, self.backtrack_coverages.len(), &f1) {
- if let Some(end_idx) = match_lookahead(ctx, self.lookahead_coverages.len(), &f2, 1) {
+ let mut start_index = 0;
+ let mut end_index = 0;
+
+ if match_backtrack(ctx, self.backtrack_coverages.len(), &f1, &mut start_index) {
+ if match_lookahead(
+ ctx,
+ self.lookahead_coverages.len(),
+ &f2,
+ ctx.buffer.idx + 1,
+ &mut end_index,
+ ) {
ctx.buffer
- .unsafe_to_break_from_outbuffer(start_idx, end_idx);
+ .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index));
ctx.replace_glyph_inplace(subst);
// Note: We DON'T decrease buffer.idx. The main loop does it
@@ -463,6 +505,8 @@ impl Apply for ReverseChainSingleSubstitution<'_> {
}
}
- None
+ ctx.buffer
+ .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index));
+ return None;
}
}
diff --git a/src/shape.rs b/src/shape.rs
index 7c3311e0..b8d39d0a 100644
--- a/src/shape.rs
+++ b/src/shape.rs
@@ -1,5 +1,3 @@
-use core::convert::TryFrom;
-
use crate::buffer::{
glyph_flag, Buffer, BufferClusterLevel, BufferFlags, BufferScratchFlags, GlyphInfo,
GlyphPropsFlags,
@@ -77,17 +75,7 @@ struct ShapeContext<'a> {
// Pull it all together!
fn shape_internal(ctx: &mut ShapeContext) {
- ctx.buffer.scratch_flags = BufferScratchFlags::empty();
-
- if let Some(len) = ctx.buffer.len.checked_mul(Buffer::MAX_LEN_FACTOR) {
- ctx.buffer.max_len = len.max(Buffer::MAX_LEN_MIN);
- }
-
- if let Ok(len) = i32::try_from(ctx.buffer.len) {
- if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) {
- ctx.buffer.max_ops = ops.max(Buffer::MAX_OPS_MIN);
- }
- }
+ ctx.buffer.enter();
initialize_masks(ctx);
set_unicode_props(ctx.buffer);
@@ -108,8 +96,7 @@ fn shape_internal(ctx: &mut ShapeContext) {
propagate_flags(ctx.buffer);
ctx.buffer.direction = ctx.target_direction;
- ctx.buffer.max_len = Buffer::MAX_LEN_DEFAULT;
- ctx.buffer.max_ops = Buffer::MAX_OPS_DEFAULT;
+ ctx.buffer.leave();
}
fn substitute_pre(ctx: &mut ShapeContext) {
@@ -213,7 +200,7 @@ fn position_complex(ctx: &mut ShapeContext) {
// hanging over the next glyph after the final reordering.
//
// Note: If fallback positioning happens, we don't care about
- // this as it will be overriden.
+ // this as it will be overridden.
let adjust_offsets_when_zeroing =
ctx.plan.adjust_mark_positioning_when_zeroing && ctx.buffer.direction.is_forward();
@@ -328,7 +315,7 @@ fn setup_masks_fraction(ctx: &mut ShapeContext) {
end += 1;
}
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
for info in &mut buffer.info[start..i] {
info.mask |= pre_mask;
@@ -427,7 +414,7 @@ fn insert_dotted_circle(buffer: &mut Buffer, face: &Face) {
info.init_unicode_props(&mut buffer.scratch_flags);
buffer.clear_output();
buffer.output_info(info);
- buffer.swap_buffers();
+ buffer.sync();
}
}
@@ -440,7 +427,7 @@ fn form_clusters(buffer: &mut Buffer) {
foreach_grapheme!(buffer, start, end, { buffer.merge_clusters(start, end) });
} else {
foreach_grapheme!(buffer, start, end, {
- buffer.unsafe_to_break(start, end);
+ buffer.unsafe_to_break(Some(start), Some(end));
});
}
}
@@ -624,15 +611,17 @@ fn propagate_flags(buffer: &mut Buffer) {
// Simplifies using them.
if buffer
.scratch_flags
- .contains(BufferScratchFlags::HAS_UNSAFE_TO_BREAK)
+ .contains(BufferScratchFlags::HAS_GLYPH_FLAGS)
{
foreach_cluster!(buffer, start, end, {
+ let mut mask = 0;
for info in &buffer.info[start..end] {
- if info.mask & glyph_flag::UNSAFE_TO_BREAK != 0 {
- for info in &mut buffer.info[start..end] {
- info.mask |= glyph_flag::UNSAFE_TO_BREAK;
- }
- break;
+ mask |= info.mask * glyph_flag::DEFINED;
+ }
+
+ if mask != 0 {
+ for info in &mut buffer.info[start..end] {
+ info.mask |= mask;
}
}
});
diff --git a/src/tag_table.rs b/src/tag_table.rs
index cce46717..75e68820 100644
--- a/src/tag_table.rs
+++ b/src/tag_table.rs
@@ -62,7 +62,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "an", tag: Tag::from_bytes(b"ARG ") }, // Aragonese
// LangTag { language: "ang", tag: Tag::from_bytes(b"ANG ") }, // Old English (ca. 450-1100) -> Anglo-Saxon
LangTag { language: "aoa", tag: Tag::from_bytes(b"CPP ") }, // Angolar -> Creoles
- LangTag { language: "apa", tag: Tag::from_bytes(b"ATH ") }, // Apache [family] -> Athapaskan
+ LangTag { language: "apa", tag: Tag::from_bytes(b"ATH ") }, // Apache [collection] -> Athapaskan
LangTag { language: "apc", tag: Tag::from_bytes(b"ARA ") }, // Levantine Arabic -> Arabic
LangTag { language: "apd", tag: Tag::from_bytes(b"ARA ") }, // Sudanese Arabic -> Arabic
LangTag { language: "apj", tag: Tag::from_bytes(b"ATH ") }, // Jicarilla Apache -> Athapaskan
@@ -82,7 +82,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "arz", tag: Tag::from_bytes(b"ARA ") }, // Egyptian Arabic -> Arabic
LangTag { language: "as", tag: Tag::from_bytes(b"ASM ") }, // Assamese
// LangTag { language: "ast", tag: Tag::from_bytes(b"AST ") }, // Asturian
-// LangTag { language: "ath", tag: Tag::from_bytes(b"ATH ") }, // Athapascan [family] -> Athapaskan
+// LangTag { language: "ath", tag: Tag::from_bytes(b"ATH ") }, // Athapascan [collection] -> Athapaskan
LangTag { language: "atj", tag: Tag::from_bytes(b"RCR ") }, // Atikamekw -> R-Cree
LangTag { language: "atv", tag: Tag::from_bytes(b"ALT ") }, // Northern Altai -> Altai
LangTag { language: "auj", tag: Tag::from_bytes(b"BBR ") }, // Awjilah -> Berber
@@ -106,10 +106,10 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "azn", tag: Tag::from_bytes(b"NAH ") }, // Western Durango Nahuatl -> Nahuatl
LangTag { language: "azz", tag: Tag::from_bytes(b"NAH ") }, // Highland Puebla Nahuatl -> Nahuatl
LangTag { language: "ba", tag: Tag::from_bytes(b"BSH ") }, // Bashkir
- LangTag { language: "bad", tag: Tag::from_bytes(b"BAD0") }, // Banda [family]
+ LangTag { language: "bad", tag: Tag::from_bytes(b"BAD0") }, // Banda [collection]
LangTag { language: "bag", tag: Tag(0) }, // Tuki != Baghelkhandi
LangTag { language: "bah", tag: Tag::from_bytes(b"CPP ") }, // Bahamas Creole English -> Creoles
- LangTag { language: "bai", tag: Tag::from_bytes(b"BML ") }, // Bamileke [family]
+ LangTag { language: "bai", tag: Tag::from_bytes(b"BML ") }, // Bamileke [collection]
LangTag { language: "bal", tag: Tag::from_bytes(b"BLI ") }, // Baluchi [macrolanguage]
// LangTag { language: "ban", tag: Tag::from_bytes(b"BAN ") }, // Balinese
// LangTag { language: "bar", tag: Tag::from_bytes(b"BAR ") }, // Bavarian
@@ -131,7 +131,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "bea", tag: Tag::from_bytes(b"ATH ") }, // Beaver -> Athapaskan
LangTag { language: "beb", tag: Tag::from_bytes(b"BTI ") }, // Bebele -> Beti
// LangTag { language: "bem", tag: Tag::from_bytes(b"BEM ") }, // Bemba (Zambia)
- LangTag { language: "ber", tag: Tag::from_bytes(b"BBR ") }, // Berber [family]
+ LangTag { language: "ber", tag: Tag::from_bytes(b"BBR ") }, // Berber [collection]
LangTag { language: "bew", tag: Tag::from_bytes(b"CPP ") }, // Betawi -> Creoles
LangTag { language: "bfl", tag: Tag::from_bytes(b"BAD0") }, // Banda-Ndélé -> Banda
LangTag { language: "bfq", tag: Tag::from_bytes(b"BAD ") }, // Badaga
@@ -199,7 +199,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "btd", tag: Tag::from_bytes(b"BTK ") }, // Batak Dairi -> Batak
LangTag { language: "bti", tag: Tag(0) }, // Burate != Beti
LangTag { language: "btj", tag: Tag::from_bytes(b"MLY ") }, // Bacanese Malay -> Malay
-// LangTag { language: "btk", tag: Tag::from_bytes(b"BTK ") }, // Batak [family]
+// LangTag { language: "btk", tag: Tag::from_bytes(b"BTK ") }, // Batak [collection]
LangTag { language: "btm", tag: Tag::from_bytes(b"BTM ") }, // Batak Mandailing
LangTag { language: "btm", tag: Tag::from_bytes(b"BTK ") }, // Batak Mandailing -> Batak
LangTag { language: "bto", tag: Tag::from_bytes(b"BIK ") }, // Rinconada Bikol -> Bikol
@@ -252,6 +252,8 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "chh", tag: Tag(0) }, // Chinook != Chattisgarhi
LangTag { language: "chj", tag: Tag::from_bytes(b"CCHN") }, // Ojitlán Chinantec -> Chinantec
LangTag { language: "chk", tag: Tag::from_bytes(b"CHK0") }, // Chuukese
+ LangTag { language: "chm", tag: Tag::from_bytes(b"HMA ") }, // Mari (Russia) [macrolanguage] -> High Mari
+ LangTag { language: "chm", tag: Tag::from_bytes(b"LMA ") }, // Mari (Russia) [macrolanguage] -> Low Mari
LangTag { language: "chn", tag: Tag::from_bytes(b"CPP ") }, // Chinook jargon -> Creoles
// LangTag { language: "cho", tag: Tag::from_bytes(b"CHO ") }, // Choctaw
LangTag { language: "chp", tag: Tag::from_bytes(b"CHP ") }, // Chipewyan
@@ -293,10 +295,10 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
// LangTag { language: "cop", tag: Tag::from_bytes(b"COP ") }, // Coptic
LangTag { language: "coq", tag: Tag::from_bytes(b"ATH ") }, // Coquille -> Athapaskan
LangTag { language: "cpa", tag: Tag::from_bytes(b"CCHN") }, // Palantla Chinantec -> Chinantec
- LangTag { language: "cpe", tag: Tag::from_bytes(b"CPP ") }, // English-based creoles and pidgins [family] -> Creoles
- LangTag { language: "cpf", tag: Tag::from_bytes(b"CPP ") }, // French-based creoles and pidgins [family] -> Creoles
+ LangTag { language: "cpe", tag: Tag::from_bytes(b"CPP ") }, // English-based creoles and pidgins [collection] -> Creoles
+ LangTag { language: "cpf", tag: Tag::from_bytes(b"CPP ") }, // French-based creoles and pidgins [collection] -> Creoles
LangTag { language: "cpi", tag: Tag::from_bytes(b"CPP ") }, // Chinese Pidgin English -> Creoles
-// LangTag { language: "cpp", tag: Tag::from_bytes(b"CPP ") }, // Portuguese-based creoles and pidgins [family] -> Creoles
+// LangTag { language: "cpp", tag: Tag::from_bytes(b"CPP ") }, // Portuguese-based creoles and pidgins [collection] -> Creoles
LangTag { language: "cpx", tag: Tag::from_bytes(b"ZHS ") }, // Pu-Xian Chinese -> Chinese, Simplified
LangTag { language: "cqd", tag: Tag::from_bytes(b"HMN ") }, // Chuanqiandian Cluster Miao -> Hmong
LangTag { language: "cqu", tag: Tag::from_bytes(b"QUH ") }, // Chilean Quechua(retired code) -> Quechua (Bolivia)
@@ -316,7 +318,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "crm", tag: Tag::from_bytes(b"MCR ") }, // Moose Cree
LangTag { language: "crm", tag: Tag::from_bytes(b"LCR ") }, // Moose Cree -> L-Cree
LangTag { language: "crm", tag: Tag::from_bytes(b"CRE ") }, // Moose Cree -> Cree
- LangTag { language: "crp", tag: Tag::from_bytes(b"CPP ") }, // Creoles and pidgins [family] -> Creoles
+ LangTag { language: "crp", tag: Tag::from_bytes(b"CPP ") }, // Creoles and pidgins [collection] -> Creoles
LangTag { language: "crr", tag: Tag(0) }, // Carolina Algonquian != Carrier
LangTag { language: "crs", tag: Tag::from_bytes(b"CPP ") }, // Seselwa Creole French -> Creoles
LangTag { language: "crt", tag: Tag(0) }, // Iyojwa'ja Chorote != Crimean Tatar
@@ -427,7 +429,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "et", tag: Tag::from_bytes(b"ETI ") }, // Estonian [macrolanguage]
LangTag { language: "eto", tag: Tag::from_bytes(b"BTI ") }, // Eton (Cameroon) -> Beti
LangTag { language: "eu", tag: Tag::from_bytes(b"EUQ ") }, // Basque
- LangTag { language: "euq", tag: Tag(0) }, // Basque [family] != Basque
+ LangTag { language: "euq", tag: Tag(0) }, // Basque [collection] != Basque
LangTag { language: "eve", tag: Tag::from_bytes(b"EVN ") }, // Even
LangTag { language: "evn", tag: Tag::from_bytes(b"EVK ") }, // Evenki
LangTag { language: "ewo", tag: Tag::from_bytes(b"BTI ") }, // Ewondo -> Beti
@@ -616,13 +618,12 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "ijc", tag: Tag::from_bytes(b"IJO ") }, // Izon -> Ijo
LangTag { language: "ije", tag: Tag::from_bytes(b"IJO ") }, // Biseni -> Ijo
LangTag { language: "ijn", tag: Tag::from_bytes(b"IJO ") }, // Kalabari -> Ijo
-// LangTag { language: "ijo", tag: Tag::from_bytes(b"IJO ") }, // Ijo [family]
+// LangTag { language: "ijo", tag: Tag::from_bytes(b"IJO ") }, // Ijo [collection]
LangTag { language: "ijs", tag: Tag::from_bytes(b"IJO ") }, // Southeast Ijo -> Ijo
LangTag { language: "ik", tag: Tag::from_bytes(b"IPK ") }, // Inupiaq [macrolanguage] -> Inupiat
LangTag { language: "ike", tag: Tag::from_bytes(b"INU ") }, // Eastern Canadian Inuktitut -> Inuktitut
LangTag { language: "ike", tag: Tag::from_bytes(b"INUK") }, // Eastern Canadian Inuktitut -> Nunavik Inuktitut
LangTag { language: "ikt", tag: Tag::from_bytes(b"INU ") }, // Inuinnaqtun -> Inuktitut
- LangTag { language: "ikt", tag: Tag::from_bytes(b"INUK") }, // Inuinnaqtun -> Nunavik Inuktitut
// LangTag { language: "ilo", tag: Tag::from_bytes(b"ILO ") }, // Iloko -> Ilokano
LangTag { language: "in", tag: Tag::from_bytes(b"IND ") }, // Indonesian(retired code)
LangTag { language: "in", tag: Tag::from_bytes(b"MLY ") }, // Indonesian(retired code) -> Malay
@@ -666,7 +667,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "kab", tag: Tag::from_bytes(b"BBR ") }, // Kabyle -> Berber
LangTag { language: "kac", tag: Tag(0) }, // Kachin != Kachchi
LangTag { language: "kam", tag: Tag::from_bytes(b"KMB ") }, // Kamba (Kenya)
- LangTag { language: "kar", tag: Tag::from_bytes(b"KRN ") }, // Karen [family]
+ LangTag { language: "kar", tag: Tag::from_bytes(b"KRN ") }, // Karen [collection]
// LangTag { language: "kaw", tag: Tag::from_bytes(b"KAW ") }, // Kawi (Old Javanese)
LangTag { language: "kbd", tag: Tag::from_bytes(b"KAB ") }, // Kabardian
LangTag { language: "kby", tag: Tag::from_bytes(b"KNR ") }, // Manga Kanuri -> Kanuri
@@ -875,7 +876,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "mam", tag: Tag::from_bytes(b"MAM ") }, // Mam
LangTag { language: "mam", tag: Tag::from_bytes(b"MYN ") }, // Mam -> Mayan
LangTag { language: "man", tag: Tag::from_bytes(b"MNK ") }, // Mandingo [macrolanguage] -> Maninka
- LangTag { language: "map", tag: Tag(0) }, // Austronesian [family] != Mapudungun
+ LangTag { language: "map", tag: Tag(0) }, // Austronesian [collection] != Mapudungun
LangTag { language: "maw", tag: Tag(0) }, // Mampruli != Marwari
LangTag { language: "max", tag: Tag::from_bytes(b"MLY ") }, // North Moluccan Malay -> Malay
LangTag { language: "max", tag: Tag::from_bytes(b"CPP ") }, // North Moluccan Malay -> Creoles
@@ -935,6 +936,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "mnw", tag: Tag::from_bytes(b"MONT") }, // Mon -> Thailand Mon
LangTag { language: "mnx", tag: Tag(0) }, // Manikion != Manx
LangTag { language: "mo", tag: Tag::from_bytes(b"MOL ") }, // Moldavian(retired code) -> Romanian (Moldova)
+ LangTag { language: "mo", tag: Tag::from_bytes(b"ROM ") }, // Moldavian(retired code) -> Romanian
LangTag { language: "mod", tag: Tag::from_bytes(b"CPP ") }, // Mobilian -> Creoles
// LangTag { language: "moh", tag: Tag::from_bytes(b"MOH ") }, // Mohawk
LangTag { language: "mok", tag: Tag(0) }, // Morori != Moksha
@@ -957,7 +959,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "mts", tag: Tag(0) }, // Yora != Maltese
LangTag { language: "mud", tag: Tag::from_bytes(b"CPP ") }, // Mednyj Aleut -> Creoles
LangTag { language: "mui", tag: Tag::from_bytes(b"MLY ") }, // Musi -> Malay
- LangTag { language: "mun", tag: Tag(0) }, // Munda [family] != Mundari
+ LangTag { language: "mun", tag: Tag(0) }, // Munda [collection] != Mundari
LangTag { language: "mup", tag: Tag::from_bytes(b"RAJ ") }, // Malvi -> Rajasthani
LangTag { language: "muq", tag: Tag::from_bytes(b"HMN ") }, // Eastern Xiangxi Miao -> Hmong
// LangTag { language: "mus", tag: Tag::from_bytes(b"MUS ") }, // Creek -> Muscogee
@@ -972,7 +974,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "mww", tag: Tag::from_bytes(b"HMN ") }, // Hmong Daw -> Hmong
LangTag { language: "my", tag: Tag::from_bytes(b"BRM ") }, // Burmese
LangTag { language: "mym", tag: Tag::from_bytes(b"MEN ") }, // Me’en
-// LangTag { language: "myn", tag: Tag::from_bytes(b"MYN ") }, // Mayan [family]
+// LangTag { language: "myn", tag: Tag::from_bytes(b"MYN ") }, // Mayan [collection]
LangTag { language: "myq", tag: Tag::from_bytes(b"MNK ") }, // Forest Maninka(retired code) -> Maninka
LangTag { language: "myv", tag: Tag::from_bytes(b"ERZ ") }, // Erzya
LangTag { language: "mzb", tag: Tag::from_bytes(b"BBR ") }, // Tumzabt -> Berber
@@ -981,7 +983,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "na", tag: Tag::from_bytes(b"NAU ") }, // Nauru -> Nauruan
LangTag { language: "nag", tag: Tag::from_bytes(b"NAG ") }, // Naga Pidgin -> Naga-Assamese
LangTag { language: "nag", tag: Tag::from_bytes(b"CPP ") }, // Naga Pidgin -> Creoles
-// LangTag { language: "nah", tag: Tag::from_bytes(b"NAH ") }, // Nahuatl [family]
+// LangTag { language: "nah", tag: Tag::from_bytes(b"NAH ") }, // Nahuatl [collection]
LangTag { language: "nan", tag: Tag::from_bytes(b"ZHS ") }, // Min Nan Chinese -> Chinese, Simplified
// LangTag { language: "nap", tag: Tag::from_bytes(b"NAP ") }, // Neapolitan
LangTag { language: "nas", tag: Tag(0) }, // Naasioi != Naskapi
@@ -1038,7 +1040,6 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "nln", tag: Tag::from_bytes(b"NAH ") }, // Durango Nahuatl(retired code) -> Nahuatl
LangTag { language: "nlv", tag: Tag::from_bytes(b"NAH ") }, // Orizaba Nahuatl -> Nahuatl
LangTag { language: "nn", tag: Tag::from_bytes(b"NYN ") }, // Norwegian Nynorsk (Nynorsk, Norwegian)
- LangTag { language: "nn", tag: Tag::from_bytes(b"NOR ") }, // Norwegian Nynorsk -> Norwegian
LangTag { language: "nnh", tag: Tag::from_bytes(b"BML ") }, // Ngiemboon -> Bamileke
LangTag { language: "nnz", tag: Tag::from_bytes(b"BML ") }, // Nda'nda' -> Bamileke
LangTag { language: "no", tag: Tag::from_bytes(b"NOR ") }, // Norwegian [macrolanguage]
@@ -1092,7 +1093,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "otw", tag: Tag::from_bytes(b"OJB ") }, // Ottawa -> Ojibway
LangTag { language: "oua", tag: Tag::from_bytes(b"BBR ") }, // Tagargrent -> Berber
LangTag { language: "pa", tag: Tag::from_bytes(b"PAN ") }, // Punjabi
- LangTag { language: "paa", tag: Tag(0) }, // Papuan [family] != Palestinian Aramaic
+ LangTag { language: "paa", tag: Tag(0) }, // Papuan [collection] != Palestinian Aramaic
// LangTag { language: "pag", tag: Tag::from_bytes(b"PAG ") }, // Pangasinan
LangTag { language: "pal", tag: Tag(0) }, // Pahlavi != Pali
// LangTag { language: "pam", tag: Tag::from_bytes(b"PAM ") }, // Pampanga -> Pampangan
@@ -1308,6 +1309,9 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "sgo", tag: Tag(0) }, // Songa(retired code) != Sango
// LangTag { language: "sgs", tag: Tag::from_bytes(b"SGS ") }, // Samogitian
LangTag { language: "sgw", tag: Tag::from_bytes(b"CHG ") }, // Sebat Bet Gurage -> Chaha Gurage
+ LangTag { language: "sh", tag: Tag::from_bytes(b"BOS ") }, // Serbo-Croatian [macrolanguage] -> Bosnian
+ LangTag { language: "sh", tag: Tag::from_bytes(b"HRV ") }, // Serbo-Croatian [macrolanguage] -> Croatian
+ LangTag { language: "sh", tag: Tag::from_bytes(b"SRB ") }, // Serbo-Croatian [macrolanguage] -> Serbian
LangTag { language: "shi", tag: Tag::from_bytes(b"SHI ") }, // Tachelhit
LangTag { language: "shi", tag: Tag::from_bytes(b"BBR ") }, // Tachelhit -> Berber
LangTag { language: "shl", tag: Tag::from_bytes(b"QIN ") }, // Shendu -> Chin
@@ -1329,7 +1333,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "skw", tag: Tag::from_bytes(b"CPP ") }, // Skepi Creole Dutch -> Creoles
LangTag { language: "sky", tag: Tag(0) }, // Sikaiana != Slovak
LangTag { language: "sl", tag: Tag::from_bytes(b"SLV ") }, // Slovenian
- LangTag { language: "sla", tag: Tag(0) }, // Slavic [family] != Slavey
+ LangTag { language: "sla", tag: Tag(0) }, // Slavic [collection] != Slavey
LangTag { language: "sm", tag: Tag::from_bytes(b"SMO ") }, // Samoan
LangTag { language: "sma", tag: Tag::from_bytes(b"SSM ") }, // Southern Sami
LangTag { language: "smd", tag: Tag::from_bytes(b"MBN ") }, // Sama(retired code) -> Mbundu
@@ -1453,7 +1457,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "tpi", tag: Tag::from_bytes(b"CPP ") }, // Tok Pisin -> Creoles
LangTag { language: "tr", tag: Tag::from_bytes(b"TRK ") }, // Turkish
LangTag { language: "trf", tag: Tag::from_bytes(b"CPP ") }, // Trinidadian Creole English -> Creoles
- LangTag { language: "trk", tag: Tag(0) }, // Turkic [family] != Turkish
+ LangTag { language: "trk", tag: Tag(0) }, // Turkic [collection] != Turkish
LangTag { language: "tru", tag: Tag::from_bytes(b"TUA ") }, // Turoyo -> Turoyo Aramaic
LangTag { language: "tru", tag: Tag::from_bytes(b"SYR ") }, // Turoyo -> Syriac
LangTag { language: "ts", tag: Tag::from_bytes(b"TSG ") }, // Tsonga
@@ -1597,7 +1601,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "zlq", tag: Tag::from_bytes(b"ZHA ") }, // Liuqian Zhuang -> Zhuang
LangTag { language: "zmi", tag: Tag::from_bytes(b"MLY ") }, // Negeri Sembilan Malay -> Malay
LangTag { language: "zmz", tag: Tag::from_bytes(b"BAD0") }, // Mbandja -> Banda
- LangTag { language: "znd", tag: Tag(0) }, // Zande [family] != Zande
+ LangTag { language: "znd", tag: Tag(0) }, // Zande [collection] != Zande
LangTag { language: "zne", tag: Tag::from_bytes(b"ZND ") }, // Zande
LangTag { language: "zom", tag: Tag::from_bytes(b"QIN ") }, // Zou -> Chin
LangTag { language: "zqe", tag: Tag::from_bytes(b"ZHA ") }, // Qiubei Zhuang -> Zhuang
@@ -2309,18 +2313,18 @@ pub fn tags_from_complex_language(language: &str, tags: &mut smallvec::SmallVec<
}
if &language[1..] == "o-nyn" {
// Norwegian Nynorsk(retired code)
- let possible_tags = &[
- Tag::from_bytes(b"NYN "), // Norwegian Nynorsk (Nynorsk, Norwegian)
- Tag::from_bytes(b"NOR "), // Norwegian
- ];
- tags.extend_from_slice(possible_tags);
+ tags.push(Tag::from_bytes(b"NYN ")); // Norwegian Nynorsk (Nynorsk, Norwegian)
return true;
}
}
b'r' => {
if strncmp(&language[1..], "o-", 2) && subtag_matches(language, "-md") {
// Romanian; Moldova
- tags.push(Tag::from_bytes(b"MOL ")); // Romanian (Moldova)
+ let possible_tags = &[
+ Tag::from_bytes(b"MOL "), // Romanian (Moldova)
+ Tag::from_bytes(b"ROM "), // Romanian
+ ];
+ tags.extend_from_slice(possible_tags);
return true;
}
}
diff --git a/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf b/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf
new file mode 100644
index 00000000..8508fbee
Binary files /dev/null and b/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf differ
diff --git a/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf b/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf
new file mode 100644
index 00000000..45896c10
Binary files /dev/null and b/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf differ
diff --git a/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf b/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf
new file mode 100644
index 00000000..cd01de0d
Binary files /dev/null and b/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf differ
diff --git a/tests/shaping/aots.rs b/tests/shaping/aots.rs
index 8c44a219..4585d366 100644
--- a/tests/shaping/aots.rs
+++ b/tests/shaping/aots.rs
@@ -832,6 +832,21 @@ fn gpos4_lookupflag_002() {
);
}
+#[test]
+fn gpos4_simple_001() {
+ assert_eq!(
+ shape(
+ "tests/fonts/aots/gpos4_simple_1.otf",
+ "\u{0011}\u{0012}\u{0013}\u{0011}",
+ "--features=\"test\" --single-par --no-clusters --no-glyph-names --ned",
+ ),
+ "17|\
+ 18@1500,0|\
+ 19@1400,-80|\
+ 17@3000,0"
+ );
+}
+
#[test]
fn gpos4_simple_002() {
assert_eq!(
@@ -877,6 +892,21 @@ fn gpos5_001() {
);
}
+#[test]
+fn gpos5_002() {
+ assert_eq!(
+ shape(
+ "tests/fonts/aots/gpos5_font1.otf",
+ "\u{0011}\u{001E}\u{001F}\u{0013}\u{0011}",
+ "--features=\"test\" --single-par --no-clusters --no-glyph-names --ned",
+ ),
+ "17|\
+ 18@1500,0|\
+ 19@1401,-79|\
+ 17@3000,0"
+ );
+}
+
#[test]
fn gpos6_002() {
assert_eq!(
diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs
index 8ad60296..843e84c1 100644
--- a/tests/shaping/in_house.rs
+++ b/tests/shaping/in_house.rs
@@ -1824,6 +1824,19 @@ fn fallback_positioning_002() {
);
}
+#[test]
+fn glyph_props_no_gdef_001() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf",
+ "\u{A995}\u{A9BF}",
+ "",
+ ),
+ "glyph01=0+600|\
+ uniA995=0+600"
+ );
+}
+
#[test]
fn hangul_jamo_001() {
assert_eq!(
@@ -7323,6 +7336,104 @@ fn myanmar_zawgyi_001() {
);
}
+#[test]
+fn nested_mark_filtering_sets_001() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf",
+ "\u{0628}\u{0628}\u{6D2}",
+ "",
+ ),
+ "OneDotBelowYB=2@764,-183+0|\
+ YBc1=2@764,-282+0|\
+ YehBarreeFin_3=2+355|\
+ OneDotBelowNS=1@20,-120+0|\
+ BehxMed.inT2outD2YB=1@0,349+182|\
+ NullMk=0+0|\
+ sp10=0+0|\
+ BehxIni.outT2=0@0,406+766"
+ );
+}
+
+#[test]
+fn nested_mark_filtering_sets_002() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf",
+ "\u{0628}\u{0628}\u{0628}\u{6D2}",
+ "",
+ ),
+ "OneDotBelowYB=3@764,-183+0|\
+ YBc1=3@764,-282+0|\
+ OneDotBelowYB=3@1098,-60+0|\
+ YBc2=3@1098,-159+0|\
+ YehBarreeFin_4=3+355|\
+ OneDotBelowNS=2@20,-120+0|\
+ BehxMed.inT2outD2YB=2@0,349+182|\
+ NullMk=1+0|\
+ BehxMed.inT1outT2=1@0,406+184|\
+ NullMk=0+0|\
+ sp5=0+0|\
+ BehxIni=0@0,471+541"
+ );
+}
+
+#[test]
+fn nested_mark_filtering_sets_003() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf",
+ "\u{0628}\u{0628}\u{0628}\u{0628}\u{6D2}",
+ "",
+ ),
+ "OneDotBelowYB=4@659,-192+0|\
+ YBc1=4@659,-291+0|\
+ OneDotBelowYB=4@966,-55+0|\
+ YBc2=4@966,-154+0|\
+ OneDotBelowYB=4@1274,-148+0|\
+ YBc3=4@1274,-247+0|\
+ YehBarreeFin_5=4+355|\
+ OneDotBelowNS=3@20,-120+0|\
+ BehxMed.inT2outD2YB=3@0,349+182|\
+ NullMk=2+0|\
+ BehxMed.inT1outT2=2@0,406+184|\
+ NullMk=1+0|\
+ BehxMed.inT2outT1=1@0,471+267|\
+ NullMk=0+0|\
+ sp0=0+0|\
+ BehxIni.outT2=0@0,616+156"
+ );
+}
+
+#[test]
+fn nested_mark_filtering_sets_004() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf",
+ "\u{0628}\u{0628}\u{0628}\u{0628}\u{0628}\u{6D2}",
+ "",
+ ),
+ "OneDotBelowYB=5@659,-192+0|\
+ YBc1=5@659,-291+0|\
+ OneDotBelowYB=5@966,-55+0|\
+ YBc2=5@966,-154+0|\
+ OneDotBelowYB=5@1274,-148+0|\
+ YBc3=5@1274,-247+0|\
+ YehBarreeFin_5=5+355|\
+ OneDotBelowNS=4@20,-120+0|\
+ BehxMed.inT2outD2YB=4@0,349+182|\
+ NullMk=3+0|\
+ BehxMed.inT1outT2=3@0,406+184|\
+ NullMk=2+0|\
+ BehxMed.inT2outT1=2@0,471+267|\
+ NullMk=1+0|\
+ BehxMed.inT1outT2=1@0,616+184|\
+ OneDotBelowNS=0@73,516+0|\
+ sp0=0+0|\
+ BehxIni=0@0,681+236"
+ );
+}
+
#[test]
fn none_directional_001() {
assert_eq!(
@@ -9881,6 +9992,19 @@ fn tt_kern_gpos_001() {
);
}
+#[test]
+fn unsafe_to_concat_001() {
+ assert_eq!(
+ shape(
+ "tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf",
+ "\u{0628}\u{200C}\u{0628}",
+ "--show-flags",
+ ),
+ "uni0628=1+993#2|\
+ uni0628=0+993#2"
+ );
+}
+
#[test]
fn use_indic3_001() {
assert_eq!(
diff --git a/tests/shaping/main.rs b/tests/shaping/main.rs
index 77fddbfd..a86338b1 100644
--- a/tests/shaping/main.rs
+++ b/tests/shaping/main.rs
@@ -15,7 +15,7 @@ struct Args {
language: Option,
script: Option,
#[allow(dead_code)]
- remove_default_ignorables: bool, // we don't use it, but have to parse it anyway
+ remove_default_ignorables: bool,
cluster_level: rustybuzz::BufferClusterLevel,
features: Vec,
pre_context: Option,
@@ -139,6 +139,10 @@ pub fn shape(font_path: &str, text: &str, options: &str) -> String {
let mut buffer_flags = BufferFlags::default();
buffer_flags.set(BufferFlags::BEGINNING_OF_TEXT, args.bot);
buffer_flags.set(BufferFlags::END_OF_TEXT, args.eot);
+ buffer_flags.set(
+ BufferFlags::REMOVE_DEFAULT_IGNORABLES,
+ args.remove_default_ignorables,
+ );
buffer.set_flags(buffer_flags);
buffer.set_cluster_level(args.cluster_level);
diff --git a/tests/shaping/text_rendering_tests.rs b/tests/shaping/text_rendering_tests.rs
index f8436289..58a22115 100644
--- a/tests/shaping/text_rendering_tests.rs
+++ b/tests/shaping/text_rendering_tests.rs
@@ -662,6 +662,18 @@ fn cmap_1_003() {
);
}
+#[test]
+fn cmap_1_004() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/TestCMAP14.otf",
+ "\u{82A6}\u{E0102}",
+ "--ned --remove-default-ignorables",
+ ),
+ "uni82A6_uE0100"
+ );
+}
+
#[test]
fn cmap_2_001() {
assert_eq!(
@@ -9897,6 +9909,21 @@ fn shknda_3_030() {
);
}
+#[test]
+fn shknda_3_031() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/NotoSansKannada-Regular.ttf",
+ "\u{0C86}\u{0CCD}\u{0CAF}\u{0C95}\u{0CCD}\u{0CB7}\u{0CBF}\u{0CB8}\u{0CCD}\u{200C}",
+ "--ned --remove-default-ignorables",
+ ),
+ "gid7|\
+ gid122@1717,0|\
+ gid285@2249,0|\
+ gid200@3425,0"
+ );
+}
+
#[test]
fn shlana_1_001() {
assert_eq!(
@@ -11081,6 +11108,24 @@ fn shlana_10_027() {
);
}
+#[test]
+fn shlana_10_028() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/TestShapeLana.ttf",
+ "\u{1A32}\u{1A6C}\u{1A74}\u{1A75}\u{034F}\u{1A6F}\u{1A60}\u{1A36}",
+ "--ned --remove-default-ignorables",
+ ),
+ "uni1A32|\
+ uni1A6C.wide@1910,0|\
+ uni1A74@1560,0|\
+ uni1A75@1560,732|\
+ uni1A6F@1910,0|\
+ uni25CC@4154,0|\
+ uni1A601A36@5366,0"
+ );
+}
+
#[test]
fn shlana_10_029() {
assert_eq!(
@@ -11277,6 +11322,21 @@ fn shlana_10_040() {
);
}
+#[test]
+fn shlana_10_041() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/TestShapeLana.ttf",
+ "\u{1A32}\u{1A66}\u{034F}\u{1A63}\u{1A60}\u{1A3F}",
+ "--ned --remove-default-ignorables",
+ ),
+ "uni1A32|\
+ uni1A66@1560,0|\
+ uni1A63@1910,0|\
+ uni1A601A3F@3122,0"
+ );
+}
+
#[test]
fn shlana_10_042() {
assert_eq!(
@@ -12341,6 +12401,24 @@ fn shlana_5_009() {
);
}
+#[test]
+fn shlana_5_010() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/TestShapeLana.ttf",
+ "\u{1A34}\u{1A64}\u{1A74}\u{1A36}\u{1A60}\u{1A45}\u{200C}\u{1A63}\u{1A60}\u{1A3F}",
+ "--ned --remove-default-ignorables",
+ ),
+ "uni1A34|\
+ uni1A74@1212,0|\
+ uni1A64@1212,0|\
+ uni1A36@1676,0|\
+ uni1A601A45@2888,0|\
+ uni1A63@2888,0|\
+ uni1A601A3F@4100,0"
+ );
+}
+
#[test]
fn shlana_5_011() {
assert_eq!(
@@ -12359,6 +12437,19 @@ fn shlana_5_011() {
);
}
+#[test]
+fn shlana_5_012() {
+ assert_eq!(
+ shape(
+ "tests/fonts/text-rendering-tests/TestShapeLana.ttf",
+ "\u{1A36}\u{200C}\u{1A63}",
+ "--ned --remove-default-ignorables",
+ ),
+ "uni1A36|\
+ uni1A63@1212,0"
+ );
+}
+
#[test]
fn shlana_5_013() {
assert_eq!(