diff --git a/scripts/gen-shaping-tests.py b/scripts/gen-shaping-tests.py index cae73dc6..d1dcf0b8 100755 --- a/scripts/gen-shaping-tests.py +++ b/scripts/gen-shaping-tests.py @@ -38,15 +38,6 @@ 'morx_36_001', # ttf-parser uses different rounding, not a bug 'fallback_positioning_001', - - # text-rendering-tests tests - # Unknown issue. Investigate. - 'cmap_1_004', - 'shknda_3_031', - 'shlana_10_028', - 'shlana_10_041', - 'shlana_5_010', - 'shlana_5_012', ] diff --git a/scripts/gen-tag-table.py b/scripts/gen-tag-table.py index 7d1e1533..f7ea2210 100755 --- a/scripts/gen-tag-table.py +++ b/scripts/gen-tag-table.py @@ -319,6 +319,10 @@ class OpenTypeRegistryParser(HTMLParser): from_bcp_47(DefaultDict[str, AbstractSet[str]]): ``to_bcp_47`` inverted. Its values start as unsorted sets; ``sort_languages`` converts them to sorted lists. + from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]): + A copy of ``from_bcp_47``. It starts as ``None`` and is + populated at the beginning of the first call to + ``inherit_from_macrolanguages``. """ def __init__(self): @@ -328,6 +332,7 @@ def __init__(self): self.ranks = collections.defaultdict(int) self.to_bcp_47 = collections.defaultdict(set) self.from_bcp_47 = collections.defaultdict(set) + self.from_bcp_47_uninherited = None # Whether the parser is in a element self._td = False # Whether the parser is after a
element within the current element @@ -448,34 +453,56 @@ def inherit_from_macrolanguages(self): If a BCP 47 tag for an individual mapping has no OpenType mapping but its macrolanguage does, the mapping is copied to - the individual language. For example, als(Tosk Albanian) has no - explicit mapping, so it inherits from sq(Albanian) the mapping + the individual language. For example, als (Tosk Albanian) has no + explicit mapping, so it inherits from sq (Albanian) the mapping to SQI. + However, if an OpenType tag maps to a BCP 47 macrolanguage and + some but not all of its individual languages, the mapping is not + inherited from the macrolanguage to the missing individual + languages. For example, INUK (Nunavik Inuktitut) is mapped to + ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to + ikt (Inuinnaqtun, which is an individual language of iu), so + this method does not add a mapping from ikt to INUK. + + If a BCP 47 tag for a macrolanguage has no OpenType mapping but - all of its individual languages do and they all map to the same - tags, the mapping is copied to the macrolanguage. + some of its individual languages do, their mappings are copied + to the macrolanguage. """ global bcp_47 - original_ot_from_bcp_47 = dict(self.from_bcp_47) + first_time = self.from_bcp_47_uninherited is None + if first_time: + self.from_bcp_47_uninherited = dict(self.from_bcp_47) for macrolanguage, languages in dict(bcp_47.macrolanguages).items(): - ot_macrolanguages = set(original_ot_from_bcp_47.get(macrolanguage, set())) + ot_macrolanguages = { + ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get(macrolanguage, set ()) + } + blocked_ot_macrolanguages = set() + if 'retired code' not in bcp_47.scopes.get(macrolanguage, ''): + for ot_macrolanguage in ot_macrolanguages: + round_trip_macrolanguages = { + l for l in self.to_bcp_47[ot_macrolanguage] + if 'retired code' not in bcp_47.scopes.get(l, '') + } + round_trip_languages = { + l for l in languages + if 'retired code' not in bcp_47.scopes.get(l, '') + } + intersection = round_trip_macrolanguages & round_trip_languages + if intersection and intersection != round_trip_languages: + blocked_ot_macrolanguages.add(ot_macrolanguage) if ot_macrolanguages: for ot_macrolanguage in ot_macrolanguages: - for language in languages: - self.add_language(language, ot_macrolanguage) - self.ranks[ot_macrolanguage] += 1 - else: + if ot_macrolanguage not in blocked_ot_macrolanguages: + for language in languages: + self.add_language(language, ot_macrolanguage) + if not blocked_ot_macrolanguages: + self.ranks[ot_macrolanguage] += 1 + elif first_time: for language in languages: - if language in original_ot_from_bcp_47: - if ot_macrolanguages: - ml = original_ot_from_bcp_47[language] - if ml: - ot_macrolanguages &= ml - else: - pass - else: - ot_macrolanguages |= original_ot_from_bcp_47[language] + if language in self.from_bcp_47_uninherited: + ot_macrolanguages |= self.from_bcp_47_uninherited[language] else: ot_macrolanguages.clear() if not ot_macrolanguages: @@ -561,7 +588,7 @@ def parse(self, filename): if scope == 'macrolanguage': scope = ' [macrolanguage]' elif scope == 'collection': - scope = ' [family]' + scope = ' [collection]' else: continue self.scopes[subtag] = scope @@ -710,6 +737,7 @@ def get_name(self, lt): ot.add_language('oc-provenc', 'PRO') +ot.remove_language_ot('QUZ') ot.add_language('qu', 'QUZ') ot.add_language('qub', 'QWH') ot.add_language('qud', 'QVI') @@ -742,7 +770,6 @@ def get_name(self, lt): ot.add_language('qxt', 'QWH') ot.add_language('qxw', 'QWH') -bcp_47.macrolanguages['ro'].remove('mo') bcp_47.macrolanguages['ro-MD'].add('mo') ot.remove_language_ot('SYRE') @@ -993,6 +1020,8 @@ def print_subtag_matches(subtag, new_line): continue for lt, tags in items: + if not tags: + continue if lt.variant in bcp_47.prefixes: expect(next(iter(bcp_47.prefixes[lt.variant])) == lt.language, '%s is not a valid prefix of %s' %(lt.language, lt.variant)) @@ -1022,6 +1051,8 @@ def print_subtag_matches(subtag, new_line): print(" b'%s' => {" % initial) for lt, tags in items: + if not tags: + continue print(' if ', end='') script = lt.script region = lt.region diff --git a/scripts/gen-vowel-constraints.py b/scripts/gen-vowel-constraints.py index 046119f7..b6e78d30 100755 --- a/scripts/gen-vowel-constraints.py +++ b/scripts/gen-vowel-constraints.py @@ -185,6 +185,6 @@ def __str__(self, index=0, depth=4): print(' _ => {}') print(' }') -print(' buffer.swap_buffers();') +print(' buffer.sync();') print('}') print() diff --git a/scripts/ms-use/IndicPositionalCategory-Additional.txt b/scripts/ms-use/IndicPositionalCategory-Additional.txt index 83a164e4..5a338378 100644 --- a/scripts/ms-use/IndicPositionalCategory-Additional.txt +++ b/scripts/ms-use/IndicPositionalCategory-Additional.txt @@ -2,7 +2,7 @@ # Not derivable # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 # Updated for Unicode 10.0 by Andrew Glass 2017-07-25 -# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21 +# Amended for Unicode 10.0 by Andrew Glass 2018-09-21 # Updated for L2/19-083 by Andrew Glass 2019-05-06 # Updated for Unicode 12.1 by Andrew Glass 2019-05-30 # Updated for Unicode 13.0 by Andrew Glass 2020-07-28 @@ -58,16 +58,16 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN # Indic_Positional_Category=Bottom 0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA -10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order +10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overridden, ccc controls order 10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW 10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW -10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order +10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overridden, ccc controls order 10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW -10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order +10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overridden, ccc controls order 10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW -10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order +10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overridden, ccc controls order 10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW -10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order +10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overridden, ccc controls order 10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW 16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI diff --git a/src/aat/extended_kerning.rs b/src/aat/extended_kerning.rs index 6d14ab50..60d6bd32 100644 --- a/src/aat/extended_kerning.rs +++ b/src/aat/extended_kerning.rs @@ -92,6 +92,8 @@ pub(crate) fn apply(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) -> Optio continue; } + buffer.unsafe_to_concat(None, None); + apply_simple_kerning(&subtable, plan, face, buffer); } kerx::Format::Format4(ref sub) => { @@ -140,7 +142,10 @@ fn apply_simple_kerning( } let mut iter = SkippyIter::new(&ctx, i, 1, false); - if !iter.next() { + + let mut unsafe_to = 0; + if !iter.next(Some(&mut unsafe_to)) { + ctx.buffer.unsafe_to_concat(Some(i), Some(unsafe_to)); i += 1; continue; } @@ -179,7 +184,7 @@ fn apply_simple_kerning( } } - ctx.buffer.unsafe_to_break(i, j + 1) + ctx.buffer.unsafe_to_break(Some(i), Some(j + 1)) } i = j; @@ -235,7 +240,10 @@ fn apply_state_machine_kerning( // If there's no value and we're just epsilon-transitioning to state 0, safe to break. if entry.is_actionable() || !(entry.new_state == START_OF_TEXT && !entry.has_advance()) { - buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1); + buffer.unsafe_to_break_from_outbuffer( + Some(buffer.backtrack_len() - 1), + Some(buffer.idx + 1), + ); } } @@ -249,7 +257,7 @@ fn apply_state_machine_kerning( }; if end_entry.is_actionable() { - buffer.unsafe_to_break(buffer.idx, buffer.idx + 2); + buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); } } diff --git a/src/aat/metamorphosis.rs b/src/aat/metamorphosis.rs index f393231d..eeb19fc0 100644 --- a/src/aat/metamorphosis.rs +++ b/src/aat/metamorphosis.rs @@ -210,7 +210,10 @@ fn drive( }; if !is_safe_to_break() && buffer.backtrack_len() > 0 && buffer.idx < buffer.len { - buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1); + buffer.unsafe_to_break_from_outbuffer( + Some(buffer.backtrack_len() - 1), + Some(buffer.idx + 1), + ); } c.transition(&entry, buffer); @@ -232,7 +235,7 @@ fn drive( } if !c.in_place() { - buffer.swap_buffers(); + buffer.sync(); } } @@ -459,7 +462,7 @@ impl Driver for ContextualCtx<'_> { } if let Some(replacement) = replacement { - buffer.unsafe_to_break(self.mark, (buffer.idx + 1).min(buffer.len)); + buffer.unsafe_to_break(Some(self.mark), Some((buffer.idx + 1).min(buffer.len))); buffer.info[self.mark].glyph_id = u32::from(replacement); if let Some(face) = self.face_if_has_glyph_classes { @@ -565,8 +568,8 @@ impl Driver for InsertionCtx<'_> { buffer.move_to(end + usize::from(count)); buffer.unsafe_to_break_from_outbuffer( - self.mark as usize, - (buffer.idx + 1).min(buffer.len), + Some(self.mark as usize), + Some((buffer.idx + 1).min(buffer.len)), ); } diff --git a/src/buffer.rs b/src/buffer.rs index 52ac68b1..d219fe4b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,8 +1,10 @@ use alloc::{string::String, vec::Vec}; use core::convert::TryFrom; +use std::cmp::min; use ttf_parser::GlyphId; +use crate::buffer::glyph_flag::{UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT}; use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt, Space}; use crate::{script, Direction, Face, Language, Mask, Script}; @@ -12,22 +14,80 @@ pub mod glyph_flag { /// Indicates that if input text is broken at the /// beginning of the cluster this glyph is part of, /// then both sides need to be re-shaped, as the - /// result might be different. On the flip side, - /// it means that when this flag is not present, - /// then it's safe to break the glyph-run at the - /// beginning of this cluster, and the two sides - /// represent the exact same result one would get - /// if breaking input text at the beginning of - /// this cluster and shaping the two sides - /// separately. This can be used to optimize - /// paragraph layout, by avoiding re-shaping - /// of each line after line-breaking, or limiting - /// the reshaping to a small piece around the - /// breaking point only. + /// result might be different. + /// + /// On the flip side, it means that when + /// this flag is not present, then it is safe + /// to break the glyph-run at the beginning of + /// this cluster, and the two sides will represent + /// the exact same result one would get if breaking + /// input text at the beginning of this cluster and + /// shaping the two sides separately. + /// + /// This can be used to optimize paragraph layout, + /// by avoiding re-shaping of each line after line-breaking. pub const UNSAFE_TO_BREAK: u32 = 0x00000001; + /// Indicates that if input text is changed on one side + /// of the beginning of the cluster this glyph is part + /// of, then the shaping results for the other side + /// might change. + /// + /// Note that the absence of this flag will NOT by + /// itself mean that it IS safe to concat text. Only + /// two pieces of text both of which clear of this + /// flag can be concatenated safely. + /// + /// This can be used to optimize paragraph layout, + /// by avoiding re-shaping of each line after + /// line-breaking, by limiting the reshaping to a + /// small piece around the breaking position only, + /// even if the breaking position carries the + /// UNSAFE_TO_BREAK or when hyphenation or + /// other text transformation happens at + /// line-break position, in the following way: + /// + /// 1. Iterate back from the line-break + /// position until the first cluster + /// start position that is NOT unsafe-to-concat, + /// 2. shape the segment from there till the + /// end of line, 3. check whether the resulting + /// glyph-run also is clear of the unsafe-to-concat + /// at its start-of-text position; if it is, just + /// splice it into place and the line is shaped; + /// If not, move on to a position further back that + /// is clear of unsafe-to-concat and retry from + /// there, and repeat. + /// + /// At the start of next line a similar + /// algorithm can be implemented. + /// That is: 1. Iterate forward from + /// the line-break position until the first cluster + /// start position that is NOT unsafe-to-concat, 2. + /// shape the segment from beginning of the line to + /// that position, 3. check whether the resulting + /// glyph-run also is clear of the unsafe-to-concat + /// at its end-of-text position; if it is, just splice + /// it into place and the beginning is shaped; If not, + /// move on to a position further forward that is clear + /// of unsafe-to-concat and retry up to there, and repeat. + /// + /// A slight complication will arise in the + /// implementation of the algorithm above, + /// because while + /// our buffer API has a way to return flags + /// for position corresponding to + /// start-of-text, there is currently no + /// position corresponding to end-of-text. + /// This limitation can be alleviated by + /// shaping more text than needed and + /// looking for unsafe-to-concat flag + /// within text clusters. + /// + /// The UNSAFE_TO_BREAK flag will always imply this flag. + pub const UNSAFE_TO_CONCAT: u32 = 0x00000002; /// All the currently defined flags. - pub const DEFINED: u32 = 0x00000001; // OR of all defined flags + pub const DEFINED: u32 = 0x00000003; // OR of all defined flags } /// Holds the positions of the glyph in both horizontal and vertical directions. @@ -514,11 +574,6 @@ pub struct Buffer { pub flags: BufferFlags, pub cluster_level: BufferClusterLevel, pub invisible: Option, - pub scratch_flags: BufferScratchFlags, - // Maximum allowed len. - pub max_len: usize, - /// Maximum allowed operations. - pub max_ops: i32, // Buffer contents. pub direction: Direction, @@ -540,13 +595,19 @@ pub struct Buffer { pub info: Vec, pub pos: Vec, - serial: u32, - // Text before / after the main buffer contents. // Always in Unicode, and ordered outward. // Index 0 is for "pre-context", 1 for "post-context". pub context: [[char; CONTEXT_LENGTH]; 2], pub context_len: [usize; 2], + + // Managed by enter / leave + pub serial: u8, + pub scratch_flags: BufferScratchFlags, + /// Maximum allowed len. + pub max_len: usize, + /// Maximum allowed operations. + pub max_ops: i32, } impl Buffer { @@ -656,7 +717,6 @@ impl Buffer { self.direction = Direction::Invalid; self.script = None; self.language = None; - self.scratch_flags = BufferScratchFlags::default(); self.successful = true; self.have_output = false; @@ -669,13 +729,15 @@ impl Buffer { self.out_len = 0; self.have_separate_output = false; - self.serial = 0; - self.context = [ ['\0', '\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0', '\0'], ]; self.context_len = [0, 0]; + + self.serial = 0; + self.scratch_flags = BufferScratchFlags::default(); + self.cluster_level = BufferClusterLevel::default(); } #[inline] @@ -693,8 +755,13 @@ impl Buffer { } #[inline] - fn next_serial(&mut self) -> u32 { + fn next_serial(&mut self) -> u8 { self.serial += 1; + + if self.serial == 0 { + self.serial += 1; + } + self.serial } @@ -783,10 +850,6 @@ impl Buffer { start } - pub fn reverse_clusters(&mut self) { - self.reverse_groups(_cluster_group_func, false); - } - #[inline] fn reset_clusters(&mut self) { for (i, info) in self.info.iter_mut().enumerate() { @@ -820,7 +883,7 @@ impl Buffer { // TODO: language must be set } - pub fn swap_buffers(&mut self) { + pub fn sync(&mut self) { assert!(self.have_output); assert!(self.idx <= self.len); @@ -1044,7 +1107,7 @@ impl Buffer { fn merge_clusters_impl(&mut self, mut start: usize, mut end: usize) { if self.cluster_level == BufferClusterLevel::Characters { - self.unsafe_to_break(start, end); + self.unsafe_to_break(Some(start), Some(end)); return; } @@ -1203,44 +1266,104 @@ impl Buffer { self.len = j; } - pub fn unsafe_to_break(&mut self, start: usize, end: usize) { - if end - start < 2 { + pub fn unsafe_to_break(&mut self, start: Option, end: Option) { + self._set_glyph_flags( + UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT, + start, + end, + Some(true), + None, + ); + } + + /// Adds glyph flags in mask to infos with clusters between start and end. + /// The start index will be from out-buffer if from_out_buffer is true. + /// If interior is true, then the cluster having the minimum value is skipped. */ + fn _set_glyph_flags( + &mut self, + mask: Mask, + start: Option, + end: Option, + interior: Option, + from_out_buffer: Option, + ) { + let start = start.unwrap_or(0); + let end = min(end.unwrap_or(self.len), self.len); + let interior = interior.unwrap_or(false); + let from_out_buffer = from_out_buffer.unwrap_or(false); + + if interior && !from_out_buffer && end - start < 2 { return; } - self.unsafe_to_break_impl(start, end); - } + self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS; - fn unsafe_to_break_impl(&mut self, start: usize, end: usize) { - let mut cluster = core::u32::MAX; - cluster = Self::_infos_find_min_cluster(&self.info, start, end, cluster); - let unsafe_to_break = Self::_unsafe_to_break_set_mask(&mut self.info, start, end, cluster); - if unsafe_to_break { - self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK; - } - } + if !from_out_buffer || !self.have_output { + if !interior { + for i in start..end { + self.info[i].mask |= mask; + } + } else { + let cluster = Self::_infos_find_min_cluster(&self.info, start, end, None); + if Self::_infos_set_glyph_flags(&mut self.info, start, end, cluster, mask) { + self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS; + } + } + } else { + assert!(start <= self.out_len); + assert!(self.idx <= end); - pub fn unsafe_to_break_from_outbuffer(&mut self, start: usize, end: usize) { - if !self.have_output { - self.unsafe_to_break_impl(start, end); - return; + if !interior { + for i in start..self.out_len { + self.out_info_mut()[i].mask |= mask; + } + + for i in self.idx..end { + self.info[i].mask |= mask; + } + } else { + let mut cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, None); + cluster = Self::_infos_find_min_cluster( + &self.out_info(), + start, + self.out_len, + Some(cluster), + ); + + let out_len = self.out_len; + let first = Self::_infos_set_glyph_flags( + &mut self.out_info_mut(), + start, + out_len, + cluster, + mask, + ); + let second = + Self::_infos_set_glyph_flags(&mut self.info, self.idx, end, cluster, mask); + + if first || second { + self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS; + } + } } + } - assert!(start <= self.out_len); - assert!(self.idx <= end); + pub fn unsafe_to_concat(&mut self, start: Option, end: Option) { + self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(true), None); + } - let mut cluster = core::u32::MAX; - cluster = Self::_infos_find_min_cluster(self.out_info(), start, self.out_len, cluster); - cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, cluster); - let idx = self.idx; - let out_len = self.out_len; - let unsafe_to_break1 = - Self::_unsafe_to_break_set_mask(self.out_info_mut(), start, out_len, cluster); - let unsafe_to_break2 = Self::_unsafe_to_break_set_mask(&mut self.info, idx, end, cluster); + pub fn unsafe_to_break_from_outbuffer(&mut self, start: Option, end: Option) { + self._set_glyph_flags( + UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT, + start, + end, + Some(true), + Some(true), + ); + } - if unsafe_to_break1 || unsafe_to_break2 { - self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK; - } + pub fn unsafe_to_concat_from_outbuffer(&mut self, start: Option, end: Option) { + self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(false), Some(true)); } pub fn move_to(&mut self, i: usize) -> bool { @@ -1384,22 +1507,43 @@ impl Buffer { pub fn set_cluster(info: &mut GlyphInfo, cluster: u32, mask: Mask) { if info.cluster != cluster { - if mask & glyph_flag::UNSAFE_TO_BREAK != 0 { - info.mask |= glyph_flag::UNSAFE_TO_BREAK; - } else { - info.mask &= !glyph_flag::UNSAFE_TO_BREAK; - } + info.mask = (info.mask & !glyph_flag::DEFINED) | (mask & glyph_flag::DEFINED); } info.cluster = cluster; } + // Called around shape() + pub(crate) fn enter(&mut self) { + self.serial = 0; + self.scratch_flags = BufferScratchFlags::empty(); + + if let Some(len) = self.len.checked_mul(Buffer::MAX_LEN_FACTOR) { + self.max_len = len.max(Buffer::MAX_LEN_MIN); + } + + if let Ok(len) = i32::try_from(self.len) { + if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) { + self.max_ops = ops.max(Buffer::MAX_OPS_MIN); + } + } + } + + // Called around shape() + pub(crate) fn leave(&mut self) { + self.max_len = Buffer::MAX_LEN_DEFAULT; + self.max_ops = Buffer::MAX_OPS_DEFAULT; + self.serial = 0; + } + fn _infos_find_min_cluster( info: &[GlyphInfo], start: usize, end: usize, - mut cluster: u32, + cluster: Option, ) -> u32 { + let mut cluster = cluster.unwrap_or(core::u32::MAX); + for glyph_info in &info[start..end] { cluster = core::cmp::min(cluster, glyph_info.cluster); } @@ -1407,17 +1551,23 @@ impl Buffer { cluster } - fn _unsafe_to_break_set_mask( + #[must_use] + fn _infos_set_glyph_flags( info: &mut [GlyphInfo], start: usize, end: usize, cluster: u32, + mask: Mask, ) -> bool { + // NOTE: Because of problems with ownership, we don't pass the scratch flags to this + // function, unlike in harfbuzz. Because of this, each time you call this function you + // the caller needs to set the "BufferScratchFlags::HAS_GLYPH_FLAGS" scratch flag + // themselves if the function returns true. let mut unsafe_to_break = false; for glyph_info in &mut info[start..end] { if glyph_info.cluster != cluster { + glyph_info.mask |= mask; unsafe_to_break = true; - glyph_info.mask |= glyph_flag::UNSAFE_TO_BREAK; } } @@ -1453,20 +1603,6 @@ impl Buffer { } } - pub fn next_cluster(&self, mut start: usize) -> usize { - if start >= self.len { - return start; - } - - let cluster = self.info[start].cluster; - start += 1; - while start < self.len && cluster == self.info[start].cluster { - start += 1; - } - - start - } - pub fn next_syllable(&self, mut start: usize) -> usize { if start >= self.len { return start; @@ -1481,27 +1617,9 @@ impl Buffer { start } - pub fn next_grapheme(&self, mut start: usize) -> usize { - if start >= self.len { - return start; - } - - start += 1; - while start < self.len && self.info[start].is_continuation() { - start += 1; - } - - start - } - #[inline] pub fn allocate_lig_id(&mut self) -> u8 { - let mut lig_id = self.next_serial() & 0x07; - if lig_id == 0 { - // In case of overflow. - lig_id = self.next_serial() & 0x07; - } - lig_id as u8 + self.next_serial() & 0x07 } } @@ -1600,6 +1718,8 @@ bitflags::bitflags! { const REMOVE_DEFAULT_IGNORABLES = 1 << 4; /// Indicates that a dotted circle should not be inserted in the rendering of incorrect character sequences (such as `<0905 093E>`). const DO_NOT_INSERT_DOTTED_CIRCLE = 1 << 5; + /// Indicates that the shape() call and its variants should perform various verification processes on the results of the shaping operation on the buffer. If the verification fails, then either a buffer message is sent, if a message handler is installed on the buffer, or a message is written to standard error. In either case, the shaping result might be modified to show the failed output. + const VERIFY = 1 << 6; } } @@ -1610,8 +1730,8 @@ bitflags::bitflags! { const HAS_DEFAULT_IGNORABLES = 0x00000002; const HAS_SPACE_FALLBACK = 0x00000004; const HAS_GPOS_ATTACHMENT = 0x00000008; - const HAS_UNSAFE_TO_BREAK = 0x00000010; - const HAS_CGJ = 0x00000020; + const HAS_CGJ = 0x00000010; + const HAS_GLYPH_FLAGS = 0x00000020; // Reserved for complex shapers' internal use. const COMPLEX0 = 0x01000000; diff --git a/src/complex/arabic.rs b/src/complex/arabic.rs index da9e44c6..ac57a25e 100644 --- a/src/complex/arabic.rs +++ b/src/complex/arabic.rs @@ -130,7 +130,7 @@ const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[ ], ]; -#[derive(Clone, Copy, PartialEq, Debug)] +#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)] pub enum JoiningType { U = 0, L = 1, @@ -406,7 +406,7 @@ fn apply_stch(face: &Face, buffer: &mut Buffer) { if step == MEASURE { extra_glyphs_needed += (n_copies * n_repeating) as usize; } else { - buffer.unsafe_to_break(context, end); + buffer.unsafe_to_break(Some(context), Some(end)); let mut x_offset = 0; for k in (start + 1..=end).rev() { let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()) as i32; @@ -513,7 +513,19 @@ fn arabic_joining(buffer: &mut Buffer) { if entry.0 != action::NONE && prev.is_some() { if let Some(prev) = prev { buffer.info[prev].set_arabic_shaping_action(entry.0); - buffer.unsafe_to_break(prev, i + 1); + buffer.unsafe_to_break(Some(prev), Some(i + 1)); + } + } + // States that have a possible prev_action. + else { + if let Some(prev) = prev { + if this_type >= JoiningType::R || (2 <= state && state <= 5) { + buffer.unsafe_to_concat(Some(prev), Some(i + 1)); + } + } else { + if this_type >= JoiningType::R { + buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1)); + } } } @@ -534,6 +546,13 @@ fn arabic_joining(buffer: &mut Buffer) { if entry.0 != action::NONE && prev.is_some() { if let Some(prev) = prev { buffer.info[prev].set_arabic_shaping_action(entry.0); + buffer.unsafe_to_break(Some(prev), Some(buffer.len)); + } + } + // States that have a possible prev_action. + else if 2 <= state && state <= 5 { + if let Some(prev) = prev { + buffer.unsafe_to_concat(Some(prev), Some(buffer.len)); } } diff --git a/src/complex/hangul.rs b/src/complex/hangul.rs index e4f78f59..bfda01bc 100644 --- a/src/complex/hangul.rs +++ b/src/complex/hangul.rs @@ -89,7 +89,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { // // - LV can be precomposed, or decomposed. Lets call those // and , - // - LVT can be fully precomposed, partically precomposed, or + // - LVT can be fully precomposed, partially precomposed, or // fully decomposed. Ie. , , or . // // The composition / decomposition is mechanical. However, not @@ -147,7 +147,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { // I didn't bother for now. if start < end && end == buffer.out_len { // Tone mark follows a valid syllable; move it in front, unless it's zero width. - buffer.unsafe_to_break_from_outbuffer(start, buffer.idx); + buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx)); buffer.next_glyph(); if !is_zero_width_char(face, c) { buffer.merge_out_clusters(start, end + 1); @@ -209,7 +209,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { } let offset = if t != 0 { 3 } else { 2 }; - buffer.unsafe_to_break(buffer.idx, buffer.idx + offset); + buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset)); // We've got a syllable ; see if it can potentially be composed. if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) { @@ -267,7 +267,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { continue; } else { // Mark unsafe between LV and T. - buffer.unsafe_to_break(buffer.idx, buffer.idx + 2); + buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); } } @@ -310,7 +310,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id) { // Mark unsafe between LV and T. - buffer.unsafe_to_break(buffer.idx, buffer.idx + 2); + buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); } } @@ -327,7 +327,7 @@ fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { buffer.next_glyph(); } - buffer.swap_buffers(); + buffer.sync(); } fn is_hangul_tone(u: u32) -> bool { diff --git a/src/complex/indic.rs b/src/complex/indic.rs index 08cde528..e04e7eca 100644 --- a/src/complex/indic.rs +++ b/src/complex/indic.rs @@ -770,7 +770,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { let mut start = 0; let mut end = buffer.next_syllable(0); while start < buffer.len { - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); start = end; end = buffer.next_syllable(start); } @@ -1951,7 +1951,7 @@ fn final_reordering_impl( { buffer.info[start].mask |= plan.mask_array[indic_feature::INIT]; } else { - buffer.unsafe_to_break(start - 1, start + 1); + buffer.unsafe_to_break(Some(start - 1), Some(start + 1)); } } } diff --git a/src/complex/khmer.rs b/src/complex/khmer.rs index fa788a22..e95a4dc8 100644 --- a/src/complex/khmer.rs +++ b/src/complex/khmer.rs @@ -160,7 +160,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { let mut start = 0; let mut end = buffer.next_syllable(0); while start < buffer.len { - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); start = end; end = buffer.next_syllable(start); } diff --git a/src/complex/myanmar.rs b/src/complex/myanmar.rs index c2f9b8bb..59720b23 100644 --- a/src/complex/myanmar.rs +++ b/src/complex/myanmar.rs @@ -176,7 +176,7 @@ fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { let mut start = 0; let mut end = buffer.next_syllable(0); while start < buffer.len { - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); start = end; end = buffer.next_syllable(start); } diff --git a/src/complex/syllabic.rs b/src/complex/syllabic.rs index a47f2fa2..96850794 100644 --- a/src/complex/syllabic.rs +++ b/src/complex/syllabic.rs @@ -72,5 +72,5 @@ pub fn insert_dotted_circles( } } - buffer.swap_buffers(); + buffer.sync(); } diff --git a/src/complex/thai.rs b/src/complex/thai.rs index cbda8f8c..9d73d25e 100644 --- a/src/complex/thai.rs +++ b/src/complex/thai.rs @@ -297,7 +297,7 @@ fn do_pua_shaping(face: &Face, buffer: &mut Buffer) { below_edge.action }; - buffer.unsafe_to_break(base, i); + buffer.unsafe_to_break(Some(base), Some(i)); if action == Action::RD { buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face); } else { @@ -415,7 +415,7 @@ fn preprocess_text(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { } } - buffer.swap_buffers(); + buffer.sync(); // If font has Thai GSUB, we are done. if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) { diff --git a/src/complex/universal.rs b/src/complex/universal.rs index e12d137c..75b1afac 100644 --- a/src/complex/universal.rs +++ b/src/complex/universal.rs @@ -23,6 +23,7 @@ pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper { }; pub type Category = u8; +#[allow(dead_code)] pub mod category { pub const O: u8 = 0; // OTHER @@ -232,7 +233,7 @@ fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { super::universal_machine::find_syllables(buffer); foreach_syllable!(buffer, start, end, { - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); }); setup_rphf_mask(plan, buffer); diff --git a/src/complex/vowel_constraints.rs b/src/complex/vowel_constraints.rs index 243273e8..4e372bba 100644 --- a/src/complex/vowel_constraints.rs +++ b/src/complex/vowel_constraints.rs @@ -420,5 +420,5 @@ pub fn preprocess_text_vowel_constraints(buffer: &mut Buffer) { _ => {} } - buffer.swap_buffers(); + buffer.sync(); } diff --git a/src/fallback.rs b/src/fallback.rs index f69f63b9..087ccb0f 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -154,7 +154,7 @@ fn position_around_base( adjust_offsets_when_zeroing: bool, ) { let mut horizontal_dir = Direction::Invalid; - buffer.unsafe_to_break(base, end); + buffer.unsafe_to_break(Some(base), Some(end)); let base_info = &buffer.info[base]; let base_pos = &buffer.pos[base]; diff --git a/src/normalize.rs b/src/normalize.rs index d356dc25..54146ad9 100644 --- a/src/normalize.rs +++ b/src/normalize.rs @@ -171,7 +171,7 @@ pub fn normalize(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { } } - buffer.swap_buffers(); + buffer.sync(); } // Second round, reorder (inplace) @@ -285,7 +285,7 @@ pub fn normalize(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { } } - buffer.swap_buffers(); + buffer.sync(); } } @@ -366,7 +366,12 @@ fn decompose_current_character(ctx: &mut ShapeNormalizeContext, shortest: bool) // Handle space characters. if ctx.buffer.cur(0).general_category() == GeneralCategory::SpaceSeparator { if let Some(space_type) = u.space_fallback() { - if let Some(space_glyph) = ctx.face.glyph_index(u32::from(' ')) { + let space_glyph = ctx + .face + .glyph_index(u32::from(' ')) + .or(ctx.buffer.invisible); + + if let Some(space_glyph) = space_glyph { ctx.buffer.cur_mut(0).set_space_fallback(space_type); ctx.buffer.next_char(u32::from(space_glyph.0)); ctx.buffer.scratch_flags |= BufferScratchFlags::HAS_SPACE_FALLBACK; diff --git a/src/ot/apply.rs b/src/ot/apply.rs index 2dbec6e8..d638fe28 100644 --- a/src/ot/apply.rs +++ b/src/ot/apply.rs @@ -152,13 +152,13 @@ impl<'a, 'b> ApplyContext<'a, 'b> { props |= GlyphPropsFlags::SUBSTITUTED.bits(); if ligature { + props |= GlyphPropsFlags::LIGATED.bits(); // In the only place that the MULTIPLIED bit is used, Uniscribe // seems to only care about the "last" transformation between // Ligature and Multiple substitutions. Ie. if you ligate, expand, // and ligate again, it forgives the multiplication and acts as // if only ligation happened. As such, clear MULTIPLIED bit. props &= !GlyphPropsFlags::MULTIPLIED.bits(); - props |= GlyphPropsFlags::LIGATED.bits(); } if component { @@ -172,9 +172,13 @@ impl<'a, 'b> ApplyContext<'a, 'b> { .map_or(false, |table| table.has_glyph_classes()); if has_glyph_classes { + props &= GlyphPropsFlags::PRESERVE.bits(); props = (props & !GlyphPropsFlags::CLASS_MASK.bits()) | self.face.glyph_props(glyph_id); } else if !class_guess.is_empty() { + props &= GlyphPropsFlags::PRESERVE.bits(); props = (props & !GlyphPropsFlags::CLASS_MASK.bits()) | class_guess.bits(); + } else { + props = props & !GlyphPropsFlags::CLASS_MASK.bits(); } cur.set_glyph_props(props); diff --git a/src/ot/contextual.rs b/src/ot/contextual.rs index 8bfe902b..d1ee13b6 100644 --- a/src/ot/contextual.rs +++ b/src/ot/contextual.rs @@ -2,9 +2,7 @@ use ttf_parser::opentype_layout::*; use ttf_parser::{GlyphId, LazyArray16}; use super::apply::{Apply, ApplyContext, WouldApply, WouldApplyContext}; -use super::matching::{ - match_backtrack, match_glyph, match_input, match_lookahead, MatchFunc, Matched, -}; +use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead, MatchFunc}; use super::MAX_CONTEXT_LENGTH; impl WouldApply for ContextLookup<'_> { @@ -64,11 +62,32 @@ impl Apply for ContextLookup<'_> { coverage.get(glyph).is_some() }; - match_input(ctx, coverages_len as u16, &match_func).map(|matched| { + let mut match_end = 0; + let mut match_positions = [0; MAX_CONTEXT_LENGTH]; + + if match_input( + ctx, + coverages_len, + &match_func, + &mut match_end, + &mut match_positions, + None, + ) { ctx.buffer - .unsafe_to_break(ctx.buffer.idx, ctx.buffer.idx + matched.len); - apply_lookup(ctx, usize::from(coverages_len), matched, lookups); - }) + .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end)); + apply_lookup( + ctx, + usize::from(coverages_len), + &mut match_positions, + match_end, + lookups, + ); + return Some(()); + } else { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end)); + return None; + } } } } @@ -208,21 +227,56 @@ impl Apply for ChainedContextLookup<'_> { coverage.contains(glyph) }; - if let Some(matched) = match_input(ctx, input_coverages.len(), &input) { - if let Some(start_idx) = match_backtrack(ctx, backtrack_coverages.len(), &back) - { - if let Some(end_idx) = - match_lookahead(ctx, lookahead_coverages.len(), &ahead, matched.len) - { - ctx.buffer - .unsafe_to_break_from_outbuffer(start_idx, end_idx); - apply_lookup(ctx, usize::from(input_coverages.len()), matched, lookups); - return Some(()); - } - } + let mut end_index = ctx.buffer.idx; + let mut match_end = 0; + let mut match_positions = [0; MAX_CONTEXT_LENGTH]; + + let input_matches = match_input( + ctx, + input_coverages.len(), + &input, + &mut match_end, + &mut match_positions, + None, + ); + + if input_matches { + end_index = match_end; + } + + if !(input_matches + && match_lookahead( + ctx, + lookahead_coverages.len(), + &ahead, + match_end, + &mut end_index, + )) + { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index)); + return None; + } + + let mut start_index = ctx.buffer.out_len; + + if !match_backtrack(ctx, backtrack_coverages.len(), &back, &mut start_index) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); + return None; } - None + ctx.buffer + .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); + apply_lookup( + ctx, + usize::from(input_coverages.len()), + &mut match_positions, + match_end, + lookups, + ); + + Some(()) } } } @@ -291,11 +345,30 @@ fn apply_context( match_func(glyph, value) }; - match_input(ctx, input.len(), &match_func).map(|matched| { + let mut match_end = 0; + let mut match_positions = [0; MAX_CONTEXT_LENGTH]; + + if match_input( + ctx, + input.len(), + &match_func, + &mut match_end, + &mut match_positions, + None, + ) { ctx.buffer - .unsafe_to_break(ctx.buffer.idx, ctx.buffer.idx + matched.len); - apply_lookup(ctx, usize::from(input.len()), matched, lookups); - }) + .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end)); + apply_lookup( + ctx, + usize::from(input.len()), + &mut match_positions, + match_end, + lookups, + ); + return Some(()); + } + + None } fn apply_chain_context( @@ -306,6 +379,8 @@ fn apply_chain_context( match_funcs: [&MatchFunc; 3], lookups: LazyArray16, ) -> Option<()> { + // NOTE: Whenever something in this method changes, we also need to + // change it in the `apply` implementation for ChainedContextLookup. let f1 = |glyph, num_items| { let index = backtrack.len() - num_items; let value = backtrack.get(index).unwrap(); @@ -324,24 +399,55 @@ fn apply_chain_context( match_funcs[1](glyph, value) }; - if let Some(matched) = match_input(ctx, input.len(), &f3) { - if let Some(start_idx) = match_backtrack(ctx, backtrack.len(), &f1) { - if let Some(end_idx) = match_lookahead(ctx, lookahead.len(), &f2, matched.len) { - ctx.buffer - .unsafe_to_break_from_outbuffer(start_idx, end_idx); - apply_lookup(ctx, usize::from(input.len()), matched, lookups); - return Some(()); - } - } + let mut end_index = ctx.buffer.idx; + let mut match_end = 0; + let mut match_positions = [0; MAX_CONTEXT_LENGTH]; + + let input_matches = match_input( + ctx, + input.len(), + &f3, + &mut match_end, + &mut match_positions, + None, + ); + + if input_matches { + end_index = match_end; } - None + if !(input_matches && match_lookahead(ctx, lookahead.len(), &f2, match_end, &mut end_index)) { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index)); + return None; + } + + let mut start_index = ctx.buffer.out_len; + + if !match_backtrack(ctx, backtrack.len(), &f1, &mut start_index) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); + return None; + } + + ctx.buffer + .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); + apply_lookup( + ctx, + usize::from(input.len()), + &mut match_positions, + match_end, + lookups, + ); + + Some(()) } fn apply_lookup( ctx: &mut ApplyContext, input_len: usize, - mut matched: Matched, + match_positions: &mut [usize; MAX_CONTEXT_LENGTH], + match_end: usize, lookups: LazyArray16, ) { let mut count = input_len + 1; @@ -354,10 +460,10 @@ fn apply_lookup( // Convert positions to new indexing. for j in 0..count { - matched.positions[j] = (matched.positions[j] as isize + delta) as _; + match_positions[j] = (match_positions[j] as isize + delta) as _; } - backtrack_len + matched.len + backtrack_len + match_end - ctx.buffer.idx }; for record in lookups { @@ -376,7 +482,7 @@ fn apply_lookup( continue; } - if !ctx.buffer.move_to(matched.positions[idx]) { + if !ctx.buffer.move_to(match_positions[idx]) { break; } @@ -419,12 +525,12 @@ fn apply_lookup( // It should be possible to construct tests for both of these cases. end = (end as isize + delta) as _; - if end <= matched.positions[idx] { + if end <= match_positions[idx] { // End might end up being smaller than match_positions[idx] if the recursed // lookup ended up removing many items, more than we have had matched. // Just never rewind end back and get out of here. // https://bugs.chromium.org/p/chromium/issues/detail?id=659496 - end = matched.positions[idx]; + end = match_positions[idx]; // There can't be any further changes. break; @@ -444,20 +550,18 @@ fn apply_lookup( } // Shift! - matched - .positions - .copy_within(next..count, (next as isize + delta) as _); + match_positions.copy_within(next..count, (next as isize + delta) as _); next = (next as isize + delta) as _; count = (count as isize + delta) as _; // Fill in new entries. for j in idx + 1..next { - matched.positions[j] = matched.positions[j - 1] + 1; + match_positions[j] = match_positions[j - 1] + 1; } // And fixup the rest. while next < count { - matched.positions[next] = (matched.positions[next] as isize + delta) as _; + match_positions[next] = (match_positions[next] as isize + delta) as _; next += 1; } } diff --git a/src/ot/kerning.rs b/src/ot/kerning.rs index 4d07d1fb..566ac9cc 100644 --- a/src/ot/kerning.rs +++ b/src/ot/kerning.rs @@ -85,6 +85,7 @@ fn machine_kern( cross_stream: bool, get_kerning: impl Fn(u32, u32) -> i32, ) { + buffer.unsafe_to_concat(None, None); let mut ctx = ApplyContext::new(TableIndex::GPOS, face, buffer); ctx.lookup_mask = kern_mask; ctx.lookup_props = u32::from(lookup_flags::IGNORE_MARKS); @@ -99,7 +100,9 @@ fn machine_kern( } let mut iter = SkippyIter::new(&ctx, i, 1, false); - if !iter.next() { + + let mut unsafe_to = 0; + if !iter.next(Some(&mut unsafe_to)) { i += 1; continue; } @@ -135,7 +138,7 @@ fn machine_kern( } } - ctx.buffer.unsafe_to_break(i, j + 1) + ctx.buffer.unsafe_to_break(Some(i), Some(j + 1)) } i = j; @@ -204,7 +207,10 @@ fn apply_state_machine_kerning(subtable: &kern::Subtable, kern_mask: Mask, buffe if entry.has_offset() || !(entry.new_state == apple_layout::state::START_OF_TEXT && !entry.has_advance()) { - buffer.unsafe_to_break_from_outbuffer(buffer.backtrack_len() - 1, buffer.idx + 1); + buffer.unsafe_to_break_from_outbuffer( + Some(buffer.backtrack_len() - 1), + Some(buffer.idx + 1), + ); } } @@ -216,7 +222,7 @@ fn apply_state_machine_kerning(subtable: &kern::Subtable, kern_mask: Mask, buffe }; if end_entry.has_offset() { - buffer.unsafe_to_break(buffer.idx, buffer.idx + 2); + buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); } } diff --git a/src/ot/layout.rs b/src/ot/layout.rs index cf57a829..36be1e14 100644 --- a/src/ot/layout.rs +++ b/src/ot/layout.rs @@ -227,7 +227,7 @@ fn apply_string(ctx: &mut ApplyContext, lookup: &T::Lookup) { apply_forward(ctx, lookup); if !T::IN_PLACE { - ctx.buffer.swap_buffers(); + ctx.buffer.sync(); } } else { // in-place backward substitution/positioning diff --git a/src/ot/matching.rs b/src/ot/matching.rs index 6235ccf3..44d7d030 100644 --- a/src/ot/matching.rs +++ b/src/ot/matching.rs @@ -1,5 +1,6 @@ //! Matching of glyph patterns. +use std::cmp::max; use ttf_parser::GlyphId; use super::apply::ApplyContext; @@ -14,18 +15,14 @@ pub fn match_glyph(glyph: GlyphId, value: u16) -> bool { glyph == GlyphId(value) } -// TODO: Find out whether returning this by value is slow. -pub struct Matched { - pub len: usize, - pub positions: [usize; MAX_CONTEXT_LENGTH], - pub total_component_count: u8, -} - pub fn match_input( - ctx: &ApplyContext, + ctx: &mut ApplyContext, input_len: u16, match_func: &MatchingFunc, -) -> Option { + end_position: &mut usize, + match_positions: &mut [usize; MAX_CONTEXT_LENGTH], + p_total_component_count: Option<&mut u8>, +) -> bool { // This is perhaps the trickiest part of OpenType... Remarks: // // - If all components of the ligature were marks, we call this a mark ligature. @@ -57,7 +54,7 @@ pub fn match_input( let count = usize::from(input_len) + 1; if count > MAX_CONTEXT_LENGTH { - return None; + return false; } let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, input_len, false); @@ -66,15 +63,16 @@ pub fn match_input( let first = ctx.buffer.cur(0); let first_lig_id = first.lig_id(); let first_lig_comp = first.lig_comp(); - let mut positions = [0; MAX_CONTEXT_LENGTH]; let mut total_component_count = first.lig_num_comps(); let mut ligbase = Ligbase::NotChecked; - positions[0] = ctx.buffer.idx; + match_positions[0] = ctx.buffer.idx; - for position in &mut positions[1..count] { - if !iter.next() { - return None; + for position in &mut match_positions[1..count] { + let mut unsafe_to = 0; + if !iter.next(Some(&mut unsafe_to)) { + *end_position = unsafe_to; + return false; } *position = iter.index(); @@ -111,7 +109,7 @@ pub fn match_input( } if ligbase == Ligbase::MayNotSkip { - return None; + return false; } } } else { @@ -119,53 +117,63 @@ pub fn match_input( // all subsequent components should also NOT be attached to any ligature // component, unless they are attached to the first component itself! if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) { - return None; + return false; } } total_component_count += this.lig_num_comps(); } - Some(Matched { - len: iter.index() - ctx.buffer.idx + 1, - positions, - total_component_count, - }) + *end_position = iter.index() + 1; + + if let Some(p_total_component_count) = p_total_component_count { + *p_total_component_count = total_component_count; + } + + true } pub fn match_backtrack( - ctx: &ApplyContext, + ctx: &mut ApplyContext, backtrack_len: u16, match_func: &MatchingFunc, -) -> Option { + match_start: &mut usize, +) -> bool { let mut iter = SkippyIter::new(ctx, ctx.buffer.backtrack_len(), backtrack_len, true); iter.enable_matching(match_func); for _ in 0..backtrack_len { - if !iter.prev() { - return None; + let mut unsafe_from = 0; + if !iter.prev(Some(&mut unsafe_from)) { + *match_start = unsafe_from; + return false; } } - Some(iter.index()) + *match_start = iter.index(); + true } pub fn match_lookahead( - ctx: &ApplyContext, + ctx: &mut ApplyContext, lookahead_len: u16, match_func: &MatchingFunc, - offset: usize, -) -> Option { - let mut iter = SkippyIter::new(ctx, ctx.buffer.idx + offset - 1, lookahead_len, true); + start_index: usize, + end_index: &mut usize, +) -> bool { + let mut iter = SkippyIter::new(ctx, start_index - 1, lookahead_len, true); iter.enable_matching(match_func); for _ in 0..lookahead_len { - if !iter.next() { - return None; + let mut unsafe_to = 0; + if !iter.next(Some(&mut unsafe_to)) { + *end_index = unsafe_to; + return false; } } - Some(iter.index() + 1) + *end_index = iter.index() + 1; + true } pub type MatchingFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; @@ -226,7 +234,7 @@ impl<'a, 'b> SkippyIter<'a, 'b> { self.buf_idx } - pub fn next(&mut self) -> bool { + pub fn next(&mut self, unsafe_to: Option<&mut usize>) -> bool { assert!(self.num_items > 0); while self.buf_idx + usize::from(self.num_items) < self.buf_len { self.buf_idx += 1; @@ -244,14 +252,22 @@ impl<'a, 'b> SkippyIter<'a, 'b> { } if skip == Some(false) { + if let Some(unsafe_to) = unsafe_to { + *unsafe_to = self.buf_idx + 1; + } + return false; } } + if let Some(unsafe_to) = unsafe_to { + *unsafe_to = self.buf_idx + 1; + } + false } - pub fn prev(&mut self) -> bool { + pub fn prev(&mut self, unsafe_from: Option<&mut usize>) -> bool { assert!(self.num_items > 0); while self.buf_idx >= usize::from(self.num_items) { self.buf_idx -= 1; @@ -269,10 +285,18 @@ impl<'a, 'b> SkippyIter<'a, 'b> { } if skip == Some(false) { + if let Some(unsafe_from) = unsafe_from { + *unsafe_from = max(1, self.buf_idx) - 1; + } + return false; } } + if let Some(unsafe_from) = unsafe_from { + *unsafe_from = 0; + } + false } diff --git a/src/ot/position.rs b/src/ot/position.rs index da141414..584b2c96 100644 --- a/src/ot/position.rs +++ b/src/ot/position.rs @@ -168,36 +168,78 @@ impl Apply for SingleAdjustment<'_> { impl Apply for PairAdjustment<'_> { fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { - let first = ctx.buffer.cur(0).as_glyph(); - let index = self.coverage().get(first)?; + let first_glyph = ctx.buffer.cur(0).as_glyph(); + let first_glyph_coverage_index = self.coverage().get(first_glyph)?; let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, 1, false); - if !iter.next() { + + let mut unsafe_to = 0; + if !iter.next(Some(&mut unsafe_to)) { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(unsafe_to)); return None; } - let pos = iter.index(); - let second = ctx.buffer.info[pos].as_glyph(); + let second_glyph_index = iter.index(); + let second_glyph = ctx.buffer.info[second_glyph_index].as_glyph(); + + let finish = |ctx: &mut ApplyContext, has_record2| { + ctx.buffer.idx = second_glyph_index; + + if has_record2 { + ctx.buffer.idx += 1; + } + + Some(()) + }; + + let boring = |ctx: &mut ApplyContext, has_record2| { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(second_glyph_index + 1)); + finish(ctx, has_record2) + }; + + let success = |ctx: &mut ApplyContext, flag1, flag2, has_record2| { + if flag1 || flag2 { + ctx.buffer + .unsafe_to_break(Some(ctx.buffer.idx), Some(second_glyph_index + 1)); + finish(ctx, has_record2) + } else { + boring(ctx, has_record2) + } + }; + + let bail = |ctx: &mut ApplyContext, records: (ValueRecord, ValueRecord)| { + let flag1 = records.0.apply(ctx, ctx.buffer.idx); + let flag2 = records.1.apply(ctx, second_glyph_index); + + let has_record2 = !records.1.is_empty(); + success(ctx, flag1, flag2, has_record2) + }; let records = match self { - Self::Format1 { sets, .. } => sets.get(index)?.get(second), + Self::Format1 { sets, .. } => { + sets.get(first_glyph_coverage_index)?.get(second_glyph)? + } Self::Format2 { classes, matrix, .. } => { - let classes = (classes.0.get(first), classes.1.get(second)); - matrix.get(classes) + let classes = (classes.0.get(first_glyph), classes.1.get(second_glyph)); + + let records = match matrix.get(classes) { + Some(v) => v, + None => { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(iter.index() + 1)); + return None; + } + }; + + return bail(ctx, records); } - }?; - - let flag1 = records.0.apply(ctx, ctx.buffer.idx); - let flag2 = records.1.apply(ctx, pos); - // Note the intentional use of "|" instead of short-circuit "||". - if flag1 | flag2 { - ctx.buffer.unsafe_to_break(ctx.buffer.idx, pos + 1); - } + }; - ctx.buffer.idx = pos + usize::from(flag2); - Some(()) + bail(ctx, records) } } @@ -209,21 +251,29 @@ impl Apply for CursiveAdjustment<'_> { let entry_this = self.sets.entry(index_this)?; let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, 1, false); - if !iter.prev() { + + let mut unsafe_from = 0; + if !iter.prev(Some(&mut unsafe_from)) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1)); return None; } let i = iter.index(); let prev = ctx.buffer.info[i].as_glyph(); let index_prev = self.coverage.get(prev)?; - let exit_prev = self.sets.exit(index_prev)?; + let Some(exit_prev) = self.sets.exit(index_prev) else { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter.index()), Some(ctx.buffer.idx + 1)); + return None; + }; let (exit_x, exit_y) = exit_prev.get(ctx.face); let (entry_x, entry_y) = entry_this.get(ctx.face); let direction = ctx.buffer.direction; let j = ctx.buffer.idx; - ctx.buffer.unsafe_to_break(i, j); + ctx.buffer.unsafe_to_break(Some(i), Some(j)); let pos = &mut ctx.buffer.pos; match direction { @@ -345,7 +395,10 @@ impl Apply for MarkToBaseAdjustment<'_> { let info = &buffer.info; loop { - if !iter.prev() { + let mut unsafe_from = 0; + if !iter.prev(Some(&mut unsafe_from)) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1)); return None; } @@ -369,12 +422,16 @@ impl Apply for MarkToBaseAdjustment<'_> { // Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled - let idx = iter.index(); - let base_glyph = info[idx].as_glyph(); - let base_index = self.base_coverage.get(base_glyph)?; + let iter_idx = iter.index(); + let base_glyph = info[iter_idx].as_glyph(); + let Some(base_index) = self.base_coverage.get(base_glyph) else { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1)); + return None; + }; self.marks - .apply(ctx, self.anchors, mark_index, base_index, idx) + .apply(ctx, self.anchors, mark_index, base_index, iter_idx) } } @@ -387,20 +444,30 @@ impl Apply for MarkToLigatureAdjustment<'_> { // Now we search backwards for a non-mark glyph let mut iter = SkippyIter::new(ctx, buffer.idx, 1, false); iter.set_lookup_props(u32::from(lookup_flags::IGNORE_MARKS)); - if !iter.prev() { + + let mut unsafe_from = 0; + if !iter.prev(Some(&mut unsafe_from)) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1)); return None; } // Checking that matched glyph is actually a ligature by GDEF is too strong; disabled - let idx = iter.index(); - let lig_glyph = buffer.info[idx].as_glyph(); - let lig_index = self.ligature_coverage.get(lig_glyph)?; + let iter_idx = iter.index(); + let lig_glyph = buffer.info[iter_idx].as_glyph(); + let Some(lig_index) = self.ligature_coverage.get(lig_glyph) else { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1)); + return None; + }; let lig_attach = self.ligature_array.get(lig_index)?; // Find component to attach to let comp_count = lig_attach.rows; if comp_count == 0 { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1)); return None; } @@ -408,7 +475,7 @@ impl Apply for MarkToLigatureAdjustment<'_> { // is identical to the ligature ID of the found ligature. If yes, we // can directly use the component index. If not, we attach the mark // glyph to the last component of the ligature. - let lig_id = buffer.info[idx].lig_id(); + let lig_id = buffer.info[iter_idx].lig_id(); let mark_id = buffer.cur(0).lig_id(); let mark_comp = u16::from(buffer.cur(0).lig_comp()); let matches = lig_id != 0 && lig_id == mark_id && mark_comp > 0; @@ -419,7 +486,7 @@ impl Apply for MarkToLigatureAdjustment<'_> { } - 1; self.marks - .apply(ctx, lig_attach, mark_index, comp_index, idx) + .apply(ctx, lig_attach, mark_index, comp_index, iter_idx) } } @@ -432,19 +499,25 @@ impl Apply for MarkToMarkAdjustment<'_> { // Now we search backwards for a suitable mark glyph until a non-mark glyph let mut iter = SkippyIter::new(ctx, buffer.idx, 1, false); iter.set_lookup_props(ctx.lookup_props & !u32::from(lookup_flags::IGNORE_FLAGS)); - if !iter.prev() { + + let mut unsafe_from = 0; + if !iter.prev(Some(&mut unsafe_from)) { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(unsafe_from), Some(ctx.buffer.idx + 1)); return None; } - let idx = iter.index(); - if !buffer.info[idx].is_mark() { + let iter_idx = iter.index(); + if !buffer.info[iter_idx].is_mark() { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1)); return None; } let id1 = buffer.cur(0).lig_id(); - let id2 = buffer.info[idx].lig_id(); + let id2 = buffer.info[iter_idx].lig_id(); let comp1 = buffer.cur(0).lig_comp(); - let comp2 = buffer.info[idx].lig_comp(); + let comp2 = buffer.info[iter_idx].lig_comp(); let matches = if id1 == id2 { // Marks belonging to the same base @@ -457,25 +530,46 @@ impl Apply for MarkToMarkAdjustment<'_> { }; if !matches { + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(iter_idx), Some(buffer.idx + 1)); return None; } - let mark2_glyph = buffer.info[idx].as_glyph(); + let mark2_glyph = buffer.info[iter_idx].as_glyph(); let mark2_index = self.mark2_coverage.get(mark2_glyph)?; self.marks - .apply(ctx, self.mark2_matrix, mark1_index, mark2_index, idx) + .apply(ctx, self.mark2_matrix, mark1_index, mark2_index, iter_idx) } } trait ValueRecordExt { + fn is_empty(&self) -> bool; fn apply(&self, ctx: &mut ApplyContext, idx: usize) -> bool; + fn apply_to_pos(&self, ctx: &mut ApplyContext, pos: &mut GlyphPosition) -> bool; } impl ValueRecordExt for ValueRecord<'_> { + fn is_empty(&self) -> bool { + self.x_placement == 0 + && self.y_placement == 0 + && self.x_advance == 0 + && self.y_advance == 0 + && self.x_placement_device.is_none() + && self.y_placement_device.is_none() + && self.x_advance_device.is_none() + && self.y_advance_device.is_none() + } + fn apply(&self, ctx: &mut ApplyContext, idx: usize) -> bool { - let horizontal = ctx.buffer.direction.is_horizontal(); let mut pos = ctx.buffer.pos[idx]; + let worked = self.apply_to_pos(ctx, &mut pos); + ctx.buffer.pos[idx] = pos; + worked + } + + fn apply_to_pos(&self, ctx: &mut ApplyContext, pos: &mut GlyphPosition) -> bool { + let horizontal = ctx.buffer.direction.is_horizontal(); let mut worked = false; if self.x_placement != 0 { @@ -535,7 +629,6 @@ impl ValueRecordExt for ValueRecord<'_> { } } - ctx.buffer.pos[idx] = pos; worked } } @@ -568,7 +661,8 @@ impl MarkArrayExt for MarkArray<'_> { let (mark_x, mark_y) = mark_anchor.get(ctx.face); let (base_x, base_y) = base_anchor.get(ctx.face); - ctx.buffer.unsafe_to_break(glyph_pos, ctx.buffer.idx); + ctx.buffer + .unsafe_to_break(Some(glyph_pos), Some(ctx.buffer.idx + 1)); let idx = ctx.buffer.idx; let pos = ctx.buffer.cur_pos_mut(); diff --git a/src/ot/substitute.rs b/src/ot/substitute.rs index 41f310f1..5b11926d 100644 --- a/src/ot/substitute.rs +++ b/src/ot/substitute.rs @@ -9,9 +9,10 @@ use crate::unicode::GeneralCategory; use crate::Face; use super::apply::{Apply, ApplyContext, WouldApply, WouldApplyContext}; -use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead, Matched}; +use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead}; use super::{ - LayoutLookup, LayoutTable, Map, SubstLookup, SubstitutionTable, TableIndex, MAX_NESTING_LEVEL, + LayoutLookup, LayoutTable, Map, SubstLookup, SubstitutionTable, TableIndex, MAX_CONTEXT_LENGTH, + MAX_NESTING_LEVEL, }; use ttf_parser::opentype_layout::LookupIndex; @@ -224,7 +225,7 @@ impl Apply for AlternateSet<'_> { if alt_index == Map::MAX_VALUE && ctx.random { // Maybe we can do better than unsafe-to-break all; but since we are // changing random state, it would be hard to track that. Good 'nough. - ctx.buffer.unsafe_to_break(0, ctx.buffer.len); + ctx.buffer.unsafe_to_break(Some(0), Some(ctx.buffer.len)); alt_index = ctx.random_number() % u32::from(len) + 1; } @@ -296,15 +297,47 @@ impl Apply for Ligature<'_> { match_glyph(glyph, value.0) }; - match_input(ctx, self.components.len(), &f).map(|matched| { - let count = usize::from(self.components.len()) + 1; - ligate(ctx, count, matched, self.glyph); - }) + let mut match_end = 0; + let mut match_positions = [0; MAX_CONTEXT_LENGTH]; + let mut total_component_count = 0; + + if !match_input( + ctx, + self.components.len(), + &f, + &mut match_end, + &mut match_positions, + Some(&mut total_component_count), + ) { + ctx.buffer + .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end)); + return None; + } + + let count = usize::from(self.components.len()) + 1; + ligate( + ctx, + count, + &match_positions, + match_end, + total_component_count, + self.glyph, + ); + return Some(()); } } } -fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: GlyphId) { +fn ligate( + ctx: &mut ApplyContext, + // Including the first glyph + count: usize, + // Including the first glyph + match_positions: &[usize; MAX_CONTEXT_LENGTH], + match_end: usize, + total_component_count: u8, + lig_glyph: GlyphId, +) { // - If a base and one or more marks ligate, consider that as a base, NOT // ligature, such that all following marks can still attach to it. // https://github.com/harfbuzz/harfbuzz/issues/1109 @@ -338,12 +371,12 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly // let mut buffer = &mut ctx.buffer; - buffer.merge_clusters(buffer.idx, buffer.idx + matched.len); + buffer.merge_clusters(buffer.idx, match_end); - let mut is_base_ligature = buffer.info[matched.positions[0]].is_base_glyph(); - let mut is_mark_ligature = buffer.info[matched.positions[0]].is_mark(); + let mut is_base_ligature = buffer.info[match_positions[0]].is_base_glyph(); + let mut is_mark_ligature = buffer.info[match_positions[0]].is_mark(); for i in 1..count { - if !buffer.info[matched.positions[i]].is_mark() { + if !buffer.info[match_positions[i]].is_mark() { is_base_ligature = false; is_mark_ligature = false; } @@ -366,7 +399,7 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly let mut comps_so_far = last_num_comps; if is_ligature { - first.set_lig_props_for_ligature(lig_id, matched.total_component_count); + first.set_lig_props_for_ligature(lig_id, total_component_count); if first.general_category() == GeneralCategory::NonspacingMark { first.set_general_category(GeneralCategory::OtherLetter); } @@ -376,7 +409,7 @@ fn ligate(ctx: &mut ApplyContext, count: usize, matched: Matched, lig_glyph: Gly buffer = &mut ctx.buffer; for i in 1..count { - while buffer.idx < matched.positions[i] && buffer.successful { + while buffer.idx < match_positions[i] && buffer.successful { if is_ligature { let cur = buffer.cur_mut(0); let mut this_comp = cur.lig_comp(); @@ -450,10 +483,19 @@ impl Apply for ReverseChainSingleSubstitution<'_> { value.contains(glyph) }; - if let Some(start_idx) = match_backtrack(ctx, self.backtrack_coverages.len(), &f1) { - if let Some(end_idx) = match_lookahead(ctx, self.lookahead_coverages.len(), &f2, 1) { + let mut start_index = 0; + let mut end_index = 0; + + if match_backtrack(ctx, self.backtrack_coverages.len(), &f1, &mut start_index) { + if match_lookahead( + ctx, + self.lookahead_coverages.len(), + &f2, + ctx.buffer.idx + 1, + &mut end_index, + ) { ctx.buffer - .unsafe_to_break_from_outbuffer(start_idx, end_idx); + .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); ctx.replace_glyph_inplace(subst); // Note: We DON'T decrease buffer.idx. The main loop does it @@ -463,6 +505,8 @@ impl Apply for ReverseChainSingleSubstitution<'_> { } } - None + ctx.buffer + .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); + return None; } } diff --git a/src/shape.rs b/src/shape.rs index 7c3311e0..b8d39d0a 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -1,5 +1,3 @@ -use core::convert::TryFrom; - use crate::buffer::{ glyph_flag, Buffer, BufferClusterLevel, BufferFlags, BufferScratchFlags, GlyphInfo, GlyphPropsFlags, @@ -77,17 +75,7 @@ struct ShapeContext<'a> { // Pull it all together! fn shape_internal(ctx: &mut ShapeContext) { - ctx.buffer.scratch_flags = BufferScratchFlags::empty(); - - if let Some(len) = ctx.buffer.len.checked_mul(Buffer::MAX_LEN_FACTOR) { - ctx.buffer.max_len = len.max(Buffer::MAX_LEN_MIN); - } - - if let Ok(len) = i32::try_from(ctx.buffer.len) { - if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) { - ctx.buffer.max_ops = ops.max(Buffer::MAX_OPS_MIN); - } - } + ctx.buffer.enter(); initialize_masks(ctx); set_unicode_props(ctx.buffer); @@ -108,8 +96,7 @@ fn shape_internal(ctx: &mut ShapeContext) { propagate_flags(ctx.buffer); ctx.buffer.direction = ctx.target_direction; - ctx.buffer.max_len = Buffer::MAX_LEN_DEFAULT; - ctx.buffer.max_ops = Buffer::MAX_OPS_DEFAULT; + ctx.buffer.leave(); } fn substitute_pre(ctx: &mut ShapeContext) { @@ -213,7 +200,7 @@ fn position_complex(ctx: &mut ShapeContext) { // hanging over the next glyph after the final reordering. // // Note: If fallback positioning happens, we don't care about - // this as it will be overriden. + // this as it will be overridden. let adjust_offsets_when_zeroing = ctx.plan.adjust_mark_positioning_when_zeroing && ctx.buffer.direction.is_forward(); @@ -328,7 +315,7 @@ fn setup_masks_fraction(ctx: &mut ShapeContext) { end += 1; } - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); for info in &mut buffer.info[start..i] { info.mask |= pre_mask; @@ -427,7 +414,7 @@ fn insert_dotted_circle(buffer: &mut Buffer, face: &Face) { info.init_unicode_props(&mut buffer.scratch_flags); buffer.clear_output(); buffer.output_info(info); - buffer.swap_buffers(); + buffer.sync(); } } @@ -440,7 +427,7 @@ fn form_clusters(buffer: &mut Buffer) { foreach_grapheme!(buffer, start, end, { buffer.merge_clusters(start, end) }); } else { foreach_grapheme!(buffer, start, end, { - buffer.unsafe_to_break(start, end); + buffer.unsafe_to_break(Some(start), Some(end)); }); } } @@ -624,15 +611,17 @@ fn propagate_flags(buffer: &mut Buffer) { // Simplifies using them. if buffer .scratch_flags - .contains(BufferScratchFlags::HAS_UNSAFE_TO_BREAK) + .contains(BufferScratchFlags::HAS_GLYPH_FLAGS) { foreach_cluster!(buffer, start, end, { + let mut mask = 0; for info in &buffer.info[start..end] { - if info.mask & glyph_flag::UNSAFE_TO_BREAK != 0 { - for info in &mut buffer.info[start..end] { - info.mask |= glyph_flag::UNSAFE_TO_BREAK; - } - break; + mask |= info.mask * glyph_flag::DEFINED; + } + + if mask != 0 { + for info in &mut buffer.info[start..end] { + info.mask |= mask; } } }); diff --git a/src/tag_table.rs b/src/tag_table.rs index cce46717..75e68820 100644 --- a/src/tag_table.rs +++ b/src/tag_table.rs @@ -62,7 +62,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "an", tag: Tag::from_bytes(b"ARG ") }, // Aragonese // LangTag { language: "ang", tag: Tag::from_bytes(b"ANG ") }, // Old English (ca. 450-1100) -> Anglo-Saxon LangTag { language: "aoa", tag: Tag::from_bytes(b"CPP ") }, // Angolar -> Creoles - LangTag { language: "apa", tag: Tag::from_bytes(b"ATH ") }, // Apache [family] -> Athapaskan + LangTag { language: "apa", tag: Tag::from_bytes(b"ATH ") }, // Apache [collection] -> Athapaskan LangTag { language: "apc", tag: Tag::from_bytes(b"ARA ") }, // Levantine Arabic -> Arabic LangTag { language: "apd", tag: Tag::from_bytes(b"ARA ") }, // Sudanese Arabic -> Arabic LangTag { language: "apj", tag: Tag::from_bytes(b"ATH ") }, // Jicarilla Apache -> Athapaskan @@ -82,7 +82,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "arz", tag: Tag::from_bytes(b"ARA ") }, // Egyptian Arabic -> Arabic LangTag { language: "as", tag: Tag::from_bytes(b"ASM ") }, // Assamese // LangTag { language: "ast", tag: Tag::from_bytes(b"AST ") }, // Asturian -// LangTag { language: "ath", tag: Tag::from_bytes(b"ATH ") }, // Athapascan [family] -> Athapaskan +// LangTag { language: "ath", tag: Tag::from_bytes(b"ATH ") }, // Athapascan [collection] -> Athapaskan LangTag { language: "atj", tag: Tag::from_bytes(b"RCR ") }, // Atikamekw -> R-Cree LangTag { language: "atv", tag: Tag::from_bytes(b"ALT ") }, // Northern Altai -> Altai LangTag { language: "auj", tag: Tag::from_bytes(b"BBR ") }, // Awjilah -> Berber @@ -106,10 +106,10 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "azn", tag: Tag::from_bytes(b"NAH ") }, // Western Durango Nahuatl -> Nahuatl LangTag { language: "azz", tag: Tag::from_bytes(b"NAH ") }, // Highland Puebla Nahuatl -> Nahuatl LangTag { language: "ba", tag: Tag::from_bytes(b"BSH ") }, // Bashkir - LangTag { language: "bad", tag: Tag::from_bytes(b"BAD0") }, // Banda [family] + LangTag { language: "bad", tag: Tag::from_bytes(b"BAD0") }, // Banda [collection] LangTag { language: "bag", tag: Tag(0) }, // Tuki != Baghelkhandi LangTag { language: "bah", tag: Tag::from_bytes(b"CPP ") }, // Bahamas Creole English -> Creoles - LangTag { language: "bai", tag: Tag::from_bytes(b"BML ") }, // Bamileke [family] + LangTag { language: "bai", tag: Tag::from_bytes(b"BML ") }, // Bamileke [collection] LangTag { language: "bal", tag: Tag::from_bytes(b"BLI ") }, // Baluchi [macrolanguage] // LangTag { language: "ban", tag: Tag::from_bytes(b"BAN ") }, // Balinese // LangTag { language: "bar", tag: Tag::from_bytes(b"BAR ") }, // Bavarian @@ -131,7 +131,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "bea", tag: Tag::from_bytes(b"ATH ") }, // Beaver -> Athapaskan LangTag { language: "beb", tag: Tag::from_bytes(b"BTI ") }, // Bebele -> Beti // LangTag { language: "bem", tag: Tag::from_bytes(b"BEM ") }, // Bemba (Zambia) - LangTag { language: "ber", tag: Tag::from_bytes(b"BBR ") }, // Berber [family] + LangTag { language: "ber", tag: Tag::from_bytes(b"BBR ") }, // Berber [collection] LangTag { language: "bew", tag: Tag::from_bytes(b"CPP ") }, // Betawi -> Creoles LangTag { language: "bfl", tag: Tag::from_bytes(b"BAD0") }, // Banda-Ndélé -> Banda LangTag { language: "bfq", tag: Tag::from_bytes(b"BAD ") }, // Badaga @@ -199,7 +199,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "btd", tag: Tag::from_bytes(b"BTK ") }, // Batak Dairi -> Batak LangTag { language: "bti", tag: Tag(0) }, // Burate != Beti LangTag { language: "btj", tag: Tag::from_bytes(b"MLY ") }, // Bacanese Malay -> Malay -// LangTag { language: "btk", tag: Tag::from_bytes(b"BTK ") }, // Batak [family] +// LangTag { language: "btk", tag: Tag::from_bytes(b"BTK ") }, // Batak [collection] LangTag { language: "btm", tag: Tag::from_bytes(b"BTM ") }, // Batak Mandailing LangTag { language: "btm", tag: Tag::from_bytes(b"BTK ") }, // Batak Mandailing -> Batak LangTag { language: "bto", tag: Tag::from_bytes(b"BIK ") }, // Rinconada Bikol -> Bikol @@ -252,6 +252,8 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "chh", tag: Tag(0) }, // Chinook != Chattisgarhi LangTag { language: "chj", tag: Tag::from_bytes(b"CCHN") }, // Ojitlán Chinantec -> Chinantec LangTag { language: "chk", tag: Tag::from_bytes(b"CHK0") }, // Chuukese + LangTag { language: "chm", tag: Tag::from_bytes(b"HMA ") }, // Mari (Russia) [macrolanguage] -> High Mari + LangTag { language: "chm", tag: Tag::from_bytes(b"LMA ") }, // Mari (Russia) [macrolanguage] -> Low Mari LangTag { language: "chn", tag: Tag::from_bytes(b"CPP ") }, // Chinook jargon -> Creoles // LangTag { language: "cho", tag: Tag::from_bytes(b"CHO ") }, // Choctaw LangTag { language: "chp", tag: Tag::from_bytes(b"CHP ") }, // Chipewyan @@ -293,10 +295,10 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ // LangTag { language: "cop", tag: Tag::from_bytes(b"COP ") }, // Coptic LangTag { language: "coq", tag: Tag::from_bytes(b"ATH ") }, // Coquille -> Athapaskan LangTag { language: "cpa", tag: Tag::from_bytes(b"CCHN") }, // Palantla Chinantec -> Chinantec - LangTag { language: "cpe", tag: Tag::from_bytes(b"CPP ") }, // English-based creoles and pidgins [family] -> Creoles - LangTag { language: "cpf", tag: Tag::from_bytes(b"CPP ") }, // French-based creoles and pidgins [family] -> Creoles + LangTag { language: "cpe", tag: Tag::from_bytes(b"CPP ") }, // English-based creoles and pidgins [collection] -> Creoles + LangTag { language: "cpf", tag: Tag::from_bytes(b"CPP ") }, // French-based creoles and pidgins [collection] -> Creoles LangTag { language: "cpi", tag: Tag::from_bytes(b"CPP ") }, // Chinese Pidgin English -> Creoles -// LangTag { language: "cpp", tag: Tag::from_bytes(b"CPP ") }, // Portuguese-based creoles and pidgins [family] -> Creoles +// LangTag { language: "cpp", tag: Tag::from_bytes(b"CPP ") }, // Portuguese-based creoles and pidgins [collection] -> Creoles LangTag { language: "cpx", tag: Tag::from_bytes(b"ZHS ") }, // Pu-Xian Chinese -> Chinese, Simplified LangTag { language: "cqd", tag: Tag::from_bytes(b"HMN ") }, // Chuanqiandian Cluster Miao -> Hmong LangTag { language: "cqu", tag: Tag::from_bytes(b"QUH ") }, // Chilean Quechua(retired code) -> Quechua (Bolivia) @@ -316,7 +318,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "crm", tag: Tag::from_bytes(b"MCR ") }, // Moose Cree LangTag { language: "crm", tag: Tag::from_bytes(b"LCR ") }, // Moose Cree -> L-Cree LangTag { language: "crm", tag: Tag::from_bytes(b"CRE ") }, // Moose Cree -> Cree - LangTag { language: "crp", tag: Tag::from_bytes(b"CPP ") }, // Creoles and pidgins [family] -> Creoles + LangTag { language: "crp", tag: Tag::from_bytes(b"CPP ") }, // Creoles and pidgins [collection] -> Creoles LangTag { language: "crr", tag: Tag(0) }, // Carolina Algonquian != Carrier LangTag { language: "crs", tag: Tag::from_bytes(b"CPP ") }, // Seselwa Creole French -> Creoles LangTag { language: "crt", tag: Tag(0) }, // Iyojwa'ja Chorote != Crimean Tatar @@ -427,7 +429,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "et", tag: Tag::from_bytes(b"ETI ") }, // Estonian [macrolanguage] LangTag { language: "eto", tag: Tag::from_bytes(b"BTI ") }, // Eton (Cameroon) -> Beti LangTag { language: "eu", tag: Tag::from_bytes(b"EUQ ") }, // Basque - LangTag { language: "euq", tag: Tag(0) }, // Basque [family] != Basque + LangTag { language: "euq", tag: Tag(0) }, // Basque [collection] != Basque LangTag { language: "eve", tag: Tag::from_bytes(b"EVN ") }, // Even LangTag { language: "evn", tag: Tag::from_bytes(b"EVK ") }, // Evenki LangTag { language: "ewo", tag: Tag::from_bytes(b"BTI ") }, // Ewondo -> Beti @@ -616,13 +618,12 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "ijc", tag: Tag::from_bytes(b"IJO ") }, // Izon -> Ijo LangTag { language: "ije", tag: Tag::from_bytes(b"IJO ") }, // Biseni -> Ijo LangTag { language: "ijn", tag: Tag::from_bytes(b"IJO ") }, // Kalabari -> Ijo -// LangTag { language: "ijo", tag: Tag::from_bytes(b"IJO ") }, // Ijo [family] +// LangTag { language: "ijo", tag: Tag::from_bytes(b"IJO ") }, // Ijo [collection] LangTag { language: "ijs", tag: Tag::from_bytes(b"IJO ") }, // Southeast Ijo -> Ijo LangTag { language: "ik", tag: Tag::from_bytes(b"IPK ") }, // Inupiaq [macrolanguage] -> Inupiat LangTag { language: "ike", tag: Tag::from_bytes(b"INU ") }, // Eastern Canadian Inuktitut -> Inuktitut LangTag { language: "ike", tag: Tag::from_bytes(b"INUK") }, // Eastern Canadian Inuktitut -> Nunavik Inuktitut LangTag { language: "ikt", tag: Tag::from_bytes(b"INU ") }, // Inuinnaqtun -> Inuktitut - LangTag { language: "ikt", tag: Tag::from_bytes(b"INUK") }, // Inuinnaqtun -> Nunavik Inuktitut // LangTag { language: "ilo", tag: Tag::from_bytes(b"ILO ") }, // Iloko -> Ilokano LangTag { language: "in", tag: Tag::from_bytes(b"IND ") }, // Indonesian(retired code) LangTag { language: "in", tag: Tag::from_bytes(b"MLY ") }, // Indonesian(retired code) -> Malay @@ -666,7 +667,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "kab", tag: Tag::from_bytes(b"BBR ") }, // Kabyle -> Berber LangTag { language: "kac", tag: Tag(0) }, // Kachin != Kachchi LangTag { language: "kam", tag: Tag::from_bytes(b"KMB ") }, // Kamba (Kenya) - LangTag { language: "kar", tag: Tag::from_bytes(b"KRN ") }, // Karen [family] + LangTag { language: "kar", tag: Tag::from_bytes(b"KRN ") }, // Karen [collection] // LangTag { language: "kaw", tag: Tag::from_bytes(b"KAW ") }, // Kawi (Old Javanese) LangTag { language: "kbd", tag: Tag::from_bytes(b"KAB ") }, // Kabardian LangTag { language: "kby", tag: Tag::from_bytes(b"KNR ") }, // Manga Kanuri -> Kanuri @@ -875,7 +876,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "mam", tag: Tag::from_bytes(b"MAM ") }, // Mam LangTag { language: "mam", tag: Tag::from_bytes(b"MYN ") }, // Mam -> Mayan LangTag { language: "man", tag: Tag::from_bytes(b"MNK ") }, // Mandingo [macrolanguage] -> Maninka - LangTag { language: "map", tag: Tag(0) }, // Austronesian [family] != Mapudungun + LangTag { language: "map", tag: Tag(0) }, // Austronesian [collection] != Mapudungun LangTag { language: "maw", tag: Tag(0) }, // Mampruli != Marwari LangTag { language: "max", tag: Tag::from_bytes(b"MLY ") }, // North Moluccan Malay -> Malay LangTag { language: "max", tag: Tag::from_bytes(b"CPP ") }, // North Moluccan Malay -> Creoles @@ -935,6 +936,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "mnw", tag: Tag::from_bytes(b"MONT") }, // Mon -> Thailand Mon LangTag { language: "mnx", tag: Tag(0) }, // Manikion != Manx LangTag { language: "mo", tag: Tag::from_bytes(b"MOL ") }, // Moldavian(retired code) -> Romanian (Moldova) + LangTag { language: "mo", tag: Tag::from_bytes(b"ROM ") }, // Moldavian(retired code) -> Romanian LangTag { language: "mod", tag: Tag::from_bytes(b"CPP ") }, // Mobilian -> Creoles // LangTag { language: "moh", tag: Tag::from_bytes(b"MOH ") }, // Mohawk LangTag { language: "mok", tag: Tag(0) }, // Morori != Moksha @@ -957,7 +959,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "mts", tag: Tag(0) }, // Yora != Maltese LangTag { language: "mud", tag: Tag::from_bytes(b"CPP ") }, // Mednyj Aleut -> Creoles LangTag { language: "mui", tag: Tag::from_bytes(b"MLY ") }, // Musi -> Malay - LangTag { language: "mun", tag: Tag(0) }, // Munda [family] != Mundari + LangTag { language: "mun", tag: Tag(0) }, // Munda [collection] != Mundari LangTag { language: "mup", tag: Tag::from_bytes(b"RAJ ") }, // Malvi -> Rajasthani LangTag { language: "muq", tag: Tag::from_bytes(b"HMN ") }, // Eastern Xiangxi Miao -> Hmong // LangTag { language: "mus", tag: Tag::from_bytes(b"MUS ") }, // Creek -> Muscogee @@ -972,7 +974,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "mww", tag: Tag::from_bytes(b"HMN ") }, // Hmong Daw -> Hmong LangTag { language: "my", tag: Tag::from_bytes(b"BRM ") }, // Burmese LangTag { language: "mym", tag: Tag::from_bytes(b"MEN ") }, // Me’en -// LangTag { language: "myn", tag: Tag::from_bytes(b"MYN ") }, // Mayan [family] +// LangTag { language: "myn", tag: Tag::from_bytes(b"MYN ") }, // Mayan [collection] LangTag { language: "myq", tag: Tag::from_bytes(b"MNK ") }, // Forest Maninka(retired code) -> Maninka LangTag { language: "myv", tag: Tag::from_bytes(b"ERZ ") }, // Erzya LangTag { language: "mzb", tag: Tag::from_bytes(b"BBR ") }, // Tumzabt -> Berber @@ -981,7 +983,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "na", tag: Tag::from_bytes(b"NAU ") }, // Nauru -> Nauruan LangTag { language: "nag", tag: Tag::from_bytes(b"NAG ") }, // Naga Pidgin -> Naga-Assamese LangTag { language: "nag", tag: Tag::from_bytes(b"CPP ") }, // Naga Pidgin -> Creoles -// LangTag { language: "nah", tag: Tag::from_bytes(b"NAH ") }, // Nahuatl [family] +// LangTag { language: "nah", tag: Tag::from_bytes(b"NAH ") }, // Nahuatl [collection] LangTag { language: "nan", tag: Tag::from_bytes(b"ZHS ") }, // Min Nan Chinese -> Chinese, Simplified // LangTag { language: "nap", tag: Tag::from_bytes(b"NAP ") }, // Neapolitan LangTag { language: "nas", tag: Tag(0) }, // Naasioi != Naskapi @@ -1038,7 +1040,6 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "nln", tag: Tag::from_bytes(b"NAH ") }, // Durango Nahuatl(retired code) -> Nahuatl LangTag { language: "nlv", tag: Tag::from_bytes(b"NAH ") }, // Orizaba Nahuatl -> Nahuatl LangTag { language: "nn", tag: Tag::from_bytes(b"NYN ") }, // Norwegian Nynorsk (Nynorsk, Norwegian) - LangTag { language: "nn", tag: Tag::from_bytes(b"NOR ") }, // Norwegian Nynorsk -> Norwegian LangTag { language: "nnh", tag: Tag::from_bytes(b"BML ") }, // Ngiemboon -> Bamileke LangTag { language: "nnz", tag: Tag::from_bytes(b"BML ") }, // Nda'nda' -> Bamileke LangTag { language: "no", tag: Tag::from_bytes(b"NOR ") }, // Norwegian [macrolanguage] @@ -1092,7 +1093,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "otw", tag: Tag::from_bytes(b"OJB ") }, // Ottawa -> Ojibway LangTag { language: "oua", tag: Tag::from_bytes(b"BBR ") }, // Tagargrent -> Berber LangTag { language: "pa", tag: Tag::from_bytes(b"PAN ") }, // Punjabi - LangTag { language: "paa", tag: Tag(0) }, // Papuan [family] != Palestinian Aramaic + LangTag { language: "paa", tag: Tag(0) }, // Papuan [collection] != Palestinian Aramaic // LangTag { language: "pag", tag: Tag::from_bytes(b"PAG ") }, // Pangasinan LangTag { language: "pal", tag: Tag(0) }, // Pahlavi != Pali // LangTag { language: "pam", tag: Tag::from_bytes(b"PAM ") }, // Pampanga -> Pampangan @@ -1308,6 +1309,9 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "sgo", tag: Tag(0) }, // Songa(retired code) != Sango // LangTag { language: "sgs", tag: Tag::from_bytes(b"SGS ") }, // Samogitian LangTag { language: "sgw", tag: Tag::from_bytes(b"CHG ") }, // Sebat Bet Gurage -> Chaha Gurage + LangTag { language: "sh", tag: Tag::from_bytes(b"BOS ") }, // Serbo-Croatian [macrolanguage] -> Bosnian + LangTag { language: "sh", tag: Tag::from_bytes(b"HRV ") }, // Serbo-Croatian [macrolanguage] -> Croatian + LangTag { language: "sh", tag: Tag::from_bytes(b"SRB ") }, // Serbo-Croatian [macrolanguage] -> Serbian LangTag { language: "shi", tag: Tag::from_bytes(b"SHI ") }, // Tachelhit LangTag { language: "shi", tag: Tag::from_bytes(b"BBR ") }, // Tachelhit -> Berber LangTag { language: "shl", tag: Tag::from_bytes(b"QIN ") }, // Shendu -> Chin @@ -1329,7 +1333,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "skw", tag: Tag::from_bytes(b"CPP ") }, // Skepi Creole Dutch -> Creoles LangTag { language: "sky", tag: Tag(0) }, // Sikaiana != Slovak LangTag { language: "sl", tag: Tag::from_bytes(b"SLV ") }, // Slovenian - LangTag { language: "sla", tag: Tag(0) }, // Slavic [family] != Slavey + LangTag { language: "sla", tag: Tag(0) }, // Slavic [collection] != Slavey LangTag { language: "sm", tag: Tag::from_bytes(b"SMO ") }, // Samoan LangTag { language: "sma", tag: Tag::from_bytes(b"SSM ") }, // Southern Sami LangTag { language: "smd", tag: Tag::from_bytes(b"MBN ") }, // Sama(retired code) -> Mbundu @@ -1453,7 +1457,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "tpi", tag: Tag::from_bytes(b"CPP ") }, // Tok Pisin -> Creoles LangTag { language: "tr", tag: Tag::from_bytes(b"TRK ") }, // Turkish LangTag { language: "trf", tag: Tag::from_bytes(b"CPP ") }, // Trinidadian Creole English -> Creoles - LangTag { language: "trk", tag: Tag(0) }, // Turkic [family] != Turkish + LangTag { language: "trk", tag: Tag(0) }, // Turkic [collection] != Turkish LangTag { language: "tru", tag: Tag::from_bytes(b"TUA ") }, // Turoyo -> Turoyo Aramaic LangTag { language: "tru", tag: Tag::from_bytes(b"SYR ") }, // Turoyo -> Syriac LangTag { language: "ts", tag: Tag::from_bytes(b"TSG ") }, // Tsonga @@ -1597,7 +1601,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[ LangTag { language: "zlq", tag: Tag::from_bytes(b"ZHA ") }, // Liuqian Zhuang -> Zhuang LangTag { language: "zmi", tag: Tag::from_bytes(b"MLY ") }, // Negeri Sembilan Malay -> Malay LangTag { language: "zmz", tag: Tag::from_bytes(b"BAD0") }, // Mbandja -> Banda - LangTag { language: "znd", tag: Tag(0) }, // Zande [family] != Zande + LangTag { language: "znd", tag: Tag(0) }, // Zande [collection] != Zande LangTag { language: "zne", tag: Tag::from_bytes(b"ZND ") }, // Zande LangTag { language: "zom", tag: Tag::from_bytes(b"QIN ") }, // Zou -> Chin LangTag { language: "zqe", tag: Tag::from_bytes(b"ZHA ") }, // Qiubei Zhuang -> Zhuang @@ -2309,18 +2313,18 @@ pub fn tags_from_complex_language(language: &str, tags: &mut smallvec::SmallVec< } if &language[1..] == "o-nyn" { // Norwegian Nynorsk(retired code) - let possible_tags = &[ - Tag::from_bytes(b"NYN "), // Norwegian Nynorsk (Nynorsk, Norwegian) - Tag::from_bytes(b"NOR "), // Norwegian - ]; - tags.extend_from_slice(possible_tags); + tags.push(Tag::from_bytes(b"NYN ")); // Norwegian Nynorsk (Nynorsk, Norwegian) return true; } } b'r' => { if strncmp(&language[1..], "o-", 2) && subtag_matches(language, "-md") { // Romanian; Moldova - tags.push(Tag::from_bytes(b"MOL ")); // Romanian (Moldova) + let possible_tags = &[ + Tag::from_bytes(b"MOL "), // Romanian (Moldova) + Tag::from_bytes(b"ROM "), // Romanian + ]; + tags.extend_from_slice(possible_tags); return true; } } diff --git a/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf b/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf new file mode 100644 index 00000000..8508fbee Binary files /dev/null and b/tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf differ diff --git a/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf b/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf new file mode 100644 index 00000000..45896c10 Binary files /dev/null and b/tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf differ diff --git a/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf b/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf new file mode 100644 index 00000000..cd01de0d Binary files /dev/null and b/tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf differ diff --git a/tests/shaping/aots.rs b/tests/shaping/aots.rs index 8c44a219..4585d366 100644 --- a/tests/shaping/aots.rs +++ b/tests/shaping/aots.rs @@ -832,6 +832,21 @@ fn gpos4_lookupflag_002() { ); } +#[test] +fn gpos4_simple_001() { + assert_eq!( + shape( + "tests/fonts/aots/gpos4_simple_1.otf", + "\u{0011}\u{0012}\u{0013}\u{0011}", + "--features=\"test\" --single-par --no-clusters --no-glyph-names --ned", + ), + "17|\ + 18@1500,0|\ + 19@1400,-80|\ + 17@3000,0" + ); +} + #[test] fn gpos4_simple_002() { assert_eq!( @@ -877,6 +892,21 @@ fn gpos5_001() { ); } +#[test] +fn gpos5_002() { + assert_eq!( + shape( + "tests/fonts/aots/gpos5_font1.otf", + "\u{0011}\u{001E}\u{001F}\u{0013}\u{0011}", + "--features=\"test\" --single-par --no-clusters --no-glyph-names --ned", + ), + "17|\ + 18@1500,0|\ + 19@1401,-79|\ + 17@3000,0" + ); +} + #[test] fn gpos6_002() { assert_eq!( diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index 8ad60296..843e84c1 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -1824,6 +1824,19 @@ fn fallback_positioning_002() { ); } +#[test] +fn glyph_props_no_gdef_001() { + assert_eq!( + shape( + "tests/fonts/in-house/FallbackPlus-Javanese-no-GDEF.otf", + "\u{A995}\u{A9BF}", + "", + ), + "glyph01=0+600|\ + uniA995=0+600" + ); +} + #[test] fn hangul_jamo_001() { assert_eq!( @@ -7323,6 +7336,104 @@ fn myanmar_zawgyi_001() { ); } +#[test] +fn nested_mark_filtering_sets_001() { + assert_eq!( + shape( + "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf", + "\u{0628}\u{0628}\u{6D2}", + "", + ), + "OneDotBelowYB=2@764,-183+0|\ + YBc1=2@764,-282+0|\ + YehBarreeFin_3=2+355|\ + OneDotBelowNS=1@20,-120+0|\ + BehxMed.inT2outD2YB=1@0,349+182|\ + NullMk=0+0|\ + sp10=0+0|\ + BehxIni.outT2=0@0,406+766" + ); +} + +#[test] +fn nested_mark_filtering_sets_002() { + assert_eq!( + shape( + "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf", + "\u{0628}\u{0628}\u{0628}\u{6D2}", + "", + ), + "OneDotBelowYB=3@764,-183+0|\ + YBc1=3@764,-282+0|\ + OneDotBelowYB=3@1098,-60+0|\ + YBc2=3@1098,-159+0|\ + YehBarreeFin_4=3+355|\ + OneDotBelowNS=2@20,-120+0|\ + BehxMed.inT2outD2YB=2@0,349+182|\ + NullMk=1+0|\ + BehxMed.inT1outT2=1@0,406+184|\ + NullMk=0+0|\ + sp5=0+0|\ + BehxIni=0@0,471+541" + ); +} + +#[test] +fn nested_mark_filtering_sets_003() { + assert_eq!( + shape( + "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf", + "\u{0628}\u{0628}\u{0628}\u{0628}\u{6D2}", + "", + ), + "OneDotBelowYB=4@659,-192+0|\ + YBc1=4@659,-291+0|\ + OneDotBelowYB=4@966,-55+0|\ + YBc2=4@966,-154+0|\ + OneDotBelowYB=4@1274,-148+0|\ + YBc3=4@1274,-247+0|\ + YehBarreeFin_5=4+355|\ + OneDotBelowNS=3@20,-120+0|\ + BehxMed.inT2outD2YB=3@0,349+182|\ + NullMk=2+0|\ + BehxMed.inT1outT2=2@0,406+184|\ + NullMk=1+0|\ + BehxMed.inT2outT1=1@0,471+267|\ + NullMk=0+0|\ + sp0=0+0|\ + BehxIni.outT2=0@0,616+156" + ); +} + +#[test] +fn nested_mark_filtering_sets_004() { + assert_eq!( + shape( + "tests/fonts/in-house/NotoNastaliqUrdu-Regular.ttf", + "\u{0628}\u{0628}\u{0628}\u{0628}\u{0628}\u{6D2}", + "", + ), + "OneDotBelowYB=5@659,-192+0|\ + YBc1=5@659,-291+0|\ + OneDotBelowYB=5@966,-55+0|\ + YBc2=5@966,-154+0|\ + OneDotBelowYB=5@1274,-148+0|\ + YBc3=5@1274,-247+0|\ + YehBarreeFin_5=5+355|\ + OneDotBelowNS=4@20,-120+0|\ + BehxMed.inT2outD2YB=4@0,349+182|\ + NullMk=3+0|\ + BehxMed.inT1outT2=3@0,406+184|\ + NullMk=2+0|\ + BehxMed.inT2outT1=2@0,471+267|\ + NullMk=1+0|\ + BehxMed.inT1outT2=1@0,616+184|\ + OneDotBelowNS=0@73,516+0|\ + sp0=0+0|\ + BehxIni=0@0,681+236" + ); +} + #[test] fn none_directional_001() { assert_eq!( @@ -9881,6 +9992,19 @@ fn tt_kern_gpos_001() { ); } +#[test] +fn unsafe_to_concat_001() { + assert_eq!( + shape( + "tests/fonts/in-house/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf", + "\u{0628}\u{200C}\u{0628}", + "--show-flags", + ), + "uni0628=1+993#2|\ + uni0628=0+993#2" + ); +} + #[test] fn use_indic3_001() { assert_eq!( diff --git a/tests/shaping/main.rs b/tests/shaping/main.rs index 77fddbfd..a86338b1 100644 --- a/tests/shaping/main.rs +++ b/tests/shaping/main.rs @@ -15,7 +15,7 @@ struct Args { language: Option, script: Option, #[allow(dead_code)] - remove_default_ignorables: bool, // we don't use it, but have to parse it anyway + remove_default_ignorables: bool, cluster_level: rustybuzz::BufferClusterLevel, features: Vec, pre_context: Option, @@ -139,6 +139,10 @@ pub fn shape(font_path: &str, text: &str, options: &str) -> String { let mut buffer_flags = BufferFlags::default(); buffer_flags.set(BufferFlags::BEGINNING_OF_TEXT, args.bot); buffer_flags.set(BufferFlags::END_OF_TEXT, args.eot); + buffer_flags.set( + BufferFlags::REMOVE_DEFAULT_IGNORABLES, + args.remove_default_ignorables, + ); buffer.set_flags(buffer_flags); buffer.set_cluster_level(args.cluster_level); diff --git a/tests/shaping/text_rendering_tests.rs b/tests/shaping/text_rendering_tests.rs index f8436289..58a22115 100644 --- a/tests/shaping/text_rendering_tests.rs +++ b/tests/shaping/text_rendering_tests.rs @@ -662,6 +662,18 @@ fn cmap_1_003() { ); } +#[test] +fn cmap_1_004() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestCMAP14.otf", + "\u{82A6}\u{E0102}", + "--ned --remove-default-ignorables", + ), + "uni82A6_uE0100" + ); +} + #[test] fn cmap_2_001() { assert_eq!( @@ -9897,6 +9909,21 @@ fn shknda_3_030() { ); } +#[test] +fn shknda_3_031() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/NotoSansKannada-Regular.ttf", + "\u{0C86}\u{0CCD}\u{0CAF}\u{0C95}\u{0CCD}\u{0CB7}\u{0CBF}\u{0CB8}\u{0CCD}\u{200C}", + "--ned --remove-default-ignorables", + ), + "gid7|\ + gid122@1717,0|\ + gid285@2249,0|\ + gid200@3425,0" + ); +} + #[test] fn shlana_1_001() { assert_eq!( @@ -11081,6 +11108,24 @@ fn shlana_10_027() { ); } +#[test] +fn shlana_10_028() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestShapeLana.ttf", + "\u{1A32}\u{1A6C}\u{1A74}\u{1A75}\u{034F}\u{1A6F}\u{1A60}\u{1A36}", + "--ned --remove-default-ignorables", + ), + "uni1A32|\ + uni1A6C.wide@1910,0|\ + uni1A74@1560,0|\ + uni1A75@1560,732|\ + uni1A6F@1910,0|\ + uni25CC@4154,0|\ + uni1A601A36@5366,0" + ); +} + #[test] fn shlana_10_029() { assert_eq!( @@ -11277,6 +11322,21 @@ fn shlana_10_040() { ); } +#[test] +fn shlana_10_041() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestShapeLana.ttf", + "\u{1A32}\u{1A66}\u{034F}\u{1A63}\u{1A60}\u{1A3F}", + "--ned --remove-default-ignorables", + ), + "uni1A32|\ + uni1A66@1560,0|\ + uni1A63@1910,0|\ + uni1A601A3F@3122,0" + ); +} + #[test] fn shlana_10_042() { assert_eq!( @@ -12341,6 +12401,24 @@ fn shlana_5_009() { ); } +#[test] +fn shlana_5_010() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestShapeLana.ttf", + "\u{1A34}\u{1A64}\u{1A74}\u{1A36}\u{1A60}\u{1A45}\u{200C}\u{1A63}\u{1A60}\u{1A3F}", + "--ned --remove-default-ignorables", + ), + "uni1A34|\ + uni1A74@1212,0|\ + uni1A64@1212,0|\ + uni1A36@1676,0|\ + uni1A601A45@2888,0|\ + uni1A63@2888,0|\ + uni1A601A3F@4100,0" + ); +} + #[test] fn shlana_5_011() { assert_eq!( @@ -12359,6 +12437,19 @@ fn shlana_5_011() { ); } +#[test] +fn shlana_5_012() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestShapeLana.ttf", + "\u{1A36}\u{200C}\u{1A63}", + "--ned --remove-default-ignorables", + ), + "uni1A36|\ + uni1A63@1212,0" + ); +} + #[test] fn shlana_5_013() { assert_eq!(