diff --git a/read-fonts/src/tables/cmap.rs b/read-fonts/src/tables/cmap.rs index df71a6bc6..fb5a06ba9 100644 --- a/read-fonts/src/tables/cmap.rs +++ b/read-fonts/src/tables/cmap.rs @@ -4,7 +4,8 @@ include!("../../generated/generated_cmap.rs"); #[cfg(feature = "std")] use crate::collections::IntSet; -use std::ops::{Range, RangeInclusive}; +use crate::{FontRef, TableProvider}; +use std::ops::Range; /// Result of mapping a codepoint with a variation selector. #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -222,8 +223,18 @@ impl<'a> Cmap12<'a> { /// Returns an iterator over all (codepoint, glyph identifier) pairs /// in the subtable. + /// + /// Malicious and malformed fonts can produce a large number of invalid + /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence + /// that is limited to reasonable values. pub fn iter(&self) -> Cmap12Iter<'a> { - Cmap12Iter::new(self.clone()) + Cmap12Iter::new(self.clone(), None) + } + + /// Returns an iterator over all (codepoint, glyph identifier) pairs + /// in the subtable within the given limits. + pub fn iter_with_limits(&self, limits: Cmap12IterLimits) -> Cmap12Iter<'a> { + Cmap12Iter::new(self.clone(), Some(limits)) } /// Does the final phase of glyph id lookup. @@ -240,27 +251,77 @@ impl<'a> Cmap12<'a> { /// Returns the codepoint range and start glyph id for the group /// at the given index. - fn group(&self, index: usize) -> Option { + fn group(&self, index: usize, limits: &Option) -> Option { let group = self.groups().get(index)?; let start_code = group.start_char_code(); - // Limit to the valid range of Unicode characters - // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184 - let end_code = group.end_char_code().min(char::MAX as u32); + // Change to exclusive range. This can never overflow since the source + // is a 32-bit value + let end_code = group.end_char_code() as u64 + 1; + let start_glyph_id = group.start_glyph_id(); + let end_code = if let Some(limits) = limits { + // Set our end code to the minimum of our character and glyph + // count limit + (limits.glyph_count as u64) + .saturating_sub(start_glyph_id as u64) + .saturating_add(start_code as u64) + .min(end_code.min(limits.max_char as u64)) + } else { + end_code + }; Some(Cmap12Group { - range: start_code..=end_code, + range: start_code as u64..end_code, start_code, - start_glyph_id: group.start_glyph_id(), + start_glyph_id, }) } } -#[derive(Clone)] +#[derive(Clone, Debug)] struct Cmap12Group { - range: RangeInclusive, + range: Range, start_code: u32, start_glyph_id: u32, } +/// Character and glyph limits for iterating format 12 subtables. +#[derive(Copy, Clone, Debug)] +pub struct Cmap12IterLimits { + /// The maximum valid character. + pub max_char: u32, + /// The number of glyphs in the font. + pub glyph_count: u32, +} + +impl Cmap12IterLimits { + /// Returns the default limits for the given font. + /// + /// This will limit pairs to `char::MAX` and the number of glyphs contained + /// in the font. If the font is missing a `maxp` table, the number of + /// glyphs will be limited to `u16::MAX`. + pub fn default_for_font(font: &FontRef) -> Self { + let glyph_count = font + .maxp() + .map(|maxp| maxp.num_glyphs()) + .unwrap_or(u16::MAX) as u32; + Self { + // Limit to the valid range of Unicode characters + // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184 + max_char: char::MAX as u32, + glyph_count, + } + } +} + +impl Default for Cmap12IterLimits { + fn default() -> Self { + Self { + max_char: char::MAX as u32, + // Revisit this when we actually support big glyph ids + glyph_count: u16::MAX as u32, + } + } +} + /// Iterator over all (codepoint, glyph identifier) pairs in /// the subtable. #[derive(Clone)] @@ -268,15 +329,17 @@ pub struct Cmap12Iter<'a> { subtable: Cmap12<'a>, cur_group: Option, cur_group_ix: usize, + limits: Option, } impl<'a> Cmap12Iter<'a> { - fn new(subtable: Cmap12<'a>) -> Self { - let cur_group = subtable.group(0); + fn new(subtable: Cmap12<'a>, limits: Option) -> Self { + let cur_group = subtable.group(0, &limits); Self { subtable, cur_group, cur_group_ix: 0, + limits, } } } @@ -288,6 +351,7 @@ impl Iterator for Cmap12Iter<'_> { loop { let group = self.cur_group.as_mut()?; if let Some(codepoint) = group.range.next() { + let codepoint = codepoint as u32; let glyph_id = self.subtable.lookup_glyph_id( codepoint, group.start_code, @@ -301,14 +365,12 @@ impl Iterator for Cmap12Iter<'_> { return Some((codepoint, glyph_id)); } else { self.cur_group_ix += 1; - let mut next_group = self.subtable.group(self.cur_group_ix)?; + let mut next_group = self.subtable.group(self.cur_group_ix, &self.limits)?; // Groups should be in order and non-overlapping so make sure // that the start code of next group is at least - // current_end + 1. - // This ensures we only ever generate a maximum of - // char::MAX + 1 results. - if next_group.range.start() <= group.range.end() { - next_group.range = *group.range.end() + 1..=*next_group.range.end(); + // current_end. + if next_group.range.start < group.range.end { + next_group.range = group.range.end..next_group.range.end; } self.cur_group = Some(next_group); } @@ -710,7 +772,52 @@ mod tests { [170u32, 1330926671, 328960] // group 0 }; let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap(); - assert!(cmap12.iter().count() <= char::MAX as usize + 1); + assert!( + cmap12.iter_with_limits(Cmap12IterLimits::default()).count() <= char::MAX as usize + 1 + ); + } + + // oss-fuzz: timeout in outlines, caused by cmap 12 iter + // ref: + #[test] + fn cmap12_iter_avoid_timeout2() { + let cmap12_data = be_buffer! { + 12u16, // format + 0u16, // reserved, set to 0 + 0u32, // length, ignored + 0u32, // language, ignored + 3u32, // numGroups + // groups: [startCode, endCode, startGlyphID] + [199u32, 16777271, 2], + [262u32, 262, 3], + [268u32, 268, 4] + }; + let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap(); + // In the test case, maxp.numGlyphs = 8 + const MAX_GLYPHS: u32 = 8; + let limits = Cmap12IterLimits { + glyph_count: MAX_GLYPHS, + ..Default::default() + }; + assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize); + } + + #[test] + fn cmap12_iter_glyph_limit() { + let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap(); + let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap(); + let mut limits = Cmap12IterLimits::default_for_font(&font); + // Ensure we obey the glyph count limit. + // This font has 11 glyphs + for glyph_count in 0..=11 { + limits.glyph_count = glyph_count; + assert_eq!( + cmap12.iter_with_limits(limits).count(), + // We always return one less than glyph count limit because + // notdef is not mapped + (glyph_count as usize).saturating_sub(1) + ); + } } #[test] @@ -725,7 +832,11 @@ mod tests { // These groups overlap and extend to the whole u32 range assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]); // But we produce at most char::MAX + 1 results - assert!(cmap12.iter().count() <= char::MAX as usize + 1); + let limits = Cmap12IterLimits { + glyph_count: u32::MAX, + ..Default::default() + }; + assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1); } #[test] diff --git a/skrifa/src/charmap.rs b/skrifa/src/charmap.rs index df87f5675..4ebd24e56 100644 --- a/skrifa/src/charmap.rs +++ b/skrifa/src/charmap.rs @@ -16,11 +16,11 @@ use read_fonts::{ tables::cmap::{ - self, Cmap, Cmap12, Cmap12Iter, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter, CmapSubtable, - EncodingRecord, PlatformId, + self, Cmap, Cmap12, Cmap12Iter, Cmap12IterLimits, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter, + CmapSubtable, EncodingRecord, PlatformId, }, types::GlyphId, - FontData, TableProvider, + FontData, FontRef, TableProvider, }; pub use read_fonts::tables::cmap::MapVariant; @@ -54,15 +54,16 @@ pub use read_fonts::tables::cmap::MapVariant; pub struct Charmap<'a> { codepoint_subtable: Option>, variant_subtable: Option>, + cmap12_limits: Cmap12IterLimits, } impl<'a> Charmap<'a> { /// Creates a new character map from the given font. - pub fn new(font: &impl TableProvider<'a>) -> Self { + pub fn new(font: &FontRef<'a>) -> Self { let Ok(cmap) = font.cmap() else { return Default::default(); }; - let selection = MappingSelection::new(&cmap); + let selection = MappingSelection::new(font, &cmap); Self { codepoint_subtable: selection .codepoint_subtable @@ -71,6 +72,7 @@ impl<'a> Charmap<'a> { is_symbol: selection.mapping_index.codepoint_subtable_is_symbol, }), variant_subtable: selection.variant_subtable, + cmap12_limits: selection.mapping_index.cmap12_limits, } } @@ -107,7 +109,9 @@ impl<'a> Charmap<'a> { .map(|subtable| { Mappings(match &subtable.subtable { SupportedSubtable::Format4(cmap4) => MappingsInner::Format4(cmap4.iter()), - SupportedSubtable::Format12(cmap12) => MappingsInner::Format12(cmap12.iter()), + SupportedSubtable::Format12(cmap12) => { + MappingsInner::Format12(cmap12.iter_with_limits(self.cmap12_limits)) + } }) }) .unwrap_or(Mappings(MappingsInner::None)) @@ -142,23 +146,25 @@ pub struct MappingIndex { codepoint_subtable_is_symbol: bool, /// Index of Unicode variation selector subtable. variant_subtable: Option, + /// Limits for iterating a cmap format 12 subtable. + cmap12_limits: Cmap12IterLimits, } impl MappingIndex { /// Finds the indices of the most suitable Unicode mapping tables in the /// given font. - pub fn new<'a>(font: &impl TableProvider<'a>) -> Self { + pub fn new(font: &FontRef) -> Self { let Ok(cmap) = font.cmap() else { return Default::default(); }; - MappingSelection::new(&cmap).mapping_index + MappingSelection::new(font, &cmap).mapping_index } /// Creates a new character map for the given font using the tables referenced by /// the precomputed indices. /// /// The font should be the same as the one used to construct this object. - pub fn charmap<'a>(&self, font: &impl TableProvider<'a>) -> Charmap<'a> { + pub fn charmap<'a>(&self, font: &FontRef<'a>) -> Charmap<'a> { let Ok(cmap) = font.cmap() else { return Default::default(); }; @@ -180,6 +186,7 @@ impl MappingIndex { CmapSubtable::Format14(cmap14) => Some(cmap14), _ => None, }), + cmap12_limits: Cmap12IterLimits::default_for_font(font), } } } @@ -319,7 +326,7 @@ struct MappingSelection<'a> { } impl<'a> MappingSelection<'a> { - fn new(cmap: &Cmap<'a>) -> Self { + fn new(font: &FontRef<'a>, cmap: &Cmap<'a>) -> Self { const ENCODING_MS_SYMBOL: u16 = 0; const ENCODING_MS_UNICODE_CS: u16 = 1; const ENCODING_APPLE_ID_UNICODE_32: u16 = 4; @@ -380,6 +387,7 @@ impl<'a> MappingSelection<'a> { _ => {} } } + mapping_index.cmap12_limits = Cmap12IterLimits::default_for_font(font); Self { mapping_index, codepoint_subtable,