Skip to content

Commit

Permalink
Apply limits to cmap12 iterator (#1352)
Browse files Browse the repository at this point in the history
Meant to address fuzzing timeouts that all seem to point back to iterating cmap format 12 subtables. We previously limited characters to the max assigned codepoint and this extends that to limit glyphs to `maxp.numGlyphs`.

Reverts `Cmap12::iter()` to yield the raw mappings and adds a new `Cmap12::iter_with_limits()` that allows custom limits for both characters and glyphs. `Cmap12IterLimits::default_for_font(&FontRef)` produces the expected limits.

We _could_ have modified the cmap table to add these limits directly but reasons for not doing so are two-fold:
1. This prevents the read-fonts code from accurately representing the font data
2. Would require a lot of manual definitions and impls for many cmap types

Skrifa changed to use the limited version internally.

Avoids timeout for fuzzing bug: https://issues.oss-fuzz.com/issues/394638728
  • Loading branch information
dfrg authored Feb 12, 2025
1 parent 24fb1c8 commit 652b3ca
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 31 deletions.
151 changes: 131 additions & 20 deletions read-fonts/src/tables/cmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ include!("../../generated/generated_cmap.rs");

#[cfg(feature = "std")]
use crate::collections::IntSet;
use std::ops::{Range, RangeInclusive};
use crate::{FontRef, TableProvider};
use std::ops::Range;

/// Result of mapping a codepoint with a variation selector.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
Expand Down Expand Up @@ -222,8 +223,18 @@ impl<'a> Cmap12<'a> {

/// Returns an iterator over all (codepoint, glyph identifier) pairs
/// in the subtable.
///
/// Malicious and malformed fonts can produce a large number of invalid
/// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
/// that is limited to reasonable values.
pub fn iter(&self) -> Cmap12Iter<'a> {
Cmap12Iter::new(self.clone())
Cmap12Iter::new(self.clone(), None)
}

/// Returns an iterator over all (codepoint, glyph identifier) pairs
/// in the subtable within the given limits.
pub fn iter_with_limits(&self, limits: Cmap12IterLimits) -> Cmap12Iter<'a> {
Cmap12Iter::new(self.clone(), Some(limits))
}

/// Does the final phase of glyph id lookup.
Expand All @@ -240,43 +251,95 @@ impl<'a> Cmap12<'a> {

/// Returns the codepoint range and start glyph id for the group
/// at the given index.
fn group(&self, index: usize) -> Option<Cmap12Group> {
fn group(&self, index: usize, limits: &Option<Cmap12IterLimits>) -> Option<Cmap12Group> {
let group = self.groups().get(index)?;
let start_code = group.start_char_code();
// Limit to the valid range of Unicode characters
// per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
let end_code = group.end_char_code().min(char::MAX as u32);
// Change to exclusive range. This can never overflow since the source
// is a 32-bit value
let end_code = group.end_char_code() as u64 + 1;
let start_glyph_id = group.start_glyph_id();
let end_code = if let Some(limits) = limits {
// Set our end code to the minimum of our character and glyph
// count limit
(limits.glyph_count as u64)
.saturating_sub(start_glyph_id as u64)
.saturating_add(start_code as u64)
.min(end_code.min(limits.max_char as u64))
} else {
end_code
};
Some(Cmap12Group {
range: start_code..=end_code,
range: start_code as u64..end_code,
start_code,
start_glyph_id: group.start_glyph_id(),
start_glyph_id,
})
}
}

#[derive(Clone)]
#[derive(Clone, Debug)]
struct Cmap12Group {
range: RangeInclusive<u32>,
range: Range<u64>,
start_code: u32,
start_glyph_id: u32,
}

/// Character and glyph limits for iterating format 12 subtables.
#[derive(Copy, Clone, Debug)]
pub struct Cmap12IterLimits {
/// The maximum valid character.
pub max_char: u32,
/// The number of glyphs in the font.
pub glyph_count: u32,
}

impl Cmap12IterLimits {
/// Returns the default limits for the given font.
///
/// This will limit pairs to `char::MAX` and the number of glyphs contained
/// in the font. If the font is missing a `maxp` table, the number of
/// glyphs will be limited to `u16::MAX`.
pub fn default_for_font(font: &FontRef) -> Self {
let glyph_count = font
.maxp()
.map(|maxp| maxp.num_glyphs())
.unwrap_or(u16::MAX) as u32;
Self {
// Limit to the valid range of Unicode characters
// per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
max_char: char::MAX as u32,
glyph_count,
}
}
}

impl Default for Cmap12IterLimits {
fn default() -> Self {
Self {
max_char: char::MAX as u32,
// Revisit this when we actually support big glyph ids
glyph_count: u16::MAX as u32,
}
}
}

/// Iterator over all (codepoint, glyph identifier) pairs in
/// the subtable.
#[derive(Clone)]
pub struct Cmap12Iter<'a> {
subtable: Cmap12<'a>,
cur_group: Option<Cmap12Group>,
cur_group_ix: usize,
limits: Option<Cmap12IterLimits>,
}

impl<'a> Cmap12Iter<'a> {
fn new(subtable: Cmap12<'a>) -> Self {
let cur_group = subtable.group(0);
fn new(subtable: Cmap12<'a>, limits: Option<Cmap12IterLimits>) -> Self {
let cur_group = subtable.group(0, &limits);
Self {
subtable,
cur_group,
cur_group_ix: 0,
limits,
}
}
}
Expand All @@ -288,6 +351,7 @@ impl Iterator for Cmap12Iter<'_> {
loop {
let group = self.cur_group.as_mut()?;
if let Some(codepoint) = group.range.next() {
let codepoint = codepoint as u32;
let glyph_id = self.subtable.lookup_glyph_id(
codepoint,
group.start_code,
Expand All @@ -301,14 +365,12 @@ impl Iterator for Cmap12Iter<'_> {
return Some((codepoint, glyph_id));
} else {
self.cur_group_ix += 1;
let mut next_group = self.subtable.group(self.cur_group_ix)?;
let mut next_group = self.subtable.group(self.cur_group_ix, &self.limits)?;
// Groups should be in order and non-overlapping so make sure
// that the start code of next group is at least
// current_end + 1.
// This ensures we only ever generate a maximum of
// char::MAX + 1 results.
if next_group.range.start() <= group.range.end() {
next_group.range = *group.range.end() + 1..=*next_group.range.end();
// current_end.
if next_group.range.start < group.range.end {
next_group.range = group.range.end..next_group.range.end;
}
self.cur_group = Some(next_group);
}
Expand Down Expand Up @@ -710,7 +772,52 @@ mod tests {
[170u32, 1330926671, 328960] // group 0
};
let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
assert!(cmap12.iter().count() <= char::MAX as usize + 1);
assert!(
cmap12.iter_with_limits(Cmap12IterLimits::default()).count() <= char::MAX as usize + 1
);
}

// oss-fuzz: timeout in outlines, caused by cmap 12 iter
// ref: <https://issues.oss-fuzz.com/issues/394638728>
#[test]
fn cmap12_iter_avoid_timeout2() {
let cmap12_data = be_buffer! {
12u16, // format
0u16, // reserved, set to 0
0u32, // length, ignored
0u32, // language, ignored
3u32, // numGroups
// groups: [startCode, endCode, startGlyphID]
[199u32, 16777271, 2],
[262u32, 262, 3],
[268u32, 268, 4]
};
let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
// In the test case, maxp.numGlyphs = 8
const MAX_GLYPHS: u32 = 8;
let limits = Cmap12IterLimits {
glyph_count: MAX_GLYPHS,
..Default::default()
};
assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
}

#[test]
fn cmap12_iter_glyph_limit() {
let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
let mut limits = Cmap12IterLimits::default_for_font(&font);
// Ensure we obey the glyph count limit.
// This font has 11 glyphs
for glyph_count in 0..=11 {
limits.glyph_count = glyph_count;
assert_eq!(
cmap12.iter_with_limits(limits).count(),
// We always return one less than glyph count limit because
// notdef is not mapped
(glyph_count as usize).saturating_sub(1)
);
}
}

#[test]
Expand All @@ -725,7 +832,11 @@ mod tests {
// These groups overlap and extend to the whole u32 range
assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
// But we produce at most char::MAX + 1 results
assert!(cmap12.iter().count() <= char::MAX as usize + 1);
let limits = Cmap12IterLimits {
glyph_count: u32::MAX,
..Default::default()
};
assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
}

#[test]
Expand Down
30 changes: 19 additions & 11 deletions skrifa/src/charmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
use read_fonts::{
tables::cmap::{
self, Cmap, Cmap12, Cmap12Iter, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter, CmapSubtable,
EncodingRecord, PlatformId,
self, Cmap, Cmap12, Cmap12Iter, Cmap12IterLimits, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter,
CmapSubtable, EncodingRecord, PlatformId,
},
types::GlyphId,
FontData, TableProvider,
FontData, FontRef, TableProvider,
};

pub use read_fonts::tables::cmap::MapVariant;
Expand All @@ -32,7 +32,7 @@ pub use read_fonts::tables::cmap::MapVariant;
///
/// ## Obtaining a Charmap
///
/// Typically a Charmap is acquired by calling [charmap](crate::MetadataProvider::charmap) on a [FontRef](crate::FontRef).
/// Typically a Charmap is acquired by calling [charmap](crate::MetadataProvider::charmap) on a [FontRef].
///
/// ## Selection strategy
///
Expand All @@ -54,15 +54,16 @@ pub use read_fonts::tables::cmap::MapVariant;
pub struct Charmap<'a> {
codepoint_subtable: Option<CodepointSubtable<'a>>,
variant_subtable: Option<Cmap14<'a>>,
cmap12_limits: Cmap12IterLimits,
}

impl<'a> Charmap<'a> {
/// Creates a new character map from the given font.
pub fn new(font: &impl TableProvider<'a>) -> Self {
pub fn new(font: &FontRef<'a>) -> Self {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
let selection = MappingSelection::new(&cmap);
let selection = MappingSelection::new(font, &cmap);
Self {
codepoint_subtable: selection
.codepoint_subtable
Expand All @@ -71,6 +72,7 @@ impl<'a> Charmap<'a> {
is_symbol: selection.mapping_index.codepoint_subtable_is_symbol,
}),
variant_subtable: selection.variant_subtable,
cmap12_limits: selection.mapping_index.cmap12_limits,
}
}

Expand Down Expand Up @@ -107,7 +109,9 @@ impl<'a> Charmap<'a> {
.map(|subtable| {
Mappings(match &subtable.subtable {
SupportedSubtable::Format4(cmap4) => MappingsInner::Format4(cmap4.iter()),
SupportedSubtable::Format12(cmap12) => MappingsInner::Format12(cmap12.iter()),
SupportedSubtable::Format12(cmap12) => {
MappingsInner::Format12(cmap12.iter_with_limits(self.cmap12_limits))
}
})
})
.unwrap_or(Mappings(MappingsInner::None))
Expand Down Expand Up @@ -142,23 +146,25 @@ pub struct MappingIndex {
codepoint_subtable_is_symbol: bool,
/// Index of Unicode variation selector subtable.
variant_subtable: Option<u16>,
/// Limits for iterating a cmap format 12 subtable.
cmap12_limits: Cmap12IterLimits,
}

impl MappingIndex {
/// Finds the indices of the most suitable Unicode mapping tables in the
/// given font.
pub fn new<'a>(font: &impl TableProvider<'a>) -> Self {
pub fn new(font: &FontRef) -> Self {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
MappingSelection::new(&cmap).mapping_index
MappingSelection::new(font, &cmap).mapping_index
}

/// Creates a new character map for the given font using the tables referenced by
/// the precomputed indices.
///
/// The font should be the same as the one used to construct this object.
pub fn charmap<'a>(&self, font: &impl TableProvider<'a>) -> Charmap<'a> {
pub fn charmap<'a>(&self, font: &FontRef<'a>) -> Charmap<'a> {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
Expand All @@ -180,6 +186,7 @@ impl MappingIndex {
CmapSubtable::Format14(cmap14) => Some(cmap14),
_ => None,
}),
cmap12_limits: Cmap12IterLimits::default_for_font(font),
}
}
}
Expand Down Expand Up @@ -319,7 +326,7 @@ struct MappingSelection<'a> {
}

impl<'a> MappingSelection<'a> {
fn new(cmap: &Cmap<'a>) -> Self {
fn new(font: &FontRef<'a>, cmap: &Cmap<'a>) -> Self {
const ENCODING_MS_SYMBOL: u16 = 0;
const ENCODING_MS_UNICODE_CS: u16 = 1;
const ENCODING_APPLE_ID_UNICODE_32: u16 = 4;
Expand Down Expand Up @@ -380,6 +387,7 @@ impl<'a> MappingSelection<'a> {
_ => {}
}
}
mapping_index.cmap12_limits = Cmap12IterLimits::default_for_font(font);
Self {
mapping_index,
codepoint_subtable,
Expand Down

0 comments on commit 652b3ca

Please sign in to comment.