Skip to content

Commit 652b3ca

Browse files
authored
Apply limits to cmap12 iterator (#1352)
Meant to address fuzzing timeouts that all seem to point back to iterating cmap format 12 subtables. We previously limited characters to the max assigned codepoint and this extends that to limit glyphs to `maxp.numGlyphs`. Reverts `Cmap12::iter()` to yield the raw mappings and adds a new `Cmap12::iter_with_limits()` that allows custom limits for both characters and glyphs. `Cmap12IterLimits::default_for_font(&FontRef)` produces the expected limits. We _could_ have modified the cmap table to add these limits directly but reasons for not doing so are two-fold: 1. This prevents the read-fonts code from accurately representing the font data 2. Would require a lot of manual definitions and impls for many cmap types Skrifa changed to use the limited version internally. Avoids timeout for fuzzing bug: https://issues.oss-fuzz.com/issues/394638728
1 parent 24fb1c8 commit 652b3ca

File tree

2 files changed

+150
-31
lines changed

2 files changed

+150
-31
lines changed

read-fonts/src/tables/cmap.rs

Lines changed: 131 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ include!("../../generated/generated_cmap.rs");
44

55
#[cfg(feature = "std")]
66
use crate::collections::IntSet;
7-
use std::ops::{Range, RangeInclusive};
7+
use crate::{FontRef, TableProvider};
8+
use std::ops::Range;
89

910
/// Result of mapping a codepoint with a variation selector.
1011
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -222,8 +223,18 @@ impl<'a> Cmap12<'a> {
222223

223224
/// Returns an iterator over all (codepoint, glyph identifier) pairs
224225
/// in the subtable.
226+
///
227+
/// Malicious and malformed fonts can produce a large number of invalid
228+
/// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
229+
/// that is limited to reasonable values.
225230
pub fn iter(&self) -> Cmap12Iter<'a> {
226-
Cmap12Iter::new(self.clone())
231+
Cmap12Iter::new(self.clone(), None)
232+
}
233+
234+
/// Returns an iterator over all (codepoint, glyph identifier) pairs
235+
/// in the subtable within the given limits.
236+
pub fn iter_with_limits(&self, limits: Cmap12IterLimits) -> Cmap12Iter<'a> {
237+
Cmap12Iter::new(self.clone(), Some(limits))
227238
}
228239

229240
/// Does the final phase of glyph id lookup.
@@ -240,43 +251,95 @@ impl<'a> Cmap12<'a> {
240251

241252
/// Returns the codepoint range and start glyph id for the group
242253
/// at the given index.
243-
fn group(&self, index: usize) -> Option<Cmap12Group> {
254+
fn group(&self, index: usize, limits: &Option<Cmap12IterLimits>) -> Option<Cmap12Group> {
244255
let group = self.groups().get(index)?;
245256
let start_code = group.start_char_code();
246-
// Limit to the valid range of Unicode characters
247-
// per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
248-
let end_code = group.end_char_code().min(char::MAX as u32);
257+
// Change to exclusive range. This can never overflow since the source
258+
// is a 32-bit value
259+
let end_code = group.end_char_code() as u64 + 1;
260+
let start_glyph_id = group.start_glyph_id();
261+
let end_code = if let Some(limits) = limits {
262+
// Set our end code to the minimum of our character and glyph
263+
// count limit
264+
(limits.glyph_count as u64)
265+
.saturating_sub(start_glyph_id as u64)
266+
.saturating_add(start_code as u64)
267+
.min(end_code.min(limits.max_char as u64))
268+
} else {
269+
end_code
270+
};
249271
Some(Cmap12Group {
250-
range: start_code..=end_code,
272+
range: start_code as u64..end_code,
251273
start_code,
252-
start_glyph_id: group.start_glyph_id(),
274+
start_glyph_id,
253275
})
254276
}
255277
}
256278

257-
#[derive(Clone)]
279+
#[derive(Clone, Debug)]
258280
struct Cmap12Group {
259-
range: RangeInclusive<u32>,
281+
range: Range<u64>,
260282
start_code: u32,
261283
start_glyph_id: u32,
262284
}
263285

286+
/// Character and glyph limits for iterating format 12 subtables.
287+
#[derive(Copy, Clone, Debug)]
288+
pub struct Cmap12IterLimits {
289+
/// The maximum valid character.
290+
pub max_char: u32,
291+
/// The number of glyphs in the font.
292+
pub glyph_count: u32,
293+
}
294+
295+
impl Cmap12IterLimits {
296+
/// Returns the default limits for the given font.
297+
///
298+
/// This will limit pairs to `char::MAX` and the number of glyphs contained
299+
/// in the font. If the font is missing a `maxp` table, the number of
300+
/// glyphs will be limited to `u16::MAX`.
301+
pub fn default_for_font(font: &FontRef) -> Self {
302+
let glyph_count = font
303+
.maxp()
304+
.map(|maxp| maxp.num_glyphs())
305+
.unwrap_or(u16::MAX) as u32;
306+
Self {
307+
// Limit to the valid range of Unicode characters
308+
// per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
309+
max_char: char::MAX as u32,
310+
glyph_count,
311+
}
312+
}
313+
}
314+
315+
impl Default for Cmap12IterLimits {
316+
fn default() -> Self {
317+
Self {
318+
max_char: char::MAX as u32,
319+
// Revisit this when we actually support big glyph ids
320+
glyph_count: u16::MAX as u32,
321+
}
322+
}
323+
}
324+
264325
/// Iterator over all (codepoint, glyph identifier) pairs in
265326
/// the subtable.
266327
#[derive(Clone)]
267328
pub struct Cmap12Iter<'a> {
268329
subtable: Cmap12<'a>,
269330
cur_group: Option<Cmap12Group>,
270331
cur_group_ix: usize,
332+
limits: Option<Cmap12IterLimits>,
271333
}
272334

273335
impl<'a> Cmap12Iter<'a> {
274-
fn new(subtable: Cmap12<'a>) -> Self {
275-
let cur_group = subtable.group(0);
336+
fn new(subtable: Cmap12<'a>, limits: Option<Cmap12IterLimits>) -> Self {
337+
let cur_group = subtable.group(0, &limits);
276338
Self {
277339
subtable,
278340
cur_group,
279341
cur_group_ix: 0,
342+
limits,
280343
}
281344
}
282345
}
@@ -288,6 +351,7 @@ impl Iterator for Cmap12Iter<'_> {
288351
loop {
289352
let group = self.cur_group.as_mut()?;
290353
if let Some(codepoint) = group.range.next() {
354+
let codepoint = codepoint as u32;
291355
let glyph_id = self.subtable.lookup_glyph_id(
292356
codepoint,
293357
group.start_code,
@@ -301,14 +365,12 @@ impl Iterator for Cmap12Iter<'_> {
301365
return Some((codepoint, glyph_id));
302366
} else {
303367
self.cur_group_ix += 1;
304-
let mut next_group = self.subtable.group(self.cur_group_ix)?;
368+
let mut next_group = self.subtable.group(self.cur_group_ix, &self.limits)?;
305369
// Groups should be in order and non-overlapping so make sure
306370
// that the start code of next group is at least
307-
// current_end + 1.
308-
// This ensures we only ever generate a maximum of
309-
// char::MAX + 1 results.
310-
if next_group.range.start() <= group.range.end() {
311-
next_group.range = *group.range.end() + 1..=*next_group.range.end();
371+
// current_end.
372+
if next_group.range.start < group.range.end {
373+
next_group.range = group.range.end..next_group.range.end;
312374
}
313375
self.cur_group = Some(next_group);
314376
}
@@ -710,7 +772,52 @@ mod tests {
710772
[170u32, 1330926671, 328960] // group 0
711773
};
712774
let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
713-
assert!(cmap12.iter().count() <= char::MAX as usize + 1);
775+
assert!(
776+
cmap12.iter_with_limits(Cmap12IterLimits::default()).count() <= char::MAX as usize + 1
777+
);
778+
}
779+
780+
// oss-fuzz: timeout in outlines, caused by cmap 12 iter
781+
// ref: <https://issues.oss-fuzz.com/issues/394638728>
782+
#[test]
783+
fn cmap12_iter_avoid_timeout2() {
784+
let cmap12_data = be_buffer! {
785+
12u16, // format
786+
0u16, // reserved, set to 0
787+
0u32, // length, ignored
788+
0u32, // language, ignored
789+
3u32, // numGroups
790+
// groups: [startCode, endCode, startGlyphID]
791+
[199u32, 16777271, 2],
792+
[262u32, 262, 3],
793+
[268u32, 268, 4]
794+
};
795+
let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
796+
// In the test case, maxp.numGlyphs = 8
797+
const MAX_GLYPHS: u32 = 8;
798+
let limits = Cmap12IterLimits {
799+
glyph_count: MAX_GLYPHS,
800+
..Default::default()
801+
};
802+
assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
803+
}
804+
805+
#[test]
806+
fn cmap12_iter_glyph_limit() {
807+
let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
808+
let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
809+
let mut limits = Cmap12IterLimits::default_for_font(&font);
810+
// Ensure we obey the glyph count limit.
811+
// This font has 11 glyphs
812+
for glyph_count in 0..=11 {
813+
limits.glyph_count = glyph_count;
814+
assert_eq!(
815+
cmap12.iter_with_limits(limits).count(),
816+
// We always return one less than glyph count limit because
817+
// notdef is not mapped
818+
(glyph_count as usize).saturating_sub(1)
819+
);
820+
}
714821
}
715822

716823
#[test]
@@ -725,7 +832,11 @@ mod tests {
725832
// These groups overlap and extend to the whole u32 range
726833
assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
727834
// But we produce at most char::MAX + 1 results
728-
assert!(cmap12.iter().count() <= char::MAX as usize + 1);
835+
let limits = Cmap12IterLimits {
836+
glyph_count: u32::MAX,
837+
..Default::default()
838+
};
839+
assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
729840
}
730841

731842
#[test]

skrifa/src/charmap.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
1717
use read_fonts::{
1818
tables::cmap::{
19-
self, Cmap, Cmap12, Cmap12Iter, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter, CmapSubtable,
20-
EncodingRecord, PlatformId,
19+
self, Cmap, Cmap12, Cmap12Iter, Cmap12IterLimits, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter,
20+
CmapSubtable, EncodingRecord, PlatformId,
2121
},
2222
types::GlyphId,
23-
FontData, TableProvider,
23+
FontData, FontRef, TableProvider,
2424
};
2525

2626
pub use read_fonts::tables::cmap::MapVariant;
@@ -32,7 +32,7 @@ pub use read_fonts::tables::cmap::MapVariant;
3232
///
3333
/// ## Obtaining a Charmap
3434
///
35-
/// Typically a Charmap is acquired by calling [charmap](crate::MetadataProvider::charmap) on a [FontRef](crate::FontRef).
35+
/// Typically a Charmap is acquired by calling [charmap](crate::MetadataProvider::charmap) on a [FontRef].
3636
///
3737
/// ## Selection strategy
3838
///
@@ -54,15 +54,16 @@ pub use read_fonts::tables::cmap::MapVariant;
5454
pub struct Charmap<'a> {
5555
codepoint_subtable: Option<CodepointSubtable<'a>>,
5656
variant_subtable: Option<Cmap14<'a>>,
57+
cmap12_limits: Cmap12IterLimits,
5758
}
5859

5960
impl<'a> Charmap<'a> {
6061
/// Creates a new character map from the given font.
61-
pub fn new(font: &impl TableProvider<'a>) -> Self {
62+
pub fn new(font: &FontRef<'a>) -> Self {
6263
let Ok(cmap) = font.cmap() else {
6364
return Default::default();
6465
};
65-
let selection = MappingSelection::new(&cmap);
66+
let selection = MappingSelection::new(font, &cmap);
6667
Self {
6768
codepoint_subtable: selection
6869
.codepoint_subtable
@@ -71,6 +72,7 @@ impl<'a> Charmap<'a> {
7172
is_symbol: selection.mapping_index.codepoint_subtable_is_symbol,
7273
}),
7374
variant_subtable: selection.variant_subtable,
75+
cmap12_limits: selection.mapping_index.cmap12_limits,
7476
}
7577
}
7678

@@ -107,7 +109,9 @@ impl<'a> Charmap<'a> {
107109
.map(|subtable| {
108110
Mappings(match &subtable.subtable {
109111
SupportedSubtable::Format4(cmap4) => MappingsInner::Format4(cmap4.iter()),
110-
SupportedSubtable::Format12(cmap12) => MappingsInner::Format12(cmap12.iter()),
112+
SupportedSubtable::Format12(cmap12) => {
113+
MappingsInner::Format12(cmap12.iter_with_limits(self.cmap12_limits))
114+
}
111115
})
112116
})
113117
.unwrap_or(Mappings(MappingsInner::None))
@@ -142,23 +146,25 @@ pub struct MappingIndex {
142146
codepoint_subtable_is_symbol: bool,
143147
/// Index of Unicode variation selector subtable.
144148
variant_subtable: Option<u16>,
149+
/// Limits for iterating a cmap format 12 subtable.
150+
cmap12_limits: Cmap12IterLimits,
145151
}
146152

147153
impl MappingIndex {
148154
/// Finds the indices of the most suitable Unicode mapping tables in the
149155
/// given font.
150-
pub fn new<'a>(font: &impl TableProvider<'a>) -> Self {
156+
pub fn new(font: &FontRef) -> Self {
151157
let Ok(cmap) = font.cmap() else {
152158
return Default::default();
153159
};
154-
MappingSelection::new(&cmap).mapping_index
160+
MappingSelection::new(font, &cmap).mapping_index
155161
}
156162

157163
/// Creates a new character map for the given font using the tables referenced by
158164
/// the precomputed indices.
159165
///
160166
/// The font should be the same as the one used to construct this object.
161-
pub fn charmap<'a>(&self, font: &impl TableProvider<'a>) -> Charmap<'a> {
167+
pub fn charmap<'a>(&self, font: &FontRef<'a>) -> Charmap<'a> {
162168
let Ok(cmap) = font.cmap() else {
163169
return Default::default();
164170
};
@@ -180,6 +186,7 @@ impl MappingIndex {
180186
CmapSubtable::Format14(cmap14) => Some(cmap14),
181187
_ => None,
182188
}),
189+
cmap12_limits: Cmap12IterLimits::default_for_font(font),
183190
}
184191
}
185192
}
@@ -319,7 +326,7 @@ struct MappingSelection<'a> {
319326
}
320327

321328
impl<'a> MappingSelection<'a> {
322-
fn new(cmap: &Cmap<'a>) -> Self {
329+
fn new(font: &FontRef<'a>, cmap: &Cmap<'a>) -> Self {
323330
const ENCODING_MS_SYMBOL: u16 = 0;
324331
const ENCODING_MS_UNICODE_CS: u16 = 1;
325332
const ENCODING_APPLE_ID_UNICODE_32: u16 = 4;
@@ -380,6 +387,7 @@ impl<'a> MappingSelection<'a> {
380387
_ => {}
381388
}
382389
}
390+
mapping_index.cmap12_limits = Cmap12IterLimits::default_for_font(font);
383391
Self {
384392
mapping_index,
385393
codepoint_subtable,

0 commit comments

Comments
 (0)