@@ -4,7 +4,8 @@ include!("../../generated/generated_cmap.rs");
4
4
5
5
#[ cfg( feature = "std" ) ]
6
6
use crate :: collections:: IntSet ;
7
- use std:: ops:: { Range , RangeInclusive } ;
7
+ use crate :: { FontRef , TableProvider } ;
8
+ use std:: ops:: Range ;
8
9
9
10
/// Result of mapping a codepoint with a variation selector.
10
11
#[ derive( Copy , Clone , PartialEq , Eq , Debug ) ]
@@ -222,8 +223,18 @@ impl<'a> Cmap12<'a> {
222
223
223
224
/// Returns an iterator over all (codepoint, glyph identifier) pairs
224
225
/// in the subtable.
226
+ ///
227
+ /// Malicious and malformed fonts can produce a large number of invalid
228
+ /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
229
+ /// that is limited to reasonable values.
225
230
pub fn iter ( & self ) -> Cmap12Iter < ' a > {
226
- Cmap12Iter :: new ( self . clone ( ) )
231
+ Cmap12Iter :: new ( self . clone ( ) , None )
232
+ }
233
+
234
+ /// Returns an iterator over all (codepoint, glyph identifier) pairs
235
+ /// in the subtable within the given limits.
236
+ pub fn iter_with_limits ( & self , limits : Cmap12IterLimits ) -> Cmap12Iter < ' a > {
237
+ Cmap12Iter :: new ( self . clone ( ) , Some ( limits) )
227
238
}
228
239
229
240
/// Does the final phase of glyph id lookup.
@@ -240,43 +251,95 @@ impl<'a> Cmap12<'a> {
240
251
241
252
/// Returns the codepoint range and start glyph id for the group
242
253
/// at the given index.
243
- fn group ( & self , index : usize ) -> Option < Cmap12Group > {
254
+ fn group ( & self , index : usize , limits : & Option < Cmap12IterLimits > ) -> Option < Cmap12Group > {
244
255
let group = self . groups ( ) . get ( index) ?;
245
256
let start_code = group. start_char_code ( ) ;
246
- // Limit to the valid range of Unicode characters
247
- // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
248
- let end_code = group. end_char_code ( ) . min ( char:: MAX as u32 ) ;
257
+ // Change to exclusive range. This can never overflow since the source
258
+ // is a 32-bit value
259
+ let end_code = group. end_char_code ( ) as u64 + 1 ;
260
+ let start_glyph_id = group. start_glyph_id ( ) ;
261
+ let end_code = if let Some ( limits) = limits {
262
+ // Set our end code to the minimum of our character and glyph
263
+ // count limit
264
+ ( limits. glyph_count as u64 )
265
+ . saturating_sub ( start_glyph_id as u64 )
266
+ . saturating_add ( start_code as u64 )
267
+ . min ( end_code. min ( limits. max_char as u64 ) )
268
+ } else {
269
+ end_code
270
+ } ;
249
271
Some ( Cmap12Group {
250
- range : start_code..= end_code,
272
+ range : start_code as u64 .. end_code,
251
273
start_code,
252
- start_glyph_id : group . start_glyph_id ( ) ,
274
+ start_glyph_id,
253
275
} )
254
276
}
255
277
}
256
278
257
- #[ derive( Clone ) ]
279
+ #[ derive( Clone , Debug ) ]
258
280
struct Cmap12Group {
259
- range : RangeInclusive < u32 > ,
281
+ range : Range < u64 > ,
260
282
start_code : u32 ,
261
283
start_glyph_id : u32 ,
262
284
}
263
285
286
+ /// Character and glyph limits for iterating format 12 subtables.
287
+ #[ derive( Copy , Clone , Debug ) ]
288
+ pub struct Cmap12IterLimits {
289
+ /// The maximum valid character.
290
+ pub max_char : u32 ,
291
+ /// The number of glyphs in the font.
292
+ pub glyph_count : u32 ,
293
+ }
294
+
295
+ impl Cmap12IterLimits {
296
+ /// Returns the default limits for the given font.
297
+ ///
298
+ /// This will limit pairs to `char::MAX` and the number of glyphs contained
299
+ /// in the font. If the font is missing a `maxp` table, the number of
300
+ /// glyphs will be limited to `u16::MAX`.
301
+ pub fn default_for_font ( font : & FontRef ) -> Self {
302
+ let glyph_count = font
303
+ . maxp ( )
304
+ . map ( |maxp| maxp. num_glyphs ( ) )
305
+ . unwrap_or ( u16:: MAX ) as u32 ;
306
+ Self {
307
+ // Limit to the valid range of Unicode characters
308
+ // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
309
+ max_char : char:: MAX as u32 ,
310
+ glyph_count,
311
+ }
312
+ }
313
+ }
314
+
315
+ impl Default for Cmap12IterLimits {
316
+ fn default ( ) -> Self {
317
+ Self {
318
+ max_char : char:: MAX as u32 ,
319
+ // Revisit this when we actually support big glyph ids
320
+ glyph_count : u16:: MAX as u32 ,
321
+ }
322
+ }
323
+ }
324
+
264
325
/// Iterator over all (codepoint, glyph identifier) pairs in
265
326
/// the subtable.
266
327
#[ derive( Clone ) ]
267
328
pub struct Cmap12Iter < ' a > {
268
329
subtable : Cmap12 < ' a > ,
269
330
cur_group : Option < Cmap12Group > ,
270
331
cur_group_ix : usize ,
332
+ limits : Option < Cmap12IterLimits > ,
271
333
}
272
334
273
335
impl < ' a > Cmap12Iter < ' a > {
274
- fn new ( subtable : Cmap12 < ' a > ) -> Self {
275
- let cur_group = subtable. group ( 0 ) ;
336
+ fn new ( subtable : Cmap12 < ' a > , limits : Option < Cmap12IterLimits > ) -> Self {
337
+ let cur_group = subtable. group ( 0 , & limits ) ;
276
338
Self {
277
339
subtable,
278
340
cur_group,
279
341
cur_group_ix : 0 ,
342
+ limits,
280
343
}
281
344
}
282
345
}
@@ -288,6 +351,7 @@ impl Iterator for Cmap12Iter<'_> {
288
351
loop {
289
352
let group = self . cur_group . as_mut ( ) ?;
290
353
if let Some ( codepoint) = group. range . next ( ) {
354
+ let codepoint = codepoint as u32 ;
291
355
let glyph_id = self . subtable . lookup_glyph_id (
292
356
codepoint,
293
357
group. start_code ,
@@ -301,14 +365,12 @@ impl Iterator for Cmap12Iter<'_> {
301
365
return Some ( ( codepoint, glyph_id) ) ;
302
366
} else {
303
367
self . cur_group_ix += 1 ;
304
- let mut next_group = self . subtable . group ( self . cur_group_ix ) ?;
368
+ let mut next_group = self . subtable . group ( self . cur_group_ix , & self . limits ) ?;
305
369
// Groups should be in order and non-overlapping so make sure
306
370
// that the start code of next group is at least
307
- // current_end + 1.
308
- // This ensures we only ever generate a maximum of
309
- // char::MAX + 1 results.
310
- if next_group. range . start ( ) <= group. range . end ( ) {
311
- next_group. range = * group. range . end ( ) + 1 ..=* next_group. range . end ( ) ;
371
+ // current_end.
372
+ if next_group. range . start < group. range . end {
373
+ next_group. range = group. range . end ..next_group. range . end ;
312
374
}
313
375
self . cur_group = Some ( next_group) ;
314
376
}
@@ -710,7 +772,52 @@ mod tests {
710
772
[ 170u32 , 1330926671 , 328960 ] // group 0
711
773
} ;
712
774
let cmap12 = Cmap12 :: read ( cmap12_data. data ( ) . into ( ) ) . unwrap ( ) ;
713
- assert ! ( cmap12. iter( ) . count( ) <= char :: MAX as usize + 1 ) ;
775
+ assert ! (
776
+ cmap12. iter_with_limits( Cmap12IterLimits :: default ( ) ) . count( ) <= char :: MAX as usize + 1
777
+ ) ;
778
+ }
779
+
780
+ // oss-fuzz: timeout in outlines, caused by cmap 12 iter
781
+ // ref: <https://issues.oss-fuzz.com/issues/394638728>
782
+ #[ test]
783
+ fn cmap12_iter_avoid_timeout2 ( ) {
784
+ let cmap12_data = be_buffer ! {
785
+ 12u16 , // format
786
+ 0u16 , // reserved, set to 0
787
+ 0u32 , // length, ignored
788
+ 0u32 , // language, ignored
789
+ 3u32 , // numGroups
790
+ // groups: [startCode, endCode, startGlyphID]
791
+ [ 199u32 , 16777271 , 2 ] ,
792
+ [ 262u32 , 262 , 3 ] ,
793
+ [ 268u32 , 268 , 4 ]
794
+ } ;
795
+ let cmap12 = Cmap12 :: read ( cmap12_data. data ( ) . into ( ) ) . unwrap ( ) ;
796
+ // In the test case, maxp.numGlyphs = 8
797
+ const MAX_GLYPHS : u32 = 8 ;
798
+ let limits = Cmap12IterLimits {
799
+ glyph_count : MAX_GLYPHS ,
800
+ ..Default :: default ( )
801
+ } ;
802
+ assert_eq ! ( cmap12. iter_with_limits( limits) . count( ) , MAX_GLYPHS as usize ) ;
803
+ }
804
+
805
+ #[ test]
806
+ fn cmap12_iter_glyph_limit ( ) {
807
+ let font = FontRef :: new ( font_test_data:: CMAP12_FONT1 ) . unwrap ( ) ;
808
+ let cmap12 = find_cmap12 ( & font. cmap ( ) . unwrap ( ) ) . unwrap ( ) ;
809
+ let mut limits = Cmap12IterLimits :: default_for_font ( & font) ;
810
+ // Ensure we obey the glyph count limit.
811
+ // This font has 11 glyphs
812
+ for glyph_count in 0 ..=11 {
813
+ limits. glyph_count = glyph_count;
814
+ assert_eq ! (
815
+ cmap12. iter_with_limits( limits) . count( ) ,
816
+ // We always return one less than glyph count limit because
817
+ // notdef is not mapped
818
+ ( glyph_count as usize ) . saturating_sub( 1 )
819
+ ) ;
820
+ }
714
821
}
715
822
716
823
#[ test]
@@ -725,7 +832,11 @@ mod tests {
725
832
// These groups overlap and extend to the whole u32 range
726
833
assert_eq ! ( ranges, & [ ( 0 , 16777215 ) , ( 255 , u32 :: MAX ) ] ) ;
727
834
// But we produce at most char::MAX + 1 results
728
- assert ! ( cmap12. iter( ) . count( ) <= char :: MAX as usize + 1 ) ;
835
+ let limits = Cmap12IterLimits {
836
+ glyph_count : u32:: MAX ,
837
+ ..Default :: default ( )
838
+ } ;
839
+ assert ! ( cmap12. iter_with_limits( limits) . count( ) <= char :: MAX as usize + 1 ) ;
729
840
}
730
841
731
842
#[ test]
0 commit comments