@@ -420,20 +420,42 @@ struct CCITTStatus {
420420 bool has_reached_eol { false };
421421};
422422
423- ErrorOr<void > ensure_invalid_result_is_actually_eol (BigEndianInputBitStream& input_bit_stream, InvalidResult partially_read_eol)
423+ ErrorOr<void > ensure_invalid_result_is_actually_eol (BigEndianInputBitStream& input_bit_stream, InvalidResult partially_read_eol, Group4Options const & options )
424424{
425425 if (partially_read_eol != 0 )
426426 return Error::from_string_literal (" CCITTDecoder: Unable to find the correct mode" );
427427
428428 auto const remaining_eol = TRY (input_bit_stream.read_bits (5 ));
429- if (remaining_eol != 1 ) {
429+ if (options.has_end_of_block == Group4Options::HasEndOfBlock::Yes
430+ && remaining_eol == 0 ) {
431+ // Some PDF like 00000337.pdf ends with an EOFB [1] that is byte aligned. This is
432+ // what we are trying to detect/read here. As we already read 12 bytes from
433+ // partially_read_eol and remaining_eol, we need to realign ourselves first.
434+ // [1] 2.4.1.1 End-of-facsimile block
435+
436+ static constexpr u32 EOFB = 0x001001 ;
437+
438+ u8 fill_bits_length = (12 + input_bit_stream.bits_until_next_byte_boundary ()) % 8 ;
439+ u8 to_read = fill_bits_length + 12 ;
440+ auto potential_eofb = TRY (input_bit_stream.read_bits (to_read));
441+
442+ // We already checked that the 12 first bits were zeroes, so here we check that the
443+ // last to_read bits ends like a EOFB.
444+ if (potential_eofb != EOFB)
445+ return Error::from_string_literal (" CCITTDecoder: Unable to find the correct mode" );
446+ } else if (remaining_eol != 1 ) {
430447 return Error::from_string_literal (" CCITTDecoder: Unable to find the correct mode" );
431448 }
432449
433450 return {};
434451}
435452
436- ErrorOr<CCITTStatus> decode_single_ccitt_2d_line (BigEndianInputBitStream& input_bit_stream, BigEndianOutputBitStream& decoded_bits, ReferenceLine&& reference_line, u32 image_width)
453+ ErrorOr<CCITTStatus> decode_single_ccitt_2d_line (
454+ BigEndianInputBitStream& input_bit_stream,
455+ BigEndianOutputBitStream& decoded_bits,
456+ ReferenceLine&& reference_line,
457+ u32 image_width,
458+ Group4Options const & options = {})
437459{
438460 CCITTStatus status {};
439461 Color current_color { ccitt_white };
@@ -483,7 +505,7 @@ ErrorOr<CCITTStatus> decode_single_ccitt_2d_line(BigEndianInputBitStream& input_
483505 auto const maybe_mode = TRY (read_mode (input_bit_stream));
484506
485507 if (maybe_mode.has <InvalidResult>()) {
486- TRY (ensure_invalid_result_is_actually_eol (input_bit_stream, maybe_mode.get <InvalidResult>()));
508+ TRY (ensure_invalid_result_is_actually_eol (input_bit_stream, maybe_mode.get <InvalidResult>(), options ));
487509
488510 // We reached EOL
489511 status.has_reached_eol = true ;
@@ -621,7 +643,7 @@ ErrorOr<ByteBuffer> decode_ccitt_group3(ReadonlyBytes bytes, u32 image_width, u3
621643 return decoded_bytes;
622644}
623645
624- ErrorOr<ByteBuffer> decode_ccitt_group4 (ReadonlyBytes bytes, u32 image_width, u32 image_height)
646+ ErrorOr<ByteBuffer> decode_ccitt_group4 (ReadonlyBytes bytes, u32 image_width, u32 image_height, Group4Options const & options )
625647{
626648 auto strip_stream = make<FixedMemoryStream>(bytes);
627649 auto bit_stream = make<BigEndianInputBitStream>(MaybeOwned<Stream>(*strip_stream));
@@ -636,7 +658,7 @@ ErrorOr<ByteBuffer> decode_ccitt_group4(ReadonlyBytes bytes, u32 image_width, u3
636658
637659 u32 i {};
638660 while (!status.has_reached_eol && (image_height == 0 || i < image_height)) {
639- status = TRY (decode_single_ccitt_2d_line (*bit_stream, *decoded_bits, move (status.current_line ), image_width));
661+ status = TRY (decode_single_ccitt_2d_line (*bit_stream, *decoded_bits, move (status.current_line ), image_width, options ));
640662 ++i;
641663 }
642664
0 commit comments