Skip to content

Commit

Permalink
Overhaul the serializer pipeline (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenzV authored Aug 29, 2024
1 parent c347503 commit bff21be
Show file tree
Hide file tree
Showing 36 changed files with 673 additions and 647 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = { version = "0.25.1", default-features = false, features = ["jpeg", "png", "gif", "webp"] }
miniz_oxide = "0.7.4"
once_cell = "1.19.0"
pdf-writer = {git = "https://github.com/LaurenzV/pdf-writer", rev = "f83b414"}
pdf-writer = {git = "https://github.com/LaurenzV/pdf-writer", rev = "34c991f5"}
resvg = {git = "https://github.com/LaurenzV/resvg", rev = "1c2b6bd0"}
siphasher = "1.0.1"
skrifa = {git="https://github.com/LaurenzV/fontations", rev="0ed7955"}
Expand Down
151 changes: 151 additions & 0 deletions src/chunk_container.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use crate::serialize::SerializeSettings;
use pdf_writer::{Chunk, Finish, Name, Pdf, Ref};
use std::collections::HashMap;

pub struct ChunkContainer {
pub(crate) page_label_tree: Option<(Ref, Chunk)>,
pub(crate) page_tree: Option<(Ref, Chunk)>,
pub(crate) outline: Option<(Ref, Chunk)>,

pub(crate) pages: Vec<Chunk>,
pub(crate) page_labels: Vec<Chunk>,
pub(crate) annotations: Vec<Chunk>,
pub(crate) fonts: Vec<Chunk>,
pub(crate) color_spaces: Vec<Chunk>,
pub(crate) destinations: Vec<Chunk>,
pub(crate) ext_g_states: Vec<Chunk>,
pub(crate) images: Vec<Chunk>,
pub(crate) masks: Vec<Chunk>,
pub(crate) x_objects: Vec<Chunk>,
pub(crate) shading_functions: Vec<Chunk>,
pub(crate) patterns: Vec<Chunk>,
}

impl ChunkContainer {
pub fn new() -> Self {
Self {
page_tree: None,
outline: None,
page_label_tree: None,

pages: vec![],
page_labels: vec![],
annotations: vec![],
fonts: vec![],
color_spaces: vec![],
destinations: vec![],
ext_g_states: vec![],
images: vec![],
masks: vec![],
x_objects: vec![],
shading_functions: vec![],
patterns: vec![],
}
}

pub fn finish(mut self, serialize_settings: SerializeSettings) -> Pdf {
let mut remapped_ref = Ref::new(1);
let mut remapper = HashMap::new();

// Two utility macros, that basically traverses the fields in the order that we
// will write them to the PDF and assigns new references as we go.
// This gives us the advantage that the PDF will be numbered with
// monotonically increasing numbers, which, while it is not a strict requirement
// for a valid PDF, makes it a lot cleaner and might make implementing features
// like object streams easier down the road.
//
// It also allows us to estimate the capacity we will need for the new PDF.
let mut chunks_len = 0;
macro_rules! remap_field {
($self:expr, $remapper:expr, $remapped_ref:expr; $($field:ident),+) => {
$(
if let Some((original_ref, chunk)) = &mut $self.$field {
chunks_len += chunk.len();
for object_ref in chunk.object_refs() {
debug_assert!(!remapper.contains_key(&object_ref));

$remapper.insert(object_ref, $remapped_ref.bump());
*original_ref = *remapper.get(&object_ref).unwrap();
}
}
)+
};
}

macro_rules! remap_fields {
($self:expr, $remapper:expr, $remapped_ref:expr; $($field:ident),+) => {
$(
for chunk in &$self.$field {
chunks_len += chunk.len();
for ref_ in chunk.object_refs() {
debug_assert!(!remapper.contains_key(&ref_));

$remapper.insert(ref_, $remapped_ref.bump());
}
}
)+
};
}

// Chunk length is not an exact number because the length might change as we renumber,
// so we add a bit of a padding by multiplying with 1.1. The 200 is additional padding
// for the document catalog. This hopefully allows us to avoid realloactions in the general
// case, and thus give us better performance.
let mut pdf = Pdf::with_capacity((chunks_len as f32 * 1.1 + 200.0) as usize);

if serialize_settings.ascii_compatible {
pdf.set_binary_marker(&[b'A', b'A', b'A', b'A'])
}

// We only write a catalog if a page tree exists. Every valid PDF must have one
// and krilla ensures that there always is one, but for snapshot tests, it can be
// useful to not write a document catalog if we don't actually need it for the test.
if self.page_tree.is_some() || self.outline.is_some() || self.page_label_tree.is_some() {
let catalog_ref = remapped_ref.bump();

let mut catalog = pdf.catalog(catalog_ref);
remap_field!(self, remapper, remapped_ref; page_tree, outline, page_label_tree);

if let Some(pt) = &self.page_tree {
catalog.pages(pt.0);
}

if let Some(pl) = &self.page_label_tree {
catalog.pair(Name(b"PageLabels"), pl.0);
}

if let Some(ol) = &self.outline {
catalog.outlines(ol.0);
}

catalog.finish();
}

remap_fields!(self, remapper, remapped_ref; pages, page_labels, annotations, fonts, color_spaces, destinations, ext_g_states, images, masks, x_objects, shading_functions, patterns);

macro_rules! write_field {
($self:expr, $remapper:expr, $pdf:expr; $($field:ident),+) => {
$(
if let Some((_, chunk)) = $self.$field {
chunk.renumber_into($pdf, |old| *$remapper.get(&old).unwrap());
}
)+
};
}

macro_rules! write_fields {
($self:expr, $remapper:expr, $pdf:expr; $($field:ident),+) => {
$(
for chunk in $self.$field {
chunk.renumber_into($pdf, |old| *$remapper.get(&old).unwrap());
}
)+
};
}

write_field!(self, remapper, &mut pdf; page_tree, outline, page_label_tree);
write_fields!(self, remapper, &mut pdf; pages, page_labels, annotations, fonts, color_spaces, destinations, ext_g_states, images, masks, x_objects, shading_functions, patterns);

pdf
}
}
5 changes: 5 additions & 0 deletions src/font/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::chunk_container::ChunkContainer;
use crate::serialize::{Object, SerializerContext, SvgSettings};
use crate::surface::Surface;
use crate::type3_font::Type3ID;
Expand Down Expand Up @@ -302,6 +303,10 @@ impl FontIdentifier {

// TODO: Remove?
impl Object for FontIdentifier {
fn chunk_container<'a>(&self, _: &'a mut ChunkContainer) -> &'a mut Vec<Chunk> {
unreachable!()
}

fn serialize_into(&self, _: &mut SerializerContext, _: Ref) -> Chunk {
unreachable!()
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod chunk_container;
pub mod document;
pub mod font;
mod graphics_state;
Expand Down
6 changes: 2 additions & 4 deletions src/object/cid_font.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::font::{CIDIdentifer, Font, FontIdentifier};
use crate::serialize::{Object, SerializerContext, SipHashable};
use crate::serialize::{SerializerContext, SipHashable};
use crate::util::{RectExt, SliceExt};
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
use pdf_writer::{Chunk, Filter, Finish, Name, Ref, Str};
Expand Down Expand Up @@ -90,10 +90,8 @@ impl CIDFont {
pub fn identifier(&self) -> FontIdentifier {
FontIdentifier::Cid(CIDIdentifer(self.font.clone()))
}
}

impl Object for CIDFont {
fn serialize_into(&self, sc: &mut SerializerContext, root_ref: Ref) -> Chunk {
pub(crate) fn serialize_into(&self, sc: &mut SerializerContext, root_ref: Ref) -> Chunk {
let mut chunk = Chunk::new();

let cid_ref = sc.new_ref();
Expand Down
Loading

0 comments on commit bff21be

Please sign in to comment.