Skip to content

Commit bff21be

Browse files
authored
Overhaul the serializer pipeline (#23)
1 parent c347503 commit bff21be

36 files changed

+673
-647
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2021"
77
image = { version = "0.25.1", default-features = false, features = ["jpeg", "png", "gif", "webp"] }
88
miniz_oxide = "0.7.4"
99
once_cell = "1.19.0"
10-
pdf-writer = {git = "https://github.com/LaurenzV/pdf-writer", rev = "f83b414"}
10+
pdf-writer = {git = "https://github.com/LaurenzV/pdf-writer", rev = "34c991f5"}
1111
resvg = {git = "https://github.com/LaurenzV/resvg", rev = "1c2b6bd0"}
1212
siphasher = "1.0.1"
1313
skrifa = {git="https://github.com/LaurenzV/fontations", rev="0ed7955"}

src/chunk_container.rs

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
use crate::serialize::SerializeSettings;
2+
use pdf_writer::{Chunk, Finish, Name, Pdf, Ref};
3+
use std::collections::HashMap;
4+
5+
pub struct ChunkContainer {
6+
pub(crate) page_label_tree: Option<(Ref, Chunk)>,
7+
pub(crate) page_tree: Option<(Ref, Chunk)>,
8+
pub(crate) outline: Option<(Ref, Chunk)>,
9+
10+
pub(crate) pages: Vec<Chunk>,
11+
pub(crate) page_labels: Vec<Chunk>,
12+
pub(crate) annotations: Vec<Chunk>,
13+
pub(crate) fonts: Vec<Chunk>,
14+
pub(crate) color_spaces: Vec<Chunk>,
15+
pub(crate) destinations: Vec<Chunk>,
16+
pub(crate) ext_g_states: Vec<Chunk>,
17+
pub(crate) images: Vec<Chunk>,
18+
pub(crate) masks: Vec<Chunk>,
19+
pub(crate) x_objects: Vec<Chunk>,
20+
pub(crate) shading_functions: Vec<Chunk>,
21+
pub(crate) patterns: Vec<Chunk>,
22+
}
23+
24+
impl ChunkContainer {
25+
pub fn new() -> Self {
26+
Self {
27+
page_tree: None,
28+
outline: None,
29+
page_label_tree: None,
30+
31+
pages: vec![],
32+
page_labels: vec![],
33+
annotations: vec![],
34+
fonts: vec![],
35+
color_spaces: vec![],
36+
destinations: vec![],
37+
ext_g_states: vec![],
38+
images: vec![],
39+
masks: vec![],
40+
x_objects: vec![],
41+
shading_functions: vec![],
42+
patterns: vec![],
43+
}
44+
}
45+
46+
pub fn finish(mut self, serialize_settings: SerializeSettings) -> Pdf {
47+
let mut remapped_ref = Ref::new(1);
48+
let mut remapper = HashMap::new();
49+
50+
// Two utility macros, that basically traverses the fields in the order that we
51+
// will write them to the PDF and assigns new references as we go.
52+
// This gives us the advantage that the PDF will be numbered with
53+
// monotonically increasing numbers, which, while it is not a strict requirement
54+
// for a valid PDF, makes it a lot cleaner and might make implementing features
55+
// like object streams easier down the road.
56+
//
57+
// It also allows us to estimate the capacity we will need for the new PDF.
58+
let mut chunks_len = 0;
59+
macro_rules! remap_field {
60+
($self:expr, $remapper:expr, $remapped_ref:expr; $($field:ident),+) => {
61+
$(
62+
if let Some((original_ref, chunk)) = &mut $self.$field {
63+
chunks_len += chunk.len();
64+
for object_ref in chunk.object_refs() {
65+
debug_assert!(!remapper.contains_key(&object_ref));
66+
67+
$remapper.insert(object_ref, $remapped_ref.bump());
68+
*original_ref = *remapper.get(&object_ref).unwrap();
69+
}
70+
}
71+
)+
72+
};
73+
}
74+
75+
macro_rules! remap_fields {
76+
($self:expr, $remapper:expr, $remapped_ref:expr; $($field:ident),+) => {
77+
$(
78+
for chunk in &$self.$field {
79+
chunks_len += chunk.len();
80+
for ref_ in chunk.object_refs() {
81+
debug_assert!(!remapper.contains_key(&ref_));
82+
83+
$remapper.insert(ref_, $remapped_ref.bump());
84+
}
85+
}
86+
)+
87+
};
88+
}
89+
90+
// Chunk length is not an exact number because the length might change as we renumber,
91+
// so we add a bit of a padding by multiplying with 1.1. The 200 is additional padding
92+
// for the document catalog. This hopefully allows us to avoid realloactions in the general
93+
// case, and thus give us better performance.
94+
let mut pdf = Pdf::with_capacity((chunks_len as f32 * 1.1 + 200.0) as usize);
95+
96+
if serialize_settings.ascii_compatible {
97+
pdf.set_binary_marker(&[b'A', b'A', b'A', b'A'])
98+
}
99+
100+
// We only write a catalog if a page tree exists. Every valid PDF must have one
101+
// and krilla ensures that there always is one, but for snapshot tests, it can be
102+
// useful to not write a document catalog if we don't actually need it for the test.
103+
if self.page_tree.is_some() || self.outline.is_some() || self.page_label_tree.is_some() {
104+
let catalog_ref = remapped_ref.bump();
105+
106+
let mut catalog = pdf.catalog(catalog_ref);
107+
remap_field!(self, remapper, remapped_ref; page_tree, outline, page_label_tree);
108+
109+
if let Some(pt) = &self.page_tree {
110+
catalog.pages(pt.0);
111+
}
112+
113+
if let Some(pl) = &self.page_label_tree {
114+
catalog.pair(Name(b"PageLabels"), pl.0);
115+
}
116+
117+
if let Some(ol) = &self.outline {
118+
catalog.outlines(ol.0);
119+
}
120+
121+
catalog.finish();
122+
}
123+
124+
remap_fields!(self, remapper, remapped_ref; pages, page_labels, annotations, fonts, color_spaces, destinations, ext_g_states, images, masks, x_objects, shading_functions, patterns);
125+
126+
macro_rules! write_field {
127+
($self:expr, $remapper:expr, $pdf:expr; $($field:ident),+) => {
128+
$(
129+
if let Some((_, chunk)) = $self.$field {
130+
chunk.renumber_into($pdf, |old| *$remapper.get(&old).unwrap());
131+
}
132+
)+
133+
};
134+
}
135+
136+
macro_rules! write_fields {
137+
($self:expr, $remapper:expr, $pdf:expr; $($field:ident),+) => {
138+
$(
139+
for chunk in $self.$field {
140+
chunk.renumber_into($pdf, |old| *$remapper.get(&old).unwrap());
141+
}
142+
)+
143+
};
144+
}
145+
146+
write_field!(self, remapper, &mut pdf; page_tree, outline, page_label_tree);
147+
write_fields!(self, remapper, &mut pdf; pages, page_labels, annotations, fonts, color_spaces, destinations, ext_g_states, images, masks, x_objects, shading_functions, patterns);
148+
149+
pdf
150+
}
151+
}

src/font/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::chunk_container::ChunkContainer;
12
use crate::serialize::{Object, SerializerContext, SvgSettings};
23
use crate::surface::Surface;
34
use crate::type3_font::Type3ID;
@@ -302,6 +303,10 @@ impl FontIdentifier {
302303

303304
// TODO: Remove?
304305
impl Object for FontIdentifier {
306+
fn chunk_container<'a>(&self, _: &'a mut ChunkContainer) -> &'a mut Vec<Chunk> {
307+
unreachable!()
308+
}
309+
305310
fn serialize_into(&self, _: &mut SerializerContext, _: Ref) -> Chunk {
306311
unreachable!()
307312
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod chunk_container;
12
pub mod document;
23
pub mod font;
34
mod graphics_state;

src/object/cid_font.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::font::{CIDIdentifer, Font, FontIdentifier};
2-
use crate::serialize::{Object, SerializerContext, SipHashable};
2+
use crate::serialize::{SerializerContext, SipHashable};
33
use crate::util::{RectExt, SliceExt};
44
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
55
use pdf_writer::{Chunk, Filter, Finish, Name, Ref, Str};
@@ -90,10 +90,8 @@ impl CIDFont {
9090
pub fn identifier(&self) -> FontIdentifier {
9191
FontIdentifier::Cid(CIDIdentifer(self.font.clone()))
9292
}
93-
}
9493

95-
impl Object for CIDFont {
96-
fn serialize_into(&self, sc: &mut SerializerContext, root_ref: Ref) -> Chunk {
94+
pub(crate) fn serialize_into(&self, sc: &mut SerializerContext, root_ref: Ref) -> Chunk {
9795
let mut chunk = Chunk::new();
9896

9997
let cid_ref = sc.new_ref();

0 commit comments

Comments
 (0)