Skip to content

Commit

Permalink
Improve text spanner
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenzV committed Dec 18, 2024
1 parent d4dedf7 commit 3834a6e
Show file tree
Hide file tree
Showing 12 changed files with 1,486 additions and 497 deletions.
2 changes: 1 addition & 1 deletion crates/krilla/src/chunk_container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ impl ChunkContainer {
// TODO: Replace with `is_none_or` once MSRV allows to.
let missing_title = match self.metadata.as_ref() {
None => true,
Some(m) => m.title.is_none()
Some(m) => m.title.is_none(),
};

if missing_title {
Expand Down
78 changes: 51 additions & 27 deletions crates/krilla/src/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,15 @@ impl ContentBuilder {

// Separate into distinct glyph runs that either are encoded using actual text, or are
// not.
let spanned = TextSpanner::new(glyphs, text, paint_mode, font_container.clone());
let spanned = TextSpanner::new(
glyphs,
text,
sc.serialize_settings()
.validator
.requires_codepoint_mappings(),
paint_mode,
font_container.clone(),
);

for fragment in spanned {
if let Some(text) = fragment.actual_text() {
Expand Down Expand Up @@ -1094,6 +1102,7 @@ where
{
slice: &'a [T],
paint_mode: PaintMode<'a>,
forbid_invalid_codepoints: bool,
font_container: Rc<RefCell<FontContainer>>,
text: &'a str,
}
Expand All @@ -1105,12 +1114,14 @@ where
pub(crate) fn new(
slice: &'a [T],
text: &'a str,
forbid_invalid_codepoints: bool,
paint_mode: PaintMode<'a>,
font_container: Rc<RefCell<FontContainer>>,
) -> Self {
Self {
slice,
paint_mode,
forbid_invalid_codepoints,
text,
font_container,
}
Expand All @@ -1128,6 +1139,8 @@ where
fn func<U>(
g: &U,
paint_mode: PaintMode,
previous_range: Option<Range<usize>>,
forbid_invalid_codepoints: bool,
mut font_container: RefMut<FontContainer>,
text: &str,
) -> (Range<usize>, bool)
Expand All @@ -1145,11 +1158,26 @@ where
let codepoints = pdf_font.get_codepoints(pdf_glyph);
// Check if the glyph has already been assigned codepoints that don't match the
// one we are seeing right now.
let incompatible_codepoint = codepoints.is_some() && codepoints != Some(text);

// Only set the codepoint if there isn't a previous one.
let incompatible_codepoint = codepoints.is_some_and(|text| codepoints != Some(text));

// Only set the codepoint if there isn't a previous, different mapping.
//
// If we could set it, we only want to insert a codepoint if we are not already
// building a spanned run (which is the case if the previous range is the same).
// If we are building a spanned run, it means that the glyphs are part of the same
// cluster, in which case only the first glyph should be assigned the codepoint,
// while all other glyphs in the same cluster should not be assigned anything.
// Otherwise, when copying text from the PDF, we will get the same codepoint multiple
// times in viewers that don't support `ActualText`.
//
// However, in case we are for example exporting to PDF/UA, every glyph is required
// to have a valid codepoint mapping. So in this case, we still add the codepoints
// to each glyph in the cluster, this will result in worse copy-pasting in viewers
// that don't support `ActualText`.
if !incompatible_codepoint {
pdf_font.set_codepoints(pdf_glyph, text.to_string());
if previous_range != Some(range.clone()) || forbid_invalid_codepoints {
pdf_font.set_codepoints(pdf_glyph, text.to_string());
}
}

(range, incompatible_codepoint)
Expand All @@ -1165,6 +1193,8 @@ where
let (first_range, first_incompatible) = func(
iter.next()?,
self.paint_mode,
None,
self.forbid_invalid_codepoints,
self.font_container.borrow_mut(),
self.text,
);
Expand All @@ -1175,6 +1205,8 @@ where
let (next_range, next_incompatible) = func(
next,
self.paint_mode,
Some(last_range.clone()),
self.forbid_invalid_codepoints,
self.font_container.borrow_mut(),
self.text,
);
Expand All @@ -1183,33 +1215,24 @@ where
// In this case, we just started and we are looking at the first two glyphs.
// This decides whether the current run will be spanned, or not.
None => {
// The first glyph is incompatible, so we definitely need actual text.
if first_incompatible {
// The two glyphs are in the same range, so we definitely want this run
// to be spanned, and also want to include both glyphs in that run.
if last_range == next_range {
use_span = Some(true);

// If the range of the next one is the same, it means they are
// part of the same cluster, meaning that we need to include it
// in the actual text. If not, we abort and only wrap the first
// glyph in actual text.
if last_range != next_range {
} else {
// Else, whether we use a span depends on whether the first glyph
// is incompatible.
use_span = Some(first_incompatible);

// If either the first glyph or the second glyph are incompatible, they
// need to be in separate runs, since they are not part of the same cluster.
if first_incompatible || next_incompatible {
break;
}
}

// If the next is incompatible but not part of the current cluster,
// then it will need a dedicated spanned range, and
// we can't include it in the current text span. So we abort and
// create a spanned element with just the first glyph.
if next_incompatible && last_range != next_range {
break;
// If none are incompatible, then `use_span` is false, and we can also
// include the next glyph in that unspanned run.
}

// If they have the same range, they are part of the same cluster,
// and thus we started a spanned range with actual text.
//
// Otherwise, they are part of a different cluster, and we
// start a spanned range with no actual text (common case).
use_span = Some(last_range == next_range);
}
// We are currently building a spanned range, and all glyphs
// are part of the same cluster.
Expand Down Expand Up @@ -1251,6 +1274,7 @@ where
true => TextSpan::Spanned(head, &self.text[first_range]),
false => TextSpan::Unspanned(head),
};

Some(fragment)
}
}
Expand Down
27 changes: 16 additions & 11 deletions crates/krilla/src/surface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ fn naive_shape(
mod tests {
use crate::font::Font;
use crate::mask::MaskType;
use crate::page::Page;
use crate::paint::{LinearGradient, Paint, SpreadMethod};
use crate::path::Fill;
use crate::surface::Surface;
Expand Down Expand Up @@ -811,8 +812,9 @@ mod tests {
);
}

#[snapshot(stream)]
fn stream_complex_text(surface: &mut Surface) {
#[snapshot(single_page)]
fn complex_text(page: &mut Page) {
let mut surface = page.surface();
surface.fill_text(
Point::from_xy(0.0, 50.0),
Fill::default(),
Expand All @@ -825,41 +827,44 @@ mod tests {
);
}

#[snapshot(stream)]
fn stream_complex_text_2(surface: &mut Surface) {
#[snapshot(single_page)]
fn complex_text_2(page: &mut Page) {
let mut surface = page.surface();
surface.fill_text(
Point::from_xy(0.0, 50.0),
Fill::default(),
Font::new(NOTO_SANS_DEVANAGARI.clone(), 0, true).unwrap(),
16.0,
&[],
"यु॒धा नर॑ ऋ॒ष्वा ",
"यु॒धा नर॑ ऋ॒ष्वा",
false,
TextDirection::Auto,
);
}

#[snapshot(stream)]
fn stream_complex_text_3(surface: &mut Surface) {
#[snapshot(single_page)]
fn complex_text_3(page: &mut Page) {
let mut surface = page.surface();
surface.fill_text(
Point::from_xy(0.0, 50.0),
Fill::default(),
Font::new(NOTO_SANS_DEVANAGARI.clone(), 0, true).unwrap(),
16.0,
12.0,
&[],
"आ रु॒क्मैरा यु॒धा नर॑ ऋ॒ष्वा ऋ॒ष्टीर॑सृक्षत ।",
false,
TextDirection::Auto,
);
}

#[snapshot(stream)]
fn stream_complex_text_4(surface: &mut Surface) {
#[snapshot(single_page)]
fn complex_text_4(page: &mut Page) {
let mut surface = page.surface();
surface.fill_text(
Point::from_xy(0.0, 50.0),
Fill::default(),
Font::new(NOTO_SANS_DEVANAGARI.clone(), 0, true).unwrap(),
16.0,
10.0,
&[],
"अन्वे॑नाँ॒ अह॑ वि॒द्युतो॑ म॒रुतो॒ जज्झ॑तीरव भनर॑र्त॒ त्मना॑ दि॒वः ॥",
false,
Expand Down
32 changes: 22 additions & 10 deletions crates/krilla/src/validation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ impl Validator {
ValidationError::ContainsPostScript => true,
ValidationError::MissingCMYKProfile => true,
ValidationError::ContainsNotDefGlyph => false,
ValidationError::InvalidCodepointMapping(_, _) => false,
ValidationError::InvalidCodepointMapping(_, _) => {
self.requires_codepoint_mappings()
}
ValidationError::UnicodePrivateArea(_, _) => false,
ValidationError::NoDocumentLanguage => *self == Validator::A1_A,
ValidationError::NoDocumentTitle => false,
Expand All @@ -276,11 +278,10 @@ impl Validator {
ValidationError::ContainsPostScript => true,
ValidationError::MissingCMYKProfile => true,
ValidationError::ContainsNotDefGlyph => true,
// Only applies for PDF/A2-U and PDF/A2-A
ValidationError::InvalidCodepointMapping(_, _) => *self != Validator::A2_B,
// Only applies to PDF/A2-A
ValidationError::InvalidCodepointMapping(_, _) => {
self.requires_codepoint_mappings()
}
ValidationError::UnicodePrivateArea(_, _) => *self == Validator::A2_A,
// Only applies to PDF/A2-A
ValidationError::NoDocumentLanguage => *self == Validator::A2_A,
ValidationError::NoDocumentTitle => false,
ValidationError::MissingAltText => false,
Expand All @@ -300,11 +301,10 @@ impl Validator {
ValidationError::ContainsPostScript => true,
ValidationError::MissingCMYKProfile => true,
ValidationError::ContainsNotDefGlyph => true,
// Only applies for PDF/A3-U and PDF/A3-A
ValidationError::InvalidCodepointMapping(_, _) => *self != Validator::A3_B,
// Only applies to PDF/A3-A
ValidationError::InvalidCodepointMapping(_, _) => {
self.requires_codepoint_mappings()
}
ValidationError::UnicodePrivateArea(_, _) => *self == Validator::A3_A,
// Only applies to PDF/A3-A
ValidationError::NoDocumentLanguage => *self == Validator::A3_A,
ValidationError::NoDocumentTitle => false,
ValidationError::MissingAltText => false,
Expand All @@ -324,7 +324,9 @@ impl Validator {
ValidationError::ContainsPostScript => false,
ValidationError::MissingCMYKProfile => false,
ValidationError::ContainsNotDefGlyph => true,
ValidationError::InvalidCodepointMapping(_, _) => true,
ValidationError::InvalidCodepointMapping(_, _) => {
self.requires_codepoint_mappings()
}
ValidationError::UnicodePrivateArea(_, _) => false,
ValidationError::NoDocumentLanguage => false,
ValidationError::NoDocumentTitle => true,
Expand Down Expand Up @@ -424,6 +426,16 @@ impl Validator {
}
}

pub(crate) fn requires_codepoint_mappings(&self) -> bool {
match self {
Validator::None => false,
Validator::A1_A | Validator::A1_B => false,
Validator::A2_A | Validator::A2_B | Validator::A2_U => *self != Validator::A2_B,
Validator::A3_A | Validator::A3_B | Validator::A3_U => *self != Validator::A3_B,
Validator::UA1 => true,
}
}

pub(crate) fn requires_display_doc_title(&self) -> bool {
match self {
Validator::None => false,
Expand Down
Loading

0 comments on commit 3834a6e

Please sign in to comment.