Skip to content

Commit d7c07a4

Browse files
committed
Add support for overlapping codepoints
1 parent 0d7c57c commit d7c07a4

File tree

2 files changed

+85
-61
lines changed

2 files changed

+85
-61
lines changed

src/font/simple_shape.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ mod tests {
3535
// ),
3636
(
3737
"NotoSans-Regular.ttf",
38-
" birth\u{ad}day",
38+
" birth\u{ad}day ",
3939
Direction::LeftToRight,
4040
14.0,
4141
),

src/stream.rs

Lines changed: 84 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::cell::RefCell;
21
use crate::font::{Font, FontIdentifier};
32
use crate::graphics_state::GraphicsStates;
43
use crate::object::cid_font::CIDFont;
@@ -22,6 +21,7 @@ use float_cmp::approx_eq;
2221
use pdf_writer::types::TextRenderingMode;
2322
use pdf_writer::{Content, Finish, Name, Str, TextStr};
2423
use skrifa::GlyphId;
24+
use std::cell::RefCell;
2525
use std::ops::Range;
2626
use std::sync::Arc;
2727
use tiny_skia_path::{FiniteF32, NormalizedF32, Path, PathSegment, Rect, Size, Transform};
@@ -369,7 +369,7 @@ impl ContentBuilder {
369369

370370
let font_container = sc.create_or_get_font_container(font.clone());
371371

372-
let spanned = TextSpanner::new(glyphs, text);
372+
let spanned = TextSpanner::new(glyphs, text, font_container);
373373

374374
for fragment in spanned {
375375
if let Some(text) = fragment.actual_text() {
@@ -379,11 +379,7 @@ impl ContentBuilder {
379379
actual_text.properties().actual_text(TextStr(text));
380380
}
381381

382-
let segmented = GlyphGrouper::new(
383-
font_container,
384-
fragment.glyphs(),
385-
text,
386-
);
382+
let segmented = GlyphGrouper::new(font_container, fragment.glyphs());
387383

388384
for glyph_group in segmented {
389385
sb.encode_consecutive_glyph_run(
@@ -770,58 +766,54 @@ impl Glyph {
770766
}
771767
}
772768

773-
pub enum PdfFont<'a> {
774-
Type3(&'a Type3Font),
775-
CID(&'a CIDFont),
769+
pub enum PdfFontMut<'a> {
770+
Type3(&'a mut Type3Font),
771+
CID(&'a mut CIDFont),
776772
}
777773

778-
impl PdfFont<'_> {
774+
impl<'a> PdfFontMut<'a> {
779775
pub fn identifier(&self) -> FontIdentifier {
780776
match self {
781-
PdfFont::Type3(t3) => t3.identifier(),
782-
PdfFont::CID(cid) => cid.identifier(),
777+
PdfFontMut::Type3(t3) => t3.identifier(),
778+
PdfFontMut::CID(cid) => cid.identifier(),
783779
}
784780
}
785781

786782
pub fn to_font_units(&self, val: f32) -> f32 {
787783
match self {
788-
PdfFont::Type3(t3) => t3.to_pdf_font_units(val),
789-
PdfFont::CID(cid) => cid.to_pdf_font_units(val),
784+
PdfFontMut::Type3(t3) => t3.to_pdf_font_units(val),
785+
PdfFontMut::CID(cid) => cid.to_pdf_font_units(val),
790786
}
791787
}
792788

793789
pub fn advance_width(&self, pdf_glyph: PDFGlyph) -> Option<f32> {
794790
match (self, pdf_glyph) {
795-
(PdfFont::Type3(t3), PDFGlyph::Type3(gid)) => t3.advance_width(gid),
796-
(PdfFont::CID(cid_font), PDFGlyph::CID(cid)) => cid_font.advance_width(cid),
791+
(PdfFontMut::Type3(t3), PDFGlyph::Type3(gid)) => t3.advance_width(gid),
792+
(PdfFontMut::CID(cid_font), PDFGlyph::CID(cid)) => cid_font.advance_width(cid),
797793
_ => None,
798794
}
799795
}
800-
}
801796

802-
pub enum PdfFontMut<'a> {
803-
Type3(&'a mut Type3Font),
804-
CID(&'a mut CIDFont),
805-
}
806-
807-
impl PdfFontMut<'_> {
808-
fn pdf_font(&self) -> PdfFont {
809-
match self {
810-
PdfFontMut::Type3(t3) => PdfFont::Type3(t3),
811-
PdfFontMut::CID(cid) => PdfFont::CID(cid),
797+
pub fn get_codepoints(&self, pdf_glyph: PDFGlyph) -> Option<&str> {
798+
match (self, pdf_glyph) {
799+
(PdfFontMut::Type3(t3), PDFGlyph::Type3(gid)) => t3.get_codepoints(gid),
800+
(PdfFontMut::CID(cid_font), PDFGlyph::CID(cid)) => cid_font.get_codepoints(cid),
801+
_ => None,
812802
}
813803
}
814804

815-
pub fn identifier(&self) -> FontIdentifier {
816-
self.pdf_font().identifier()
817-
}
818-
819-
pub fn to_font_units(&self, val: f32) -> f32 {
820-
self.pdf_font().to_font_units(val)
821-
}
822-
823-
pub fn advance_width(&self, pdf_glyph: PDFGlyph) -> Option<f32> {
824-
self.pdf_font().advance_width(pdf_glyph)
805+
pub fn set_codepoints(&mut self, pdf_glyph: PDFGlyph, text: String) -> Option<()> {
806+
match (self, pdf_glyph) {
807+
(PdfFontMut::Type3(t3), PDFGlyph::Type3(gid)) => {
808+
t3.set_codepoints(gid, text);
809+
Some(())
810+
}
811+
(PdfFontMut::CID(cid_font), PDFGlyph::CID(cid)) => {
812+
cid_font.set_codepoints(cid, text);
813+
Some(())
814+
}
815+
_ => None,
816+
}
825817
}
826818
}
827819

@@ -846,43 +838,80 @@ impl TextSpan<'_> {
846838
}
847839
}
848840

849-
pub struct TextSpanner<'a> {
841+
pub struct TextSpanner<'a, 'b> {
850842
slice: &'a [Glyph],
843+
font_container: &'b RefCell<FontContainer>,
851844
text: &'a str,
852845
}
853846

854-
impl<'a> TextSpanner<'a> {
855-
pub fn new(slice: &'a [Glyph], text: &'a str) -> Self {
856-
Self { slice, text }
847+
impl<'a, 'b> TextSpanner<'a, 'b> {
848+
pub fn new(
849+
slice: &'a [Glyph],
850+
text: &'a str,
851+
font_container: &'b RefCell<FontContainer>,
852+
) -> Self {
853+
Self {
854+
slice,
855+
text,
856+
font_container,
857+
}
857858
}
858859
}
859860

860-
impl<'a> Iterator for TextSpanner<'a> {
861+
impl<'a> Iterator for TextSpanner<'a, '_> {
861862
type Item = TextSpan<'a>;
862863

863864
fn next(&mut self) -> Option<Self::Item> {
864-
let func = |g: &Glyph| g.range.clone();
865+
let func = |g: &Glyph| {
866+
let mut font_container = self.font_container.borrow_mut();
867+
let pdf_glyph = font_container.add_glyph(g.glyph_id);
868+
let font_identifier = font_container.font_identifier(g.glyph_id).unwrap();
869+
let mut pdf_font = font_container
870+
.get_from_identifier_mut(font_identifier.clone())
871+
.unwrap();
872+
873+
let range = g.range.clone();
874+
let text = &self.text[range.clone()];
875+
let codepoints = pdf_font.get_codepoints(pdf_glyph);
876+
let incompatible_codepoint = codepoints.is_some() && codepoints != Some(text);
877+
878+
if !incompatible_codepoint {
879+
pdf_font.set_codepoints(pdf_glyph, text.to_string());
880+
}
865881

866-
let mut same_range = None;
882+
(range, incompatible_codepoint)
883+
};
884+
885+
let mut use_span = None;
867886
let mut count = 1;
868887

869888
let mut iter = self.slice.iter();
870-
let first = (func)(iter.next()?);
871-
let mut last_range = first.clone();
889+
let (first_range, first_incompatible) = (func)(iter.next()?);
890+
891+
let mut last_range = first_range.clone();
872892

873893
while let Some(next) = iter.next() {
874-
let next_range = func(next);
894+
let (next_range, next_incompatible) = func(next);
875895

876-
match same_range {
896+
match use_span {
877897
None => {
878-
same_range = Some(last_range == next_range);
898+
if first_incompatible {
899+
use_span = Some(true);
900+
break;
901+
}
902+
903+
use_span = Some(last_range == next_range);
879904
}
880905
Some(true) => {
881-
if last_range != next_range {
906+
if next_incompatible || last_range != next_range {
882907
break;
883908
}
884909
}
885910
Some(false) => {
911+
if next_incompatible {
912+
break;
913+
}
914+
886915
if last_range == next_range {
887916
count -= 1;
888917
break;
@@ -897,8 +926,8 @@ impl<'a> Iterator for TextSpanner<'a> {
897926
let (head, tail) = self.slice.split_at(count);
898927
self.slice = tail;
899928

900-
let fragment = match same_range.unwrap_or(false) {
901-
true => TextSpan::Spanned(head, &self.text[first]),
929+
let fragment = match use_span.unwrap_or(false) {
930+
true => TextSpan::Spanned(head, &self.text[first_range]),
902931
false => TextSpan::Unspanned(head),
903932
};
904933
Some(fragment)
@@ -931,15 +960,13 @@ impl GlyphGroup {
931960
pub struct GlyphGrouper<'a, 'b> {
932961
font_container: &'b RefCell<FontContainer>,
933962
slice: &'a [Glyph],
934-
text: &'a str,
935963
}
936964

937965
impl<'a, 'b> GlyphGrouper<'a, 'b> {
938-
pub fn new(font_container: &'b RefCell<FontContainer>, slice: &'a [Glyph], text: &'a str) -> Self {
966+
pub fn new(font_container: &'b RefCell<FontContainer>, slice: &'a [Glyph]) -> Self {
939967
Self {
940968
font_container,
941969
slice,
942-
text,
943970
}
944971
}
945972
}
@@ -956,8 +983,7 @@ impl<'a> Iterator for GlyphGrouper<'a, '_> {
956983
}
957984

958985
let func = |g: &Glyph| {
959-
let mut font_container = self.font_container.borrow_mut();
960-
font_container.add_glyph(g.glyph_id);
986+
let font_container = self.font_container.borrow_mut();
961987
let font_identifier = font_container.font_identifier(g.glyph_id).unwrap();
962988

963989
GlyphProps {
@@ -1003,12 +1029,10 @@ impl<'a> Iterator for GlyphGrouper<'a, '_> {
10031029
let pdf_glyph = match pdf_font {
10041030
PdfFontMut::Type3(ref mut t3) => {
10051031
let gid = t3.get_gid(g.glyph_id).unwrap();
1006-
t3.set_codepoints(gid, self.text[g.range.clone()].to_string());
10071032
PDFGlyph::Type3(gid)
10081033
}
10091034
PdfFontMut::CID(ref mut cid_font) => {
10101035
let cid = cid_font.get_cid(g.glyph_id).unwrap();
1011-
cid_font.set_codepoints(cid, self.text[g.range.clone()].to_string());
10121036
PDFGlyph::CID(cid)
10131037
}
10141038
};

0 commit comments

Comments
 (0)