|
31 | 31 | from playa.data_structures import NameTree, NumberTree |
32 | 32 | from playa.document import Document as PDFDocument |
33 | 33 | from playa.exceptions import PDFException |
34 | | -from playa.page import ( |
| 34 | +from playa.content import ( |
35 | 35 | ContentObject, |
36 | 36 | GlyphObject, |
37 | 37 | GraphicState, |
|
52 | 52 | apply_matrix_pt, |
53 | 53 | decode_text, |
54 | 54 | get_bound, |
| 55 | + transform_bbox, |
55 | 56 | ) |
| 57 | +from paves.compat import subpaths |
56 | 58 |
|
57 | 59 | PSException = Exception |
58 | 60 | __all__ = [ |
@@ -519,33 +521,45 @@ def __init__( |
519 | 521 | glyph: GlyphObject, |
520 | 522 | ) -> None: |
521 | 523 | LTText.__init__(self) |
522 | | - textstate = glyph.textstate |
523 | 524 | gstate = glyph.gstate |
524 | 525 | matrix = glyph.matrix |
| 526 | + font = glyph.font |
525 | 527 | if glyph.text is None: |
526 | | - logger.debug("undefined: %r, %r", textstate.font, glyph.cid) |
| 528 | + logger.debug("undefined: %r, %r", font, glyph.cid) |
527 | 529 | # Horrible awful pdfminer.six behaviour |
528 | 530 | self._text = "(cid:%d)" % glyph.cid |
529 | 531 | else: |
530 | 532 | self._text = glyph.text |
531 | | - self.matrix = matrix |
532 | 533 | self.mcstack = glyph.mcstack |
533 | | - font = textstate.font |
534 | | - assert font is not None |
535 | 534 | self.fontname = font.fontname |
536 | | - self.render_mode = textstate.render_mode |
537 | 535 | self.graphicstate = gstate |
| 536 | + self.render_mode = gstate.render_mode |
538 | 537 | self.stroking_color = gstate.scolor |
539 | 538 | self.non_stroking_color = gstate.ncolor |
540 | 539 | self.scs = gstate.scs |
541 | 540 | self.ncs = gstate.ncs |
542 | | - self.adv = glyph.adv |
| 541 | + scaling = gstate.scaling * 0.01 |
| 542 | + fontsize = gstate.fontsize |
543 | 543 | (a, b, c, d, e, f) = matrix |
544 | | - scaling = textstate.scaling |
545 | 544 | # FIXME: Still really not sure what this means |
546 | 545 | self.upright = a * d * scaling > 0 and b * c <= 0 |
547 | | - LTComponent.__init__(self, glyph.bbox, glyph.mcstack) |
548 | | - # FIXME: This is now quite wrong for rotated glyphs |
| 546 | + # Unscale the matrix to match pdfminer.six |
| 547 | + xscale = 1 / (fontsize * scaling) |
| 548 | + yscale = 1 / fontsize |
| 549 | + self.matrix = (a * xscale, b * yscale, c * xscale, d * yscale, e, f) |
| 550 | + # Recreate pdfminer.six's bogus bboxes |
| 551 | + if font.vertical: |
| 552 | + vdisp = font.vdisp(glyph.cid) |
| 553 | + self.adv = vdisp * fontsize |
| 554 | + vx, vy = font.position(glyph.cid) |
| 555 | + textbox = (-vx, vy + vdisp, -vx + 1, vy) |
| 556 | + else: |
| 557 | + textwidth = font.char_width(glyph.cid) |
| 558 | + self.adv = textwidth * fontsize * scaling |
| 559 | + textbox = (0, font.get_descent(), textwidth, font.get_descent() + 1) |
| 560 | + miner_box = transform_bbox(glyph.matrix, textbox) |
| 561 | + LTComponent.__init__(self, miner_box, glyph.mcstack) |
| 562 | + # FIXME: This is quite wrong for rotated glyphs, but so is pdfminer.six |
549 | 563 | if font.vertical: |
550 | 564 | self.size = self.width |
551 | 565 | else: |
@@ -1157,7 +1171,7 @@ def process_object(obj: ContentObject) -> Iterator[LTComponent]: |
1157 | 1171 |
|
1158 | 1172 | @process_object.register |
1159 | 1173 | def _(obj: PathObject) -> Iterator[LTComponent]: |
1160 | | - for path in obj: |
| 1174 | + for path in subpaths(obj): |
1161 | 1175 | ops = [] |
1162 | 1176 | pts: List[Point] = [] |
1163 | 1177 | for seg in path.raw_segments: |
|
0 commit comments