Skip to content

Commit 38767ab

Browse files
lambdalemonDavid Huggins-Daines
authored andcommitted
cid2gid
1 parent 9b786d7 commit 38767ab

File tree

3 files changed

+81
-24
lines changed

3 files changed

+81
-24
lines changed

playa/font.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"""
66

77
import logging
8+
import struct
89
from io import BytesIO
910
from pathlib import Path
1011
from typing import (
1112
Dict,
1213
Iterable,
1314
List,
15+
Mapping,
1416
Optional,
1517
Tuple,
1618
Union,
@@ -61,10 +63,12 @@
6163
choplist,
6264
decode_text,
6365
transform_bbox,
66+
IDENTITY_MAPPING,
6467
)
6568

6669
log = logging.getLogger(__name__)
6770
LITERAL_STANDARD_ENCODING = LIT("StandardEncoding")
71+
LITERAL_CIDFONT_TYPE0 = LIT("CIDFontType0")
6872

6973

7074
class Font:
@@ -292,12 +296,13 @@ def get_implicit_encoding(
292296
self.fontfile3 = stream_value(descriptor.get("FontFile3"))
293297
try:
294298
cfffont = CFFFontProgram(self.basefont, BytesIO(self.fontfile3.buffer))
295-
self.cfffont = cfffont
296-
return {
297-
cid: cfffont.gid2name[gid]
298-
for cid, gid in cfffont.code2gid.items()
299-
if gid in cfffont.gid2name
300-
}
299+
assert not cfffont.is_cidfont
300+
return cfffont.code2name
301+
except AssertionError:
302+
log.warning(
303+
"Embedded CFFFont %r for Type1 font is a CIDFont", self.fontfile3
304+
)
305+
return LITERAL_STANDARD_ENCODING
301306
except Exception:
302307
log.debug("Failed to parse CFFFont %r", self.fontfile3, exc_info=True)
303308
return LITERAL_STANDARD_ENCODING
@@ -515,6 +520,8 @@ def __init__(
515520
self,
516521
spec: Dict[str, PDFObject],
517522
) -> None:
523+
self.spec = spec
524+
self.subtype = resolve1(spec.get("Subtype"))
518525
self.basefont = get_basefont(spec)
519526
self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
520527
# These are *supposed* to be ASCII (PDF 1.7 section 9.7.3),
@@ -621,8 +628,44 @@ def __init__(
621628
self.default_vdisp = 0
622629
self.positions = {}
623630
self.vdisps = {}
631+
624632
Font.__init__(self, descriptor, widths, default_width=default_width)
625633

634+
@property
635+
def cid2gid(self) -> Optional[Mapping[int, int]]:
636+
"""According to PDF 2.0 Sec 9.7.4.2 Glyph selection in CIDFonts:
637+
The CID to glyph id mapping, or None in the case of external TrueType
638+
font program (Type2 CIDFont), because "...In this case, CIDs shall not
639+
participate in glyph selection..."
640+
Note that this is not exactly equivalent to the CIDToGIDMap entry,
641+
despite what the name might suggest.
642+
"""
643+
if "FontFile2" in self.descriptor:
644+
# Type 2, embedded
645+
cid2gidmap = resolve1(self.spec.get("CIDToGIDMap"))
646+
if isinstance(cid2gidmap, ContentStream):
647+
buffer = cid2gidmap.buffer
648+
return dict(
649+
enumerate(struct.unpack(">" + "H" * (len(buffer) // 2), buffer))
650+
)
651+
else:
652+
return IDENTITY_MAPPING
653+
elif "FontFile3" in self.descriptor:
654+
# Type 0, embedded
655+
try:
656+
fontfile3 = stream_value(self.descriptor.get("FontFile3"))
657+
cfffont = CFFFontProgram(self.basefont, BytesIO(fontfile3.buffer))
658+
return cfffont.cid2gid if cfffont.is_cidfont else IDENTITY_MAPPING
659+
except Exception:
660+
log.debug("Failed to parse CFFFont %r", fontfile3, exc_info=True)
661+
return IDENTITY_MAPPING
662+
elif self.subtype == LITERAL_CIDFONT_TYPE0:
663+
# Type 0, external
664+
return IDENTITY_MAPPING
665+
else:
666+
# Type 2, external
667+
return None
668+
626669
def get_cmap_from_spec(self, spec: Dict[str, PDFObject]) -> CMapBase:
627670
"""Get cmap from font specification
628671

playa/fontprogram.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,8 +1471,7 @@ def __init__(self, name: str, fp: BinaryIO) -> None:
14711471
self.subr_index = self.INDEX(self.fp)
14721472
# Top DICT DATA
14731473
self.top_dict = self.getdict(self.dict_index[0])
1474-
if (12, 30) in self.top_dict:
1475-
raise NotImplementedError("CFF CIDFont not implemented")
1474+
self.is_cidfont = (12, 30) in self.top_dict
14761475
(charset_pos,) = self.top_dict.get(15, [0])
14771476
(encoding_pos,) = self.top_dict.get(16, [0])
14781477
(charstring_pos,) = self.top_dict.get(17, [0])
@@ -1481,17 +1480,24 @@ def __init__(self, name: str, fp: BinaryIO) -> None:
14811480
self.charstring = self.INDEX(self.fp)
14821481
self.nglyphs = len(self.charstring)
14831482
self._parse_charset(int(charset_pos))
1484-
self._parse_encoding(int(encoding_pos))
1483+
if not self.is_cidfont:
1484+
self.name2gid = {self.getstr(sid): gid for gid, sid in self.gid2sid.items()}
1485+
self._parse_encoding(int(encoding_pos))
1486+
self.code2name = {
1487+
code: self.getstr(self.gid2sid[gid])
1488+
for code, gid in self.code2gid.items()
1489+
if gid in self.gid2sid
1490+
}
1491+
else:
1492+
self.cid2gid = {sid: gid for gid, sid in self.gid2sid.items()}
14851493

14861494
def _parse_encoding(self, encoding_pos: int) -> None:
14871495
# Encodings
14881496
self.code2gid = {}
1489-
self.gid2code = {}
14901497
if encoding_pos in (0, 1):
14911498
for code, sid in enumerate(self.PREDEFINED_ENCODINGS[encoding_pos]):
14921499
if gid := self.name2gid.get(self.getstr(sid)):
14931500
self.code2gid[code] = gid
1494-
self.gid2code[gid] = code
14951501
return
14961502
self.fp.seek(encoding_pos)
14971503
(format,) = self.fp.read(1)
@@ -1504,7 +1510,6 @@ def _parse_encoding(self, encoding_pos: int) -> None:
15041510
struct.unpack("B" * n, self.fp.read(n)), start=1
15051511
):
15061512
self.code2gid[code] = gid
1507-
self.gid2code[gid] = code
15081513
elif format == 1:
15091514
# Format 1
15101515
(n,) = struct.unpack("B", self.fp.read(1))
@@ -1513,7 +1518,6 @@ def _parse_encoding(self, encoding_pos: int) -> None:
15131518
(first, nleft) = struct.unpack("BB", self.fp.read(2))
15141519
for code in range(first, first + nleft + 1):
15151520
self.code2gid[code] = gid
1516-
self.gid2code[gid] = code
15171521
gid += 1
15181522
else:
15191523
raise ValueError("unsupported encoding format: %r" % format)
@@ -1526,13 +1530,12 @@ def _parse_encoding(self, encoding_pos: int) -> None:
15261530

15271531
def _parse_charset(self, charset_pos: int) -> None:
15281532
# Charsets
1529-
self.name2gid = {}
1530-
self.gid2name = {}
1533+
self.gid2sid = {}
15311534
if charset_pos in (0, 1, 2):
1535+
if self.is_cidfont:
1536+
raise ValueError("no predefined charsets for CID CFF fonts")
15321537
for gid, sid in enumerate(self.PREDEFINED_CHARSETS[charset_pos], start=1):
1533-
sidname = self.getstr(sid)
1534-
self.name2gid[sidname] = gid
1535-
self.gid2name[gid] = sidname
1538+
self.gid2sid[gid] = sid
15361539
return
15371540
self.fp.seek(charset_pos)
15381541
(format,) = self.fp.read(1)
@@ -1543,9 +1546,7 @@ def _parse_charset(self, charset_pos: int) -> None:
15431546
for gid, sid in enumerate(
15441547
struct.unpack(">" + "H" * n, self.fp.read(2 * n)), start=1
15451548
):
1546-
sidname = self.getstr(sid)
1547-
self.name2gid[sidname] = gid
1548-
self.gid2name[gid] = sidname
1549+
self.gid2sid[gid] = sid
15491550
elif format in (1, 2):
15501551
# Format 1 & 2
15511552
range_f = ">HB" if format == 1 else ">HH"
@@ -1554,9 +1555,7 @@ def _parse_charset(self, charset_pos: int) -> None:
15541555
while gid < self.nglyphs:
15551556
(first, nleft) = struct.unpack(range_f, self.fp.read(range_f_size))
15561557
for sid in range(first, first + nleft + 1):
1557-
sidname = self.getstr(sid)
1558-
self.name2gid[sidname] = gid
1559-
self.gid2name[gid] = sidname
1558+
self.gid2sid[gid] = sid
15601559
gid += 1
15611560
else:
15621561
raise ValueError("unsupported charset format: %r" % format)

playa/utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Iterable,
99
Iterator,
1010
List,
11+
Mapping,
1112
Tuple,
1213
TypeVar,
1314
Union,
@@ -687,3 +688,17 @@ def format_int_alpha(value: int) -> str:
687688

688689
result.reverse()
689690
return "".join(result)
691+
692+
693+
class IdentityMapping(Mapping[_T, _T]):
694+
def __getitem__(self, key: _T) -> _T:
695+
return key
696+
697+
def __iter__(self) -> Iterator[_T]:
698+
yield from ()
699+
700+
def __len__(self) -> int:
701+
return 0
702+
703+
704+
IDENTITY_MAPPING: IdentityMapping = IdentityMapping()

0 commit comments

Comments
 (0)