Skip to content

Commit 81f35f9

Browse files
authored
ENH: Accept ETen-B5 and UniCNS-UTF16 encodings (#2721)
Related to #2356.
1 parent e0a92e2 commit 81f35f9

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

pypdf/_cmap.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,23 @@ def build_char_map_from_dict(
113113
_predefined_cmap: Dict[str, str] = {
114114
"/Identity-H": "utf-16-be",
115115
"/Identity-V": "utf-16-be",
116-
"/GB-EUC-H": "gbk", # TBC
117-
"/GB-EUC-V": "gbk", # TBC
118-
"/GBpc-EUC-H": "gb2312", # TBC
119-
"/GBpc-EUC-V": "gb2312", # TBC
120-
"/GBK-EUC-H": "gbk", # TBC
121-
"/GBK-EUC-V": "gbk", # TBC
116+
"/GB-EUC-H": "gbk",
117+
"/GB-EUC-V": "gbk",
118+
"/GBpc-EUC-H": "gb2312",
119+
"/GBpc-EUC-V": "gb2312",
120+
"/GBK-EUC-H": "gbk",
121+
"/GBK-EUC-V": "gbk",
122122
"/GBK2K-H": "gb18030",
123123
"/GBK2K-V": "gb18030",
124+
"/ETen-B5-H": "cp950",
125+
"/ETen-B5-V": "cp950",
126+
"/ETenms-B5-H": "cp950",
127+
"/ETenms-B5-V": "cp950",
128+
"/UniCNS-UTF16-H": "utf-16-be",
129+
"/UniCNS-UTF16-V": "utf-16-be",
124130
# UCS2 in code
125131
}
126132

127-
128133
# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
129134
_default_fonts_space_width: Dict[str, int] = {
130135
"/Courier": 600,

tests/test_cmap.py

+7
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,10 @@ def test_ignoring_non_put_entries():
199199
"""Issue #2290"""
200200
reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf")))
201201
reader.pages[0].extract_text()
202+
203+
204+
@pytest.mark.enable_socket()
205+
def test_eten_b5():
206+
"""Issue #2356"""
207+
reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf")))
208+
reader.pages[0].extract_text().startswith("1/7 \n富邦新終身壽險")

0 commit comments

Comments
 (0)