1111
1212import gzip
1313import logging
14- import os
15- import os .path
1614import pickle as pickle
1715import struct
1816import sys
1917from bisect import bisect_left
18+ from pathlib import Path
2019from typing import (
2120 Any ,
2221 Dict ,
@@ -48,6 +47,7 @@ def cache(func): # type: ignore
4847
4948
5049log = logging .getLogger (__name__ )
50+ CMAP_DIR = (Path (__file__ ).parent / "cmap" ).resolve ()
5151
5252
5353class CMapError (Exception ):
@@ -175,21 +175,21 @@ def dump(self, out: TextIO = sys.stdout) -> None:
175175
176176
177177class PyCMap (CMap ):
178- def __init__ (self , name : str , module : Any ) -> None :
178+ def __init__ (self , name : str , data : Dict ) -> None :
179179 super ().__init__ (CMapName = name )
180- self .code2cid = module . CODE2CID
181- if module . IS_VERTICAL :
180+ self .code2cid = data [ " CODE2CID" ]
181+ if data [ " IS_VERTICAL" ] :
182182 self .attrs ["WMode" ] = 1
183183
184184
185185class PyUnicodeMap (UnicodeMap ):
186- def __init__ (self , name : str , module : Any , vertical : bool ) -> None :
186+ def __init__ (self , name : str , data : Dict , vertical : bool ) -> None :
187187 super ().__init__ (CMapName = name )
188188 if vertical :
189- self .cid2unichr = module . CID2UNICHR_V
189+ self .cid2unichr = data [ " CID2UNICHR_V" ]
190190 self .attrs ["WMode" ] = 1
191191 else :
192- self .cid2unichr = module . CID2UNICHR_H
192+ self .cid2unichr = data [ " CID2UNICHR_H" ]
193193
194194
195195class CMapDB :
@@ -200,19 +200,14 @@ class CMapDB:
200200 def _load_data (cls , name : str ) -> Any :
201201 name = name .replace ("\0 " , "" )
202202 filename = "%s.pickle.gz" % name
203- cmap_paths = (
204- os .environ .get ("CMAP_PATH" , "/usr/share/pdfminer/" ),
205- os .path .join (os .path .dirname (__file__ ), "cmap" ),
206- )
207- for directory in cmap_paths :
208- path = os .path .join (directory , filename )
209- if os .path .exists (path ):
210- gzfile = gzip .open (path )
211- try :
212- return type (str (name ), (), pickle .loads (gzfile .read ()))
213- finally :
214- gzfile .close ()
215- raise KeyError (f"CMap { name !r} not found in CMapDB" )
203+ pklpath = (CMAP_DIR / filename ).resolve ()
204+ if not pklpath .is_relative_to (CMAP_DIR ):
205+ raise KeyError (f"Ignoring malicious or malformed CMap { name } " )
206+ try :
207+ with gzip .open (pklpath ) as gzfile :
208+ return pickle .load (gzfile )
209+ except FileNotFoundError as e :
210+ raise KeyError (f"CMap { name } not found in CMapDB" ) from e
216211
217212 @classmethod
218213 def get_cmap (cls , name : str ) -> CMapBase :
0 commit comments