|
25 | 25 | from itertools import chain |
26 | 26 | from urllib.error import HTTPError, URLError |
27 | 27 |
|
| 28 | +import magic as python_magic |
| 29 | + |
28 | 30 | from mathics.builtin.pymimesniffer import magic |
29 | 31 | from mathics.core.atoms import ByteArray |
30 | 32 | from mathics.core.attributes import A_NO_ATTRIBUTES, A_PROTECTED, A_READ_PROTECTED |
|
106 | 108 | "application/x-tex": "TeX", # Also TeX |
107 | 109 | "application/xhtml+xml": "XHTML", |
108 | 110 | "application/xml": "XML", |
| 111 | + "application/zip": "ZIP", |
109 | 112 | "audio/aiff": "AIFF", |
110 | 113 | "audio/basic": "AU", # Also SND |
111 | 114 | "audio/midi": "MIDI", |
@@ -2080,20 +2083,39 @@ def eval(self, filename: String, evaluation: Evaluation): |
2080 | 2083 | return findfile |
2081 | 2084 |
|
2082 | 2085 | path = findfile.value |
2083 | | - if not FileFormat.detector: |
2084 | | - loader = magic.MagicLoader() |
2085 | | - loader.load() |
2086 | | - FileFormat.detector = magic.MagicDetector(loader.mimetypes) |
2087 | | - |
2088 | | - mime = set(FileFormat.detector.match(path)) |
2089 | | - |
2090 | | - # If match fails match on extension only |
2091 | | - if mime == set(): |
2092 | | - mime, encoding = mimetypes.guess_type(path) |
2093 | | - if mime is None: |
2094 | | - mime = set() |
2095 | | - else: |
2096 | | - mime = set([mime]) |
| 2086 | + |
| 2087 | + # FileFormat classifies by by getting a mime type for file, |
| 2088 | + # even though the path doesn't have to be something received |
| 2089 | + # or transmitted over HTTP. |
| 2090 | + |
| 2091 | + if os.path.exists(path): |
| 2092 | + try: |
| 2093 | + # Use python_magic to determine the file type. |
| 2094 | + # This is the most accurate method since it looks inside the file |
| 2095 | + # for magic numbers. Therefore, if a JPEG file has been renamed with the |
| 2096 | + # file extension .txt, this will still figure out what's up. |
| 2097 | + mimetype = python_magic.from_file(path, mime=True) |
| 2098 | + if mimetype in mimetype_dict: |
| 2099 | + return String(mimetype_dict[mimetype]) |
| 2100 | + |
| 2101 | + except Exception: |
| 2102 | + pass |
| 2103 | + else: |
| 2104 | + if not FileFormat.detector: |
| 2105 | + loader = magic.MagicLoader() |
| 2106 | + loader.load() |
| 2107 | + FileFormat.detector = magic.MagicDetector(loader.mimetypes) |
| 2108 | + |
| 2109 | + mime = set(FileFormat.detector.match(path)) |
| 2110 | + |
| 2111 | + # If match fails match on extension only |
| 2112 | + if mime == set(): |
| 2113 | + mime, _ = mimetypes.guess_type(path) |
| 2114 | + if mime is None: |
| 2115 | + mime = set() |
| 2116 | + else: |
| 2117 | + mime = set([mime]) |
| 2118 | + |
2097 | 2119 | result = [] |
2098 | 2120 | for key in mimetype_dict.keys(): |
2099 | 2121 | if key in mime: |
|
0 commit comments