Skip to content

Commit a97841a

Browse files
committed
Replace imghdr by filetype for python 3.11
keep imghdr for python 2.7 to reduce dependency imghdr is deprecated from python 3.11 https://docs.python.org/3/library/imghdr.html Use alternative filetype Adding test
1 parent 0c774c0 commit a97841a

3 files changed

Lines changed: 129 additions & 5 deletions

File tree

flanker/mime/message/part.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import base64
2-
import imghdr
32
import logging
3+
import sys
44
import mimetypes
55
import quopri
66
from contextlib import closing
@@ -9,6 +9,11 @@
99
import six
1010
from six.moves import StringIO
1111

12+
if sys.version_info[0] >= 3:
13+
import filetype as _filetype
14+
else:
15+
import imghdr as _imghdr
16+
1217
from flanker import metrics, _email
1318
from flanker.mime import bounce
1419
from flanker.mime.message import headers, charsets
@@ -113,9 +118,16 @@ def adjust_content_type(content_type, body=None, filename=None):
113118
if six.PY3 and isinstance(body, six.text_type):
114119
image_preamble = image_preamble.encode('utf-8', 'ignore')
115120

116-
sub = imghdr.what(None, image_preamble)
117-
if sub:
118-
content_type = ContentType('image', sub)
121+
if sys.version_info[0] >= 3:
122+
kind = _filetype.guess(image_preamble)
123+
if kind and kind.mime.startswith('image/'):
124+
sub = kind.extension
125+
sub = {'jpg': 'jpeg', 'tif': 'tiff'}.get(sub, sub)
126+
content_type = ContentType('image', sub)
127+
else:
128+
sub = _imghdr.what(None, image_preamble)
129+
if sub:
130+
content_type = ContentType('image', sub)
119131

120132
elif content_type.main == 'audio' and body:
121133
sub = _email.detect_audio_type(body)

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
'regex>=0.1.20110315',
4242
'six',
4343
'tld',
44-
'WebOb>=0.9.8'],
44+
'WebOb>=0.9.8',
45+
'filetype; python_version >= "3"'],
4546
extras_require={
4647
'validator': [
4748
'dnsq>=1.1.6',
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# coding:utf-8
2+
"""
3+
Tests for adjust_content_type() in flanker/mime/message/part.py.
4+
5+
These tests specifically cover the filetype-based image detection that
6+
replaced the deprecated imghdr module.
7+
"""
8+
import pytest
9+
10+
from flanker.mime.message.part import adjust_content_type
11+
from flanker.mime.message.headers import ContentType
12+
from tests import MAILGUN_PNG
13+
14+
15+
# Magic bytes for common image formats
16+
JPEG_MAGIC = b'\xff\xd8\xff\xe0' + b'\x00' * 28
17+
PNG_MAGIC = b'\x89PNG\r\n\x1a\n' + b'\x00' * 24
18+
GIF_MAGIC = b'GIF89a' + b'\x00' * 26
19+
BMP_MAGIC = b'BM' + b'\x00' * 30
20+
# WebP requires: RIFF + size(4) + WEBP + VP8 chunk type(4)
21+
WEBP_MAGIC = b'RIFF\x28\x00\x00\x00WEBPVP8 \x1c\x00\x00\x00' + b'\x00' * 16
22+
# TIFF little-endian (II) and big-endian (MM)
23+
TIFF_LE_MAGIC = b'II\x2a\x00' + b'\x00' * 28
24+
TIFF_BE_MAGIC = b'MM\x00\x2a' + b'\x00' * 28
25+
26+
27+
class TestAdjustContentTypeImageDetection:
28+
"""Tests for filetype-based image subtype detection from body bytes."""
29+
30+
def _image_ct(self, sub='octet-stream'):
31+
return ContentType('image', sub)
32+
33+
def test_jpeg_detected_and_mapped(self):
34+
"""filetype returns 'jpg', must be mapped to 'jpeg' for MIME."""
35+
ct = adjust_content_type(self._image_ct(), body=JPEG_MAGIC)
36+
assert str(ct) == 'image/jpeg'
37+
38+
def test_png_detected(self):
39+
ct = adjust_content_type(self._image_ct(), body=PNG_MAGIC)
40+
assert str(ct) == 'image/png'
41+
42+
def test_gif_detected(self):
43+
ct = adjust_content_type(self._image_ct(), body=GIF_MAGIC)
44+
assert str(ct) == 'image/gif'
45+
46+
def test_bmp_detected(self):
47+
ct = adjust_content_type(self._image_ct(), body=BMP_MAGIC)
48+
assert str(ct) == 'image/bmp'
49+
50+
def test_webp_detected(self):
51+
ct = adjust_content_type(self._image_ct(), body=WEBP_MAGIC)
52+
assert str(ct) == 'image/webp'
53+
54+
def test_tiff_le_detected_and_mapped(self):
55+
"""filetype returns 'tif', must be mapped to 'tiff' for MIME."""
56+
ct = adjust_content_type(self._image_ct(), body=TIFF_LE_MAGIC)
57+
assert str(ct) == 'image/tiff'
58+
59+
def test_tiff_be_detected_and_mapped(self):
60+
ct = adjust_content_type(self._image_ct(), body=TIFF_BE_MAGIC)
61+
assert str(ct) == 'image/tiff'
62+
63+
def test_unknown_body_keeps_original_subtype(self):
64+
"""Non-image bytes: content type should not change."""
65+
ct = adjust_content_type(self._image_ct('jpeg'), body=b'notanimage' * 4)
66+
assert str(ct) == 'image/jpeg'
67+
68+
def test_no_body_keeps_original_subtype(self):
69+
ct = adjust_content_type(self._image_ct('jpeg'), body=None)
70+
assert str(ct) == 'image/jpeg'
71+
72+
def test_real_png_fixture(self):
73+
"""Detection works on a real PNG binary (mailgun.png fixture)."""
74+
ct = adjust_content_type(self._image_ct('octet-stream'), body=MAILGUN_PNG)
75+
assert str(ct) == 'image/png'
76+
77+
def test_only_preamble_used(self):
78+
"""Detection is based on first 32 bytes; trailing garbage is ignored."""
79+
png_with_garbage = PNG_MAGIC + b'\xff' * 10000
80+
ct = adjust_content_type(self._image_ct(), body=png_with_garbage)
81+
assert str(ct) == 'image/png'
82+
83+
def test_non_image_content_type_not_affected(self):
84+
"""adjust_content_type should not touch non-image/non-audio content types."""
85+
ct = adjust_content_type(ContentType('text', 'plain'), body=PNG_MAGIC)
86+
assert str(ct) == 'text/plain'
87+
88+
89+
class TestAdjustContentTypeFilename:
90+
"""Tests for filename-based content type guessing (pre-existing behavior)."""
91+
92+
def test_bz2_filename(self):
93+
ct = adjust_content_type(
94+
ContentType('application', 'octet-stream'), filename='archive.bz2')
95+
assert str(ct) == 'application/x-bzip2'
96+
97+
def test_gz_filename(self):
98+
ct = adjust_content_type(
99+
ContentType('application', 'octet-stream'), filename='archive.gz')
100+
assert str(ct) == 'application/x-gzip'
101+
102+
def test_png_filename(self):
103+
ct = adjust_content_type(
104+
ContentType('application', 'octet-stream'), filename='photo.png')
105+
assert str(ct) == 'image/png'
106+
107+
def test_filename_ignored_when_not_octet_stream(self):
108+
"""Filename guessing only triggers for application/octet-stream."""
109+
ct = adjust_content_type(
110+
ContentType('image', 'jpeg'), filename='photo.png')
111+
assert str(ct) == 'image/jpeg'

0 commit comments

Comments
 (0)