Skip to content

Commit 134dd31

Browse files
author
User
committed
Add new errors argument to decode_content()
1 parent b9be324 commit 134dd31

2 files changed

Lines changed: 35 additions & 19 deletions

File tree

tests/test_decode_html.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,74 @@
11
# coding: utf-8
2+
import pytest
3+
24
from unicodec import decode_content
35

46

5-
def test_basic_usage():
6-
# type: () -> None
7+
def test_basic_usage(): # type: () -> None
78
assert decode_content(b"asdf") == "asdf"
89

910

10-
def test_input_bytess():
11-
# type: () -> None
11+
def test_input_bytess(): # type: () -> None
1212
# fmt: off
1313
assert decode_content(u"крокодил".encode("utf-8")) == u"крокодил"
1414
# fmt: on
1515

1616

17-
def test_input_str():
18-
# type: () -> None
17+
def test_input_str(): # type: () -> None
1918
# fmt: off
2019
assert decode_content(u"крокодил") == u"крокодил"
2120
# fmt: on
2221

2322

24-
def test_arg_decode_entities_default():
25-
# type: () -> None
23+
def test_arg_decode_entities_default(): # type: () -> None
2624
# fmt: off
2725
assert decode_content("©") == u"©"
2826
# fmt: on
2927

3028

31-
def test_arg_decode_entities_false():
32-
# type: () -> None
29+
def test_arg_decode_entities_false(): # type: () -> None
3330
assert decode_content("©", decode_entities=False) == "©"
3431

3532

36-
def test_arg_remove_null_bytes_default():
37-
# type: () -> None
33+
def test_arg_remove_null_bytes_default(): # type: () -> None
3834
assert decode_content("as\x00df") == "asdf"
3935

4036

41-
def test_arg_remove_null_bytes_false():
42-
# type: () -> None
37+
def test_arg_remove_null_bytes_false(): # type: () -> None
4338
assert decode_content("as\x00df", remove_null_bytes=False) == "as\x00df"
4439

4540

46-
def test_arg_encoding_default():
47-
# type: () -> None
41+
def test_arg_encoding_default(): # type: () -> None
4842
# fmt: off
4943
assert decode_content(u"крокодил".encode("utf-8")) == u"крокодил"
5044
# fmt: on
5145

5246

53-
def test_arg_encoding_explicit():
54-
# type: () -> None
47+
def test_arg_encoding_explicit(): # type: () -> None
5548
# fmt: off
5649
assert decode_content(u"крокодил".encode("cp1251"), encoding="cp1251") == (
5750
u"крокодил"
5851
)
5952
# fmt: on
53+
54+
55+
def test_errors_strict_default(): # type: () -> None
56+
with pytest.raises(UnicodeDecodeError):
57+
decode_content(b"\x80", encoding="utf-8")
58+
59+
60+
def test_errors_strict(): # type: () -> None
61+
with pytest.raises(UnicodeDecodeError):
62+
decode_content(b"\x80", encoding="utf-8", errors="strict")
63+
64+
65+
def test_errors_replace(): # type: () -> None
66+
# fmt: off
67+
assert decode_content(b"\x80", encoding="utf-8", errors="replace") == u"�"
68+
# fmt: on
69+
70+
71+
def test_errors_ignore(): # type: () -> None
72+
# fmt: off
73+
assert decode_content(b"\x80", encoding="utf-8", errors="ignore") == u""
74+
# fmt: on

unicodec/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,15 @@ def decode_content( # pylint: disable=R0917
4949
encoding=None, # type: None | str
5050
content_type_header=None, # type: None | str
5151
markup="html", # type: Literal["html", "xml"]
52+
errors="strict", # type: Literal["strict", "ignore", "replace"]
5253
):
5354
# type: (...) -> str
5455
if isinstance(data, bytes):
5556
if encoding is None:
5657
encoding = detect_content_encoding(
5758
data, content_type_header=content_type_header, markup=markup
5859
)
59-
data = data.decode(encoding)
60+
data = data.decode(encoding, errors=errors)
6061
# Remove BOM, it might be at the start of decoded unicode text
6162
if data.startswith(BOM_UNICODE):
6263
data = data[len(BOM_UNICODE) :]

0 commit comments

Comments
 (0)