-
-
Notifications
You must be signed in to change notification settings - Fork 730
Avoid deep copy on lz4 decompression #7437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,7 +50,6 @@ def test_compression_config(config, default): | |
|
||
|
||
def test_compression_1(): | ||
pytest.importorskip("lz4") | ||
np = pytest.importorskip("numpy") | ||
x = np.ones(1000000) | ||
b = x.tobytes() | ||
|
@@ -61,7 +60,6 @@ def test_compression_1(): | |
|
||
|
||
def test_compression_2(): | ||
pytest.importorskip("lz4") | ||
np = pytest.importorskip("numpy") | ||
x = np.random.random(10000) | ||
msg = dumps(to_serialize(x.data)) | ||
|
@@ -70,7 +68,6 @@ def test_compression_2(): | |
|
||
|
||
def test_compression_3(): | ||
pytest.importorskip("lz4") | ||
np = pytest.importorskip("numpy") | ||
x = np.ones(1000000) | ||
frames = dumps({"x": Serialize(x.data)}) | ||
|
@@ -80,7 +77,6 @@ def test_compression_3(): | |
|
||
|
||
def test_compression_without_deserialization(): | ||
pytest.importorskip("lz4") | ||
np = pytest.importorskip("numpy") | ||
x = np.ones(1000000) | ||
|
||
|
@@ -91,6 +87,18 @@ def test_compression_without_deserialization(): | |
assert all(len(frame) < 1000000 for frame in msg["x"].frames) | ||
|
||
|
||
def test_lz4_decompression_avoids_deep_copy(): | ||
"""Test that lz4 output is a bytearray, not bytes, so that numpy deserialization is | ||
not forced to perform a deep copy to obtain a writeable array. | ||
Note that zlib, zstandard, and snappy don't have this option. | ||
""" | ||
pytest.importorskip("lz4") | ||
a = bytearray(1_000_000) | ||
b = compressions["lz4"]["compress"](a) | ||
c = compressions["lz4"]["decompress"](b) | ||
assert isinstance(c, bytearray) | ||
|
||
|
||
def test_small(): | ||
assert sum(map(nbytes, dumps(b""))) < 10 | ||
assert sum(map(nbytes, dumps(1))) < 10 | ||
|
@@ -106,7 +114,13 @@ def test_small_and_big(): | |
|
||
@pytest.mark.parametrize( | ||
"lib,compression", | ||
[(None, None), ("zlib", "zlib"), ("lz4", "lz4"), ("zstandard", "zstd")], | ||
[ | ||
(None, None), | ||
("zlib", "zlib"), | ||
("lz4", "lz4"), | ||
("snappy", "snappy"), | ||
("zstandard", "zstd"), | ||
], | ||
) | ||
def test_maybe_compress(lib, compression): | ||
if lib: | ||
|
@@ -126,7 +140,13 @@ def test_maybe_compress(lib, compression): | |
|
||
@pytest.mark.parametrize( | ||
"lib,compression", | ||
[(None, None), ("zlib", "zlib"), ("lz4", "lz4"), ("zstandard", "zstd")], | ||
[ | ||
(None, None), | ||
("zlib", "zlib"), | ||
("lz4", "lz4"), | ||
("snappy", "snappy"), | ||
("zstandard", "zstd"), | ||
], | ||
) | ||
def test_compression_thread_safety(lib, compression): | ||
if lib: | ||
|
@@ -164,7 +184,13 @@ def test_compress_decompress(fn): | |
|
||
@pytest.mark.parametrize( | ||
"lib,compression", | ||
[(None, None), ("zlib", "zlib"), ("lz4", "lz4"), ("zstandard", "zstd")], | ||
[ | ||
(None, None), | ||
("zlib", "zlib"), | ||
("lz4", "lz4"), | ||
("snappy", "snappy"), | ||
("zstandard", "zstd"), | ||
], | ||
) | ||
def test_maybe_compress_config_default(lib, compression): | ||
if lib: | ||
|
@@ -185,7 +211,6 @@ def test_maybe_compress_config_default(lib, compression): | |
|
||
def test_maybe_compress_sample(): | ||
np = pytest.importorskip("numpy") | ||
lz4 = pytest.importorskip("lz4") | ||
payload = np.random.randint(0, 255, size=10000).astype("u1").tobytes() | ||
fmt, compressed = maybe_compress(payload) | ||
assert fmt is None | ||
|
@@ -202,10 +227,8 @@ def test_large_bytes(): | |
assert len(frames[1]) < 1000 | ||
|
||
|
||
@pytest.mark.slow | ||
def test_large_messages(): | ||
np = pytest.importorskip("numpy") | ||
pytest.importorskip("lz4") | ||
if MEMORY_LIMIT < 8e9: | ||
pytest.skip("insufficient memory") | ||
|
||
|
@@ -249,7 +272,6 @@ def test_loads_deserialize_False(): | |
|
||
|
||
def test_loads_without_deserialization_avoids_compression(): | ||
pytest.importorskip("lz4") | ||
b = b"0" * 100000 | ||
|
||
msg = {"x": 1, "data": to_serialize(b)} | ||
|
@@ -313,10 +335,9 @@ def test_dumps_loads_Serialized(): | |
|
||
def test_maybe_compress_memoryviews(): | ||
np = pytest.importorskip("numpy") | ||
pytest.importorskip("lz4") | ||
x = np.arange(1000000, dtype="int64") | ||
compression, payload = maybe_compress(x.data) | ||
assert compression == "lz4" | ||
assert compression in {"lz4", "snappy", "zstd", "zlib"} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I would be sad if we used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, you're right. I misread the code; default compression is lz4 -> snappy -> None. |
||
assert len(payload) < x.nbytes * 0.75 | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious, why this change? If we didn't have lz4, snappy, or zstandard installed (all of which are optional I think) then I'd expect this to fail.
The only compressor we have by default, I think, is zlib, and we don't compress with that by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, if you have snappy but not lz4 it will succeed.
zstandard does not install itself as a default compressor.
Amended the tests to reflect this.