Skip to content

Commit 968d5e2

Browse files
committed
Fix ZLibDecompressor dropping data past the first gzip member
When a response body contains concatenated gzip members (RFC 1952 §2.2), zlib sets eof and moves the remaining bytes to unused_data once the first member is fully consumed. decompress_sync() was not checking unused_data, so every member after the first was silently discarded. Apply the same while-eof-and-unused_data loop that ZSTDDecompressor already uses for multi-frame zstd streams. Add unused_data to ZLibDecompressObjProtocol so the attribute is typed. Include three tests mirroring the existing ZSTD multi-frame test suite. Fixes #7157 Signed-off-by: Ashutosh Kumar Singh <ahutoshhjp1067@gmail.com>
1 parent a0a03ca commit 968d5e2

4 files changed

Lines changed: 53 additions & 0 deletions

File tree

CHANGES/7157.bugfix.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fixed :class:`~aiohttp.ZLibDecompressor` silently dropping data past the first
2+
member when decompressing concatenated gzip/deflate streams. Each subsequent
3+
member is now handed to a fresh decompressor, matching the behaviour already
4+
implemented for ZSTD multi-frame streams.
5+
6+
-- by :user:`Ashutosh-177`

CONTRIBUTORS.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ Andrew Top
4949
Andrew Zhou
5050
Andrii Soldatenko
5151
Anes Abismail
52+
Ashutosh Kumar Singh
5253
Antoine Pietri
5354
Anton Kasyanov
5455
Anton Zhdan-Pushkin

aiohttp/compression_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ def eof(self) -> bool: ...
5555
@property
5656
def unconsumed_tail(self) -> bytes: ...
5757

58+
@property
59+
def unused_data(self) -> bytes: ...
60+
5861

5962
class ZLibBackendProtocol(Protocol):
6063
MAX_WBITS: int
@@ -284,6 +287,24 @@ def decompress_sync(
284287
)
285288
# Only way to know that isal has no further data is checking we get no output
286289
self._last_empty = result == b""
290+
291+
# Handle concatenated gzip/deflate streams (multi-member).
292+
# After a member ends, unused_data holds the start of the next member.
293+
# Create a fresh decompressor for each subsequent member.
294+
while self._decompressor.eof and self._decompressor.unused_data:
295+
unused = self._decompressor.unused_data
296+
self._decompressor = self._zlib_backend.decompressobj(wbits=self._mode)
297+
remaining = (
298+
max_length - len(result)
299+
if max_length != ZLIB_MAX_LENGTH_UNLIMITED
300+
else ZLIB_MAX_LENGTH_UNLIMITED
301+
)
302+
if max_length != ZLIB_MAX_LENGTH_UNLIMITED and remaining <= 0:
303+
break
304+
chunk = self._decompressor.decompress(unused, remaining)
305+
self._last_empty = chunk == b""
306+
result += chunk
307+
287308
return result
288309

289310
def flush(self, length: int = 0) -> bytes:

tests/test_compression_utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Tests for compression utils."""
22

3+
import gzip
34
import sys
45

56
import pytest
@@ -87,3 +88,27 @@ def test_zstd_multi_frame_max_length_exhausted_preserves_unused_data() -> None:
8788
assert result1 == b"AAAA"
8889
result2 = d.decompress_sync(frame3)
8990
assert result2 == b"BBBBCCCC"
91+
92+
93+
def test_zlib_gzip_multi_member_unlimited() -> None:
94+
d = ZLibDecompressor(encoding="gzip")
95+
member1 = gzip.compress(b"AAAA")
96+
member2 = gzip.compress(b"BBBB")
97+
result = d.decompress_sync(member1 + member2)
98+
assert result == b"AAAABBBB"
99+
100+
101+
def test_zlib_gzip_multi_member_max_length_partial() -> None:
102+
d = ZLibDecompressor(encoding="gzip")
103+
member1 = gzip.compress(b"AAAA")
104+
member2 = gzip.compress(b"BBBB")
105+
result = d.decompress_sync(member1 + member2, max_length=6)
106+
assert result == b"AAAABB"
107+
108+
109+
def test_zlib_gzip_multi_member_max_length_exhausted() -> None:
110+
d = ZLibDecompressor(encoding="gzip")
111+
member1 = gzip.compress(b"AAAA")
112+
member2 = gzip.compress(b"BBBB")
113+
result = d.decompress_sync(member1 + member2, max_length=4)
114+
assert result == b"AAAA"

0 commit comments

Comments
 (0)