Skip to content

Commit 6f0ae29

Browse files
Change .decode_async() to .decode_iter() (#12028) (#12034)
(cherry picked from commit 4bb9e6e)
1 parent b7b17ff commit 6f0ae29

File tree

5 files changed

+48
-35
lines changed

5 files changed

+48
-35
lines changed

CHANGES/11898.bugfix.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
Restored :py:meth:`~aiohttp.BodyPartReader.decode` as a synchronous method
22
for backward compatibility. The method was inadvertently changed to async
33
in 3.13.3 as part of the decompression bomb security fix. A new
4-
:py:meth:`~aiohttp.BodyPartReader.decode_async` method is now available
5-
for non-blocking decompression of large payloads. Internal aiohttp code
6-
uses the async variant to maintain security protections -- by :user:`bdraco`.
4+
:py:meth:`~aiohttp.BodyPartReader.decode_iter` method is now available
5+
for non-blocking decompression of large payloads using an async generator.
6+
Internal aiohttp code uses the async variant to maintain security protections.
7+
8+
Changed multipart processing chunk sizes from 64 KiB to 256KiB, to better
9+
match aiohttp internals
10+
-- by :user:`bdraco` and :user:`Dreamsorcerer`.

aiohttp/multipart.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import uuid
77
import warnings
88
from collections import deque
9-
from collections.abc import Iterator, Mapping, Sequence
9+
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
1010
from types import TracebackType
1111
from typing import TYPE_CHECKING, Any, Union, cast
1212
from urllib.parse import parse_qsl, unquote, urlencode
@@ -313,7 +313,10 @@ async def read(self, *, decode: bool = False) -> bytes:
313313
while not self._at_eof:
314314
data.extend(await self.read_chunk(self.chunk_size))
315315
if decode:
316-
return await self.decode_async(data)
316+
decoded_data = bytearray()
317+
async for d in self.decode_iter(data):
318+
decoded_data.extend(d)
319+
return decoded_data
317320
return data
318321

319322
async def read_chunk(self, size: int = chunk_size) -> bytes:
@@ -508,16 +511,16 @@ def decode(self, data: bytes) -> bytes:
508511
Decodes data according the specified Content-Encoding
509512
or Content-Transfer-Encoding headers value.
510513
511-
Note: For large payloads, consider using decode_async() instead
514+
Note: For large payloads, consider using decode_iter() instead
512515
to avoid blocking the event loop during decompression.
513516
"""
514517
data = self._apply_content_transfer_decoding(data)
515518
if self._needs_content_decoding():
516519
return self._decode_content(data)
517520
return data
518521

519-
async def decode_async(self, data: bytes) -> bytes:
520-
"""Decodes data asynchronously.
522+
async def decode_iter(self, data: bytes) -> AsyncIterator[bytes]:
523+
"""Async generator that yields decoded data chunks.
521524
522525
Decodes data according the specified Content-Encoding
523526
or Content-Transfer-Encoding headers value.
@@ -527,8 +530,10 @@ async def decode_async(self, data: bytes) -> bytes:
527530
"""
528531
data = self._apply_content_transfer_decoding(data)
529532
if self._needs_content_decoding():
530-
return await self._decode_content_async(data)
531-
return data
533+
async for d in self._decode_content_async(data):
534+
yield d
535+
else:
536+
yield data
532537

533538
def _decode_content(self, data: bytes) -> bytes:
534539
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
@@ -542,17 +547,18 @@ def _decode_content(self, data: bytes) -> bytes:
542547

543548
raise RuntimeError(f"unknown content encoding: {encoding}")
544549

545-
async def _decode_content_async(self, data: bytes) -> bytes:
550+
async def _decode_content_async(self, data: bytes) -> AsyncIterator[bytes]:
546551
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
547552
if encoding == "identity":
548-
return data
549-
if encoding in {"deflate", "gzip"}:
550-
return await ZLibDecompressor(
553+
yield data
554+
elif encoding in {"deflate", "gzip"}:
555+
d = ZLibDecompressor(
551556
encoding=encoding,
552557
suppress_deflate_header=True,
553-
).decompress(data, max_length=self._max_decompress_size)
554-
555-
raise RuntimeError(f"unknown content encoding: {encoding}")
558+
)
559+
yield await d.decompress(data, max_length=self._max_decompress_size)
560+
else:
561+
raise RuntimeError(f"unknown content encoding: {encoding}")
556562

557563
def _decode_content_transfer(self, data: bytes) -> bytes:
558564
encoding = self.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
@@ -623,10 +629,9 @@ async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> byt
623629

624630
async def write(self, writer: AbstractStreamWriter) -> None:
625631
field = self._value
626-
chunk = await field.read_chunk(size=2**16)
627-
while chunk:
628-
await writer.write(await field.decode_async(chunk))
629-
chunk = await field.read_chunk(size=2**16)
632+
while chunk := await field.read_chunk(size=2**18):
633+
async for d in field.decode_iter(chunk):
634+
await writer.write(d)
630635

631636

632637
class MultipartReader:

aiohttp/web_request.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -747,17 +747,17 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]":
747747
tmp = await self._loop.run_in_executor(
748748
None, tempfile.TemporaryFile
749749
)
750-
chunk = await field.read_chunk(size=2**16)
751-
while chunk:
752-
chunk = await field.decode_async(chunk)
753-
await self._loop.run_in_executor(None, tmp.write, chunk)
754-
size += len(chunk)
755-
if 0 < max_size < size:
756-
await self._loop.run_in_executor(None, tmp.close)
757-
raise HTTPRequestEntityTooLarge(
758-
max_size=max_size, actual_size=size
750+
while chunk := await field.read_chunk(size=2**18):
751+
async for decoded_chunk in field.decode_iter(chunk):
752+
await self._loop.run_in_executor(
753+
None, tmp.write, decoded_chunk
759754
)
760-
chunk = await field.read_chunk(size=2**16)
755+
size += len(decoded_chunk)
756+
if 0 < max_size < size:
757+
await self._loop.run_in_executor(None, tmp.close)
758+
raise HTTPRequestEntityTooLarge(
759+
max_size=max_size, actual_size=size
760+
)
761761
await self._loop.run_in_executor(None, tmp.seek, 0)
762762

763763
if field_ct is None:

docs/multipart_reference.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,18 @@ Multipart reference
119119

120120
.. note::
121121

122-
For large payloads, consider using :meth:`decode_async` instead
122+
For large payloads, consider using :meth:`decode_iter` instead
123123
to avoid blocking the event loop during decompression.
124124

125-
.. method:: decode_async(data)
125+
.. method:: decode_iter(data)
126126
:async:
127127

128128
Decodes data asynchronously according the specified ``Content-Encoding``
129129
or ``Content-Transfer-Encoding`` headers value.
130130

131+
This is an async iterator and will return decoded data in chunks. This
132+
can be used to avoid loading large payloads into memory.
133+
131134
This method offloads decompression to an executor for large payloads
132135
to avoid blocking the event loop.
133136

tests/test_multipart.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,15 @@ async def test_decode_with_content_transfer_encoding_base64(self) -> None:
401401
result += obj.decode(chunk)
402402
assert b"Time to Relax!" == result
403403

404-
async def test_decode_async_with_content_transfer_encoding_base64(self) -> None:
404+
async def test_decode_iter_with_content_transfer_encoding_base64(self) -> None:
405405
h = CIMultiDictProxy(CIMultiDict({CONTENT_TRANSFER_ENCODING: "base64"}))
406406
with Stream(b"VG\r\r\nltZSB0byBSZ\r\nWxheCE=\r\n--:--") as stream:
407407
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
408408
result = b""
409409
while not obj.at_eof():
410410
chunk = await obj.read_chunk(size=6)
411-
result += await obj.decode_async(chunk)
411+
async for decoded_chunk in obj.decode_iter(chunk):
412+
result += decoded_chunk
412413
assert b"Time to Relax!" == result
413414

414415
async def test_decode_with_content_encoding_deflate(self) -> None:

0 commit comments

Comments
 (0)