From b79ba135b5bdfbc6193febdd624a0ee797210520 Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Wed, 9 Oct 2024 13:26:37 +0200 Subject: [PATCH 1/7] async file in multipart file upload --- httpx/_multipart.py | 31 ++++++++++++++++++++++++++- httpx/_types.py | 2 +- tests/test_multipart.py | 46 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index b4761af9b2..8ca25acdfc 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -5,6 +5,7 @@ import os import re import typing +from collections.abc import AsyncIterable from pathlib import Path from ._types import ( @@ -201,6 +202,8 @@ def render_headers(self) -> bytes: return self._headers def render_data(self) -> typing.Iterator[bytes]: + if isinstance(self.file, AsyncIterable): + raise TypeError("Invalid type for file. AsyncIterable is not supported.") if isinstance(self.file, (str, bytes)): yield to_bytes(self.file) return @@ -216,10 +219,24 @@ def render_data(self) -> typing.Iterator[bytes]: yield to_bytes(chunk) chunk = self.file.read(self.CHUNK_SIZE) + async def arender_data(self) -> typing.AsyncIterator[bytes]: + if not isinstance(self.file, AsyncIterable): + for chunk in self.render_data(): + yield chunk + return + + async for achunk in self.file: + yield to_bytes(achunk) + def render(self) -> typing.Iterator[bytes]: yield self.render_headers() yield from self.render_data() + async def arender(self) -> typing.AsyncIterator[bytes]: + yield self.render_headers() + async for chunk in self.arender_data(): + yield chunk + class MultipartStream(SyncByteStream, AsyncByteStream): """ @@ -262,6 +279,18 @@ def iter_chunks(self) -> typing.Iterator[bytes]: yield b"\r\n" yield b"--%s--\r\n" % self.boundary + async def aiter_chunks(self) -> typing.AsyncIterator[bytes]: + for field in self.fields: + yield b"--%s\r\n" % self.boundary + if isinstance(field, FileField): + async for chunk in field.arender(): + yield chunk + else: + for chunk in field.render(): + yield chunk + yield b"\r\n" + yield b"--%s--\r\n" % self.boundary + def get_content_length(self) -> int | None: """ Return the length of the multipart encoded content, or `None` if @@ -296,5 +325,5 @@ def __iter__(self) -> typing.Iterator[bytes]: yield chunk async def __aiter__(self) -> typing.AsyncIterator[bytes]: - for chunk in self.iter_chunks(): + async for chunk in self.aiter_chunks(): yield chunk diff --git a/httpx/_types.py b/httpx/_types.py index 704dfdffc8..7253a52205 100644 --- a/httpx/_types.py +++ b/httpx/_types.py @@ -71,7 +71,7 @@ RequestData = Mapping[str, Any] -FileContent = Union[IO[bytes], bytes, str] +FileContent = Union[IO[bytes], bytes, str, AsyncIterable[bytes], AsyncIterable[str]] FileTypes = Union[ # file (or bytes) FileContent, diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 764f85a253..421ee1dfd9 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -4,7 +4,9 @@ import tempfile import typing +import anyio import pytest +import trio import httpx @@ -41,6 +43,50 @@ def test_multipart(value, output): ) +@pytest.mark.parametrize(("value,output"), (("abc", b"abc"), (b"abc", b"abc"))) +async def test_async_multipart_streaming( + value, output, tmp_path, server, anyio_backend +): + data = {"text": value} + to_upload = tmp_path / "test.txt" + to_upload.write_bytes(b"") + opener: typing.Coroutine[ + typing.Any, typing.Any, typing.Union[anyio.AsyncFile[bytes], trio.AsyncBinaryIO] + ] + if anyio_backend == "trio": + opener = trio.open_file(to_upload, "rb") + else: + opener = anyio.open_file(to_upload, "rb") + url = server.url.copy_with(path="/echo_body") + async with await opener as fp: + files = {"file": fp} + async with httpx.AsyncClient() as client: + response = await client.post(url, data=data, files=files) + boundary = response.request.headers["Content-Type"].split("boundary=")[-1] + boundary_bytes = boundary.encode("ascii") + + assert response.status_code == 200 + assert response.content == b"".join( + [ + b"--" + boundary_bytes + b"\r\n", + b'Content-Disposition: form-data; name="text"\r\n', + b"\r\n", + b"abc\r\n", + b"--" + boundary_bytes + b"\r\n", + b'Content-Disposition: form-data; name="file";' + b' filename="test.txt"\r\n', + b"Content-Type: text/plain\r\n", + b"\r\n", + b"\r\n", + b"--" + boundary_bytes + b"--\r\n", + ] + ) + + with httpx.Client() as sync_client: + with pytest.raises(TypeError, match="AsyncIterable is not supported"): + sync_client.post(url, data=data, files=files) + + @pytest.mark.parametrize( "header", [ From 91a40336770f658d9e98400be490bd69c1b4742d Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Thu, 10 Oct 2024 20:32:56 +0200 Subject: [PATCH 2/7] add AsyncIterable[str] test --- tests/test_multipart.py | 51 ++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 421ee1dfd9..d43f42583d 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -43,35 +43,39 @@ def test_multipart(value, output): ) -@pytest.mark.parametrize(("value,output"), (("abc", b"abc"), (b"abc", b"abc"))) -async def test_async_multipart_streaming( - value, output, tmp_path, server, anyio_backend -): - data = {"text": value} +async def test_async_multipart_streaming(tmp_path, server, anyio_backend): to_upload = tmp_path / "test.txt" to_upload.write_bytes(b"") - opener: typing.Coroutine[ - typing.Any, typing.Any, typing.Union[anyio.AsyncFile[bytes], trio.AsyncBinaryIO] - ] + if typing.TYPE_CHECKING: + opener_t = typing.Coroutine[ # pragma: no cover + typing.Any, + typing.Any, + typing.Union[ + anyio.AsyncFile[bytes], + anyio.AsyncFile[str], + trio.AsyncBinaryIO, + trio.AsyncTextIO, + ], + ] + opener: opener_t + text_opener: opener_t if anyio_backend == "trio": opener = trio.open_file(to_upload, "rb") + text_opener = trio.open_file(to_upload, "rt") else: opener = anyio.open_file(to_upload, "rb") + text_opener = anyio.open_file(to_upload, "rt") url = server.url.copy_with(path="/echo_body") async with await opener as fp: files = {"file": fp} async with httpx.AsyncClient() as client: - response = await client.post(url, data=data, files=files) + response = await client.post(url, files=files) boundary = response.request.headers["Content-Type"].split("boundary=")[-1] boundary_bytes = boundary.encode("ascii") assert response.status_code == 200 assert response.content == b"".join( [ - b"--" + boundary_bytes + b"\r\n", - b'Content-Disposition: form-data; name="text"\r\n', - b"\r\n", - b"abc\r\n", b"--" + boundary_bytes + b"\r\n", b'Content-Disposition: form-data; name="file";' b' filename="test.txt"\r\n', @@ -84,7 +88,26 @@ async def test_async_multipart_streaming( with httpx.Client() as sync_client: with pytest.raises(TypeError, match="AsyncIterable is not supported"): - sync_client.post(url, data=data, files=files) + sync_client.post(url, files=files) + + async with await text_opener as fp: + files = {"file": fp} + async with httpx.AsyncClient() as client: + response = await client.post(url, files=files) + boundary = response.request.headers["Content-Type"].split("boundary=")[-1] + boundary_bytes = boundary.encode("ascii") + assert response.status_code == 200 + assert response.content == b"".join( + [ + b"--" + boundary_bytes + b"\r\n", + b'Content-Disposition: form-data; name="file";' + b' filename="test.txt"\r\n', + b"Content-Type: text/plain\r\n", + b"\r\n", + b"\r\n", + b"--" + boundary_bytes + b"--\r\n", + ] + ) @pytest.mark.parametrize( From 19a70478dcd486262ac0b58262436fe5deb306a3 Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Fri, 11 Oct 2024 13:59:36 +0200 Subject: [PATCH 3/7] only AsyncIterable[bytes] is allowed for multipart --- httpx/_multipart.py | 7 +++- httpx/_types.py | 2 +- tests/test_multipart.py | 81 +++++++++++++++-------------------------- 3 files changed, 36 insertions(+), 54 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 8ca25acdfc..94dc2dafd1 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -226,7 +226,12 @@ async def arender_data(self) -> typing.AsyncIterator[bytes]: return async for achunk in self.file: - yield to_bytes(achunk) + if not isinstance(achunk, bytes): + raise TypeError( + "Multipart file uploads must be opened in binary mode," + " not text mode." + ) + yield achunk def render(self) -> typing.Iterator[bytes]: yield self.render_headers() diff --git a/httpx/_types.py b/httpx/_types.py index 7253a52205..2f78d30839 100644 --- a/httpx/_types.py +++ b/httpx/_types.py @@ -71,7 +71,7 @@ RequestData = Mapping[str, Any] -FileContent = Union[IO[bytes], bytes, str, AsyncIterable[bytes], AsyncIterable[str]] +FileContent = Union[IO[bytes], bytes, str, AsyncIterable[bytes]] FileTypes = Union[ # file (or bytes) FileContent, diff --git a/tests/test_multipart.py b/tests/test_multipart.py index d43f42583d..5dd82a273d 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -46,68 +46,45 @@ def test_multipart(value, output): async def test_async_multipart_streaming(tmp_path, server, anyio_backend): to_upload = tmp_path / "test.txt" to_upload.write_bytes(b"") - if typing.TYPE_CHECKING: - opener_t = typing.Coroutine[ # pragma: no cover - typing.Any, - typing.Any, - typing.Union[ - anyio.AsyncFile[bytes], - anyio.AsyncFile[str], - trio.AsyncBinaryIO, - trio.AsyncTextIO, - ], - ] - opener: opener_t - text_opener: opener_t + opener: typing.Any + text_opener: typing.Any if anyio_backend == "trio": - opener = trio.open_file(to_upload, "rb") - text_opener = trio.open_file(to_upload, "rt") + opener = trio.open_file(to_upload, "b+r") + text_opener = trio.open_file(to_upload, "t+r") else: - opener = anyio.open_file(to_upload, "rb") - text_opener = anyio.open_file(to_upload, "rt") + opener = anyio.open_file(to_upload, "b+r") + text_opener = anyio.open_file(to_upload, "t+r") url = server.url.copy_with(path="/echo_body") - async with await opener as fp: + async with await opener as fp, httpx.AsyncClient() as client: files = {"file": fp} - async with httpx.AsyncClient() as client: - response = await client.post(url, files=files) - boundary = response.request.headers["Content-Type"].split("boundary=")[-1] - boundary_bytes = boundary.encode("ascii") - - assert response.status_code == 200 - assert response.content == b"".join( - [ - b"--" + boundary_bytes + b"\r\n", - b'Content-Disposition: form-data; name="file";' - b' filename="test.txt"\r\n', - b"Content-Type: text/plain\r\n", - b"\r\n", - b"\r\n", - b"--" + boundary_bytes + b"--\r\n", - ] - ) + response = await client.post(url, files=files) + boundary = response.request.headers["Content-Type"].split("boundary=")[-1] + boundary_bytes = boundary.encode("ascii") + + assert response.status_code == 200 + assert response.content == b"".join( + [ + b"--" + boundary_bytes + b"\r\n", + b'Content-Disposition: form-data; name="file";' + b' filename="test.txt"\r\n', + b"Content-Type: text/plain\r\n", + b"\r\n", + b"\r\n", + b"--" + boundary_bytes + b"--\r\n", + ] + ) with httpx.Client() as sync_client: with pytest.raises(TypeError, match="AsyncIterable is not supported"): sync_client.post(url, files=files) - async with await text_opener as fp: + async with await text_opener as fp, httpx.AsyncClient() as client: files = {"file": fp} - async with httpx.AsyncClient() as client: - response = await client.post(url, files=files) - boundary = response.request.headers["Content-Type"].split("boundary=")[-1] - boundary_bytes = boundary.encode("ascii") - assert response.status_code == 200 - assert response.content == b"".join( - [ - b"--" + boundary_bytes + b"\r\n", - b'Content-Disposition: form-data; name="file";' - b' filename="test.txt"\r\n', - b"Content-Type: text/plain\r\n", - b"\r\n", - b"\r\n", - b"--" + boundary_bytes + b"--\r\n", - ] - ) + with pytest.raises( + TypeError, + match="Multipart file uploads must be opened in binary mode", + ): + await client.post(url, files=files) @pytest.mark.parametrize( From ce2513a7fc63859f6ced114dd28b91cfd54c7f60 Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Wed, 20 Nov 2024 11:23:56 +0100 Subject: [PATCH 4/7] multipart: check only first chunk in async file --- httpx/_multipart.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 94dc2dafd1..333b43f923 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -225,12 +225,22 @@ async def arender_data(self) -> typing.AsyncIterator[bytes]: yield chunk return - async for achunk in self.file: - if not isinstance(achunk, bytes): - raise TypeError( - "Multipart file uploads must be opened in binary mode," - " not text mode." - ) + file_aiter = self.file.__aiter__() + + try: + achunk = await file_aiter.__anext__() + except StopIteration: + return + + if not isinstance(achunk, bytes): + raise TypeError( + "Multipart file uploads must be opened in binary mode," + " not text mode." + ) + + yield achunk + + async for achunk in file_aiter: yield achunk def render(self) -> typing.Iterator[bytes]: From a2d4b2babad90f0d8cd668253fad9ddab951eada Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Wed, 20 Nov 2024 11:38:38 +0100 Subject: [PATCH 5/7] multipart: fix the coverage for empty file --- httpx/_multipart.py | 2 +- tests/test_multipart.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 333b43f923..412f4ca156 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -229,7 +229,7 @@ async def arender_data(self) -> typing.AsyncIterator[bytes]: try: achunk = await file_aiter.__anext__() - except StopIteration: + except StopAsyncIteration: return if not isinstance(achunk, bytes): diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 5dd82a273d..993a7832bf 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -46,14 +46,19 @@ def test_multipart(value, output): async def test_async_multipart_streaming(tmp_path, server, anyio_backend): to_upload = tmp_path / "test.txt" to_upload.write_bytes(b"") + empty_file = tmp_path / "empty.txt" + empty_file.write_bytes(b"") opener: typing.Any text_opener: typing.Any + empty_opener: typing.Any if anyio_backend == "trio": opener = trio.open_file(to_upload, "b+r") text_opener = trio.open_file(to_upload, "t+r") + empty_opener = trio.open_file(empty_file, "b+r") else: opener = anyio.open_file(to_upload, "b+r") text_opener = anyio.open_file(to_upload, "t+r") + empty_opener = anyio.open_file(empty_file, "b+r") url = server.url.copy_with(path="/echo_body") async with await opener as fp, httpx.AsyncClient() as client: files = {"file": fp} @@ -86,6 +91,10 @@ async def test_async_multipart_streaming(tmp_path, server, anyio_backend): ): await client.post(url, files=files) + async with await empty_opener as fp, httpx.AsyncClient() as client: + files = {"file": fp} + await client.post(url, files=files) + @pytest.mark.parametrize( "header", From 15de00ca20c329f360e3438108df09ff046886c3 Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Wed, 20 Nov 2024 13:36:28 +0100 Subject: [PATCH 6/7] multipart: multiline file for multiple chunks --- tests/test_multipart.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 993a7832bf..d6283b9c72 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -44,8 +44,9 @@ def test_multipart(value, output): async def test_async_multipart_streaming(tmp_path, server, anyio_backend): + content = b"\n".join([b"a" * io.DEFAULT_BUFFER_SIZE] * 3) to_upload = tmp_path / "test.txt" - to_upload.write_bytes(b"") + to_upload.write_bytes(content) empty_file = tmp_path / "empty.txt" empty_file.write_bytes(b"") opener: typing.Any @@ -74,7 +75,8 @@ async def test_async_multipart_streaming(tmp_path, server, anyio_backend): b' filename="test.txt"\r\n', b"Content-Type: text/plain\r\n", b"\r\n", - b"\r\n", + content, + b"\r\n", b"--" + boundary_bytes + b"--\r\n", ] ) From a6ea8b4589f8ccdc36d9da5f5a03bd679f10c84d Mon Sep 17 00:00:00 2001 From: Vitali Tsimoshka Date: Sun, 1 Dec 2024 16:15:51 +0100 Subject: [PATCH 7/7] multipart: aclosing support --- httpx/_compat.py | 22 ++++++++++++++++++++++ httpx/_multipart.py | 22 +++++++++++++--------- pyproject.toml | 2 +- 3 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 httpx/_compat.py diff --git a/httpx/_compat.py b/httpx/_compat.py new file mode 100644 index 0000000000..d310afbd7d --- /dev/null +++ b/httpx/_compat.py @@ -0,0 +1,22 @@ +import sys + +if sys.version_info >= (3, 10): + from contextlib import aclosing +else: + from contextlib import asynccontextmanager + from typing import Any, AsyncIterator, Awaitable, Protocol, TypeVar + + class _SupportsAclose(Protocol): + def aclose(self) -> Awaitable[object]: ... + + _SupportsAcloseT = TypeVar("_SupportsAcloseT", bound=_SupportsAclose) + + @asynccontextmanager + async def aclosing(thing: _SupportsAcloseT) -> AsyncIterator[Any]: + try: + yield thing + finally: + await thing.aclose() + + +__all__ = ["aclosing"] diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 412f4ca156..3da9e9c409 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -8,6 +8,7 @@ from collections.abc import AsyncIterable from pathlib import Path +from ._compat import aclosing from ._types import ( AsyncByteStream, FileContent, @@ -219,7 +220,7 @@ def render_data(self) -> typing.Iterator[bytes]: yield to_bytes(chunk) chunk = self.file.read(self.CHUNK_SIZE) - async def arender_data(self) -> typing.AsyncIterator[bytes]: + async def arender_data(self) -> typing.AsyncGenerator[bytes]: if not isinstance(self.file, AsyncIterable): for chunk in self.render_data(): yield chunk @@ -247,10 +248,11 @@ def render(self) -> typing.Iterator[bytes]: yield self.render_headers() yield from self.render_data() - async def arender(self) -> typing.AsyncIterator[bytes]: + async def arender(self) -> typing.AsyncGenerator[bytes]: yield self.render_headers() - async for chunk in self.arender_data(): - yield chunk + async with aclosing(self.arender_data()) as data: + async for chunk in data: + yield chunk class MultipartStream(SyncByteStream, AsyncByteStream): @@ -294,12 +296,13 @@ def iter_chunks(self) -> typing.Iterator[bytes]: yield b"\r\n" yield b"--%s--\r\n" % self.boundary - async def aiter_chunks(self) -> typing.AsyncIterator[bytes]: + async def aiter_chunks(self) -> typing.AsyncGenerator[bytes]: for field in self.fields: yield b"--%s\r\n" % self.boundary if isinstance(field, FileField): - async for chunk in field.arender(): - yield chunk + async with aclosing(field.arender()) as data: + async for chunk in data: + yield chunk else: for chunk in field.render(): yield chunk @@ -340,5 +343,6 @@ def __iter__(self) -> typing.Iterator[bytes]: yield chunk async def __aiter__(self) -> typing.AsyncIterator[bytes]: - async for chunk in self.aiter_chunks(): - yield chunk + async with aclosing(self.aiter_chunks()) as data: + async for chunk in data: + yield chunk diff --git a/pyproject.toml b/pyproject.toml index 9e67191135..2b83de5f12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,5 +128,5 @@ markers = [ ] [tool.coverage.run] -omit = ["venv/*"] +omit = ["venv/*", "httpx/_compat.py"] include = ["httpx/*", "tests/*"]