From 8d270f90b95ceea455340be733cae9890462fd0d Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 09:54:09 +0000 Subject: [PATCH 01/22] fix: guard JSON parsing, avoid header mutation, handle timeouts --- authcaptureproxy/auth_capture_proxy.py | 28 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 7fca362..05c0ccf 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -3,6 +3,7 @@ import asyncio import logging import re +from json import JSONDecodeError from functools import partial from ssl import SSLContext, create_default_context from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union @@ -382,8 +383,15 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - else: data = convert_multidict_to_dict(await request.post()) json_data = None - if request.has_body: - json_data = await request.json() + # Only attempt JSON decoding for JSON requests; avoid raising for form posts. + if request.has_body and ( + request.content_type == "application/json" + or request.content_type.endswith("+json") + ): + try: + json_data = await request.json() + except (JSONDecodeError, ValueError): + json_data = None if data: self.data.update(data) _LOGGER.debug("Storing data %s", data) @@ -426,33 +434,35 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - if skip_auto_headers: _LOGGER.debug("Discovered skip_auto_headers %s", skip_auto_headers) headers.pop(SKIP_AUTO_HEADERS) + # Avoid accidental header mutation across branches/calls + req_headers: dict[str, Any] = dict(headers) _LOGGER.debug( "Attempting %s to %s\nheaders: %s \ncookies: %s", method, site, - headers, + req_headers, self.session.cookies.jar, ) try: if mpwriter: resp = await getattr(self.session, method)( - site, data=mpwriter, headers=headers, follow_redirects=True + site, data=mpwriter, headers=req_headers, follow_redirects=True ) elif data: resp = await getattr(self.session, method)( - site, data=data, headers=headers, follow_redirects=True + site, data=data, headers=req_headers, follow_redirects=True ) elif json_data: for item in ["Host", "Origin", "User-Agent", "dnt", "Accept-Encoding"]: # remove proxy headers - if headers.get(item): - headers.pop(item) + if req_headers.get(item): + req_headers.pop(item) resp = await getattr(self.session, method)( - site, json=json_data, headers=headers, follow_redirects=True + site, json=json_data, headers=req_headers, follow_redirects=True ) else: resp = await getattr(self.session, method)( - site, headers=headers, follow_redirects=True + site, headers=req_headers, follow_redirects=True ) except ClientConnectionError as ex: return await self._build_response( From 1a2fd760f43f61b52a02d2ea21e4a49924f61fb5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 02:55:32 -0800 Subject: [PATCH 02/22] Improve error handling in auth_capture_proxy.py Refactor error handling for connection issues in auth_capture_proxy.py to provide more specific error messages. --- authcaptureproxy/auth_capture_proxy.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 05c0ccf..74b51fc 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -464,14 +464,18 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - resp = await getattr(self.session, method)( site, headers=req_headers, follow_redirects=True ) - except ClientConnectionError as ex: + except ClientConnectionError as ex: + return await self._build_response( + text=f"Error connecting to {site}; please retry: {ex}" + ) + except httpx.TimeoutException as ex: return await self._build_response( - text=f"Error connecting to {site}; please retry: {ex}" - ) - except TooManyRedirects as ex: - return await self._build_response( - text=f"Error connecting to {site}; too may redirects: {ex}" + text=f"Error connecting to {site}; request timed out: {ex}" ) + except TooManyRedirects as ex: + return await self._build_response( + text=f"Error connecting to {site}; too may redirects: {ex}" + ) if resp is None: return await self._build_response(text=f"Error connecting to {site}; please retry") self.last_resp = resp From 95277d580e5bc80c9418791b6da70fbfb99cb3e1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 03:08:34 -0800 Subject: [PATCH 03/22] Update auth_capture_proxy.py with new implementation. --- authcaptureproxy/auth_capture_proxy.py | 46 +++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 74b51fc..ce3eb86 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -10,10 +10,8 @@ import httpx from aiohttp import ( - ClientConnectionError, MultipartReader, MultipartWriter, - TooManyRedirects, hdrs, web, ) @@ -48,8 +46,7 @@ class AuthCaptureProxy: This class relies on tests to be provided to indicate the proxy has completed. At proxy completion all data can be found in self.session, self.data, and self.query. """ - def __init__( - self, + def __init__(self, proxy_url: URL, host_url: URL, session: Optional[httpx.AsyncClient] = None, @@ -59,7 +56,7 @@ def __init__( """Initialize proxy object. Args: - proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/. If there is any path, the path is considered part of the base url. If no explicit port is specified, a random port will be generated. If https is passed in, ssl_context must be provided at start_proxy() or the url will be downgraded to http. + proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/. If there is any path, the path is considered part of the base url. If no explicit port is specified, a random port w[...] host_url (URL): original url for login, e.g., http://amazon.com session (httpx.AsyncClient): httpx client to make queries. Optional session_factory (lambda: httpx.AsyncClient): factory to create the aforementioned httpx client if having one fixed session is insufficient. @@ -120,7 +117,7 @@ def port(self) -> int: def tests(self) -> Dict[Text, Callable]: """Return tests setting. - :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary of post variables, and a dictioary of query variables and returns a URL or string. See :mod:`authcaptureproxy.examples.testers` for examples. + :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary o[...] """ return self._tests @@ -139,7 +136,7 @@ def tests(self, value: Dict[Text, Callable]) -> None: def modifiers(self) -> Dict[Text, Union[Callable, Dict[Text, Callable]]]: """Return modifiers setting. - :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where the key should be the name of the modifier and the value should be a function or couroutine that takes a string and returns a modified string. If parameters are necessary, functools.partial should be used. See :mod:`authcaptureproxy.examples.modifiers` for examples. + :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where[...] """ return self._modifiers @@ -277,7 +274,7 @@ async def _build_response( async def all_handler(self, request: web.Request, **kwargs) -> web.Response: """Handle all requests. - This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_handler_active. + This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_ha[...] Args request (web.Request): The request to process @@ -317,7 +314,7 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - break if isinstance(part, MultipartReader): await _process_multipart(part, writer) - elif part.headers.get("hdrs.CONTENT_TYPE"): + elif hdrs.CONTENT_TYPE in part.headers: if part.headers[hdrs.CONTENT_TYPE] == "application/json": part_data: Optional[ Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes] @@ -399,7 +396,7 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - self.data.update(json_data) _LOGGER.debug("Storing json %s", json_data) if URL(str(request.url)).path == re.sub( - r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/stop").path + r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/stop").path ): self.all_handler_active = False if self.active: @@ -407,7 +404,7 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - return await self._build_response(text="Proxy stopped.") elif ( URL(str(request.url)).path - == re.sub(r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path) + == re.sub(r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path) and self.last_resp and isinstance(self.last_resp, httpx.Response) ): @@ -418,7 +415,7 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - if URL(str(request.url)).path in [ self._proxy_url.path, re.sub( - r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path + r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path ), ]: # either base path or resume without anything to resume @@ -464,18 +461,22 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - resp = await getattr(self.session, method)( site, headers=req_headers, follow_redirects=True ) - except ClientConnectionError as ex: - return await self._build_response( - text=f"Error connecting to {site}; please retry: {ex}" - ) + except httpx.ConnectError as ex: + return await self._build_response( + text=f"Error connecting to {site}; please retry: {ex}" + ) except httpx.TimeoutException as ex: return await self._build_response( text=f"Error connecting to {site}; request timed out: {ex}" ) - except TooManyRedirects as ex: - return await self._build_response( - text=f"Error connecting to {site}; too may redirects: {ex}" - ) + except httpx.TooManyRedirects as ex: + return await self._build_response( + text=f"Error connecting to {site}; too many redirects: {ex}" + ) + except httpx.HTTPError as ex: + return await self._build_response( + text=f"Error connecting to {site}: {ex}" + ) if resp is None: return await self._build_response(text=f"Error connecting to {site}; please retry") self.last_resp = resp @@ -613,8 +614,7 @@ def _swap_proxy_and_host(self, text: Text, domain_only: bool = False) -> Text: """ host_string: Text = str(self._host_url.with_path("/")) proxy_string: Text = str( - self.access_url() if not domain_only else self.access_url().with_path("/") - ) + self.access_url() if not domain_only else self.access_url().with_path("/")) if str(self.access_url().with_path("/")).replace("https", "http") in text: _LOGGER.debug( "Replacing %s with %s", @@ -744,4 +744,4 @@ def check_redirects(self) -> None: item.url.host, resp.url.host, ) - self._host_url = self._host_url.with_host(resp.url.host) + self._host_url = self._host_url.with_host(resp.url.host) \ No newline at end of file From b66fd37f6b6a87d28d9b8aa3df2c005118b1b028 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 03:33:31 -0800 Subject: [PATCH 04/22] Apply proposed fixes - Fix regex pattern for path normalization: r"/ +" should be r"/+". - Add an instance variable to hold background tasks. --- authcaptureproxy/auth_capture_proxy.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index ce3eb86..ccac6b3 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -92,6 +92,7 @@ def __init__(self, self.redirect_filters: Dict[Text, List[Text]] = { "url": [] } # dictionary of lists of regex strings to filter against + self._background_tasks: set[asyncio.Task] = set() @property def active(self) -> bool: @@ -396,15 +397,17 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - self.data.update(json_data) _LOGGER.debug("Storing json %s", json_data) if URL(str(request.url)).path == re.sub( - r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/stop").path + r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/stop").path ): self.all_handler_active = False if self.active: - asyncio.create_task(self.stop_proxy(3)) + task = asyncio.create_task(self.stop_proxy(3)) + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) return await self._build_response(text="Proxy stopped.") elif ( URL(str(request.url)).path - == re.sub(r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path) + == re.sub(r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path) and self.last_resp and isinstance(self.last_resp, httpx.Response) ): @@ -415,7 +418,7 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - if URL(str(request.url)).path in [ self._proxy_url.path, re.sub( - r"/ +", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path + r"/+", "/", self._proxy_url.with_path(f"{self._proxy_url.path}/resume").path ), ]: # either base path or resume without anything to resume @@ -744,4 +747,4 @@ def check_redirects(self) -> None: item.url.host, resp.url.host, ) - self._host_url = self._host_url.with_host(resp.url.host) \ No newline at end of file + self._host_url = self._host_url.with_host(resp.url.host) From 4f1d27fb696286fbdeca7b07bc7b493265ca4f7d Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 04:40:48 -0800 Subject: [PATCH 05/22] Use a more forgiving timeout for proxying browser-driven auth traffic --- authcaptureproxy/auth_capture_proxy.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index ccac6b3..8b486a5 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -3,6 +3,7 @@ import asyncio import logging import re +from datetime import timedelta from json import JSONDecodeError from functools import partial from ssl import SSLContext, create_default_context @@ -37,6 +38,10 @@ # Pre-configure SSL context ssl_context = create_default_context() +# Amazon login / verify flows can be slow; httpx defaults are often too short (~5s). +# Use a more forgiving timeout for proxying browser-driven auth traffic. +DEFAULT_HTTPX_TIMEOUT = httpx.Timeout(60.0) + _LOGGER = logging.getLogger(__name__) @@ -64,8 +69,10 @@ def __init__(self, """ self._preserve_headers = preserve_headers self.session_factory: Callable[[], httpx.AsyncClient] = session_factory or ( - lambda: httpx.AsyncClient(verify=ssl_context) - ) + lambda: httpx.AsyncClient( + verify=ssl_context, + timeout=DEFAULT_HTTPX_TIMEOUT, + ) self.session: httpx.AsyncClient = session if session else self.session_factory() self._proxy_url: URL = proxy_url self._host_url: URL = host_url From 4711a883c847958aebc81733faf2944f6ad01e82 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 04:47:37 -0800 Subject: [PATCH 06/22] Remove unused import: from datetime import timedelta --- authcaptureproxy/auth_capture_proxy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 8b486a5..2527249 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -3,7 +3,6 @@ import asyncio import logging import re -from datetime import timedelta from json import JSONDecodeError from functools import partial from ssl import SSLContext, create_default_context From 905c4f0c9cddac0324ed5f6c8cbb8f8c2208d161 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 29 Jan 2026 04:56:40 -0800 Subject: [PATCH 07/22] Syntax error --- authcaptureproxy/auth_capture_proxy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 2527249..7d5e471 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -72,6 +72,7 @@ def __init__(self, verify=ssl_context, timeout=DEFAULT_HTTPX_TIMEOUT, ) + ) self.session: httpx.AsyncClient = session if session else self.session_factory() self._proxy_url: URL = proxy_url self._host_url: URL = host_url From 20ae1ba00ec6b083c10b3648a7967c674842dd85 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 00:19:53 -0800 Subject: [PATCH 08/22] Remove unused DEFAULT_HTTPX_TIMEOUT --- authcaptureproxy/auth_capture_proxy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 0ed2ed5..635c0bb 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -37,10 +37,6 @@ # Pre-configure SSL context ssl_context = create_default_context() -# Amazon login / verify flows can be slow; httpx defaults are often too short (~5s). -# Use a more forgiving timeout for proxying browser-driven auth traffic. -DEFAULT_HTTPX_TIMEOUT = httpx.Timeout(60.0) - _LOGGER = logging.getLogger(__name__) From 253c5dc0ef477c9a1ea477783cdcc60e0469a1ea Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 01:57:59 -0800 Subject: [PATCH 09/22] Update auth_capture_proxy.py (3 changes) - 59: Fixed "Args: proxy_url (URL):" truncation generated by AI. - 326-336: Normalize multipart part content-type before comparison. - 475-504: Removed duplicate `TimeoutException` handler (line 491 is unreachable). --- authcaptureproxy/auth_capture_proxy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 635c0bb..e3f728b 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -56,7 +56,10 @@ def __init__(self, """Initialize proxy object. Args: - proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/. If there is any path, the path is considered part of the base url. If no explicit port is specified, a random port w[...] + proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/. + If there is any path, the path is considered part of the base url. + If no explicit port is specified, a random port will be generated. + If https is passed in, ssl_context must be provided at start_proxy() or the url will be downgraded to http. host_url (URL): original url for login, e.g., http://amazon.com session (httpx.AsyncClient): httpx client to make queries. Optional session_factory (lambda: httpx.AsyncClient): factory to create the aforementioned httpx client if having one fixed session is insufficient. @@ -324,7 +327,8 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - if isinstance(part, MultipartReader): await _process_multipart(part, writer) elif hdrs.CONTENT_TYPE in part.headers: - if part.headers[hdrs.CONTENT_TYPE] == "application/json": + content_type = part.headers.get(hdrs.CONTENT_TYPE, "") + if content_type.split(";", 1)[0].strip() == "application/json": part_data: Optional[ Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes] ] = await part.json() @@ -476,10 +480,6 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - return await self._build_response( text=f"Error connecting to {site}; please retry: {ex}" ) - except httpx.TimeoutException as ex: - return await self._build_response( - text=f"Error connecting to {site}; request timed out: {ex}" - ) except httpx.TooManyRedirects as ex: return await self._build_response( text=f"Error connecting to {site}; too many redirects: {ex}" From 512f15af0baa46ba7f56707506a2e6262a203e61 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 02:11:37 -0800 Subject: [PATCH 10/22] Implemented nitpick suggested fix for consistency 329-344: Inconsistent content-type normalization --- authcaptureproxy/auth_capture_proxy.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index e3f728b..8147cfa 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -328,15 +328,16 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - await _process_multipart(part, writer) elif hdrs.CONTENT_TYPE in part.headers: content_type = part.headers.get(hdrs.CONTENT_TYPE, "") - if content_type.split(";", 1)[0].strip() == "application/json": + mime_type = content_type.split(";", 1)[0].strip() + if mime_type == "application/json": part_data: Optional[ Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes] ] = await part.json() writer.append_json(part_data) - elif part.headers[hdrs.CONTENT_TYPE].startswith("text"): + elif mime_type.startswith("text"): part_data = await part.text() writer.append(part_data) - elif part.headers[hdrs.CONTENT_TYPE] == "application/www-urlform-encode": + elif mime_type == "application/x-www-form-urlencoded": part_data = await part.form() writer.append_form(part_data) else: From 76f5f3bb68863ee35425de58ff26a00b46a55504 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 02:19:15 -0800 Subject: [PATCH 11/22] Fixed three more `[...]` truncations --- authcaptureproxy/auth_capture_proxy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 8147cfa..223c7ee 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -129,7 +129,7 @@ def port(self) -> int: def tests(self) -> Dict[Text, Callable]: """Return tests setting. - :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary o[...] + :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary oof post variables, and a dictioary of query variables and returns a URL or string. See :mod:`authcaptureproxy.examples.testers` for examples. """ return self._tests @@ -148,7 +148,7 @@ def tests(self, value: Dict[Text, Callable]) -> None: def modifiers(self) -> Dict[Text, Union[Callable, Dict[Text, Callable]]]: """Return modifiers setting. - :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where[...] + :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where the key should be the name of the modifier and the value should be a function or couroutine that takes a string and returns a modified string. If parameters are necessary, functools.partial should be used. See :mod:`authcaptureproxy.examples.modifiers` for examples. """ return self._modifiers @@ -286,7 +286,7 @@ async def _build_response( async def all_handler(self, request: web.Request, **kwargs) -> web.Response: """Handle all requests. - This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_ha[...] + This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_haandler_active. Args request (web.Request): The request to process From 6a0302db28b69cd82109d9ebabe2d11c51ee02d6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 02:22:59 -0800 Subject: [PATCH 12/22] Fix: Unreachable exception handler: reorder to catch TimeoutException before HTTPError. --- authcaptureproxy/auth_capture_proxy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 223c7ee..04e8e2d 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -485,10 +485,6 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - return await self._build_response( text=f"Error connecting to {site}; too many redirects: {ex}" ) - except httpx.HTTPError as ex: - return await self._build_response( - text=f"Error connecting to {site}: {ex}" - ) except httpx.TimeoutException as ex: _LOGGER.warning( "Timeout during proxy request to %s: %s", @@ -503,6 +499,10 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - "and that the service endpoint is reachable from this host." ) ) + except httpx.HTTPError as ex: + return await self._build_response( + text=f"Error connecting to {site}: {ex}" + ) if resp is None: return await self._build_response(text=f"Error connecting to {site}; please retry") self.last_resp = resp From bf0e54117df862073301a08f66b6d6c61bf69909 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 02:32:16 -0800 Subject: [PATCH 13/22] Fix docstring typos. --- authcaptureproxy/auth_capture_proxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index 04e8e2d..d056fb0 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -129,7 +129,7 @@ def port(self) -> int: def tests(self) -> Dict[Text, Callable]: """Return tests setting. - :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary oof post variables, and a dictioary of query variables and returns a URL or string. See :mod:`authcaptureproxy.examples.testers` for examples. + :setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary of post variables, and a dictioary of query variables and returns a URL or string. See :mod:`authcaptureproxy.examples.testers` for examples. """ return self._tests @@ -286,7 +286,7 @@ async def _build_response( async def all_handler(self, request: web.Request, **kwargs) -> web.Response: """Handle all requests. - This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_haandler_active. + This handler will exit on successful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_haandler_active. Args request (web.Request): The request to process From 415c0e795763d71cbba0ba32ec40e43a153799bd Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 03:31:34 -0800 Subject: [PATCH 14/22] fix: Incomplete exception handling in JSON parsing fallback. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses “text first, bytes last” fallback for best match to the stated requirement (and better debugging/user experience). --- authcaptureproxy/auth_capture_proxy.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index d056fb0..e5a376a 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -148,7 +148,7 @@ def tests(self, value: Dict[Text, Callable]) -> None: def modifiers(self) -> Dict[Text, Union[Callable, Dict[Text, Callable]]]: """Return modifiers setting. - :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where the key should be the name of the modifier and the value should be a function or couroutine that takes a string and returns a modified string. If parameters are necessary, functools.partial should be used. See :mod:`authcaptureproxy.examples.modifiers` for examples. + :setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key should be a MIME type and the value should be a dictionary of modifiers for that MIME type where the key should be the name of the modifier and the value should be a function or coroutine that takes a string and returns a modified string. If parameters are necessary, functools.partial should be used. See :mod:`authcaptureproxy.examples.modifiers` for examples. """ return self._modifiers @@ -330,10 +330,19 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) - content_type = part.headers.get(hdrs.CONTENT_TYPE, "") mime_type = content_type.split(";", 1)[0].strip() if mime_type == "application/json": - part_data: Optional[ - Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes] - ] = await part.json() - writer.append_json(part_data) + try: + part_data: Optional[ + Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes] + ] = await part.json() + writer.append_json(part_data) + except Exception: + # Best-effort fallback: text, then bytes + try: + part_text = await part.text() + writer.append(part_text) + except Exception: + part_data = await part.read() + writer.append(part_data) elif mime_type.startswith("text"): part_data = await part.text() writer.append(part_data) From 658af909d9cd98e00722fb6762a9af928e5e7018 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 03:45:03 -0800 Subject: [PATCH 15/22] Changed generic set[asyncio.Task] to Set[asyncio.Task] from typing Line 106 used builtin generic set[asyncio.Task], but the file uses Dict, List, and other generics from typing elsewhere (lines 86-103). --- authcaptureproxy/auth_capture_proxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/authcaptureproxy/auth_capture_proxy.py b/authcaptureproxy/auth_capture_proxy.py index e5a376a..8ae6836 100644 --- a/authcaptureproxy/auth_capture_proxy.py +++ b/authcaptureproxy/auth_capture_proxy.py @@ -6,7 +6,7 @@ from json import JSONDecodeError from functools import partial from ssl import SSLContext, create_default_context -from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Set, Text, Tuple, Union import httpx from aiohttp import ( @@ -103,7 +103,7 @@ def __init__(self, self.redirect_filters: Dict[Text, List[Text]] = { "url": [] } # dictionary of lists of regex strings to filter against - self._background_tasks: set[asyncio.Task] = set() + self._background_tasks: Set[asyncio.Task] = set() @property def active(self) -> bool: From 27bde5c0cd79581bd374df57e2e555c17a2f3aac Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 04:29:54 -0800 Subject: [PATCH 16/22] Create test_regression_headers_and_json_parsing.py --- ...est_regression_headers_and_json_parsing.py | 306 ++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 tests/test_regression_headers_and_json_parsing.py diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py new file mode 100644 index 0000000..3c59adc --- /dev/null +++ b/tests/test_regression_headers_and_json_parsing.py @@ -0,0 +1,306 @@ +import pytest +import httpx + +from aiohttp.test_utils import make_mocked_request +from multidict import CIMultiDict +from yarl import URL + + +class DummyAsyncClient: + """Capture outbound requests without real network I/O.""" + + def __init__(self) -> None: + self.calls = [] + # mimic httpx cookies jar access used in debug logging + self.cookies = type("Cookies", (), {"jar": {}})() + + async def aclose(self) -> None: + return + + async def post(self, url: str, **kwargs): + self.calls.append( + { + "method": "POST", + "url": url, + "headers": dict(kwargs.get("headers") or {}), + "json": kwargs.get("json"), + "data": kwargs.get("data"), + } + ) + req = httpx.Request("POST", url) + return httpx.Response( + 200, + request=req, + text="ok", + headers={"Content-Type": "text/plain"}, + ) + + +class ReusedHeadersProxyMixin: + """ + A proxy variant that intentionally returns THE SAME headers dict instance + from modify_headers on every request. + + This is the smallest, most direct way to prove the bug your fix addresses: + if all_handler mutates headers in-place for JSON requests, that mutation + persists into the next request and can cause subtle/invalid header sets. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._shared_headers = { + # include the ones JSON branch strips: + "Host": "example.com", + "Origin": "https://example.com", + "User-Agent": "ua", + "dnt": "1", + "Accept-Encoding": "gzip", + # include a header we must preserve: + "X-Custom": "keep", + } + + async def modify_headers(self, site: URL, request): # type: ignore[override] + # NOTE: return the exact same dict each time. + return self._shared_headers + + +@pytest.fixture +def proxy(monkeypatch): + """ + Regression note: + + These tests cover cross-request header contamination caused by in-place mutation + of the headers mapping inside AuthCaptureProxy.all_handler(). + + Specifically, the JSON request path removes proxy-related headers before sending + the upstream request: + + for item in ["Host", "Origin", "User-Agent", "dnt", "Accept-Encoding"]: + if req_headers.get(item): + req_headers.pop(item) + + Prior to the fix, this mutation could occur on a shared headers dict returned + from modify_headers(), leaking into subsequent requests. The fix copies the + headers mapping (req_headers = dict(headers)) before mutation. + + These tests fail on the pre-fix behavior and pass once the copy is introduced. + """ + + from authcaptureproxy.auth_capture_proxy import AuthCaptureProxy + + class Proxy(ReusedHeadersProxyMixin, AuthCaptureProxy): + pass + + p = Proxy( + proxy_url=URL("http://127.0.0.1:12345"), + host_url=URL("https://example.com"), + session=DummyAsyncClient(), + ) + + # Make the rest of all_handler deterministic and avoid exercising + # unrelated paths. + monkeypatch.setattr("authcaptureproxy.auth_capture_proxy.print_resp", lambda *_: None) + monkeypatch.setattr("authcaptureproxy.auth_capture_proxy.get_content_type", lambda *_: "text/plain") + monkeypatch.setattr(p, "check_redirects", lambda: None) + monkeypatch.setattr(p, "refresh_tests", lambda: None) + monkeypatch.setattr(p, "refresh_modifiers", lambda *_: None) + + # Disable tests/modifiers so handler returns pass-through response + p._tests = {} + p._modifiers = {} + + return p + + +def _make_request( + *, + method: str, + path: str, + content_type: str, + body: bytes = b"", + headers: dict | None = None, +): + hdrs = CIMultiDict(headers or {}) + # aiohttp stores content-type in headers; also expose request.content_type + hdrs["Content-Type"] = content_type + return make_mocked_request( + method, + path, + headers=hdrs, + payload=body, + ) + + +@pytest.mark.asyncio +async def test_cross_request_header_contamination_across_json_posts(proxy): + """ + Primary regression: JSON path strips proxy-ish headers before sending. + + Before fix (no req_headers copy): + - JSON branch pops keys from 'headers' in-place + - because modify_headers returned a shared dict, those keys disappear + for the next request, producing an inconsistent/invalid header set. + + After fix (req_headers = dict(headers)): + - shared headers remain intact across requests + - outbound request headers still have the stripped keys removed + """ + # JSON request #1 + req1 = _make_request( + method="POST", + path="/login", + content_type="application/json", + body=b'{"a": 1}', + ) + # make_mocked_request doesn't implement .json(); force the code path + # by providing request.json via attribute. + async def _json1(): + return {"a": 1} + req1.json = _json1 # type: ignore[attr-defined] + req1.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req1) + + # Shared headers must still contain the stripped keys AFTER the request. + shared = proxy._shared_headers # from mixin + assert "Host" in shared + assert "Origin" in shared + assert "User-Agent" in shared + assert "dnt" in shared + assert "Accept-Encoding" in shared + assert shared["X-Custom"] == "keep" + + # JSON request #2 + req2 = _make_request( + method="POST", + path="/login", + content_type="application/json", + body=b'{"b": 2}', + ) + async def _json2(): + return {"b": 2} + req2.json = _json2 # type: ignore[attr-defined] + req2.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req2) + + # Outbound headers for BOTH JSON requests must have those keys removed. + calls = proxy.session.calls # type: ignore[attr-defined] + assert len(calls) >= 2 + for call in calls[-2:]: + out = call["headers"] + assert "Host" not in out + assert "Origin" not in out + assert "User-Agent" not in out + assert "dnt" not in out + assert "Accept-Encoding" not in out + assert out.get("X-Custom") == "keep" + + +@pytest.mark.asyncio +async def test_cross_request_header_contamination_between_request_types(proxy): + """ + JSON request must not "poison" the next form request by mutating shared headers. + """ + # First JSON request triggers stripping (on a copy, after fix) + req_json = _make_request( + method="POST", + path="/login", + content_type="application/json", + body=b'{"a": 1}', + ) + async def _json(): + return {"a": 1} + req_json.json = _json # type: ignore[attr-defined] + req_json.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req_json) + + # Next form request should still have full shared headers available + req_form = _make_request( + method="POST", + path="/login", + content_type="application/x-www-form-urlencoded", + body=b"field=value", + headers={"Content-Length": "11"}, + ) + async def _post(): + return {"field": "value"} + req_form.post = _post # type: ignore[attr-defined] + req_form.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req_form) + + # For form posts, the JSON stripping logic does not run. + form_out = proxy.session.calls[-1]["headers"] # type: ignore[attr-defined] + assert form_out.get("User-Agent") == "ua" + assert form_out.get("X-Custom") == "keep" + + +@pytest.mark.asyncio +async def test_json_parsing_guards_on_non_json_content(proxy): + """ + Regression: JSON parsing must NOT be attempted for form posts. + + Before fix: if code unconditionally calls request.json() when has_body, + this will raise and break processing. + After fix: request.json() is only called for JSON content-types. + """ + req_form = _make_request( + method="POST", + path="/login", + content_type="application/x-www-form-urlencoded", + body=b"field=value", + ) + # If handler calls json() on a form post, explode + async def _json_raises(): + raise RuntimeError("json() should not be called for form posts") + req_form.json = _json_raises # type: ignore[attr-defined] + async def _post(): + return {"field": "value"} + req_form.post = _post # type: ignore[attr-defined] + req_form.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req_form) + + +@pytest.mark.asyncio +async def test_json_parsing_only_for_json_content_types(proxy): + req_json = _make_request( + method="POST", + path="/login", + content_type="application/json", + body=b'{"ok": true}', + ) + called = {"count": 0} + + async def _json(): + called["count"] += 1 + return {"ok": True} + + req_json.json = _json # type: ignore[attr-defined] + req_json.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req_json) + assert called["count"] == 1 + + +@pytest.mark.asyncio +async def test_json_parsing_for_json_plus_suffix_content_types(proxy): + req_json = _make_request( + method="POST", + path="/login", + content_type="application/vnd.api+json", + body=b'{"v": 1}', + ) + called = {"count": 0} + + async def _json(): + called["count"] += 1 + return {"v": 1} + + req_json.json = _json # type: ignore[attr-defined] + req_json.has_body = True # type: ignore[attr-defined] + + await proxy.all_handler(req_json) + assert called["count"] == 1 From 8ded996afc6b390a799ba7c0ac7cd53216c20744 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 04:44:10 -0800 Subject: [PATCH 17/22] Fixed syntax/indentation problem --- ...est_regression_headers_and_json_parsing.py | 178 +++++++----------- 1 file changed, 69 insertions(+), 109 deletions(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index 3c59adc..063d0d6 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -11,7 +11,7 @@ class DummyAsyncClient: def __init__(self) -> None: self.calls = [] - # mimic httpx cookies jar access used in debug logging + # match attribute access used in logging self.cookies = type("Cookies", (), {"jar": {}})() async def aclose(self) -> None: @@ -29,67 +29,56 @@ async def post(self, url: str, **kwargs): ) req = httpx.Request("POST", url) return httpx.Response( - 200, - request=req, - text="ok", - headers={"Content-Type": "text/plain"}, + 200, request=req, text="ok", headers={"Content-Type": "text/plain"} ) -class ReusedHeadersProxyMixin: - """ - A proxy variant that intentionally returns THE SAME headers dict instance - from modify_headers on every request. +def _make_request( + *, method: str, path: str, content_type: str, headers=None, body: bytes = b"" +): + hdrs = CIMultiDict(headers or {}) + hdrs["Content-Type"] = content_type + return make_mocked_request(method, path, headers=hdrs, payload=body) - This is the smallest, most direct way to prove the bug your fix addresses: - if all_handler mutates headers in-place for JSON requests, that mutation - persists into the next request and can cause subtle/invalid header sets. + +@pytest.fixture +def proxy(monkeypatch): """ + Regression note: - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._shared_headers = { - # include the ones JSON branch strips: - "Host": "example.com", - "Origin": "https://example.com", - "User-Agent": "ua", - "dnt": "1", - "Accept-Encoding": "gzip", - # include a header we must preserve: - "X-Custom": "keep", - } + These tests cover cross-request header contamination caused by in-place mutation + of the headers mapping inside AuthCaptureProxy.all_handler(). - async def modify_headers(self, site: URL, request): # type: ignore[override] - # NOTE: return the exact same dict each time. - return self._shared_headers + Specifically, the JSON request path removes proxy-related headers before sending + the upstream request: + for item in ["Host", "Origin", "User-Agent", "dnt", "Accept-Encoding"]: + if req_headers.get(item): + req_headers.pop(item) -@pytest.fixture -def proxy(monkeypatch): - """ - Regression note: - - These tests cover cross-request header contamination caused by in-place mutation - of the headers mapping inside AuthCaptureProxy.all_handler(). - - Specifically, the JSON request path removes proxy-related headers before sending - the upstream request: - - for item in ["Host", "Origin", "User-Agent", "dnt", "Accept-Encoding"]: - if req_headers.get(item): - req_headers.pop(item) - - Prior to the fix, this mutation could occur on a shared headers dict returned - from modify_headers(), leaking into subsequent requests. The fix copies the - headers mapping (req_headers = dict(headers)) before mutation. - - These tests fail on the pre-fix behavior and pass once the copy is introduced. - """ - - from authcaptureproxy.auth_capture_proxy import AuthCaptureProxy - - class Proxy(ReusedHeadersProxyMixin, AuthCaptureProxy): - pass + Prior to the fix, this mutation could occur on a shared headers dict returned + from modify_headers(), leaking into subsequent requests. The fix copies the + headers mapping (req_headers = dict(headers)) before mutation. + + These tests fail on the pre-fix behavior and pass once the copy is introduced. + """ + from authcaptureproxy.auth_capture_proxy import AuthCaptureProxy + + class Proxy(AuthCaptureProxy): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.shared_headers = { + "Host": "example.com", + "Origin": "https://example.com", + "User-Agent": "ua", + "dnt": "1", + "Accept-Encoding": "gzip", + "X-Custom": "keep", + } + + async def modify_headers(self, site: URL, request): # type: ignore[override] + # Return the same dict instance every time to expose in-place mutation leaks. + return self.shared_headers p = Proxy( proxy_url=URL("http://127.0.0.1:12345"), @@ -97,53 +86,23 @@ class Proxy(ReusedHeadersProxyMixin, AuthCaptureProxy): session=DummyAsyncClient(), ) - # Make the rest of all_handler deterministic and avoid exercising - # unrelated paths. + # Keep output quiet and avoid side-effects not relevant to regression monkeypatch.setattr("authcaptureproxy.auth_capture_proxy.print_resp", lambda *_: None) - monkeypatch.setattr("authcaptureproxy.auth_capture_proxy.get_content_type", lambda *_: "text/plain") - monkeypatch.setattr(p, "check_redirects", lambda: None) - monkeypatch.setattr(p, "refresh_tests", lambda: None) - monkeypatch.setattr(p, "refresh_modifiers", lambda *_: None) - # Disable tests/modifiers so handler returns pass-through response + # Keep behavior focused (tests/modifiers are unrelated to the regression) p._tests = {} p._modifiers = {} return p -def _make_request( - *, - method: str, - path: str, - content_type: str, - body: bytes = b"", - headers: dict | None = None, -): - hdrs = CIMultiDict(headers or {}) - # aiohttp stores content-type in headers; also expose request.content_type - hdrs["Content-Type"] = content_type - return make_mocked_request( - method, - path, - headers=hdrs, - payload=body, - ) - - @pytest.mark.asyncio async def test_cross_request_header_contamination_across_json_posts(proxy): """ - Primary regression: JSON path strips proxy-ish headers before sending. - - Before fix (no req_headers copy): - - JSON branch pops keys from 'headers' in-place - - because modify_headers returned a shared dict, those keys disappear - for the next request, producing an inconsistent/invalid header set. + Fails before fix: JSON branch pops headers from shared_headers in-place; + next request starts from an already-mutated header set. - After fix (req_headers = dict(headers)): - - shared headers remain intact across requests - - outbound request headers still have the stripped keys removed + Passes after fix: JSON branch only mutates a copy; shared_headers remains unchanged. """ # JSON request #1 req1 = _make_request( @@ -152,17 +111,17 @@ async def test_cross_request_header_contamination_across_json_posts(proxy): content_type="application/json", body=b'{"a": 1}', ) - # make_mocked_request doesn't implement .json(); force the code path - # by providing request.json via attribute. + async def _json1(): return {"a": 1} + req1.json = _json1 # type: ignore[attr-defined] req1.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req1) - # Shared headers must still contain the stripped keys AFTER the request. - shared = proxy._shared_headers # from mixin + # Shared dict must remain intact after request #1 (core regression assertion) + shared = proxy.shared_headers assert "Host" in shared assert "Origin" in shared assert "User-Agent" in shared @@ -177,14 +136,16 @@ async def _json1(): content_type="application/json", body=b'{"b": 2}', ) + async def _json2(): return {"b": 2} + req2.json = _json2 # type: ignore[attr-defined] req2.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req2) - # Outbound headers for BOTH JSON requests must have those keys removed. + # Both outbound requests must have proxy headers stripped calls = proxy.session.calls # type: ignore[attr-defined] assert len(calls) >= 2 for call in calls[-2:]: @@ -200,38 +161,38 @@ async def _json2(): @pytest.mark.asyncio async def test_cross_request_header_contamination_between_request_types(proxy): """ - JSON request must not "poison" the next form request by mutating shared headers. + JSON request must not poison later non-JSON requests by mutating shared headers. """ - # First JSON request triggers stripping (on a copy, after fix) + # First JSON request req_json = _make_request( method="POST", path="/login", content_type="application/json", body=b'{"a": 1}', ) + async def _json(): return {"a": 1} + req_json.json = _json # type: ignore[attr-defined] req_json.has_body = True # type: ignore[attr-defined] - await proxy.all_handler(req_json) - # Next form request should still have full shared headers available + # Then a form post; provide post() to keep it on the form path. req_form = _make_request( method="POST", path="/login", content_type="application/x-www-form-urlencoded", body=b"field=value", - headers={"Content-Length": "11"}, ) + async def _post(): return {"field": "value"} + req_form.post = _post # type: ignore[attr-defined] req_form.has_body = True # type: ignore[attr-defined] - await proxy.all_handler(req_form) - # For form posts, the JSON stripping logic does not run. form_out = proxy.session.calls[-1]["headers"] # type: ignore[attr-defined] assert form_out.get("User-Agent") == "ua" assert form_out.get("X-Custom") == "keep" @@ -240,11 +201,8 @@ async def _post(): @pytest.mark.asyncio async def test_json_parsing_guards_on_non_json_content(proxy): """ - Regression: JSON parsing must NOT be attempted for form posts. - - Before fix: if code unconditionally calls request.json() when has_body, - this will raise and break processing. - After fix: request.json() is only called for JSON content-types. + Fails before fix: if handler calls request.json() on a form request, this raises. + Passes after fix: request.json() is only called for JSON content-types. """ req_form = _make_request( method="POST", @@ -252,12 +210,14 @@ async def test_json_parsing_guards_on_non_json_content(proxy): content_type="application/x-www-form-urlencoded", body=b"field=value", ) - # If handler calls json() on a form post, explode + async def _json_raises(): - raise RuntimeError("json() should not be called for form posts") - req_form.json = _json_raises # type: ignore[attr-defined] + raise RuntimeError("json() must not be called for form posts") + async def _post(): return {"field": "value"} + + req_form.json = _json_raises # type: ignore[attr-defined] req_form.post = _post # type: ignore[attr-defined] req_form.has_body = True # type: ignore[attr-defined] @@ -265,7 +225,7 @@ async def _post(): @pytest.mark.asyncio -async def test_json_parsing_only_for_json_content_types(proxy): +async def test_json_parsing_for_json_content_types(proxy): req_json = _make_request( method="POST", path="/login", From 9d944d1043058f56eda146b3805376d6b378a5da Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 04:51:53 -0800 Subject: [PATCH 18/22] Removed all has_body = True assignments (since aiohttp.web.Request.has_body is read-only). --- tests/test_regression_headers_and_json_parsing.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index 063d0d6..9256b0d 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -38,6 +38,9 @@ def _make_request( ): hdrs = CIMultiDict(headers or {}) hdrs["Content-Type"] = content_type + # make it explicit that there is a body when provided + if body: + hdrs.setdefault("Content-Length", str(len(body))) return make_mocked_request(method, path, headers=hdrs, payload=body) @@ -116,7 +119,6 @@ async def _json1(): return {"a": 1} req1.json = _json1 # type: ignore[attr-defined] - req1.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req1) @@ -141,7 +143,6 @@ async def _json2(): return {"b": 2} req2.json = _json2 # type: ignore[attr-defined] - req2.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req2) @@ -175,7 +176,6 @@ async def _json(): return {"a": 1} req_json.json = _json # type: ignore[attr-defined] - req_json.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req_json) # Then a form post; provide post() to keep it on the form path. @@ -190,7 +190,6 @@ async def _post(): return {"field": "value"} req_form.post = _post # type: ignore[attr-defined] - req_form.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req_form) form_out = proxy.session.calls[-1]["headers"] # type: ignore[attr-defined] @@ -219,7 +218,6 @@ async def _post(): req_form.json = _json_raises # type: ignore[attr-defined] req_form.post = _post # type: ignore[attr-defined] - req_form.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req_form) @@ -239,7 +237,6 @@ async def _json(): return {"ok": True} req_json.json = _json # type: ignore[attr-defined] - req_json.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req_json) assert called["count"] == 1 @@ -260,7 +257,6 @@ async def _json(): return {"v": 1} req_json.json = _json # type: ignore[attr-defined] - req_json.has_body = True # type: ignore[attr-defined] await proxy.all_handler(req_json) assert called["count"] == 1 From 25fde9bf73971d022c011aca8dde95a21675644c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 04:56:50 -0800 Subject: [PATCH 19/22] Fix: pass a real aiohttp StreamReader-like payload, not raw bytes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit make_mocked_request(..., payload=body_bytes) is giving aiohttp.web.Request a _payload that is literally a bytes object. Then request.has_body tries to call: ``` self._payload.at_eof() ``` …but bytes has no at_eof(), so you get: AttributeError: 'bytes' object has no attribute 'at_eof' ✅ Fix: pass a real aiohttp StreamReader-like payload, not raw bytes. The cleanest way in tests is to create an aiohttp.streams.StreamReader, feed it the bytes, and pass that as payload=. Because StreamReader needs a running loop, we’ll make _make_request async and call it with await. --- ...est_regression_headers_and_json_parsing.py | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index 9256b0d..4c25376 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -1,6 +1,9 @@ +import asyncio + import pytest import httpx +from aiohttp.streams import StreamReader from aiohttp.test_utils import make_mocked_request from multidict import CIMultiDict from yarl import URL @@ -33,15 +36,30 @@ async def post(self, url: str, **kwargs): ) -def _make_request( - *, method: str, path: str, content_type: str, headers=None, body: bytes = b"" +async def _make_request( + *, + method: str, + path: str, + content_type: str, + headers=None, + body: bytes = b"", ): + """ + Build a mocked aiohttp Request with a real StreamReader payload so + Request.has_body works (it calls request._payload.at_eof()). + """ hdrs = CIMultiDict(headers or {}) hdrs["Content-Type"] = content_type - # make it explicit that there is a body when provided + hdrs.setdefault("Content-Length", str(len(body))) + + # StreamReader requires a running loop + loop = asyncio.get_running_loop() + payload = StreamReader(protocol=None, loop=loop) # type: ignore[arg-type] if body: - hdrs.setdefault("Content-Length", str(len(body))) - return make_mocked_request(method, path, headers=hdrs, payload=body) + payload.feed_data(body) + payload.feed_eof() + + return make_mocked_request(method, path, headers=hdrs, payload=payload) @pytest.fixture @@ -101,14 +119,8 @@ async def modify_headers(self, site: URL, request): # type: ignore[override] @pytest.mark.asyncio async def test_cross_request_header_contamination_across_json_posts(proxy): - """ - Fails before fix: JSON branch pops headers from shared_headers in-place; - next request starts from an already-mutated header set. - - Passes after fix: JSON branch only mutates a copy; shared_headers remains unchanged. - """ # JSON request #1 - req1 = _make_request( + req1 = await _make_request( method="POST", path="/login", content_type="application/json", @@ -119,7 +131,6 @@ async def _json1(): return {"a": 1} req1.json = _json1 # type: ignore[attr-defined] - await proxy.all_handler(req1) # Shared dict must remain intact after request #1 (core regression assertion) @@ -132,7 +143,7 @@ async def _json1(): assert shared["X-Custom"] == "keep" # JSON request #2 - req2 = _make_request( + req2 = await _make_request( method="POST", path="/login", content_type="application/json", @@ -143,7 +154,6 @@ async def _json2(): return {"b": 2} req2.json = _json2 # type: ignore[attr-defined] - await proxy.all_handler(req2) # Both outbound requests must have proxy headers stripped @@ -161,11 +171,8 @@ async def _json2(): @pytest.mark.asyncio async def test_cross_request_header_contamination_between_request_types(proxy): - """ - JSON request must not poison later non-JSON requests by mutating shared headers. - """ # First JSON request - req_json = _make_request( + req_json = await _make_request( method="POST", path="/login", content_type="application/json", @@ -179,7 +186,7 @@ async def _json(): await proxy.all_handler(req_json) # Then a form post; provide post() to keep it on the form path. - req_form = _make_request( + req_form = await _make_request( method="POST", path="/login", content_type="application/x-www-form-urlencoded", @@ -199,11 +206,7 @@ async def _post(): @pytest.mark.asyncio async def test_json_parsing_guards_on_non_json_content(proxy): - """ - Fails before fix: if handler calls request.json() on a form request, this raises. - Passes after fix: request.json() is only called for JSON content-types. - """ - req_form = _make_request( + req_form = await _make_request( method="POST", path="/login", content_type="application/x-www-form-urlencoded", @@ -224,7 +227,7 @@ async def _post(): @pytest.mark.asyncio async def test_json_parsing_for_json_content_types(proxy): - req_json = _make_request( + req_json = await _make_request( method="POST", path="/login", content_type="application/json", @@ -237,14 +240,13 @@ async def _json(): return {"ok": True} req_json.json = _json # type: ignore[attr-defined] - await proxy.all_handler(req_json) assert called["count"] == 1 @pytest.mark.asyncio async def test_json_parsing_for_json_plus_suffix_content_types(proxy): - req_json = _make_request( + req_json = await _make_request( method="POST", path="/login", content_type="application/vnd.api+json", @@ -257,6 +259,5 @@ async def _json(): return {"v": 1} req_json.json = _json # type: ignore[attr-defined] - await proxy.all_handler(req_json) assert called["count"] == 1 From 272d48bdaed7f0fb10bfae4230b186bc54bf7fdd Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 05:06:00 -0800 Subject: [PATCH 20/22] fix: TypeError: StreamReader.__init__() missing 1 required positional argument: 'limit' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the aiohttp version used in CI (it downgrades to aiohttp==3.9.0), StreamReader’s constructor requires a limit argument, so CI raises: ``` TypeError: StreamReader.__init__() missing 1 required positional argument: 'limit' ``` All 5 tests then fail immediately when trying to create the request. --- tests/test_regression_headers_and_json_parsing.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index 4c25376..dd08a39 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -47,14 +47,22 @@ async def _make_request( """ Build a mocked aiohttp Request with a real StreamReader payload so Request.has_body works (it calls request._payload.at_eof()). + + CI uses aiohttp 3.9.x where StreamReader requires a `limit` argument. """ hdrs = CIMultiDict(headers or {}) hdrs["Content-Type"] = content_type hdrs.setdefault("Content-Length", str(len(body))) - # StreamReader requires a running loop loop = asyncio.get_running_loop() - payload = StreamReader(protocol=None, loop=loop) # type: ignore[arg-type] + + # aiohttp 3.9: StreamReader(protocol, limit, loop) + # newer aiohttp: signature varies; keep this compatible. + try: + payload = StreamReader(None, 2**16, loop=loop) # type: ignore[arg-type] + except TypeError: + payload = StreamReader(protocol=None, limit=2**16, loop=loop) # type: ignore[arg-type] + if body: payload.feed_data(body) payload.feed_eof() From 7b17af3da49bd63c1e9f94909356d0cdc5b32f60 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 05:19:33 -0800 Subject: [PATCH 21/22] fix failing lint tox env; it's running flake8 with bizarre docstring rules. Added terminating "." to the first line in docstring --- tests/test_regression_headers_and_json_parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index dd08a39..7ddca81 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -45,7 +45,7 @@ async def _make_request( body: bytes = b"", ): """ - Build a mocked aiohttp Request with a real StreamReader payload so + Build a mocked aiohttp Request with a real StreamReader payload. Request.has_body works (it calls request._payload.at_eof()). CI uses aiohttp 3.9.x where StreamReader requires a `limit` argument. @@ -73,7 +73,7 @@ async def _make_request( @pytest.fixture def proxy(monkeypatch): """ - Regression note: + Regression note. These tests cover cross-request header contamination caused by in-place mutation of the headers mapping inside AuthCaptureProxy.all_handler(). From 24f1db5743e2dbe5a6336ad6c3c4ea87a274e82d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 31 Jan 2026 05:29:26 -0800 Subject: [PATCH 22/22] Add a concrete type annotation for calls in DummyAsyncClient.__init__ ``` + from typing import Any import pytest import httpx ... class DummyAsyncClient: def __init__(self) -> None: - self.calls = [] + self.calls: list[dict[str, Any]] = [] self.cookies = type("Cookies", (), {"jar": {}})() ``` --- tests/test_regression_headers_and_json_parsing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_regression_headers_and_json_parsing.py b/tests/test_regression_headers_and_json_parsing.py index 7ddca81..de4c213 100644 --- a/tests/test_regression_headers_and_json_parsing.py +++ b/tests/test_regression_headers_and_json_parsing.py @@ -1,5 +1,6 @@ import asyncio +from typing import Any import pytest import httpx @@ -13,7 +14,7 @@ class DummyAsyncClient: """Capture outbound requests without real network I/O.""" def __init__(self) -> None: - self.calls = [] + self.calls: list[dict[str, Any]] = [] # match attribute access used in logging self.cookies = type("Cookies", (), {"jar": {}})()