Skip to content
Merged
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8d270f9
fix: guard JSON parsing, avoid header mutation, handle timeouts
danielbrunt57 Jan 29, 2026
1a2fd76
Improve error handling in auth_capture_proxy.py
danielbrunt57 Jan 29, 2026
95277d5
Update auth_capture_proxy.py with new implementation.
danielbrunt57 Jan 29, 2026
b66fd37
Apply proposed fixes
danielbrunt57 Jan 29, 2026
4f1d27f
Use a more forgiving timeout for proxying browser-driven auth traffic
danielbrunt57 Jan 29, 2026
4711a88
Remove unused import: from datetime import timedelta
danielbrunt57 Jan 29, 2026
905c4f0
Syntax error
danielbrunt57 Jan 29, 2026
9fe551e
Merge branch 'main' into Prevent-header-“growth”-by-mutation
danielbrunt57 Jan 31, 2026
20ae1ba
Remove unused DEFAULT_HTTPX_TIMEOUT
danielbrunt57 Jan 31, 2026
253c5dc
Update auth_capture_proxy.py (3 changes)
danielbrunt57 Jan 31, 2026
512f15a
Implemented nitpick suggested fix for consistency
danielbrunt57 Jan 31, 2026
76f5f3b
Fixed three more `[...]` truncations
danielbrunt57 Jan 31, 2026
6a0302d
Fix: Unreachable exception handler: reorder to catch TimeoutException…
danielbrunt57 Jan 31, 2026
bf0e541
Fix docstring typos.
danielbrunt57 Jan 31, 2026
415c0e7
fix: Incomplete exception handling in JSON parsing fallback.
danielbrunt57 Jan 31, 2026
658af90
Changed generic set[asyncio.Task] to Set[asyncio.Task] from typing
danielbrunt57 Jan 31, 2026
27bde5c
Create test_regression_headers_and_json_parsing.py
danielbrunt57 Jan 31, 2026
8ded996
Fixed syntax/indentation problem
danielbrunt57 Jan 31, 2026
9d944d1
Removed all has_body = True assignments (since aiohttp.web.Request.ha…
danielbrunt57 Jan 31, 2026
25fde9b
Fix: pass a real aiohttp StreamReader-like payload, not raw bytes.
danielbrunt57 Jan 31, 2026
272d48b
fix: TypeError: StreamReader.__init__() missing 1 required positional…
danielbrunt57 Jan 31, 2026
7b17af3
fix failing lint tox env; it's running flake8 with bizarre docstring …
danielbrunt57 Jan 31, 2026
24f1db5
Add a concrete type annotation for calls in DummyAsyncClient.__init__
danielbrunt57 Jan 31, 2026
f691876
Merge branch 'alandtse:main' into Prevent-header-growth-by-mutation
danielbrunt57 Jan 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 45 additions & 27 deletions authcaptureproxy/auth_capture_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
import asyncio
import logging
import re
from json import JSONDecodeError
from functools import partial
from ssl import SSLContext, create_default_context
from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union

import httpx
from aiohttp import (
ClientConnectionError,
MultipartReader,
MultipartWriter,
TooManyRedirects,
hdrs,
web,
)
Expand Down Expand Up @@ -47,8 +46,7 @@ class AuthCaptureProxy:
This class relies on tests to be provided to indicate the proxy has completed. At proxy completion all data can be found in self.session, self.data, and self.query.
"""

def __init__(
self,
def __init__(self,
proxy_url: URL,
host_url: URL,
session: Optional[httpx.AsyncClient] = None,
Expand All @@ -58,7 +56,10 @@ def __init__(
"""Initialize proxy object.

Args:
proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/. If there is any path, the path is considered part of the base url. If no explicit port is specified, a random port will be generated. If https is passed in, ssl_context must be provided at start_proxy() or the url will be downgraded to http.
proxy_url (URL): url for proxy location. e.g., http://192.168.1.1/.
If there is any path, the path is considered part of the base url.
If no explicit port is specified, a random port will be generated.
If https is passed in, ssl_context must be provided at start_proxy() or the url will be downgraded to http.
host_url (URL): original url for login, e.g., http://amazon.com
session (httpx.AsyncClient): httpx client to make queries. Optional
session_factory (lambda: httpx.AsyncClient): factory to create the aforementioned httpx client if having one fixed session is insufficient.
Expand Down Expand Up @@ -102,6 +103,7 @@ def __init__(
self.redirect_filters: Dict[Text, List[Text]] = {
"url": []
} # dictionary of lists of regex strings to filter against
self._background_tasks: set[asyncio.Task] = set()

@property
def active(self) -> bool:
Expand All @@ -127,7 +129,7 @@ def port(self) -> int:
def tests(self) -> Dict[Text, Callable]:
"""Return tests setting.

:setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary of post variables, and a dictioary of query variables and returns a URL or string. See :mod:`authcaptureproxy.examples.testers` for examples.
:setter: value (Dict[Text, Any]): A dictionary of tests. The key should be the name of the test and the value should be a function or coroutine that takes a httpx.Response, a dictionary o[...]
"""
return self._tests

Expand All @@ -146,7 +148,7 @@ def tests(self, value: Dict[Text, Callable]) -> None:
def modifiers(self) -> Dict[Text, Union[Callable, Dict[Text, Callable]]]:
"""Return modifiers setting.

:setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where the key should be the name of the modifier and the value should be a function or couroutine that takes a string and returns a modified string. If parameters are necessary, functools.partial should be used. See :mod:`authcaptureproxy.examples.modifiers` for examples.
:setter: value (Dict[Text, Dict[Text, Callable]): A nested dictionary of modifiers. The key shoud be a MIME type and the value should be a dictionary of modifiers for that MIME type where[...]
"""
return self._modifiers

Expand Down Expand Up @@ -284,7 +286,7 @@ async def _build_response(
async def all_handler(self, request: web.Request, **kwargs) -> web.Response:
"""Handle all requests.

This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_handler_active.
This handler will exit on succesful test found in self.tests or if a /stop url is seen. This handler can be used with any aiohttp webserver and disabled after registered using self.all_ha[...]

Args
request (web.Request): The request to process
Expand Down Expand Up @@ -324,16 +326,18 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) -
break
if isinstance(part, MultipartReader):
await _process_multipart(part, writer)
elif part.headers.get("hdrs.CONTENT_TYPE"):
if part.headers[hdrs.CONTENT_TYPE] == "application/json":
elif hdrs.CONTENT_TYPE in part.headers:
content_type = part.headers.get(hdrs.CONTENT_TYPE, "")
mime_type = content_type.split(";", 1)[0].strip()
if mime_type == "application/json":
part_data: Optional[
Union[Text, Dict[Text, Any], List[Tuple[Text, Text]], bytes]
] = await part.json()
writer.append_json(part_data)
elif part.headers[hdrs.CONTENT_TYPE].startswith("text"):
elif mime_type.startswith("text"):
part_data = await part.text()
writer.append(part_data)
elif part.headers[hdrs.CONTENT_TYPE] == "application/www-urlform-encode":
elif mime_type == "application/x-www-form-urlencoded":
part_data = await part.form()
writer.append_form(part_data)
else:
Expand Down Expand Up @@ -390,8 +394,15 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) -
else:
data = convert_multidict_to_dict(await request.post())
json_data = None
if request.has_body:
json_data = await request.json()
# Only attempt JSON decoding for JSON requests; avoid raising for form posts.
if request.has_body and (
request.content_type == "application/json"
or request.content_type.endswith("+json")
):
try:
json_data = await request.json()
except (JSONDecodeError, ValueError):
json_data = None
if data:
self.data.update(data)
_LOGGER.debug("Storing data %s", data)
Expand All @@ -403,7 +414,9 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) -
):
self.all_handler_active = False
if self.active:
asyncio.create_task(self.stop_proxy(3))
task = asyncio.create_task(self.stop_proxy(3))
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
return await self._build_response(text="Proxy stopped.")
elif (
URL(str(request.url)).path
Expand Down Expand Up @@ -434,41 +447,47 @@ async def _process_multipart(reader: MultipartReader, writer: MultipartWriter) -
if skip_auto_headers:
_LOGGER.debug("Discovered skip_auto_headers %s", skip_auto_headers)
headers.pop(SKIP_AUTO_HEADERS)
# Avoid accidental header mutation across branches/calls
req_headers: dict[str, Any] = dict(headers)
_LOGGER.debug(
"Attempting %s to %s\nheaders: %s \ncookies: %s",
method,
site,
headers,
req_headers,
self.session.cookies.jar,
)
try:
if mpwriter:
resp = await getattr(self.session, method)(
site, data=mpwriter, headers=headers, follow_redirects=True
site, data=mpwriter, headers=req_headers, follow_redirects=True
)
elif data:
resp = await getattr(self.session, method)(
site, data=data, headers=headers, follow_redirects=True
site, data=data, headers=req_headers, follow_redirects=True
)
elif json_data:
for item in ["Host", "Origin", "User-Agent", "dnt", "Accept-Encoding"]:
# remove proxy headers
if headers.get(item):
headers.pop(item)
if req_headers.get(item):
req_headers.pop(item)
resp = await getattr(self.session, method)(
site, json=json_data, headers=headers, follow_redirects=True
site, json=json_data, headers=req_headers, follow_redirects=True
)
else:
resp = await getattr(self.session, method)(
site, headers=headers, follow_redirects=True
site, headers=req_headers, follow_redirects=True
)
except ClientConnectionError as ex:
except httpx.ConnectError as ex:
return await self._build_response(
text=f"Error connecting to {site}; please retry: {ex}"
)
except TooManyRedirects as ex:
except httpx.TooManyRedirects as ex:
return await self._build_response(
text=f"Error connecting to {site}; too may redirects: {ex}"
text=f"Error connecting to {site}; too many redirects: {ex}"
)
except httpx.HTTPError as ex:
return await self._build_response(
text=f"Error connecting to {site}: {ex}"
)
except httpx.TimeoutException as ex:
_LOGGER.warning(
Expand Down Expand Up @@ -621,8 +640,7 @@ def _swap_proxy_and_host(self, text: Text, domain_only: bool = False) -> Text:
"""
host_string: Text = str(self._host_url.with_path("/"))
proxy_string: Text = str(
self.access_url() if not domain_only else self.access_url().with_path("/")
)
self.access_url() if not domain_only else self.access_url().with_path("/"))
if str(self.access_url().with_path("/")).replace("https", "http") in text:
_LOGGER.debug(
"Replacing %s with %s",
Expand Down
Loading