Skip to content

Commit ae13bab

Browse files
committed
feat(firefox): add expect_download to the BiDi tab (portable download API)
Brings Firefox/BiDi to parity with CDP's Tab.expect_download and makes the download API portable across both backends. - BiDiTab.expect_download(keep_file_at, timeout): async context manager that routes downloads to a managed directory (temp by default, cleaned up on exit) and yields a handle. BiDi reports the saved path on browsingContext.downloadEnd, so completion needs no progress polling — it filters events to this browsing context and resolves on status 'complete'. - _BiDiDownloadHandle mirrors CDP's _DownloadHandle: file_path, wait_started, wait_finished, read_bytes, read_base64. - New DownloadHandleProtocol + expect_download on TabProtocol, so callers get the same surface regardless of protocol; _conformance.py asserts both concrete handles satisfy it. Verified e2e on real Firefox (temp-dir read, base64, keep_file_at persist); CDP expect_download still green. mypy/ruff clean.
1 parent 0655bf6 commit ae13bab

4 files changed

Lines changed: 301 additions & 4 deletions

File tree

pydoll/browser/_conformance.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313

1414
if TYPE_CHECKING:
1515
from pydoll.browser.chromium.base import Browser
16-
from pydoll.browser.chromium.tab import Tab
16+
from pydoll.browser.chromium.tab import Tab, _DownloadHandle
1717
from pydoll.browser.firefox.base import FirefoxBrowser
18-
from pydoll.browser.firefox.tab import BiDiTab
19-
from pydoll.browser.protocols import BrowserProtocol, TabProtocol
18+
from pydoll.browser.firefox.tab import BiDiTab, _BiDiDownloadHandle
19+
from pydoll.browser.protocols import BrowserProtocol, DownloadHandleProtocol, TabProtocol
2020
from pydoll.elements.bidi.shadow_root import BiDiShadowRoot
2121
from pydoll.elements.bidi.web_element import BiDiWebElement
2222
from pydoll.elements.cdp.shadow_root import ShadowRoot
@@ -29,6 +29,12 @@ def _cdp_tab_conforms(tab: Tab) -> TabProtocol:
2929
def _bidi_tab_conforms(tab: BiDiTab) -> TabProtocol:
3030
return tab
3131

32+
def _cdp_download_handle_conforms(handle: _DownloadHandle) -> DownloadHandleProtocol:
33+
return handle
34+
35+
def _bidi_download_handle_conforms(handle: _BiDiDownloadHandle) -> DownloadHandleProtocol:
36+
return handle
37+
3238
def _cdp_browser_conforms(browser: Browser) -> BrowserProtocol:
3339
return browser
3440

pydoll/browser/firefox/tab.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33
import asyncio
44
import base64 as _b64
55
import logging
6+
import shutil
67
from collections.abc import Mapping
78
from contextlib import asynccontextmanager
89
from pathlib import Path
10+
from tempfile import mkdtemp
911
from typing import TYPE_CHECKING, Optional, Union, overload
1012

1113
import aiofiles
1214

15+
from pydoll.commands.bidi.browser_commands import BrowserCommands
1316
from pydoll.commands.bidi.browsing_context_commands import BrowsingContextCommands
1417
from pydoll.commands.bidi.emulation_commands import EmulationCommands
1518
from pydoll.commands.bidi.input_commands import InputCommands as BiDiInputCommands
@@ -18,11 +21,12 @@
1821
from pydoll.commands.bidi.storage_commands import StorageCommands
1922
from pydoll.connection.bidi_connection_handler import BiDiConnectionHandler
2023
from pydoll.elements.mixins.bidi_find_elements_mixin import BidiFindElementsMixin
21-
from pydoll.exceptions import ScriptExecutionError
24+
from pydoll.exceptions import DownloadTimeout, ScriptExecutionError
2225
from pydoll.interactions.keyboard import BiDiKeyboard
2326
from pydoll.interactions.mouse import BiDiMouse
2427
from pydoll.interactions.scroll import BiDiScroll
2528
from pydoll.protocol.bidi.base import Command, T_CommandParams, T_CommandResult
29+
from pydoll.protocol.bidi.browser.types import DownloadBehaviorAllowed
2630
from pydoll.protocol.bidi.browsing_context.types import ImageFormat, ReadinessState
2731
from pydoll.protocol.bidi.network.types import Cookie as BiDiCookie
2832
from pydoll.protocol.bidi.network.types import SameSite, StringValue
@@ -413,6 +417,118 @@ async def event_handler(event: dict) -> None:
413417
finally:
414418
await self.remove_callback(callback_id)
415419

420+
@asynccontextmanager
421+
async def expect_download(
422+
self,
423+
keep_file_at: Optional[Union[str, Path]] = None,
424+
timeout: Optional[float] = None,
425+
) -> AsyncGenerator[_BiDiDownloadHandle, None]:
426+
"""Capture a file download triggered inside the block (mirrors the CDP API).
427+
428+
Routes downloads to a directory and yields a handle to await completion
429+
and read the file. BiDi reports the saved path directly on
430+
browsingContext.downloadEnd, so no progress polling is needed.
431+
432+
Args:
433+
keep_file_at: Directory to persist the file. If None, a temporary
434+
directory is used and removed when the block exits.
435+
timeout: Max seconds to wait for the download to finish (default 60).
436+
437+
Yields:
438+
_BiDiDownloadHandle: reads the downloaded file (bytes/base64) and its path.
439+
"""
440+
download_timeout = 60.0 if timeout is None else float(timeout)
441+
442+
cleanup_dir = False
443+
if keep_file_at is None:
444+
download_dir = mkdtemp(prefix='pydoll-download-')
445+
cleanup_dir = True
446+
else:
447+
download_dir = str(Path(keep_file_at))
448+
Path(download_dir).mkdir(parents=True, exist_ok=True)
449+
450+
logger.info(f'Expecting download (dir={download_dir}, timeout={download_timeout}s)')
451+
await self._execute_command(
452+
BrowserCommands.set_download_behavior(
453+
download_behavior=DownloadBehaviorAllowed(
454+
type='allowed', destinationFolder=download_dir
455+
)
456+
)
457+
)
458+
459+
loop = asyncio.get_event_loop()
460+
will_begin: asyncio.Future[bool] = loop.create_future()
461+
done: asyncio.Future[bool] = loop.create_future()
462+
state: dict[str, Optional[str]] = {
463+
'url': None,
464+
'suggestedFilename': None,
465+
'filePath': None,
466+
}
467+
468+
async def on_will_begin(event: dict) -> None:
469+
params = event['params']
470+
if params.get('context') != self._context_id:
471+
return
472+
state['url'] = params.get('url')
473+
state['suggestedFilename'] = params.get('suggestedFilename')
474+
if not will_begin.done():
475+
will_begin.set_result(True)
476+
logger.info(
477+
f'Download will begin: url={state["url"]}, filename={state["suggestedFilename"]}'
478+
)
479+
480+
async def on_end(event: dict) -> None:
481+
params = event['params']
482+
if params.get('context') != self._context_id or params.get('status') != 'complete':
483+
return
484+
file_path = params.get('filepath')
485+
suggested = state.get('suggestedFilename')
486+
if not file_path and suggested:
487+
file_path = str(Path(download_dir) / suggested)
488+
state['filePath'] = file_path
489+
if not done.done():
490+
done.set_result(True)
491+
logger.info(f'Download completed: {file_path}')
492+
493+
cb_will_begin = await self.on(Event.DOWNLOAD_STARTED, on_will_begin)
494+
cb_end = await self.on(Event.DOWNLOAD_COMPLETED, on_end)
495+
496+
handle = _BiDiDownloadHandle(
497+
state=state,
498+
will_begin_future=will_begin,
499+
done_future=done,
500+
timeout=download_timeout,
501+
)
502+
503+
try:
504+
yield handle
505+
try:
506+
await asyncio.wait_for(done, timeout=download_timeout)
507+
except asyncio.TimeoutError as exc:
508+
raise DownloadTimeout() from exc
509+
finally:
510+
await self._cleanup_download_context(
511+
[cb_will_begin, cb_end], cleanup_dir, state, download_dir
512+
)
513+
514+
async def _cleanup_download_context(
515+
self,
516+
callback_ids: list[int],
517+
cleanup_dir: bool,
518+
state: dict[str, Optional[str]],
519+
download_dir: str,
520+
) -> None:
521+
for callback_id in callback_ids:
522+
await self.remove_callback(callback_id)
523+
await self._execute_command(
524+
BrowserCommands.set_download_behavior(download_behavior=None)
525+
)
526+
if cleanup_dir:
527+
file_path = state['filePath']
528+
if file_path:
529+
Path(file_path).unlink(missing_ok=True)
530+
shutil.rmtree(download_dir, ignore_errors=True)
531+
416532
@property
417533
def network_logs(self) -> list:
418534
"""Access captured network logs."""
@@ -637,3 +753,41 @@ def _deserialize_remote_pairs(value: object) -> dict[object, object]:
637753
else raw_value
638754
)
639755
return result
756+
757+
758+
class _BiDiDownloadHandle:
759+
"""Handle returned by BiDiTab.expect_download to access the downloaded file."""
760+
761+
def __init__(
762+
self,
763+
state: dict[str, Optional[str]],
764+
will_begin_future: asyncio.Future[bool],
765+
done_future: asyncio.Future[bool],
766+
timeout: float,
767+
) -> None:
768+
self._state = state
769+
self._will_begin_future = will_begin_future
770+
self._done_future = done_future
771+
self._timeout = timeout
772+
773+
@property
774+
def file_path(self) -> Optional[str]:
775+
return self._state.get('filePath')
776+
777+
async def wait_started(self, timeout: Optional[float] = None) -> None:
778+
await asyncio.wait_for(self._will_begin_future, timeout=timeout or self._timeout)
779+
780+
async def wait_finished(self, timeout: Optional[float] = None) -> None:
781+
await asyncio.wait_for(self._done_future, timeout=timeout or self._timeout)
782+
783+
async def read_bytes(self) -> bytes:
784+
await self.wait_finished()
785+
file_path = self.file_path
786+
if not file_path:
787+
raise FileNotFoundError('Download file path not available')
788+
async with aiofiles.open(file_path, 'rb') as f:
789+
return await f.read()
790+
791+
async def read_base64(self) -> str:
792+
data = await self.read_bytes()
793+
return _b64.b64encode(data).decode('ascii')

pydoll/browser/protocols.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,26 @@ async def intercept_requests(
180180
async def remove_intercept(self, intercept_id: str) -> None: ...
181181

182182

183+
@runtime_checkable
184+
class DownloadHandleProtocol(Protocol):
185+
"""Handle to a download captured by ``Tab.expect_download`` (CDP and BiDi).
186+
187+
Yielded inside the ``expect_download`` block to await completion and read the
188+
downloaded bytes, regardless of the underlying protocol.
189+
"""
190+
191+
@property
192+
def file_path(self) -> Optional[str]: ...
193+
194+
async def wait_started(self, timeout: Optional[float] = None) -> None: ...
195+
196+
async def wait_finished(self, timeout: Optional[float] = None) -> None: ...
197+
198+
async def read_bytes(self) -> bytes: ...
199+
200+
async def read_base64(self) -> str: ...
201+
202+
183203
@runtime_checkable
184204
class TabProtocol(Protocol):
185205
"""Portable tab-level contract shared by Tab (CDP) and BiDiTab (BiDi).
@@ -283,6 +303,12 @@ def expect_file_chooser(
283303
self, files: Union[str, Path, list[Union[str, Path]]]
284304
) -> AbstractAsyncContextManager[None]: ...
285305

306+
def expect_download(
307+
self,
308+
keep_file_at: Optional[Union[str, Path]] = None,
309+
timeout: Optional[float] = None,
310+
) -> AbstractAsyncContextManager[DownloadHandleProtocol]: ...
311+
286312
async def on(
287313
self,
288314
event_name: Event | str,
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Real-Firefox (WebDriver BiDi) integration tests for Tab.expect_download.
2+
3+
A throwaway ``ThreadingHTTPServer`` serves a page with a downloadable attachment.
4+
Clicking the link inside the ``expect_download`` block must route the file to the
5+
managed directory and expose it through the handle — the BiDi counterpart of
6+
tests/cdp/integration/test_tab_io_integration.py::TestExpectDownload. Assertions
7+
look at the downloaded bytes and the saved file, not the BiDi commands used.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import base64
13+
import socket
14+
import threading
15+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
16+
17+
import pytest
18+
import pytest_asyncio
19+
20+
from pydoll.browser import Firefox
21+
22+
DOWNLOAD_BODY = b'downloaded content from server'
23+
24+
_INDEX_HTML = (
25+
'<!DOCTYPE html><html><head><meta charset="utf-8"></head>'
26+
'<body><a id="download-link" href="/download" download="hello.txt">download</a>'
27+
'</body></html>'
28+
)
29+
30+
31+
def _find_free_port() -> int:
32+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
33+
probe.bind(('127.0.0.1', 0))
34+
return probe.getsockname()[1]
35+
36+
37+
class _DownloadHandler(BaseHTTPRequestHandler):
38+
def do_GET(self):
39+
if self.path == '/':
40+
body = _INDEX_HTML.encode()
41+
self.send_response(200)
42+
self.send_header('Content-Type', 'text/html')
43+
self.send_header('Content-Length', str(len(body)))
44+
self.end_headers()
45+
self.wfile.write(body)
46+
elif self.path == '/download':
47+
self.send_response(200)
48+
self.send_header('Content-Type', 'application/octet-stream')
49+
self.send_header('Content-Disposition', 'attachment; filename="hello.txt"')
50+
self.send_header('Content-Length', str(len(DOWNLOAD_BODY)))
51+
self.end_headers()
52+
self.wfile.write(DOWNLOAD_BODY)
53+
else:
54+
self.send_response(404)
55+
self.end_headers()
56+
57+
def log_message(self, *args):
58+
pass
59+
60+
61+
@pytest_asyncio.fixture
62+
async def served_tab(ci_firefox_options):
63+
port = _find_free_port()
64+
server = ThreadingHTTPServer(('127.0.0.1', port), _DownloadHandler)
65+
thread = threading.Thread(target=server.serve_forever, daemon=True)
66+
thread.start()
67+
base = f'http://127.0.0.1:{port}'
68+
try:
69+
async with Firefox(options=ci_firefox_options) as browser:
70+
tab = await browser.start()
71+
await tab.go_to(f'{base}/')
72+
yield tab, base
73+
finally:
74+
server.shutdown()
75+
server.server_close()
76+
thread.join(timeout=5)
77+
78+
79+
@pytest.mark.asyncio
80+
async def test_download_to_temp_dir_is_readable_inside_context(served_tab):
81+
tab, _ = served_tab
82+
async with tab.expect_download(timeout=30) as handle:
83+
link = await tab.find(id='download-link')
84+
await link.click()
85+
data = await handle.read_bytes()
86+
assert data == DOWNLOAD_BODY
87+
assert handle.file_path is not None
88+
89+
90+
@pytest.mark.asyncio
91+
async def test_download_base64_matches_bytes(served_tab):
92+
tab, _ = served_tab
93+
async with tab.expect_download(timeout=30) as handle:
94+
link = await tab.find(id='download-link')
95+
await link.click()
96+
b64 = await handle.read_base64()
97+
assert base64.b64decode(b64) == DOWNLOAD_BODY
98+
99+
100+
@pytest.mark.asyncio
101+
async def test_keep_file_at_persists_after_context(served_tab, tmp_path):
102+
tab, _ = served_tab
103+
target = tmp_path / 'downloads'
104+
async with tab.expect_download(keep_file_at=target, timeout=30) as handle:
105+
link = await tab.find(id='download-link')
106+
await link.click()
107+
await handle.wait_finished()
108+
109+
saved = target / 'hello.txt'
110+
assert saved.exists()
111+
assert saved.read_bytes() == DOWNLOAD_BODY

0 commit comments

Comments
 (0)