Skip to content

Commit a5be276

Browse files
committed
test: de-flake HAR in-flight/pending tests via deterministic units
Two HAR tests raced the recorder's own event processing: they left the record() context the instant a *separate* listener saw loadingFinished/requestWillBeSent, but the recorder's handler for that same event may not have run yet — so stop() removed the callbacks and flushed the request as a status-0 pending entry (observed on Windows CI: the in-flight-body test got None for status=200). Reframe the large-body test to poll the live recording until the entry is finalized (observable outcome, no race), and move the stop-time guarantees — awaiting in-flight body tasks and flushing still-pending requests as status 0 — to deterministic unit tests (tests/unit/test_har_recorder.py) that drive HarRecorder with synthetic events. Remove the now-unused event-waiter helpers.
1 parent 59860be commit a5be276

2 files changed

Lines changed: 86 additions & 108 deletions

File tree

tests/integration/test_har_recording_integration.py

Lines changed: 12 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@
1616

1717
from pydoll.browser.chromium import Chrome
1818
from pydoll.browser.requests.har_recorder import HarCapture
19-
from pydoll.protocol.network.events import NetworkEvent
2019
from pydoll.protocol.network.types import ResourceType
2120

22-
from _waits import wait_for_element_text, wait_until
21+
from _waits import wait_until
2322

2423

2524
def _find_free_port():
@@ -546,81 +545,6 @@ async def _coro(value):
546545
return value
547546

548547

549-
class _RequestSentWaiter:
550-
"""Arm a listener for a ``Network.requestWillBeSent`` event before navigating.
551-
552-
In-flight requests never appear in ``performance.getEntriesByType('resource')``
553-
(those entries materialize only after the resource finishes), and the in-page
554-
``fetch`` fires at parse time, so the listener must be registered *before*
555-
navigation to avoid missing the event.
556-
"""
557-
558-
def __init__(self, tab, needle):
559-
self._tab = tab
560-
self._needle = needle
561-
self._sent = asyncio.Event()
562-
self._callback_id = None
563-
564-
async def arm(self):
565-
def _on_sent(event):
566-
if self._needle in event['params']['request']['url']:
567-
self._sent.set()
568-
569-
self._callback_id = await self._tab.on(NetworkEvent.REQUEST_WILL_BE_SENT, _on_sent)
570-
571-
async def wait(self, timeout=10):
572-
try:
573-
await wait_until(
574-
lambda: _coro(self._sent.is_set()),
575-
timeout=timeout,
576-
message=f'request {self._needle!r} not dispatched',
577-
)
578-
finally:
579-
if self._callback_id is not None:
580-
await self._tab.remove_callback(self._callback_id)
581-
582-
583-
class _LoadingFinishedWaiter:
584-
"""Arm a listener for the ``Network.loadingFinished`` of a specific request.
585-
586-
Used to leave the recorder's context manager the instant a body becomes
587-
fetchable, so its asynchronous ``getResponseBody`` task is still in flight
588-
when ``stop()`` awaits the pending body tasks.
589-
"""
590-
591-
def __init__(self, tab, needle):
592-
self._tab = tab
593-
self._needle = needle
594-
self._request_ids = set()
595-
self._finished = asyncio.Event()
596-
self._sent_cb = None
597-
self._finished_cb = None
598-
599-
async def arm(self):
600-
def _on_sent(event):
601-
if self._needle in event['params']['request']['url']:
602-
self._request_ids.add(event['params']['requestId'])
603-
604-
def _on_finished(event):
605-
if event['params']['requestId'] in self._request_ids:
606-
self._finished.set()
607-
608-
self._sent_cb = await self._tab.on(NetworkEvent.REQUEST_WILL_BE_SENT, _on_sent)
609-
self._finished_cb = await self._tab.on(NetworkEvent.LOADING_FINISHED, _on_finished)
610-
611-
async def wait(self, timeout=10):
612-
try:
613-
await wait_until(
614-
lambda: _coro(self._finished.is_set()),
615-
timeout=timeout,
616-
message=f'loadingFinished for {self._needle!r} not observed',
617-
)
618-
finally:
619-
for callback_id in (self._sent_cb, self._finished_cb):
620-
if callback_id is not None:
621-
await self._tab.remove_callback(callback_id)
622-
623-
624548
class TestHarRedirectIntegration:
625549
"""Recording captures redirect entries and the final destination."""
626550

@@ -707,24 +631,6 @@ async def test_record_304_has_zero_body_size(self, ci_chrome_options, api_server
707631
class TestHarPendingAndFailedIntegration:
708632
"""Recording finalizes in-flight and failed requests at stop time."""
709633

710-
@pytest.mark.asyncio
711-
async def test_record_flushes_pending_request(self, ci_chrome_options, api_server):
712-
"""A request still in flight at stop becomes an entry with status 0."""
713-
async with Chrome(options=ci_chrome_options) as browser:
714-
tab = await browser.start()
715-
716-
async with tab.request.record() as recording:
717-
slow_waiter = _RequestSentWaiter(tab, '/slow')
718-
await slow_waiter.arm()
719-
await tab.go_to(f'{api_server}/pending-page')
720-
status_el = await tab.find(id='status', timeout=5)
721-
await wait_for_element_text(status_el, 'started')
722-
await slow_waiter.wait()
723-
724-
slow_entry = _origin_entry(recording, '/slow', status=0)
725-
assert slow_entry is not None
726-
assert slow_entry['response']['statusText'] == '(pending)'
727-
728634
@pytest.mark.asyncio
729635
async def test_record_captures_failed_request(self, ci_chrome_options, api_server):
730636
"""A request that fails (blocked port) is recorded with status 0."""
@@ -741,30 +647,28 @@ async def test_record_captures_failed_request(self, ci_chrome_options, api_serve
741647
assert failed_entry['response']['status'] == 0
742648

743649
@pytest.mark.asyncio
744-
async def test_record_awaits_in_flight_body_fetch_on_stop(
745-
self, ci_chrome_options, api_server
746-
):
747-
"""A body fetch still in flight at stop is awaited so its entry survives.
650+
async def test_record_captures_large_response_body(self, ci_chrome_options, api_server):
651+
"""A large response body is fetched asynchronously and captured in the HAR.
748652
749-
Leaving the context the instant loadingFinished fires guarantees the
750-
asynchronous getResponseBody task has not completed (it needs a CDP
751-
round-trip), so stop() must await the pending body task. The entry only
752-
appears in the capture because _finalize_entry ran to completion during
753-
that await; otherwise it would be lost.
653+
The body arrives via an async getResponseBody round-trip after
654+
loadingFinished, so the entry only appears once that task completes. Poll
655+
the live recording until it does — racing context exit against the
656+
internal finalize is inherently flaky across environments.
754657
"""
755658
async with Chrome(options=ci_chrome_options) as browser:
756659
tab = await browser.start()
757660

758661
async with tab.request.record() as recording:
759-
body_waiter = _LoadingFinishedWaiter(tab, '/large')
760-
await body_waiter.arm()
761662
await tab.go_to(f'{api_server}/large-page')
762-
await body_waiter.wait()
663+
await wait_until(
664+
lambda: _coro(_origin_entry(recording, '/large', status=200) is not None),
665+
message='/large entry not finalized in HAR',
666+
)
763667

764668
large_entry = _origin_entry(recording, '/large', status=200)
765669
assert large_entry is not None
766670
assert large_entry['response']['status'] == 200
767-
assert 'content' in large_entry['response']
671+
assert large_entry['response']['content']['size'] > 0
768672

769673

770674
class TestHarToDictIntegration:

tests/unit/test_har_recorder.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""Unit tests for HarRecorder.stop() lifecycle (deterministic, no browser).
2+
3+
stop() must await any in-flight getResponseBody tasks before flushing, remove
4+
its CDP callbacks, and disable network events it turned on. This is exercised
5+
here with a fake tab so the body-await path runs deterministically instead of
6+
racing a real browser's internal finalize against context exit.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import asyncio
12+
13+
import pytest
14+
15+
from pydoll.browser.requests.har_recorder import HarRecorder
16+
17+
18+
class _FakeTab:
19+
network_events_enabled = True
20+
21+
def __init__(self):
22+
self.removed: list[int] = []
23+
self.network_disabled = False
24+
25+
async def remove_callback(self, callback_id):
26+
self.removed.append(callback_id)
27+
28+
async def disable_network_events(self):
29+
self.network_disabled = True
30+
31+
32+
@pytest.mark.asyncio
33+
async def test_stop_awaits_in_flight_body_tasks_and_cleans_up():
34+
fake_tab = _FakeTab()
35+
recorder = HarRecorder(tab=fake_tab)
36+
recorder._network_was_enabled = True
37+
recorder._callback_ids = [1, 2, 3]
38+
39+
completed: list[bool] = []
40+
41+
async def slow_body_fetch():
42+
await asyncio.sleep(0.02)
43+
completed.append(True)
44+
45+
recorder._body_tasks = [asyncio.ensure_future(slow_body_fetch())]
46+
47+
await recorder.stop()
48+
49+
assert completed == [True]
50+
assert recorder._body_tasks == []
51+
assert fake_tab.removed == [1, 2, 3]
52+
assert fake_tab.network_disabled is True
53+
54+
55+
@pytest.mark.asyncio
56+
async def test_stop_flushes_in_flight_request_as_pending_entry():
57+
recorder = HarRecorder(tab=_FakeTab())
58+
recorder._on_request_will_be_sent({
59+
'params': {
60+
'requestId': 'r1',
61+
'request': {'url': 'http://x/slow', 'method': 'GET', 'headers': {}},
62+
'type': 'Fetch',
63+
'wallTime': 1000.0,
64+
'timestamp': 1.0,
65+
}
66+
})
67+
68+
await recorder.stop()
69+
70+
assert len(recorder._entries) == 1
71+
entry = recorder._entries[0]
72+
assert entry['request']['url'] == 'http://x/slow'
73+
assert entry['response']['status'] == 0
74+
assert entry['response']['statusText'] == '(pending)'

0 commit comments

Comments
 (0)