Skip to content

Commit 45c7e65

Browse files
committed
Resolve upstream conflicts
2 parents 3803ee7 + d12ed3a commit 45c7e65

9 files changed

Lines changed: 74 additions & 153 deletions

File tree

.github/workflows/renovate.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ jobs:
1717
sudo apt-get install chromium-browser
1818
- name: Install uv
1919
uses: astral-sh/setup-uv@v5
20+
- name: Install Deno
21+
uses: denoland/setup-deno@v2
22+
with:
23+
deno-version: v2.x
2024
- name: Test new yt-dlp
2125
run: |
2226
set -euo pipefail

.github/workflows/setup/action.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ runs:
2828
sudo /etc/init.d/rethinkdb restart
2929
shell: bash
3030

31+
- name: Install Deno
32+
uses: denoland/setup-deno@v2
33+
with:
34+
deno-version: v2.x
35+
3136
- name: Install pip dependencies
3237
run: |
3338
uv sync --python ${{ inputs.python-version }} --extra rethinkdb --extra warcprox --extra yt-dlp

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
runs-on: ubuntu-latest
1717
strategy:
1818
matrix:
19-
version: ['3.9', '3.12', '3.14']
19+
version: ['3.10', '3.12', '3.14']
2020
steps:
2121
- uses: actions/checkout@v4
2222

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,8 @@ check-format:
5959
format:
6060
$(VIRTUAL_ENV_DIR)/bin/ruff check $(OUTPUT_FLAGS) --select I --fix .
6161
$(VIRTUAL_ENV_DIR)/bin/ruff format .
62+
63+
.PHONY: test
64+
test:
65+
uv sync --all-extras
66+
uv run py.test tests

brozzler/browser.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,10 @@ def __init__(self, **kwargs):
382382
self.is_browsing = False
383383
self._command_id = Counter()
384384
self._wait_interval = 0.5
385+
self._max_screenshot_width = kwargs.get("max_screenshot_width", 2000)
386+
self._max_screenshot_height = kwargs.get("max_screenshot_height", 20000)
385387
self.session_id = None
388+
386389
# Set default configuration in case the caller doesn't use
387390
# configure_browser or browse_page
388391
self.configure_browser()
@@ -886,8 +889,12 @@ def screenshot(self, full_page=False, timeout=45):
886889
lambda: self.websock_thread.received_result(msg_id), timeout=timeout
887890
)
888891
message = self.websock_thread.pop_result(msg_id)
889-
width = message["result"]["contentSize"]["width"]
890-
height = message["result"]["contentSize"]["height"]
892+
width = min(
893+
message["result"]["contentSize"]["width"], self._max_screenshot_width
894+
)
895+
height = min(
896+
message["result"]["contentSize"]["height"], self._max_screenshot_height
897+
)
891898
clip = dict(x=0, y=0, width=width, height=height, scale=1)
892899
deviceScaleFactor = 1
893900
screenOrientation = {"angle": 0, "type": "portraitPrimary"}

brozzler/frontier.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,6 @@ def filter_claimable_site_ids(
6565
if is_claimable:
6666
claimable_sites.append(site)
6767

68-
if len(claimable_sites) >= max_sites_to_claim:
69-
break
70-
7168
site_ids_to_claim = []
7269
# gather sites that are under the max without going over
7370
for site in claimable_sites:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "brozzler"
3-
version = "1.8.0"
3+
version = "1.8.1"
44
authors = [
55
{ name="Noah Levitt", email="nlevitt@archive.org" },
66
]
@@ -13,7 +13,7 @@ maintainers = [
1313
]
1414
description = "Distributed web crawling with browsers"
1515
readme = "README.rst"
16-
requires-python = ">=3.9"
16+
requires-python = ">=3.10"
1717
classifiers = [
1818
"Development Status :: 5 - Production/Stable",
1919
"Programming Language :: Python :: 3",

tests/test_frontier.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,45 @@ def test_max_claimed_sites_cross_job(rethinker):
10521052
rr.table("sites").delete().run()
10531053

10541054

1055+
def test_many_active_claimed_sites_cross_job(rethinker):
1056+
rr = rethinker
1057+
frontier = brozzler.RethinkDbFrontier(rr)
1058+
1059+
# clean slate
1060+
rr.table("jobs").delete().run()
1061+
rr.table("sites").delete().run()
1062+
1063+
job_conf_1 = {
1064+
"id": 1,
1065+
"seeds": [{"url": f"http://example.com/{i}"} for i in range(0, 2000)],
1066+
"max_claimed_sites": 3,
1067+
}
1068+
job_conf_2 = {
1069+
"id": 2,
1070+
"seeds": [
1071+
{"url": "http://example.com/1"},
1072+
{"url": "http://example.com/2"},
1073+
{"url": "http://example.com/3"},
1074+
{"url": "http://example.com/4"},
1075+
{"url": "http://example.com/5"},
1076+
],
1077+
"max_claimed_sites": 5,
1078+
}
1079+
1080+
brozzler.new_job(frontier, job_conf_1)
1081+
1082+
# Claim all possible sites from job 1. We should only get 3 due to max_claimed_sites
1083+
claimed_sites_1 = frontier.claim_sites(4)
1084+
assert len(claimed_sites_1) == 3
1085+
1086+
# Add 5 more seeds
1087+
brozzler.new_job(frontier, job_conf_2)
1088+
1089+
# We shouldn't have trouble getting seeds from job 2
1090+
claimed_sites_1 = frontier.claim_sites(5)
1091+
assert len(claimed_sites_1) == 5
1092+
1093+
10551094
# Works locally, but reliably fails in CI.
10561095
@pytest.mark.xfail
10571096
def test_max_claimed_sites_load_perf(rethinker):

uv.lock

Lines changed: 9 additions & 145 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)