Skip to content

fix: Adapt to CrawlerMonitor constructor change #1106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 4 additions & 12 deletions deploy/docker/c4ai-code-context.md
Original file line number Diff line number Diff line change
Expand Up @@ -8898,9 +8898,7 @@ async def memory_adaptive(urls, browser_config, run_config):
dispatcher = MemoryAdaptiveDispatcher(
memory_threshold_percent=70.0,
max_session_permit=10,
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -8919,9 +8917,7 @@ async def memory_adaptive_with_rate_limit(urls, browser_config, run_config):
rate_limiter=RateLimiter(
base_delay=(1.0, 2.0), max_delay=30.0, max_retries=2
),
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -8936,9 +8932,7 @@ async def semaphore(urls, browser_config, run_config):
async with AsyncWebCrawler(config=browser_config) as crawler:
dispatcher = SemaphoreDispatcher(
semaphore_count=5,
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -8956,9 +8950,7 @@ async def semaphore_with_rate_limit(urls, browser_config, run_config):
rate_limiter=RateLimiter(
base_delay=(1.0, 2.0), max_delay=30.0, max_retries=2
),
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand Down
23 changes: 4 additions & 19 deletions deploy/docker/c4ai-doc-context.md
Original file line number Diff line number Diff line change
Expand Up @@ -6653,13 +6653,7 @@ The CrawlerMonitor provides real-time visibility into crawling operations:

```python
from crawl4ai import CrawlerMonitor, DisplayMode
monitor = CrawlerMonitor(
# Maximum rows in live display
max_visible_rows=15,

# DETAILED or AGGREGATED view
display_mode=DisplayMode.DETAILED
)
monitor = CrawlerMonitor()
```

**Display Modes**:
Expand Down Expand Up @@ -6687,10 +6681,7 @@ dispatcher = MemoryAdaptiveDispatcher(
max_delay=30.0,
max_retries=2
),
monitor=CrawlerMonitor( # Optional monitoring
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor() # Optional monitoring
)
```

Expand Down Expand Up @@ -6729,10 +6720,7 @@ dispatcher = SemaphoreDispatcher(
base_delay=(0.5, 1.0),
max_delay=10.0
),
monitor=CrawlerMonitor( # Optional monitoring
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor() # Optional monitoring
)
```

Expand Down Expand Up @@ -6848,10 +6836,7 @@ async def crawl_with_semaphore(urls):
base_delay=(0.5, 1.0),
max_delay=10.0
),
monitor=CrawlerMonitor(
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor()
)

async with AsyncWebCrawler(config=browser_config) as crawler:
Expand Down
16 changes: 4 additions & 12 deletions docs/examples/dispatcher_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ async def memory_adaptive(urls, browser_config, run_config):
dispatcher = MemoryAdaptiveDispatcher(
memory_threshold_percent=70.0,
max_session_permit=10,
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -44,9 +42,7 @@ async def memory_adaptive_with_rate_limit(urls, browser_config, run_config):
rate_limiter=RateLimiter(
base_delay=(1.0, 2.0), max_delay=30.0, max_retries=2
),
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -61,9 +57,7 @@ async def semaphore(urls, browser_config, run_config):
async with AsyncWebCrawler(config=browser_config) as crawler:
dispatcher = SemaphoreDispatcher(
semaphore_count=5,
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand All @@ -81,9 +75,7 @@ async def semaphore_with_rate_limit(urls, browser_config, run_config):
rate_limiter=RateLimiter(
base_delay=(1.0, 2.0), max_delay=30.0, max_retries=2
),
monitor=CrawlerMonitor(
max_visible_rows=15, display_mode=DisplayMode.DETAILED
),
monitor=CrawlerMonitor(),
)
results = await crawler.arun_many(
urls, config=run_config, dispatcher=dispatcher
Expand Down
5 changes: 1 addition & 4 deletions docs/examples/proxy_rotation_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,7 @@ async def demo_proxy_rotation_batch():

print("\n📈 Initializing crawler with proxy rotation...")
async with AsyncWebCrawler(config=browser_config) as crawler:
monitor = CrawlerMonitor(
max_visible_rows=10,
display_mode=DisplayMode.DETAILED
)
monitor = CrawlerMonitor()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Unused variable detected.

The monitor variable is created but never used in this function. Either pass it to the MemoryAdaptiveDispatcher by uncommenting line 112 or remove the variable assignment completely.

Either:

-            monitor = CrawlerMonitor()

Or:

             monitor = CrawlerMonitor()
             
             dispatcher = MemoryAdaptiveDispatcher(
                 memory_threshold_percent=80.0,
                 check_interval=0.5,
                 max_session_permit=1, #len(proxies),  # Match concurrent sessions to proxy count
-                # monitor=monitor
+                monitor=monitor
             )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
monitor = CrawlerMonitor()
dispatcher = MemoryAdaptiveDispatcher(
memory_threshold_percent=80.0,
check_interval=0.5,
max_session_permit=1, # Match concurrent sessions to proxy count
# monitor=monitor
)
Suggested change
monitor = CrawlerMonitor()
monitor = CrawlerMonitor()
dispatcher = MemoryAdaptiveDispatcher(
memory_threshold_percent=80.0,
check_interval=0.5,
max_session_permit=1, # Match concurrent sessions to proxy count
monitor=monitor
)
🧰 Tools
🪛 Ruff (0.8.2)

106-106: Local variable monitor is assigned to but never used

Remove assignment to unused variable monitor

(F841)


dispatcher = MemoryAdaptiveDispatcher(
memory_threshold_percent=80.0,
Expand Down
23 changes: 4 additions & 19 deletions docs/md_v2/advanced/multi-url-crawling.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,7 @@ The CrawlerMonitor provides real-time visibility into crawling operations:

```python
from crawl4ai import CrawlerMonitor, DisplayMode
monitor = CrawlerMonitor(
# Maximum rows in live display
max_visible_rows=15,

# DETAILED or AGGREGATED view
display_mode=DisplayMode.DETAILED
)
monitor = CrawlerMonitor()
```

**Display Modes**:
Expand Down Expand Up @@ -154,10 +148,7 @@ dispatcher = MemoryAdaptiveDispatcher(
max_delay=30.0,
max_retries=2
),
monitor=CrawlerMonitor( # Optional monitoring
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor() # Optional monitoring
)
```

Expand Down Expand Up @@ -196,10 +187,7 @@ dispatcher = SemaphoreDispatcher(
base_delay=(0.5, 1.0),
max_delay=10.0
),
monitor=CrawlerMonitor( # Optional monitoring
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor() # Optional monitoring
)
```

Expand Down Expand Up @@ -315,10 +303,7 @@ async def crawl_with_semaphore(urls):
base_delay=(0.5, 1.0),
max_delay=10.0
),
monitor=CrawlerMonitor(
max_visible_rows=15,
display_mode=DisplayMode.DETAILED
)
monitor=CrawlerMonitor()
)

async with AsyncWebCrawler(config=browser_config) as crawler:
Expand Down
4 changes: 1 addition & 3 deletions tests/async/test_dispatchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ async def test_rate_limit_backoff(self, browser_config, run_config):

async def test_monitor_integration(self, browser_config, run_config, test_urls):
async with AsyncWebCrawler(config=browser_config) as crawler:
monitor = CrawlerMonitor(
max_visible_rows=5, display_mode=DisplayMode.DETAILED
)
monitor = CrawlerMonitor()
dispatcher = MemoryAdaptiveDispatcher(max_session_permit=2, monitor=monitor)
results = await crawler.arun_many(
test_urls, config=run_config, dispatcher=dispatcher
Expand Down
2 changes: 0 additions & 2 deletions tests/memory/test_dispatcher_stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,6 @@ async def run_memory_stress_test(
# Create monitor with reference to test results
monitor = StressTestMonitor(
test_results=test_results,
display_mode=DisplayMode.DETAILED,
max_visible_rows=20,
total_urls=url_count # Pass total URLs count
)

Expand Down