Skip to content

Commit 593dda7

Browse files
committed
Merge branch 'feat/strands-core-apify-tools' into feat/strands-search-crawling-actor-tools
2 parents 04e8fd3 + b1a792c commit 593dda7

3 files changed

Lines changed: 87 additions & 13 deletions

File tree

README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,7 @@ Below is a comprehensive table of all available tools, how to use them with an a
9999
| Tool | Agent Usage | Use Case |
100100
|------|-------------|----------|
101101
| a2a_client | `provider = A2AClientToolProvider(known_agent_urls=["http://localhost:9000"]); agent = Agent(tools=provider.tools)` | Discover and communicate with A2A-compliant agents, send messages between agents |
102-
| apify_run_actor | `agent.tool.apify_run_actor(actor_id="apify/website-content-crawler", run_input={"startUrls": [{"url": "https://example.com"}]})` | Run any Apify Actor by ID with arbitrary input |
103-
| apify_get_dataset_items | `agent.tool.apify_get_dataset_items(dataset_id="abc123", limit=50)` | Fetch items from an Apify dataset |
104-
| apify_run_actor_and_get_dataset | `agent.tool.apify_run_actor_and_get_dataset(actor_id="apify/website-content-crawler", run_input={"startUrls": [{"url": "https://example.com"}]})` | Run an Actor and fetch its dataset results in one step |
105-
| apify_run_task | `agent.tool.apify_run_task(task_id="user/my-task")` | Run a saved Apify task by ID with optional input overrides |
106-
| apify_run_task_and_get_dataset | `agent.tool.apify_run_task_and_get_dataset(task_id="user/my-task", dataset_items_limit=50)` | Run a task and fetch its dataset results in one step |
102+
| apify_run_actor | `agent.tool.apify_run_actor(actor_id="apify/website-content-crawler", run_input={"startUrls": [{"url": "https://example.com"}]})` | Run any Apify Actor with arbitrary input |
107103
| apify_scrape_url | `agent.tool.apify_scrape_url(url="https://example.com")` | Scrape a URL and return its content as markdown |
108104
| apify_google_search_scraper | `agent.tool.apify_google_search_scraper(search_query="best AI frameworks")` | Search Google and return structured results |
109105
| apify_google_places_scraper | `agent.tool.apify_google_places_scraper(search_query="restaurants in Prague")` | Search Google Maps for businesses and places |

src/strands_tools/apify.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
import json
9191
import logging
9292
import os
93-
from typing import Any, Dict, List, Optional
93+
from typing import Any, Dict, List, Literal, Optional, get_args
9494
from urllib.parse import urlparse
9595

9696
from rich.panel import Panel
@@ -110,13 +110,16 @@
110110
except ImportError:
111111
HAS_APIFY_CLIENT = False
112112

113-
WEBSITE_CONTENT_CRAWLER = "apify/website-content-crawler"
113+
# Attribution header - lets Apify track usage originating from strands-agents (analytics only)
114114
TRACKING_HEADER = {"x-apify-integration-platform": "strands-agents"}
115115
ERROR_PANEL_TITLE = "[bold red]Apify Error[/bold red]"
116116
DEFAULT_TIMEOUT_SECS = 300
117117
DEFAULT_SCRAPE_TIMEOUT_SECS = 120
118118
DEFAULT_DATASET_ITEMS_LIMIT = 100
119-
VALID_CRAWLER_TYPES = ("playwright:adaptive", "playwright:firefox", "cheerio")
119+
120+
WEBSITE_CONTENT_CRAWLER = "apify/website-content-crawler"
121+
CrawlerType = Literal["playwright:adaptive", "playwright:firefox", "cheerio"]
122+
WEBSITE_CONTENT_CRAWLER_TYPES = get_args(CrawlerType)
120123

121124

122125
# --- Helper functions ---
@@ -149,6 +152,8 @@ def _format_error(e: Exception) -> str:
149152
"Rate limit exceeded. The Apify client retries automatically; "
150153
"if this persists, reduce request frequency."
151154
)
155+
case None:
156+
return f"Apify API error: {msg}"
152157
case _:
153158
return f"Apify API error ({status_code}): {msg}"
154159
return str(e)
@@ -230,7 +235,7 @@ def run_actor(
230235
self._validate_positive(memory_mbytes, "memory_mbytes")
231236

232237
call_kwargs: Dict[str, Any] = {
233-
"run_input": run_input or {},
238+
"run_input": run_input if run_input is not None else {},
234239
"timeout_secs": timeout_secs,
235240
"logger": None, # Suppress verbose apify-client logging not useful to end users
236241
}
@@ -354,14 +359,14 @@ def scrape_url(
354359
self,
355360
url: str,
356361
timeout_secs: int = DEFAULT_SCRAPE_TIMEOUT_SECS,
357-
crawler_type: str = "cheerio",
362+
crawler_type: CrawlerType = "cheerio",
358363
) -> str:
359364
"""Scrape a single URL using Website Content Crawler and return markdown."""
360365
self._validate_url(url)
361366
self._validate_positive(timeout_secs, "timeout_secs")
362-
if crawler_type not in VALID_CRAWLER_TYPES:
367+
if crawler_type not in WEBSITE_CONTENT_CRAWLER_TYPES:
363368
raise ValueError(
364-
f"Invalid crawler_type '{crawler_type}'. Must be one of: {', '.join(VALID_CRAWLER_TYPES)}."
369+
f"Invalid crawler_type '{crawler_type}'. Must be one of: {', '.join(WEBSITE_CONTENT_CRAWLER_TYPES)}."
365370
)
366371

367372
run_input: Dict[str, Any] = {
@@ -374,9 +379,13 @@ def scrape_url(
374379
timeout_secs=timeout_secs,
375380
logger=None, # Suppress verbose apify-client logging not useful to end users
376381
)
382+
if actor_run is None:
383+
raise RuntimeError("Website Content Crawler returned no run data (possible wait timeout).")
377384
self._check_run_status(actor_run, "Website Content Crawler")
378385

379386
dataset_id = actor_run.get("defaultDatasetId")
387+
if not dataset_id:
388+
raise RuntimeError("Website Content Crawler run has no default dataset.")
380389
result = self.client.dataset(dataset_id).list_items(limit=1)
381390
items = list(result.items)
382391

@@ -640,7 +649,7 @@ def apify_run_task_and_get_dataset(
640649
def apify_scrape_url(
641650
url: str,
642651
timeout_secs: int = DEFAULT_SCRAPE_TIMEOUT_SECS,
643-
crawler_type: str = "cheerio",
652+
crawler_type: CrawlerType = "cheerio",
644653
) -> Dict[str, Any]:
645654
"""Scrape a single URL and return its content as markdown.
646655

tests/test_apify.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ def test_run_actor_default_input(mock_apify_env, mock_apify_client):
155155
assert call_kwargs["run_input"] == {}
156156

157157

158+
def test_run_actor_explicit_empty_input(mock_apify_env, mock_apify_client):
159+
"""Actor run passes through an explicitly empty dict instead of treating it as falsy."""
160+
empty_input: dict = {}
161+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
162+
result = apify_run_actor(actor_id="actor/my-scraper", run_input=empty_input)
163+
164+
assert result["status"] == "success"
165+
call_kwargs = mock_apify_client.actor.return_value.call.call_args.kwargs
166+
assert call_kwargs["run_input"] is empty_input
167+
168+
158169
def test_run_actor_with_memory(mock_apify_env, mock_apify_client):
159170
"""Actor run passes memory_mbytes when provided."""
160171
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
@@ -197,6 +208,17 @@ def test_run_actor_api_exception(mock_apify_env, mock_apify_client):
197208
assert "Connection failed" in result["content"][0]["text"]
198209

199210

211+
def test_run_actor_none_response(mock_apify_env, mock_apify_client):
212+
"""Actor run returns error dict when ActorClient.call() returns None."""
213+
mock_apify_client.actor.return_value.call.return_value = None
214+
215+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
216+
result = apify_run_actor(actor_id="actor/my-scraper")
217+
218+
assert result["status"] == "error"
219+
assert "no run data" in result["content"][0]["text"]
220+
221+
200222
def test_run_actor_apify_api_error_401(mock_apify_env, mock_apify_client):
201223
"""Actor run returns friendly message for 401 authentication errors."""
202224
error = _make_apify_api_error(401, "Unauthorized")
@@ -280,6 +302,18 @@ def test_run_actor_and_get_dataset_success(mock_apify_env, mock_apify_client):
280302
assert data["items"][0]["title"] == "Widget A"
281303

282304

305+
def test_run_actor_and_get_dataset_no_dataset_id(mock_apify_env, mock_apify_client):
306+
"""Combined tool returns error when the Actor run has no default dataset."""
307+
run_no_dataset = {**MOCK_ACTOR_RUN, "defaultDatasetId": None}
308+
mock_apify_client.actor.return_value.call.return_value = run_no_dataset
309+
310+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
311+
result = apify_run_actor_and_get_dataset(actor_id="actor/my-scraper")
312+
313+
assert result["status"] == "error"
314+
assert "no default dataset" in result["content"][0]["text"]
315+
316+
283317
def test_run_actor_and_get_dataset_actor_failure(mock_apify_env, mock_apify_client):
284318
"""Combined tool returns error dict when the Actor fails."""
285319
mock_apify_client.actor.return_value.call.return_value = MOCK_FAILED_RUN
@@ -379,6 +413,18 @@ def test_run_task_and_get_dataset_success(mock_apify_env, mock_apify_client):
379413
assert data["items"][0]["title"] == "Widget A"
380414

381415

416+
def test_run_task_and_get_dataset_no_dataset_id(mock_apify_env, mock_apify_client):
417+
"""Combined task tool returns error when the task run has no default dataset."""
418+
run_no_dataset = {**MOCK_ACTOR_RUN, "defaultDatasetId": None}
419+
mock_apify_client.task.return_value.call.return_value = run_no_dataset
420+
421+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
422+
result = apify_run_task_and_get_dataset(task_id="user~my-task")
423+
424+
assert result["status"] == "error"
425+
assert "no default dataset" in result["content"][0]["text"]
426+
427+
382428
def test_run_task_and_get_dataset_task_failure(mock_apify_env, mock_apify_client):
383429
"""Combined task tool returns error dict when the task fails."""
384430
mock_apify_client.task.return_value.call.return_value = MOCK_FAILED_RUN
@@ -407,6 +453,29 @@ def test_scrape_url_success(mock_apify_env, mock_apify_client):
407453
mock_apify_client.actor.assert_called_once_with("apify/website-content-crawler")
408454

409455

456+
def test_scrape_url_none_response(mock_apify_env, mock_apify_client):
457+
"""Scrape URL returns error dict when ActorClient.call() returns None."""
458+
mock_apify_client.actor.return_value.call.return_value = None
459+
460+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
461+
result = apify_scrape_url(url="https://example.com")
462+
463+
assert result["status"] == "error"
464+
assert "no run data" in result["content"][0]["text"]
465+
466+
467+
def test_scrape_url_no_dataset_id(mock_apify_env, mock_apify_client):
468+
"""Scrape URL returns error when the crawler run has no default dataset."""
469+
run_no_dataset = {**MOCK_ACTOR_RUN, "defaultDatasetId": None}
470+
mock_apify_client.actor.return_value.call.return_value = run_no_dataset
471+
472+
with patch("strands_tools.apify.ApifyClient", return_value=mock_apify_client):
473+
result = apify_scrape_url(url="https://example.com")
474+
475+
assert result["status"] == "error"
476+
assert "no default dataset" in result["content"][0]["text"]
477+
478+
410479
def test_scrape_url_no_content(mock_apify_env, mock_apify_client):
411480
"""Scrape URL returns error dict when no content is returned."""
412481
mock_list_result = MagicMock()

0 commit comments

Comments
 (0)