Skip to content

Commit df7e5f6

Browse files
committed
fix: tighten Instagram URL detection and propagate Actor statusMessage
1 parent 6a0d2ea commit df7e5f6

4 files changed

Lines changed: 94 additions & 7 deletions

File tree

src/strands_apify/social_media.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@
4444
VALID_LINKEDIN_SCRAPER_MODES = ("Short", "Full")
4545

4646

47+
def _looks_like_instagram_url(value: str) -> bool:
48+
"""Return True if ``value`` is a well-formed http(s) URL whose host is instagram.com.
49+
50+
Used by ``apify_instagram_scraper`` to decide whether a ``search_query`` should
51+
be treated as a direct URL or as a plain search term. Stricter than a substring
52+
check — e.g. it rejects ``"why-instagram.com-matters"`` and ``"http"``.
53+
"""
54+
parsed = urlparse(value)
55+
if parsed.scheme not in ("http", "https"):
56+
return False
57+
host = (parsed.hostname or "").lower()
58+
return host == "instagram.com" or host.endswith(".instagram.com")
59+
60+
4761
def _extract_linkedin_username(profile_url: str) -> str:
4862
"""Extract a LinkedIn username from a profile URL, or return the value as-is if already a username."""
4963
parsed = urlparse(profile_url)
@@ -136,7 +150,7 @@ def apify_instagram_scraper(
136150

137151
if urls:
138152
run_input["directUrls"] = urls
139-
elif search_query and ("instagram.com" in search_query or search_query.startswith("http")):
153+
elif search_query and _looks_like_instagram_url(search_query):
140154
run_input["directUrls"] = [search_query]
141155
else:
142156
run_input["search"] = search_query

src/strands_apify/utils.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,21 @@ def _success_result(text: str, panel_body: str, panel_title: str) -> dict[str, A
106106

107107

108108
def _check_run_status(actor_run: dict[str, Any], label: str) -> None:
109-
"""Raise RuntimeError if the Actor run did not succeed."""
109+
"""Raise RuntimeError if the Actor run did not succeed.
110+
111+
Includes the Apify-provided ``statusMessage`` in the error when present so
112+
callers can see why a run failed without having to look up the run in the
113+
Apify Console.
114+
"""
110115
status = actor_run.get("status", "UNKNOWN")
111-
if status != "SUCCEEDED":
112-
run_id = actor_run.get("id", "N/A")
113-
raise RuntimeError(f"{label} finished with status {status}. Run ID: {run_id}")
116+
if status == "SUCCEEDED":
117+
return
118+
run_id = actor_run.get("id", "N/A")
119+
status_msg = actor_run.get("statusMessage")
120+
parts = [f"{label} finished with status {status}", f"Run ID: {run_id}"]
121+
if status_msg:
122+
parts.append(f"Message: {status_msg}")
123+
raise RuntimeError(". ".join(parts))
114124

115125

116126
def _validate_url(url: str) -> None:

tests/test_core.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,33 @@ def test_run_actor_with_memory(mock_apify_env, mock_apify_client):
171171

172172

173173
def test_run_actor_failure(mock_apify_env, mock_apify_client):
174-
"""Actor run returns error dict when Actor fails."""
174+
"""Actor run returns error dict when Actor fails and surfaces Apify's statusMessage."""
175175
mock_apify_client.actor.return_value.call.return_value = MOCK_FAILED_RUN
176176

177177
with patch("strands_apify.utils.ApifyClient", return_value=mock_apify_client):
178178
result = apify_run_actor(actor_id="actor/my-scraper")
179179

180180
assert result["status"] == "error"
181-
assert "FAILED" in result["content"][0]["text"]
181+
text = result["content"][0]["text"]
182+
assert "FAILED" in text
183+
# statusMessage from MOCK_FAILED_RUN must be propagated to the user-facing error
184+
assert "Actor failed with an error" in text
185+
186+
187+
def test_run_actor_failure_without_status_message(mock_apify_env, mock_apify_client):
188+
"""Failure error message still works when the Apify run omits statusMessage."""
189+
run_without_message = {**MOCK_FAILED_RUN}
190+
run_without_message.pop("statusMessage", None)
191+
mock_apify_client.actor.return_value.call.return_value = run_without_message
192+
193+
with patch("strands_apify.utils.ApifyClient", return_value=mock_apify_client):
194+
result = apify_run_actor(actor_id="actor/my-scraper")
195+
196+
assert result["status"] == "error"
197+
text = result["content"][0]["text"]
198+
assert "FAILED" in text
199+
assert "Run ID" in text
200+
assert "Message:" not in text
182201

183202

184203
def test_run_actor_timeout(mock_apify_env, mock_apify_client):

tests/test_social_media.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from strands_apify.social_media import (
88
_extract_linkedin_username,
9+
_looks_like_instagram_url,
910
apify_facebook_posts_scraper,
1011
apify_instagram_scraper,
1112
apify_linkedin_profile_detail,
@@ -161,6 +162,49 @@ def test_instagram_scraper_url_in_search_query(mock_apify_env, mock_apify_client
161162
assert "search" not in run_input
162163

163164

165+
@pytest.mark.parametrize(
166+
"value",
167+
[
168+
"https://www.instagram.com/apify/",
169+
"https://instagram.com/apify",
170+
"http://instagram.com/explore/tags/cooking/",
171+
"https://www.instagram.com/p/AbCdEfG/",
172+
],
173+
)
174+
def test_looks_like_instagram_url_true(value):
175+
"""Helper returns True for real Instagram URLs (with and without subdomain)."""
176+
assert _looks_like_instagram_url(value) is True
177+
178+
179+
@pytest.mark.parametrize(
180+
"value",
181+
[
182+
"apify", # plain handle
183+
"#cooking", # hashtag
184+
"why-instagram.com-matters", # substring trap the old heuristic fell into
185+
"http", # bare scheme prefix the old heuristic accepted
186+
"https://example.com/instagram.com", # not actually hosted on instagram.com
187+
"ftp://instagram.com/apify", # wrong scheme
188+
"javascript:alert(1)",
189+
"",
190+
],
191+
)
192+
def test_looks_like_instagram_url_false(value):
193+
"""Helper rejects plain queries, lookalikes, and non-http(s) schemes."""
194+
assert _looks_like_instagram_url(value) is False
195+
196+
197+
def test_instagram_scraper_search_query_with_instagram_substring(mock_apify_env, mock_apify_client):
198+
"""A search_query that merely contains 'instagram.com' is treated as a search, not a URL."""
199+
with patch("strands_apify.utils.ApifyClient", return_value=mock_apify_client):
200+
result = apify_instagram_scraper(search_query="why-instagram.com-matters")
201+
202+
assert result["status"] == "success"
203+
run_input = mock_apify_client.actor.return_value.call.call_args.kwargs["run_input"]
204+
assert run_input["search"] == "why-instagram.com-matters"
205+
assert "directUrls" not in run_input
206+
207+
164208
def test_instagram_scraper_missing_params(mock_apify_env):
165209
"""Instagram scraper returns error when neither search_query nor urls provided."""
166210
result = apify_instagram_scraper()

0 commit comments

Comments
 (0)