|
6 | 6 |
|
7 | 7 | from strands_apify.social_media import ( |
8 | 8 | _extract_linkedin_username, |
| 9 | + _looks_like_instagram_url, |
9 | 10 | apify_facebook_posts_scraper, |
10 | 11 | apify_instagram_scraper, |
11 | 12 | apify_linkedin_profile_detail, |
@@ -161,6 +162,49 @@ def test_instagram_scraper_url_in_search_query(mock_apify_env, mock_apify_client |
161 | 162 | assert "search" not in run_input |
162 | 163 |
|
163 | 164 |
|
| 165 | +@pytest.mark.parametrize( |
| 166 | + "value", |
| 167 | + [ |
| 168 | + "https://www.instagram.com/apify/", |
| 169 | + "https://instagram.com/apify", |
| 170 | + "http://instagram.com/explore/tags/cooking/", |
| 171 | + "https://www.instagram.com/p/AbCdEfG/", |
| 172 | + ], |
| 173 | +) |
| 174 | +def test_looks_like_instagram_url_true(value): |
| 175 | + """Helper returns True for real Instagram URLs (with and without subdomain).""" |
| 176 | + assert _looks_like_instagram_url(value) is True |
| 177 | + |
| 178 | + |
| 179 | +@pytest.mark.parametrize( |
| 180 | + "value", |
| 181 | + [ |
| 182 | + "apify", # plain handle |
| 183 | + "#cooking", # hashtag |
| 184 | + "why-instagram.com-matters", # substring trap the old heuristic fell into |
| 185 | + "http", # bare scheme prefix the old heuristic accepted |
| 186 | + "https://example.com/instagram.com", # not actually hosted on instagram.com |
| 187 | + "ftp://instagram.com/apify", # wrong scheme |
| 188 | + "javascript:alert(1)", |
| 189 | + "", |
| 190 | + ], |
| 191 | +) |
| 192 | +def test_looks_like_instagram_url_false(value): |
| 193 | + """Helper rejects plain queries, lookalikes, and non-http(s) schemes.""" |
| 194 | + assert _looks_like_instagram_url(value) is False |
| 195 | + |
| 196 | + |
| 197 | +def test_instagram_scraper_search_query_with_instagram_substring(mock_apify_env, mock_apify_client): |
| 198 | + """A search_query that merely contains 'instagram.com' is treated as a search, not a URL.""" |
| 199 | + with patch("strands_apify.utils.ApifyClient", return_value=mock_apify_client): |
| 200 | + result = apify_instagram_scraper(search_query="why-instagram.com-matters") |
| 201 | + |
| 202 | + assert result["status"] == "success" |
| 203 | + run_input = mock_apify_client.actor.return_value.call.call_args.kwargs["run_input"] |
| 204 | + assert run_input["search"] == "why-instagram.com-matters" |
| 205 | + assert "directUrls" not in run_input |
| 206 | + |
| 207 | + |
164 | 208 | def test_instagram_scraper_missing_params(mock_apify_env): |
165 | 209 | """Instagram scraper returns error when neither search_query nor urls provided.""" |
166 | 210 | result = apify_instagram_scraper() |
|
0 commit comments