Skip to content
Closed
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
63bf48d
Add optional timeout option to all requests
ddxv Sep 25, 2022
189b954
Add description of timeout option
ddxv Sep 25, 2022
f3be2bf
Merge pull request #1 from ddxv/add-timeout-option
ddxv Sep 25, 2022
093a722
Since API already returns all apps for a developer, add function to r…
ddxv Oct 14, 2022
ee444c5
Merge pull request #2 from ddxv/add-get-all-developers-info
ddxv Oct 14, 2022
e9f2bad
Update to new charts url
ddxv Oct 13, 2023
7fb488c
Rever to tabs for consistency with old library
ddxv Oct 13, 2023
7302896
Seems the collections need a new name to use charts API
ddxv Oct 13, 2023
12e4f43
Now API only returns IDs
ddxv Oct 13, 2023
98d18cc
Requires two letter country code, upper or lower
ddxv Oct 13, 2023
83bdab1
Requires two letter country code, upper or lower
ddxv Oct 13, 2023
d34293a
Merge pull request #3 from ddxv/new-charts-url
ddxv Oct 13, 2023
0d26c76
Merge branch 'digitalmethodsinitiative:master' into master
ddxv Oct 1, 2024
a0ed96e
Allow setting limit to none to return all search results returned by …
ddxv Oct 1, 2024
3acd660
Merge branch 'master' into unlimit-search-result
ddxv Oct 1, 2024
f4fb76b
Merge pull request #4 from ddxv/unlimit-search-result
ddxv Oct 1, 2024
7c22892
fix tabs/spaces
ddxv Oct 1, 2024
b3888fe
Merge branch 'digitalmethodsinitiative:master' into master
ddxv Dec 13, 2024
1fe6c87
Merge branch 'digitalmethodsinitiative:master' into master
ddxv Aug 18, 2025
36a5530
Merge upstream
ddxv Sep 9, 2025
7ede4a2
Apps are inside of results key
ddxv Sep 9, 2025
c491036
Timeouts still happening, seems due to .json() directly on the reques…
ddxv Oct 21, 2025
ed86afb
Merge pull request #5 from ddxv/fix-developer-results
ddxv Oct 21, 2025
d9d2159
If user sends int time include as (x,x) to catch dropped connections
ddxv Nov 3, 2025
25ab285
Merge branch 'master' of ssh://github.com/ddxv/itunes-app-scraper int…
ddxv Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions itunes_app_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", t
}

try:
result = requests.get(url, headers=headers, timeout=timeout).json()
with requests.get(url, headers=headers, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
content = r.content
result = json.loads(content)
except ConnectionError as ce:
raise AppStoreException("Cannot connect to store: {0}".format(str(ce)))
except json.JSONDecodeError:
Expand Down Expand Up @@ -95,7 +98,10 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country


try:
result = requests.get(url, timeout=timeout).json()
with requests.get(url, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
content = r.content
result = json.loads(content)
except json.JSONDecodeError:
raise AppStoreException("Could not parse app store response")

Expand All @@ -117,7 +123,10 @@ def get_apps_for_developer(self, developer_id, country="nl", lang="", timeout=No
url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country)

try:
result = requests.get(url, timeout=timeout).json()
with requests.get(url, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
content = r.content
result = json.loads(content)
except json.JSONDecodeError:
raise AppStoreException("Could not parse app store response")

Expand All @@ -141,7 +150,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout
"""
apps = self.get_apps_for_developer(developer_id, country=country, lang=lang, timeout=timeout)
if len(apps) > 0:
app_ids =[app["trackId"] for app in apps if app["wrapperType"] == "software"]
app_ids =[app["trackId"] for app in apps["results"] if app["wrapperType"] == "software"]
else:
return []
return app_ids
Expand Down Expand Up @@ -171,7 +180,9 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=N
"Accept-Language": lang
}

result = requests.get(url, headers=headers, timeout=timeout).text
with requests.get(url, headers=headers, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
result = r.text
if "customersAlsoBoughtApps" not in result:
return []

Expand Down Expand Up @@ -227,13 +238,19 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
try:
if sleep is not None:
time.sleep(sleep)
result = requests.get(url, timeout=timeout).json()
with requests.get(url, timeout=(10, 30) if timeout is None else (timeout, timeout), stream=True) as r:
r.raise_for_status()
content = r.content
result = json.loads(content)
except Exception:
try:
# handle the retry here.
# Take an extra sleep as back off and then retry the URL once.
time.sleep(2)
result = requests.get(url, timeout=timeout).json()
with requests.get(url, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
content = r.content
result = json.loads(content)
except Exception:
raise AppStoreException("Could not parse app store response for ID %s" % app_id)

Expand Down Expand Up @@ -334,13 +351,17 @@ def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None):
try:
if sleep is not None:
time.sleep(sleep)
result = requests.get(url, headers=headers, timeout=timeout).text
with requests.get(url, headers=headers, timeout=(10, 30) if timeout is None else (timeout,timeout), stream=True) as r:
r.raise_for_status()
result = r.text
except Exception:
try:
# handle the retry here.
# Take an extra sleep as back off and then retry the URL once.
time.sleep(2)
result = requests.get(url, headers=headers, timeout=timeout).text
with requests.get(url, headers=headers, timeout=(10, 30) if timeout is None else timeout, stream=True) as r:
r.raise_for_status()
result = r.text
except Exception:
raise AppStoreException("Could not parse app store rating response for ID %s" % app_id)

Expand Down
Loading