Skip to content

Commit cb0a15c

Browse files
authored
Add timeout option (#7)
* Add optional timeout option to all requests * Add description of timeout option
1 parent 4c19676 commit cb0a15c

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

itunes_app_scraper/scraper.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class AppStoreScraper:
2525
can be found at https://github.com/facundoolano/app-store-scraper.
2626
"""
2727

28-
def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
28+
def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", timeout=None):
2929
"""
3030
Retrieve suggested app IDs for search query
3131
@@ -35,6 +35,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
3535
:param str country: Two-letter country code of store to search in,
3636
default 'nl'
3737
:param str lang: Language code to search with, default 'nl'
38+
:param int timeout: Seconds to wait for response before stopping.
3839
3940
:return list: List of App IDs returned for search query
4041
"""
@@ -56,7 +57,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
5657
}
5758

5859
try:
59-
result = requests.get(url, headers=headers).json()
60+
result = requests.get(url, headers=headers, timeout=timeout).json()
6061
except ConnectionError as ce:
6162
raise AppStoreException("Cannot connect to store: {0}".format(str(ce)))
6263
except json.JSONDecodeError:
@@ -67,7 +68,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
6768

6869
return [app["id"] for app in result["bubbles"][0]["results"][:amount]]
6970

70-
def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang=""):
71+
def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang="", timeout=None):
7172
"""
7273
Retrieve app IDs in given App Store collection
7374
@@ -81,6 +82,7 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country
8182
:param str country: Two-letter country code for the store to search in.
8283
Defaults to 'nl'.
8384
:param str lang: Dummy argument for compatibility. Unused.
85+
:param int timeout: Seconds to wait for response before stopping.
8486
8587
:return: List of App IDs in collection.
8688
"""
@@ -92,27 +94,28 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country
9294
url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params
9395

9496
try:
95-
result = requests.get(url).json()
97+
result = requests.get(url, timeout=timeout).json()
9698
except json.JSONDecodeError:
9799
raise AppStoreException("Could not parse app store response")
98100

99101
return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]]
100102

101-
def get_app_ids_for_developer(self, developer_id, country="nl", lang=""):
103+
def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None):
102104
"""
103105
Retrieve App IDs linked to given developer
104106
105107
:param int developer_id: Developer ID
106108
:param str country: Two-letter country code for the store to search in.
107109
Defaults to 'nl'.
108110
:param str lang: Dummy argument for compatibility. Unused.
111+
:param int timeout: Seconds to wait for response before stopping.
109112
110113
:return list: List of App IDs linked to developer
111114
"""
112115
url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country)
113116

114117
try:
115-
result = requests.get(url).json()
118+
result = requests.get(url, timeout=timeout).json()
116119
except json.JSONDecodeError:
117120
raise AppStoreException("Could not parse app store response")
118121

@@ -122,7 +125,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang=""):
122125
# probably an invalid developer ID
123126
return []
124127

125-
def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
128+
def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=None):
126129
"""
127130
Retrieve list of App IDs of apps similar to given app
128131
@@ -134,6 +137,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
134137
:param str country: Two-letter country code for the store to search in.
135138
Defaults to 'nl'.
136139
:param str lang: Language code to search with, default 'nl'
140+
:param int timeout: Seconds to wait for response before stopping.
137141
138142
:return list: List of similar app IDs
139143
"""
@@ -145,7 +149,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
145149
"Accept-Language": lang
146150
}
147151

148-
result = requests.get(url, headers=headers).text
152+
result = requests.get(url, headers=headers, timeout=timeout).text
149153
if "customersAlsoBoughtApps" not in result:
150154
return []
151155

@@ -160,7 +164,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
160164

161165
return ids
162166

163-
def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False):
167+
def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False, timeout=None):
164168
"""
165169
Get app details for given app ID
166170
@@ -179,6 +183,7 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
179183
short time. Defaults to None.
180184
:param bool force: by-passes the server side caching by adding a timestamp
181185
to the request (default is False)
186+
:param int timeout: Seconds to wait for response before stopping.
182187
183188
:return dict: App details, as returned by the app store. The result is
184189
not processed any further, unless `flatten` is True
@@ -200,13 +205,13 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
200205
try:
201206
if sleep is not None:
202207
time.sleep(sleep)
203-
result = requests.get(url).json()
208+
result = requests.get(url, timeout=timeout).json()
204209
except Exception:
205210
try:
206211
# handle the retry here.
207212
# Take an extra sleep as back off and then retry the URL once.
208213
time.sleep(2)
209-
result = requests.get(url).json()
214+
result = requests.get(url, timeout=timeout).json()
210215
except Exception:
211216
raise AppStoreException("Could not parse app store response for ID %s" % app_id)
212217

@@ -275,7 +280,7 @@ def get_store_id_for_country(self, country):
275280
else:
276281
raise AppStoreException("Country code not found for {0}".format(country))
277282

278-
def get_app_ratings(self, app_id, countries=None, sleep=1):
283+
def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None):
279284
"""
280285
Get app ratings for given app ID
281286
@@ -287,6 +292,7 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
287292
:param int sleep: Seconds to sleep before request to prevent being
288293
temporary blocked if there are many requests in a
289294
short time. Defaults to 1.
295+
:param int timeout: Seconds to wait for response before stopping.
290296
291297
:return dict: App ratings, as scraped from the app store.
292298
"""
@@ -306,13 +312,13 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
306312
try:
307313
if sleep is not None:
308314
time.sleep(sleep)
309-
result = requests.get(url, headers=headers).text
315+
result = requests.get(url, headers=headers, timeout=timeout).text
310316
except Exception:
311317
try:
312318
# handle the retry here.
313319
# Take an extra sleep as back off and then retry the URL once.
314320
time.sleep(2)
315-
result = requests.get(url, headers=headers).text
321+
result = requests.get(url, headers=headers, timeout=timeout).text
316322
except Exception:
317323
raise AppStoreException("Could not parse app store rating response for ID %s" % app_id)
318324

0 commit comments

Comments
 (0)