Skip to content

Commit b1987a0

Browse files
committed
2 parents 3501c26 + cb0a15c commit b1987a0

File tree

2 files changed

+37
-17
lines changed

2 files changed

+37
-17
lines changed

itunes_app_scraper/scraper.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,17 @@ class AppStoreScraper:
2525
can be found at https://github.com/facundoolano/app-store-scraper.
2626
"""
2727

28-
def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
28+
def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", timeout=None):
2929
"""
3030
Retrieve suggested app IDs for search query
3131
3232
:param str term: Search query
33-
:param int num: Amount of items to return per page, default 50
34-
:param int page: Amount of pages to return
33+
:param int|None num: Amount of items to return per page, default 50
34+
:param int|None page: Amount of pages to return
3535
:param str country: Two-letter country code of store to search in,
3636
default 'nl'
3737
:param str lang: Language code to search with, default 'nl'
38+
:param int timeout: Seconds to wait for response before stopping.
3839
3940
:return list: List of App IDs returned for search query
4041
"""
@@ -44,7 +45,10 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
4445
url = "https://search.itunes.apple.com/WebObjects/MZStore.woa/wa/search?clientApplication=Software&media=software&term="
4546
url += quote_plus(term)
4647

47-
amount = int(num) * int(page)
48+
if num is None or page is None:
49+
amount = None
50+
else:
51+
amount = int(num) * int(page)
4852

4953
country = self.get_store_id_for_country(country)
5054
headers = {
@@ -53,7 +57,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
5357
}
5458

5559
try:
56-
result = requests.get(url, headers=headers).json()
60+
result = requests.get(url, headers=headers, timeout=timeout).json()
5761
except ConnectionError as ce:
5862
raise AppStoreException("Cannot connect to store: {0}".format(str(ce)))
5963
except json.JSONDecodeError:
@@ -64,7 +68,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"):
6468

6569
return [app["id"] for app in result["bubbles"][0]["results"][:amount]]
6670

67-
def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang=""):
71+
def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang="", timeout=None):
6872
"""
6973
Retrieve app IDs in given App Store collection
7074
@@ -78,6 +82,7 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country
7882
:param str country: Two-letter country code for the store to search in.
7983
Defaults to 'nl'.
8084
:param str lang: Dummy argument for compatibility. Unused.
85+
:param int timeout: Seconds to wait for response before stopping.
8186
8287
:return: List of App IDs in collection.
8388
"""
@@ -89,27 +94,28 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country
8994
url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params
9095

9196
try:
92-
result = requests.get(url).json()
97+
result = requests.get(url, timeout=timeout).json()
9398
except json.JSONDecodeError:
9499
raise AppStoreException("Could not parse app store response")
95100

96101
return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]]
97102

98-
def get_app_ids_for_developer(self, developer_id, country="nl", lang=""):
103+
def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None):
99104
"""
100105
Retrieve App IDs linked to given developer
101106
102107
:param int developer_id: Developer ID
103108
:param str country: Two-letter country code for the store to search in.
104109
Defaults to 'nl'.
105110
:param str lang: Dummy argument for compatibility. Unused.
111+
:param int timeout: Seconds to wait for response before stopping.
106112
107113
:return list: List of App IDs linked to developer
108114
"""
109115
url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country)
110116

111117
try:
112-
result = requests.get(url).json()
118+
result = requests.get(url, timeout=timeout).json()
113119
except json.JSONDecodeError:
114120
raise AppStoreException("Could not parse app store response")
115121

@@ -119,7 +125,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang=""):
119125
# probably an invalid developer ID
120126
return []
121127

122-
def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
128+
def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=None):
123129
"""
124130
Retrieve list of App IDs of apps similar to given app
125131
@@ -131,6 +137,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
131137
:param str country: Two-letter country code for the store to search in.
132138
Defaults to 'nl'.
133139
:param str lang: Language code to search with, default 'nl'
140+
:param int timeout: Seconds to wait for response before stopping.
134141
135142
:return list: List of similar app IDs
136143
"""
@@ -142,7 +149,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
142149
"Accept-Language": lang
143150
}
144151

145-
result = requests.get(url, headers=headers).text
152+
result = requests.get(url, headers=headers, timeout=timeout).text
146153
if "customersAlsoBoughtApps" not in result:
147154
return []
148155

@@ -157,7 +164,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"):
157164

158165
return ids
159166

160-
def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False):
167+
def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False, timeout=None):
161168
"""
162169
Get app details for given app ID
163170
@@ -176,6 +183,7 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
176183
short time. Defaults to None.
177184
:param bool force: by-passes the server side caching by adding a timestamp
178185
to the request (default is False)
186+
:param int timeout: Seconds to wait for response before stopping.
179187
180188
:return dict: App details, as returned by the app store. The result is
181189
not processed any further, unless `flatten` is True
@@ -197,13 +205,13 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat
197205
try:
198206
if sleep is not None:
199207
time.sleep(sleep)
200-
result = requests.get(url).json()
208+
result = requests.get(url, timeout=timeout).json()
201209
except Exception:
202210
try:
203211
# handle the retry here.
204212
# Take an extra sleep as back off and then retry the URL once.
205213
time.sleep(2)
206-
result = requests.get(url).json()
214+
result = requests.get(url, timeout=timeout).json()
207215
except Exception:
208216
raise AppStoreException("Could not parse app store response for ID %s" % app_id)
209217

@@ -272,7 +280,7 @@ def get_store_id_for_country(self, country):
272280
else:
273281
raise AppStoreException("Country code not found for {0}".format(country))
274282

275-
def get_app_ratings(self, app_id, countries=None, sleep=1):
283+
def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None):
276284
"""
277285
Get app ratings for given app ID
278286
@@ -284,6 +292,7 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
284292
:param int sleep: Seconds to sleep before request to prevent being
285293
temporary blocked if there are many requests in a
286294
short time. Defaults to 1.
295+
:param int timeout: Seconds to wait for response before stopping.
287296
288297
:return dict: App ratings, as scraped from the app store.
289298
"""
@@ -303,13 +312,13 @@ def get_app_ratings(self, app_id, countries=None, sleep=1):
303312
try:
304313
if sleep is not None:
305314
time.sleep(sleep)
306-
result = requests.get(url, headers=headers).text
315+
result = requests.get(url, headers=headers, timeout=timeout).text
307316
except Exception:
308317
try:
309318
# handle the retry here.
310319
# Take an extra sleep as back off and then retry the URL once.
311320
time.sleep(2)
312-
result = requests.get(url, headers=headers).text
321+
result = requests.get(url, headers=headers, timeout=timeout).text
313322
except Exception:
314323
raise AppStoreException("Could not parse app store rating response for ID %s" % app_id)
315324

scraper_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,14 @@ def test_country_code_does_not_exist():
4848
scraper = AppStoreScraper()
4949
with pytest.raises(AppStoreException, match="Country code not found for XZ"):
5050
scraper.get_store_id_for_country('xz')
51+
52+
def test_query_multiple_pages():
53+
query = "game"
54+
scraper = AppStoreScraper()
55+
results = set()
56+
for page in range(1,4):
57+
page_results = scraper.get_app_ids_for_query(query, country="us", lang="en", page=page)
58+
if page_results:
59+
[results.add(x) for x in page_results]
60+
assert len(results) > (page-1)*50
61+
print(f"Total results for query {query}: {len(results)}")

0 commit comments

Comments
 (0)