Skip to content

Commit 4dd3ee6

Browse files
Refactor Instagram scrapers to get rid of the awkward mode parameter
Cf. #328
1 parent 0336ce1 commit 4dd3ee6

File tree

1 file changed

+36
-36
lines changed

1 file changed

+36
-36
lines changed

snscrape/modules/instagram.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -45,39 +45,9 @@ def __str__(self):
4545

4646

4747
class InstagramCommonScraper(snscrape.base.Scraper):
48-
def __init__(self, mode, name, **kwargs):
48+
def __init__(self, **kwargs):
4949
super().__init__(**kwargs)
50-
if mode not in ('User', 'Hashtag', 'Location'):
51-
raise ValueError('Invalid mode')
52-
self._mode = mode
53-
self._name = name
54-
5550
self._headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
56-
57-
if self._mode == 'User':
58-
self._initialUrl = f'https://www.instagram.com/{self._name}/'
59-
self._pageName = 'ProfilePage'
60-
self._responseContainer = 'user'
61-
self._edgeXToMedia = 'edge_owner_to_timeline_media'
62-
self._pageIDKey = 'id'
63-
self._queryHash = 'f2405b236d85e8296cf30347c9f08c2a'
64-
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
65-
elif self._mode == 'Hashtag':
66-
self._initialUrl = f'https://www.instagram.com/explore/tags/{self._name}/'
67-
self._pageName = 'TagPage'
68-
self._responseContainer = 'hashtag'
69-
self._edgeXToMedia = 'edge_hashtag_to_media'
70-
self._pageIDKey = 'name'
71-
self._queryHash = 'f92f56d47dc7a55b606908374b43a314'
72-
self._variablesFormat = '{{"tag_name":"{pageID}","first":50,"after":"{endCursor}"}}'
73-
elif self._mode == 'Location':
74-
self._initialUrl = f'https://www.instagram.com/explore/locations/{self._name}/'
75-
self._pageName = 'LocationsPage'
76-
self._responseContainer = 'location'
77-
self._edgeXToMedia = 'edge_location_to_media'
78-
self._pageIDKey = 'id'
79-
self._queryHash = '1b84447a4d8b6d6d0426fefb34514485'
80-
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
8151
self._initialPage = None
8252

8353
def _response_to_items(self, response):
@@ -133,12 +103,12 @@ def _check_json_callback(self, r):
133103
def get_items(self):
134104
r = self._initial_page()
135105
if r.status_code == 404:
136-
logger.warning(f'{self._mode} does not exist')
106+
logger.warning(f'Page does not exist')
137107
return
138108
response = r._snscrape_json_obj
139109
rhxGis = response['rhx_gis'] if 'rhx_gis' in response else ''
140110
if response['entry_data'][self._pageName][0]['graphql'][self._responseContainer][self._edgeXToMedia]['count'] == 0:
141-
logger.info(f'{self._mode} has no posts')
111+
logger.info(f'Page has no posts')
142112
return
143113
if not response['entry_data'][self._pageName][0]['graphql'][self._responseContainer][self._edgeXToMedia]['edges']:
144114
logger.warning('Private account')
@@ -172,13 +142,23 @@ def get_items(self):
172142
class InstagramUserScraper(InstagramCommonScraper):
173143
name = 'instagram-user'
174144

145+
def __init__(self, username, **kwargs):
146+
super().__init__(**kwargs)
147+
self._initialUrl = f'https://www.instagram.com/{username}/'
148+
self._pageName = 'ProfilePage'
149+
self._responseContainer = 'user'
150+
self._edgeXToMedia = 'edge_owner_to_timeline_media'
151+
self._pageIDKey = 'id'
152+
self._queryHash = 'f2405b236d85e8296cf30347c9f08c2a'
153+
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
154+
175155
@classmethod
176156
def setup_parser(cls, subparser):
177157
subparser.add_argument('username', type = snscrape.base.nonempty_string('username'), help = 'An Instagram username (no leading @)')
178158

179159
@classmethod
180160
def from_args(cls, args):
181-
return cls._construct(args, 'User', args.username)
161+
return cls._construct(args, args.username)
182162

183163
def _get_entity(self):
184164
r = self._initial_page()
@@ -217,22 +197,42 @@ def parse_num(s):
217197
class InstagramHashtagScraper(InstagramCommonScraper):
218198
name = 'instagram-hashtag'
219199

200+
def __init__(self, hashtag, **kwargs):
201+
super().__init__(**kwargs)
202+
self._initialUrl = f'https://www.instagram.com/explore/tags/{hashtag}/'
203+
self._pageName = 'TagPage'
204+
self._responseContainer = 'hashtag'
205+
self._edgeXToMedia = 'edge_hashtag_to_media'
206+
self._pageIDKey = 'name'
207+
self._queryHash = 'f92f56d47dc7a55b606908374b43a314'
208+
self._variablesFormat = '{{"tag_name":"{pageID}","first":50,"after":"{endCursor}"}}'
209+
220210
@classmethod
221211
def setup_parser(cls, subparser):
222212
subparser.add_argument('hashtag', type = snscrape.base.nonempty_string('hashtag'), help = 'An Instagram hashtag (no leading #)')
223213

224214
@classmethod
225215
def from_args(cls, args):
226-
return cls._construct(args, 'Hashtag', args.hashtag)
216+
return cls._construct(args, args.hashtag)
227217

228218

229219
class InstagramLocationScraper(InstagramCommonScraper):
230220
name = 'instagram-location'
231221

222+
def __init__(self, locationId, **kwargs):
223+
super().__init__(**kwargs)
224+
self._initialUrl = f'https://www.instagram.com/explore/locations/{locationId}/'
225+
self._pageName = 'LocationsPage'
226+
self._responseContainer = 'location'
227+
self._edgeXToMedia = 'edge_location_to_media'
228+
self._pageIDKey = 'id'
229+
self._queryHash = '1b84447a4d8b6d6d0426fefb34514485'
230+
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
231+
232232
@classmethod
233233
def setup_parser(cls, subparser):
234234
subparser.add_argument('locationid', help = 'An Instagram location ID', type = int)
235235

236236
@classmethod
237237
def from_args(cls, args):
238-
return cls._construct(args, 'Location', args.locationid)
238+
return cls._construct(args, args.locationid)

0 commit comments

Comments
 (0)