@@ -45,39 +45,9 @@ def __str__(self):
4545
4646
4747class InstagramCommonScraper (snscrape .base .Scraper ):
48- def __init__ (self , mode , name , ** kwargs ):
48+ def __init__ (self , ** kwargs ):
4949 super ().__init__ (** kwargs )
50- if mode not in ('User' , 'Hashtag' , 'Location' ):
51- raise ValueError ('Invalid mode' )
52- self ._mode = mode
53- self ._name = name
54-
5550 self ._headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' }
56-
57- if self ._mode == 'User' :
58- self ._initialUrl = f'https://www.instagram.com/{ self ._name } /'
59- self ._pageName = 'ProfilePage'
60- self ._responseContainer = 'user'
61- self ._edgeXToMedia = 'edge_owner_to_timeline_media'
62- self ._pageIDKey = 'id'
63- self ._queryHash = 'f2405b236d85e8296cf30347c9f08c2a'
64- self ._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
65- elif self ._mode == 'Hashtag' :
66- self ._initialUrl = f'https://www.instagram.com/explore/tags/{ self ._name } /'
67- self ._pageName = 'TagPage'
68- self ._responseContainer = 'hashtag'
69- self ._edgeXToMedia = 'edge_hashtag_to_media'
70- self ._pageIDKey = 'name'
71- self ._queryHash = 'f92f56d47dc7a55b606908374b43a314'
72- self ._variablesFormat = '{{"tag_name":"{pageID}","first":50,"after":"{endCursor}"}}'
73- elif self ._mode == 'Location' :
74- self ._initialUrl = f'https://www.instagram.com/explore/locations/{ self ._name } /'
75- self ._pageName = 'LocationsPage'
76- self ._responseContainer = 'location'
77- self ._edgeXToMedia = 'edge_location_to_media'
78- self ._pageIDKey = 'id'
79- self ._queryHash = '1b84447a4d8b6d6d0426fefb34514485'
80- self ._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
8151 self ._initialPage = None
8252
8353 def _response_to_items (self , response ):
@@ -133,12 +103,12 @@ def _check_json_callback(self, r):
133103 def get_items (self ):
134104 r = self ._initial_page ()
135105 if r .status_code == 404 :
136- logger .warning (f'{ self . _mode } does not exist' )
106+ logger .warning (f'Page does not exist' )
137107 return
138108 response = r ._snscrape_json_obj
139109 rhxGis = response ['rhx_gis' ] if 'rhx_gis' in response else ''
140110 if response ['entry_data' ][self ._pageName ][0 ]['graphql' ][self ._responseContainer ][self ._edgeXToMedia ]['count' ] == 0 :
141- logger .info (f'{ self . _mode } has no posts' )
111+ logger .info (f'Page has no posts' )
142112 return
143113 if not response ['entry_data' ][self ._pageName ][0 ]['graphql' ][self ._responseContainer ][self ._edgeXToMedia ]['edges' ]:
144114 logger .warning ('Private account' )
@@ -172,13 +142,23 @@ def get_items(self):
172142class InstagramUserScraper (InstagramCommonScraper ):
173143 name = 'instagram-user'
174144
145+ def __init__ (self , username , ** kwargs ):
146+ super ().__init__ (** kwargs )
147+ self ._initialUrl = f'https://www.instagram.com/{ username } /'
148+ self ._pageName = 'ProfilePage'
149+ self ._responseContainer = 'user'
150+ self ._edgeXToMedia = 'edge_owner_to_timeline_media'
151+ self ._pageIDKey = 'id'
152+ self ._queryHash = 'f2405b236d85e8296cf30347c9f08c2a'
153+ self ._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
154+
175155 @classmethod
176156 def setup_parser (cls , subparser ):
177157 subparser .add_argument ('username' , type = snscrape .base .nonempty_string ('username' ), help = 'An Instagram username (no leading @)' )
178158
179159 @classmethod
180160 def from_args (cls , args ):
181- return cls ._construct (args , 'User' , args .username )
161+ return cls ._construct (args , args .username )
182162
183163 def _get_entity (self ):
184164 r = self ._initial_page ()
@@ -217,22 +197,42 @@ def parse_num(s):
217197class InstagramHashtagScraper (InstagramCommonScraper ):
218198 name = 'instagram-hashtag'
219199
200+ def __init__ (self , hashtag , ** kwargs ):
201+ super ().__init__ (** kwargs )
202+ self ._initialUrl = f'https://www.instagram.com/explore/tags/{ hashtag } /'
203+ self ._pageName = 'TagPage'
204+ self ._responseContainer = 'hashtag'
205+ self ._edgeXToMedia = 'edge_hashtag_to_media'
206+ self ._pageIDKey = 'name'
207+ self ._queryHash = 'f92f56d47dc7a55b606908374b43a314'
208+ self ._variablesFormat = '{{"tag_name":"{pageID}","first":50,"after":"{endCursor}"}}'
209+
220210 @classmethod
221211 def setup_parser (cls , subparser ):
222212 subparser .add_argument ('hashtag' , type = snscrape .base .nonempty_string ('hashtag' ), help = 'An Instagram hashtag (no leading #)' )
223213
224214 @classmethod
225215 def from_args (cls , args ):
226- return cls ._construct (args , 'Hashtag' , args .hashtag )
216+ return cls ._construct (args , args .hashtag )
227217
228218
229219class InstagramLocationScraper (InstagramCommonScraper ):
230220 name = 'instagram-location'
231221
222+ def __init__ (self , locationId , ** kwargs ):
223+ super ().__init__ (** kwargs )
224+ self ._initialUrl = f'https://www.instagram.com/explore/locations/{ locationId } /'
225+ self ._pageName = 'LocationsPage'
226+ self ._responseContainer = 'location'
227+ self ._edgeXToMedia = 'edge_location_to_media'
228+ self ._pageIDKey = 'id'
229+ self ._queryHash = '1b84447a4d8b6d6d0426fefb34514485'
230+ self ._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
231+
232232 @classmethod
233233 def setup_parser (cls , subparser ):
234234 subparser .add_argument ('locationid' , help = 'An Instagram location ID' , type = int )
235235
236236 @classmethod
237237 def from_args (cls , args ):
238- return cls ._construct (args , 'Location' , args .locationid )
238+ return cls ._construct (args , args .locationid )
0 commit comments