Skip to content
This repository was archived by the owner on Mar 30, 2023. It is now read-only.

Support for progress bars while scraping and some other fixes #1374

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
aiohttp
aiohttp==3.7.0
aiodns
beautifulsoup4
cchardet
Expand Down
17 changes: 15 additions & 2 deletions twint/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys, os, datetime
from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
from tqdm import tqdm

from . import datelock, feed, get, output, verbose, storage
from .token import TokenExpiryException
Expand Down Expand Up @@ -29,6 +30,7 @@ def __init__(self, config):
self.count = 0
self.user_agent = ""
self.config = config
self.progress_bar = tqdm(total=int(config.Limit), desc="Tweets")
self.config.Bearer_token = bearer
# TODO might have to make some adjustments for it to work with multi-treading
# USAGE : to get a new guest token simply do `self.token.refresh()`
Expand All @@ -46,6 +48,9 @@ def __init__(self, config):
logme.debug(__name__ + ':Twint:__init__:pandas_clean')
storage.panda.clean()

def __del__(self):
self.progress_bar.close()

def get_resume(self, resumeFile):
if not os.path.exists(resumeFile):
return '-1'
Expand Down Expand Up @@ -149,11 +154,14 @@ async def follow(self):
await self.Feed()
if self.config.User_full:
logme.debug(__name__ + ':Twint:follow:userFull')
self.count += await get.Multi(self.feed, self.config, self.conn)
foo = await get.Multi(self.feed, self.config, self.conn)
self.count += foo
self.progress_bar.update(foo)
else:
logme.debug(__name__ + ':Twint:follow:notUserFull')
for user in self.feed:
self.count += 1
self.progress_bar.update(1)
username = user.find("a")["name"]
await output.Username(username, self.config, self.conn)

Expand All @@ -164,6 +172,7 @@ async def favorite(self):
for tweet in self.feed:
tweet_dict = {}
self.count += 1
self.progress_bar.update(1)
try:
tweet_dict['data-item-id'] = tweet.find("div", {"class": "tweet-text"})['data-id']
t_url = tweet.find("span", {"class": "metadata"}).find("a")["href"]
Expand Down Expand Up @@ -211,18 +220,22 @@ async def profile(self):
logme.debug(__name__ + ':Twint:profile')
for tweet in self.feed:
self.count += 1
self.progress_bar.update(1)
await output.Tweets(tweet, self.config, self.conn)

async def tweets(self):
await self.Feed()
# TODO : need to take care of this later
if self.config.Location:
logme.debug(__name__ + ':Twint:tweets:location')
self.count += await get.Multi(self.feed, self.config, self.conn)
foo = await get.Multi(self.feed, self.config, self.conn)
self.count += foo
self.progress_bar.update(foo)
else:
logme.debug(__name__ + ':Twint:tweets:notLocation')
for tweet in self.feed:
self.count += 1
self.progress_bar.update(1)
await output.Tweets(tweet, self.config, self.conn)

async def main(self, callback=None):
Expand Down
4 changes: 2 additions & 2 deletions twint/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async def MobileProfile(username, init):
async def Search(config, init):
logme.debug(__name__ + ':Search')
url = base
tweet_count = 100
tweet_count = 100 if not config.Limit else config.Limit
q = ""
params = [
# ('include_blocking', '1'),
Expand All @@ -89,7 +89,7 @@ async def Search(config, init):
('send_error_codes', 'true'),
('simple_quoted_tweet', 'true'),
('count', tweet_count),
# ('query_source', 'typed_query'),
('query_source', 'typed_query'),
# ('pc', '1'),
('cursor', str(init)),
('spelling_corrections', '1'),
Expand Down