-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenrich_authors.py
40 lines (33 loc) · 1018 Bytes
/
enrich_authors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from tweepy import Client
from tqdm import tqdm
from db import tweet_storage
from env import env
def enrich_authors():
strg = tweet_storage()
client = Client(env('API_TOKEN'), wait_on_rate_limit=True)
already_dumped = 0
unretrievable = 0
total = 0
for tweet in tqdm(strg['tweets'].find()):
total += 1
if 'author_id' not in tweet:
try:
author = client.get_tweet(
tweet['id'],
tweet_fields=['author_id']
).data['author_id']
strg['tweets'].update_one(
{'id': tweet['id']},
{'$set': {'author_id': author}}
)
except Exception:
unretrievable += 1
else:
already_dumped += 1
print(
f"Processed {total} tweets, "
f"{already_dumped} already have author, "
f"{unretrievable} failed to retrieve"
)
if __name__ == '__main__':
enrich_authors()