-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbufo_downloader.py
More file actions
executable file
·40 lines (35 loc) · 1.32 KB
/
bufo_downloader.py
File metadata and controls
executable file
·40 lines (35 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from zipfile import ZipFile
import boto3
import pathlib
import os
from dotenv import load_dotenv, find_dotenv
env_file = find_dotenv()
if env_file:
load_dotenv(env_file)
REPO_URL = 'https://github.com/knobiknows/all-the-bufo/archive/refs/heads/main.zip'
r = requests.get(REPO_URL)
s3 = boto3.resource('s3', endpoint_url=os.environ.get('S3_URL'))
with open('all_the_bufo.zip', 'wb') as bf:
for chunk in r.iter_content(chunk_size=128):
bf.write(chunk)
with ZipFile('all_the_bufo.zip') as many_bufo:
bufo_bucket = s3.Bucket(os.environ.get('S3_BUFO_BUCKET'))
existing_bufos = set([obj.key for obj in bufo_bucket.objects.all()])
skipped = 0
uploaded = 0
for compressed_bufo in many_bufo.filelist:
bufo = many_bufo.extract(compressed_bufo)
print(f'extracted bufo to {bufo}')
if '/all-the-bufo/' in bufo:
name = pathlib.Path(bufo).name
if name not in existing_bufos:
print(f'uploading bufo {name}')
bufo_bucket.upload_file(bufo, name)
uploaded += 1
else:
print(f'skipping existing bufo {name}')
skipped += 1
else:
print(f'skipping {bufo} since it is not a valid bufo')
print(f'uploaded {uploaded} bufos and skipped {skipped} bufos')