-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscraper_template.py
More file actions
50 lines (34 loc) · 941 Bytes
/
scraper_template.py
File metadata and controls
50 lines (34 loc) · 941 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import re
from datetime import datetime, time
from functools import lru_cache
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
import dateparser
import pytz
import sentry_sdk
from data.inhabitants import inhabitants
from data.studios import studios
from utils.storage import upload_dataframe
url = 'https://example.com/'
@lru_cache
def get_data():
# Download website
response = requests.get(url)
assert bool(response), 'Laden der Beispiel-Seite fehlgeschlagen'
# Parse into data frame
df = pd.DataFrame()
return df, response
def clear_data():
df, response = get_data()
# Clean up data here
return df
def write_data_example():
df = clear_data()
filename = 'example.csv'
upload_dataframe(df, filename)
# If the file is executed directly, print cleaned data
if __name__ == '__main__':
df = clear_data()
# print(df)
print(df.to_csv(index=False))