-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdomain_list.py
More file actions
executable file
·49 lines (40 loc) · 1.35 KB
/
domain_list.py
File metadata and controls
executable file
·49 lines (40 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/env python
import utils
location = utils.data_location / 'domain_list'
def default_mapping():
return {
'domain_col': 'domain',
'label_col': 'label',
'true_vals': ['true'],
'fake_vals': ['fake']
}
def fakenewswatch_mapping():
default = default_mapping()
default['fake_vals'] = ['fake_hoax', 'clickbait']
default['true_vals'] = []
return default
def usnews_mapping():
default = default_mapping()
default['fake_vals'] = ['hoax']
return default
filecolumns = {
'cbsnews': default_mapping(),
'dailydot': default_mapping(),
'fakenewswatch': fakenewswatch_mapping(),
'newrepublic': default_mapping(),
'npr': default_mapping(),
'snopes': default_mapping(),
'thoughtco': default_mapping(),
'usnews': usnews_mapping()
}
all_domains = []
for source, mappings in filecolumns.items():
data = utils.read_tsv(location / 'intermediate' / '{}.tsv'.format(source))
print(source)
domains = [{
'domain': el[mappings['domain_col']],
'label': 'true' if el[mappings['label_col']] in mappings['true_vals'] else 'fake',
'source': 'domain_list_{}'.format(source)
} for el in data if el[mappings['label_col']] in mappings['true_vals']+mappings['fake_vals']]
all_domains.extend(domains)
utils.write_json_with_path(all_domains, location, 'domains.json')