forked from texas/texplorer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassify.py
More file actions
34 lines (25 loc) · 776 Bytes
/
classify.py
File metadata and controls
34 lines (25 loc) · 776 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import re
import json
import csv
import pandas as pd
from lib.classificator import Classifier
def find_years(text):
try:
matches = re.findall(r"\d{4}", text)
return [int(m) for m in matches]
except TypeError:
return []
dataset = pd.read_csv('data/Historical Marker_20150521_145030_254.csv')
output = {}
for row in dataset.values:
years = find_years(row[15])
classifier = Classifier(years, text=row[15])
print 'years', years
print 'classes:', classifier.classify()
for klass in classifier.classify():
if klass in output:
output[klass].append(row[0])
else:
output[klass] = [row[0]]
with open('data/classification.json', 'w') as jsonfile:
jsonfile.write(json.dumps(output))