-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtweet2csv.py
More file actions
68 lines (61 loc) · 1.99 KB
/
tweet2csv.py
File metadata and controls
68 lines (61 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from Dictionary import Encoder
import numpy
encoder = Encoder()
arr = []
labels = []
FILE_DATA = "resources/data.csv"
FILE_LABEL = "resources/labels.csv"
label_types = {
"unrelated": 0,
"savings": 1,
"investment": 2,
"insurance": 3,
"health": 4,
"mortgages": 5,
"loan": 6,
"retirement": 7
}
files = {
"unrelated": label_types["unrelated"],
"damage": label_types["insurance"],
"Allianz": label_types["investment"],
"AXA": label_types["insurance"],
"bank": label_types["investment"],
"claim": label_types["insurance"],
"cost": label_types["savings"],
"credit": label_types["loan"],
"dental": label_types["health"],
"dentist": label_types["health"],
"deposit": label_types["mortgages"],
"finance": label_types["loan"],
"financial": label_types["loan"],
"health": label_types["health"],
"hospital": label_types["health"],
"hsbc": label_types["investment"],
"icbc": label_types["investment"],
"insurance": label_types["insurance"],
"interest": label_types["loan"],
"invest": label_types["investment"],
"loan": label_types["loan"],
"mortgage": label_types["mortgages"],
"pension": label_types["retirement"],
"retirement": label_types["retirement"],
"saving": label_types["savings"],
"sick": label_types["health"],
"stock": label_types["investment"],
"tax": label_types["investment"]}
for filename in files:
print("Working on filename", filename)
with open("resources/dataset/" + filename + ".csv") as file:
while True:
line = file.readline().strip('\n')
if line is None or line == "":
break
arr.append(encoder.encode(line))
labels.append(files[filename])
print("Converting to numpy matrix")
nparray = numpy.array(arr)
print("Saving data to", FILE_DATA)
numpy.savetxt(FILE_DATA, nparray, delimiter=',', fmt='%d')
print("Saving labels to", FILE_LABEL)
numpy.savetxt(FILE_LABEL, labels, delimiter=',', fmt='%d')