-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment.py
103 lines (85 loc) · 3.55 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
import os
from tqdm import tqdm
import subprocess
from sklearn.neighbors import KNeighborsClassifier
import pandas
import numpy
import matplotlib.pyplot as plt
def eval(path_train, path_test):
data_train = pandas.read_csv(path_train, sep='\\s+').to_numpy()
data_test = pandas.read_csv(path_test, sep='\\s+').to_numpy()
labels_learn = data_train[:, 0].astype(int)
labels_test = data_test[:, 0].astype(int)
series_learn = numpy.delete(data_train, 0, axis=1)
series_test = numpy.delete(data_test, 0, axis=1)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(series_learn, labels_learn)
return knn.score(series_test, labels_test) * 100
EXTRACT_PATH = "./extract"
CONVERT_PATH = "./convert"
parser = argparse.ArgumentParser(description='Experiment runner for shapeon')
parser.add_argument(
'shots', help='How many training examples should be used', type=int)
parser.add_argument('path', help='Path to UCR archieve')
args = parser.parse_args()
shot_count = args.shots
print(f"Shots: {shot_count}")
dirs = [f.path for f in os.scandir(args.path) if f.is_dir()]
dirs[:] = [x for x in dirs if "Missing" not in x] # Not a dataset
dirs[:] = [x for x in dirs if "Gesture" not in x] # Contains NAN
dirs[:] = [x for x in dirs if "Wiimote" not in x] # Contains NAN
dirs[:] = [x for x in dirs if "Dodger" not in x] # Contains NAN
dirs[:] = [x for x in dirs if "PLAID" not in x] # Contains NAN
dirs[:] = [x for x in dirs if "MelbournePedestrian" not in x] # Contains NAN
dirs[:] = [x for x in dirs if "Pig" not in x] # Too many classes
dirs[:] = [x for x in dirs if "Phoneme" not in x] # Too many classes
dirs[:] = [x for x in dirs if "Thorax" not in x] # Too many classes
dirs[:] = [x for x in dirs if "ShapesAll" not in x] # Too many classes
print(f"Data sets: {len(dirs)}")
before = []
after = []
for path in (bar := tqdm(dirs)):
name = os.path.basename(path)
bar.set_description('{0:32}'.format(name))
path_learn = os.path.join(path, name + '_TRAIN.tsv')
path_test = os.path.join(path, name + '_TEST.tsv')
# Create shotified training file
classes = set([])
with open(path_learn) as in_file:
for line in in_file:
classes.add(line.partition('\t')[0])
classes = dict.fromkeys(classes, 0)
lines = []
with open(path_learn) as in_file:
for line in in_file:
c = line.partition('\t')[0]
if classes[c] < shot_count:
classes[c] = classes[c] + 1
lines.append(line)
path_shots = "/tmp/utrain.tsv"
with open(path_shots, "w") as out_file:
for line in lines:
out_file.write(line)
# Extract features
subprocess.run([EXTRACT_PATH, "/tmp/features.tsv",
path_shots], capture_output=True)
# Convert train and test
path_processed_train = "/tmp/train.tsv"
path_processed_test = "/tmp/test.tsv"
subprocess.run([CONVERT_PATH, path_processed_train, "/tmp/features.tsv",
path_shots], capture_output=True)
subprocess.run([CONVERT_PATH, path_processed_test, "/tmp/features.tsv",
path_test], capture_output=True)
# Eval unprocessed
score_unprocessed = eval(path_shots, path_test)
score_processed = eval(path_processed_train, path_processed_test)
before.append(score_unprocessed)
after.append(score_processed)
plt.xlabel("Before")
plt.ylabel("After")
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.plot([0, 100], [0, 100], color="black")
plt.scatter(before, after)
plt.show()