-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetrics.py
More file actions
183 lines (147 loc) · 8.32 KB
/
metrics.py
File metadata and controls
183 lines (147 loc) · 8.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
import warnings
from sklearn.metrics import precision_recall_fscore_support, classification_report, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Suppress sklearn metric warnings
warnings.filterwarnings('ignore')
def precision_recall_f1(true_labels, predicted_labels, labels):
"""Generates classification metrics precision, recall and F1 for the given predictions.
Args:
true_labels (np.array): The ground truth labels for the data
predicted_labels (np.array): The predicted labels for the data
labels (list): List of label names
Returns:
metrics_dict (dict): Dictionary of micro, macro and weighted precision, recall and F1 scores
cls_report_str (str): Formatted string with metric data for all classes and totals
cls_report_dct (dict): Dictionary containing all metric data of metrics string
"""
assert len(true_labels) == len(predicted_labels), "True labels " + str(len(true_labels)) + \
" doesn't match predicted labels " + str(len(predicted_labels))
# Initialise test metrics dictionary
metrics_dict = {}
# Calculate metrics globally by counting the total true positives, false negatives and false positives
precision_mic, recall_mic, f1_mic, _ = precision_recall_fscore_support(true_labels, predicted_labels,
average='micro')
metrics_dict['precision_micro'] = precision_mic
metrics_dict['recall_micro'] = recall_mic
metrics_dict['f1_micro'] = f1_mic
# Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account
precision_mac, recall_mac, f1_mac, support = precision_recall_fscore_support(true_labels, predicted_labels,
average='macro')
metrics_dict['precision_macro'] = precision_mac
metrics_dict['recall_macro'] = recall_mac
metrics_dict['f1_macro'] = precision_mac
# Calculate metrics for each label, and find their average weighted by support (number of true instances for each label)
precision_weight, recall_weight, f1_weight, _ = precision_recall_fscore_support(true_labels, predicted_labels,
average='weighted')
metrics_dict['precision_weighted'] = precision_weight
metrics_dict['recall_weighted'] = recall_weight
metrics_dict['f1_weighted'] = f1_weight
# Need to remove label names that are not present in the test set or predictions at all
indices = []
for i in range(len(labels)):
if i not in set(true_labels) and i not in set(predicted_labels):
indices.append(i)
for index in sorted(indices, reverse=True):
del labels[index]
# Generate classification report for each label/totals
cls_report_str = classification_report(true_labels, predicted_labels, target_names=labels)
cls_report_dct = classification_report(true_labels, predicted_labels, target_names=labels, output_dict=True)
return metrics_dict, cls_report_str, cls_report_dct
def plot_confusion_matrix(true_labels, predicted_labels, labels, matrix_dim=15, normalise=False,
title=None, fig_size=(10, 10), font_size=15):
"""Generates a confusion matrix for the given predictions.
Uses sklearn to generate the confusion matrix.
Uses matplotlib and seaborn to generate the figure.
Args:
true_labels (np.array): The ground truth labels for the data
predicted_labels (np.array): The predicted labels for the data
labels (list): List of label names
title (str): The title for the figure
matrix_dim (int): The number of classes to show on the matrix, if None or -1 creates full matrix
normalise (bool): Whether to normalise the matrix or use original values
fig_size (tuple): Tuple for the horizontal and vertical size the figure
font_size (int): Font size for figure labels.
Returns:
fig (matplotlib figure): The confusion matrix figure
matrix (numpy array): The 2d confusion matrix array
"""
# Generate the confusion matrix array
matrix = confusion_matrix(true_labels, predicted_labels)
# Normalise the matrix
if normalise:
# Ignore divide by zero for no predictions for a certain label
with np.errstate(divide='ignore', invalid='ignore'):
matrix = np.true_divide(matrix.astype('float'), matrix.sum(axis=1)[:, np.newaxis])
matrix[~ np.isfinite(matrix)] = 0 # -inf inf NaN
fmt = '.2f'
else:
fmt = 'd'
# Truncate matrix and labels to desired matrix dimensions
if matrix_dim:
matrix = matrix[:matrix_dim, :matrix_dim]
labels = labels[:matrix.shape[0]]
# Create pandas dataframe
df_cm = pd.DataFrame(matrix, index=labels, columns=labels)
# Create figure and heatmap
fig = plt.figure(figsize=fig_size)
heatmap = sns.heatmap(df_cm, cmap="YlGnBu", annot=True, fmt=fmt, linewidths=0.5, cbar=False)
# Set labels
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=font_size)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=font_size)
plt.ylabel('True label', fontsize=font_size)
plt.xlabel('Predicted label', fontsize=font_size)
if title:
plt.title(title, fontsize=font_size)
plt.tight_layout()
return fig, matrix
def save_history(file_name, history):
"""Saves training history dictionary as numpy arrays in .npz file."""
np.savez_compressed(file_name, step=np.asarray(history['step']),
train_loss=np.asarray(history['train_loss']),
train_accuracy=np.asarray(history['train_accuracy']),
val_loss=np.asarray(history['val_loss']),
val_accuracy=np.asarray(history['val_loss']))
def save_predictions(file_name, true_labels, predicted_labels, predictions):
"""Saves predictions to .csv file."""
with open(file_name, 'w') as file:
# Write header
file.write("'true','predicted','predictions'\n")
# Write in order of 'True Label, Predicted Label, Predictions'.
for i in range(len(predictions)):
file.write(str(true_labels[i]) + ',' + str(predicted_labels[i]))
for j in range(len(predictions[i])):
file.write(',' + str(predictions[i][j]))
file.write('\n')
def save_results(file_name, test_loss, test_accuracy, metrics):
"""Saves test metrics to a .txt file."""
with open(file_name, 'w') as file:
file.write('test_loss: ' + str(test_loss) + '\n')
file.write('test_accuracy: ' + str(test_accuracy) + '\n')
# Write metrics dictionary (F1, Precision and Recall
for key, value in metrics.items():
file.write(key + ': ' + str(value) + '\n')
def save_experiment(file_name, params, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc, metrics):
"""Saves/appends all experiment parameters and results to .csv file."""
if not os.path.exists(file_name):
with open(file_name, 'w+') as file:
file.write("'experiment_name','model_name',"
"'vocab_size','max_seq_length','use_punct',"
"'embedding_dim','embedding_type','embedding_source',"
"'train_loss','train_acc','val_loss','val_acc','test_loss','test_acc'")
for key, value in metrics.items():
file.write(",'" + key + "'")
file.write("\n")
with open(file_name, 'a') as file:
file.write(params['experiment_name'] + "," + params['model_name'] + "," +
str(params['vocab_size']) + "," + str(params['max_seq_length']) + "," + str(params['use_punct']) + "," +
str(params['embedding_dim']) + "," + params['embedding_type'] + "," + params['embedding_source'] + "," +
str(train_loss) + "," + str(train_acc) + "," +
str(val_loss) + "," + str(val_acc) + "," +
str(test_loss) + "," + str(test_acc))
for key, value in metrics.items():
file.write("," + str(value))
file.write("\n")