Sentence-Encoding-for-DA-Classification/metrics.py at master · NathanDuran/Sentence-Encoding-for-DA-Classification · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
import warnings
from sklearn.metrics import precision_recall_fscore_support, classification_report, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Suppress sklearn metric warnings
warnings.filterwarnings('ignore')


def precision_recall_f1(true_labels, predicted_labels, labels):
    """Generates classification metrics precision, recall and F1 for the given predictions.

    Args:
        true_labels (np.array): The ground truth labels for the data
        predicted_labels (np.array): The predicted labels for the data
        labels (list): List of label names

    Returns:
        metrics_dict (dict): Dictionary of micro, macro and weighted precision, recall and F1 scores
        cls_report_str (str): Formatted string with metric data for all classes and totals
        cls_report_dct (dict): Dictionary containing all metric data of metrics string
    """

    assert len(true_labels) == len(predicted_labels), "True labels " + str(len(true_labels)) + \
                                                      " doesn't match predicted labels " + str(len(predicted_labels))
    # Initialise test metrics dictionary
    metrics_dict = {}

    # Calculate metrics globally by counting the total true positives, false negatives and false positives
    precision_mic, recall_mic, f1_mic, _ = precision_recall_fscore_support(true_labels, predicted_labels,
                                                                           average='micro')
    metrics_dict['precision_micro'] = precision_mic
    metrics_dict['recall_micro'] = recall_mic
    metrics_dict['f1_micro'] = f1_mic

    # Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account
    precision_mac, recall_mac, f1_mac, support = precision_recall_fscore_support(true_labels, predicted_labels,
                                                                                 average='macro')
    metrics_dict['precision_macro'] = precision_mac
    metrics_dict['recall_macro'] = recall_mac
    metrics_dict['f1_macro'] = precision_mac

    # Calculate metrics for each label, and find their average weighted by support (number of true instances for each label)
    precision_weight, recall_weight, f1_weight, _ = precision_recall_fscore_support(true_labels, predicted_labels,
                                                                                    average='weighted')
    metrics_dict['precision_weighted'] = precision_weight
    metrics_dict['recall_weighted'] = recall_weight
    metrics_dict['f1_weighted'] = f1_weight

    # Need to remove label names that are not present in the test set or predictions at all
    indices = []
    for i in range(len(labels)):
        if i not in set(true_labels) and i not in set(predicted_labels):
            indices.append(i)

    for index in sorted(indices, reverse=True):
        del labels[index]

    # Generate classification report for each label/totals
    cls_report_str = classification_report(true_labels, predicted_labels, target_names=labels)
    cls_report_dct = classification_report(true_labels, predicted_labels, target_names=labels, output_dict=True)

    return metrics_dict, cls_report_str, cls_report_dct


def plot_confusion_matrix(true_labels, predicted_labels, labels,  matrix_dim=15, normalise=False,
                          title=None, fig_size=(10, 10), font_size=15):
    """Generates a confusion matrix for the given predictions.

    Uses sklearn to generate the confusion matrix.
    Uses matplotlib and seaborn to generate the figure.

    Args:
        true_labels (np.array): The ground truth labels for the data
        predicted_labels (np.array): The predicted labels for the data
        labels (list): List of label names
        title (str): The title for the figure
        matrix_dim (int): The number of classes to show on the matrix, if None or -1 creates full matrix
        normalise (bool): Whether to normalise the matrix or use original values
        fig_size (tuple): Tuple for the horizontal and vertical size the figure
        font_size (int): Font size for figure labels.

    Returns:
        fig (matplotlib figure): The confusion matrix figure
        matrix (numpy array): The 2d confusion matrix array
    """

    # Generate the confusion matrix array
    matrix = confusion_matrix(true_labels, predicted_labels)

    # Normalise the matrix
    if normalise:
        # Ignore divide by zero for no predictions for a certain label
        with np.errstate(divide='ignore', invalid='ignore'):
            matrix = np.true_divide(matrix.astype('float'), matrix.sum(axis=1)[:, np.newaxis])
            matrix[~ np.isfinite(matrix)] = 0  # -inf inf NaN
        fmt = '.2f'
    else:
        fmt = 'd'

    # Truncate matrix and labels to desired matrix dimensions
    if matrix_dim:
        matrix = matrix[:matrix_dim, :matrix_dim]
        labels = labels[:matrix.shape[0]]

    # Create pandas dataframe
    df_cm = pd.DataFrame(matrix, index=labels, columns=labels)

    # Create figure and heatmap
    fig = plt.figure(figsize=fig_size)
    heatmap = sns.heatmap(df_cm, cmap="YlGnBu", annot=True, fmt=fmt, linewidths=0.5, cbar=False)

    # Set labels
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=font_size)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=font_size)
    plt.ylabel('True label', fontsize=font_size)
    plt.xlabel('Predicted label', fontsize=font_size)
    if title:
        plt.title(title, fontsize=font_size)
    plt.tight_layout()
    return fig, matrix


def save_history(file_name, history):
    """Saves training history dictionary as numpy arrays in .npz file."""

    np.savez_compressed(file_name, step=np.asarray(history['step']),
                        train_loss=np.asarray(history['train_loss']),
                        train_accuracy=np.asarray(history['train_accuracy']),
                        val_loss=np.asarray(history['val_loss']),
                        val_accuracy=np.asarray(history['val_loss']))


def save_predictions(file_name, true_labels, predicted_labels, predictions):
    """Saves predictions to .csv file."""

    with open(file_name, 'w') as file:
        # Write header
        file.write("'true','predicted','predictions'\n")
        # Write in order of 'True Label, Predicted Label, Predictions'.
        for i in range(len(predictions)):
            file.write(str(true_labels[i]) + ',' + str(predicted_labels[i]))
            for j in range(len(predictions[i])):
                file.write(',' + str(predictions[i][j]))
            file.write('\n')


def save_results(file_name, test_loss, test_accuracy, metrics):
    """Saves test metrics to a .txt file."""

    with open(file_name, 'w') as file:
        file.write('test_loss: ' + str(test_loss) + '\n')
        file.write('test_accuracy: ' + str(test_accuracy) + '\n')
        # Write metrics dictionary (F1, Precision and Recall
        for key, value in metrics.items():
            file.write(key + ': ' + str(value) + '\n')


def save_experiment(file_name, params, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc, metrics):
    """Saves/appends all experiment parameters and results to .csv file."""
    if not os.path.exists(file_name):
        with open(file_name, 'w+') as file:
            file.write("'experiment_name','model_name',"
                       "'vocab_size','max_seq_length','use_punct',"
                       "'embedding_dim','embedding_type','embedding_source',"
                       "'train_loss','train_acc','val_loss','val_acc','test_loss','test_acc'")
            for key, value in metrics.items():
                file.write(",'" + key + "'")
            file.write("\n")

    with open(file_name, 'a') as file:
        file.write(params['experiment_name'] + "," + params['model_name'] + "," +
                   str(params['vocab_size']) + "," + str(params['max_seq_length']) + "," + str(params['use_punct']) + "," +
                   str(params['embedding_dim']) + "," + params['embedding_type'] + "," + params['embedding_source'] + "," +
                   str(train_loss) + "," + str(train_acc) + "," +
                   str(val_loss) + "," + str(val_acc) + "," +
                   str(test_loss) + "," + str(test_acc))
        for key, value in metrics.items():
            file.write("," + str(value))
        file.write("\n")