-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCI_GeorgiaRefinementLastLayer.py
More file actions
116 lines (86 loc) · 4.87 KB
/
CI_GeorgiaRefinementLastLayer.py
File metadata and controls
116 lines (86 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
'''
Deep learning project (100Hz)
CI_GeorgiaRefinementLastLayer.py
Compute the averages and the 95% CIs for the AUC over the Georgia dataset test set after having fine-tuned
the classification layer of the orignal network using the Georgia dataset train set.
Authors: Daniele Baccega, Andrea Saglietto
Topic: Deep Learning applied to ECGs
Dataset: https://physionet.org/content/ptb-xl/1.0.1/
Description: The PTB-XL ECG dataset is a large dataset of 21837 clinical 12-lead ECGs from 18885 patients of 10 second length
where 52% are male and 48% are female with ages covering the whole range from 0 to 95 years (median 62 and interquantile range of 22).
The raw waveform data was annotated by up to two cardiologists, who assigned potentially multiple ECG statements to each record.
The in total 71 different ECG statements conform to the SCP-ECG standard and cover diagnostic, form, and rhythm statements.
To ensure comparability of machine learning algorithms trained on the dataset, we provide recommended splits into training and test sets.
'''
## Import the libraries
import numpy as np
import pandas as pd
import pickle
import math
import os
import fnmatch
import physionet_challenge_utility_script as pc
from sklearn.metrics import auc, roc_curve
paths = ["GeorgiaRefinementLastLayer/D1", "GeorgiaRefinementLastLayer/D1-D2", "GeorgiaRefinementLastLayer/12leads"]
path = 'Georgia/'
first_iteration = True
_, _, labels, ecg_filenames = pc.import_key_data_Georgia(path)
SNOMED_scored = pd.read_csv("SNOMED_mappings_scored_Georgia.csv", sep=",")
SNOMED_unscored = pd.read_csv("SNOMED_mappings_unscored_Georgia.csv", sep=",")
df_labels = pc.make_undefined_class(labels, SNOMED_unscored)
SNOMED_dic = dict()
for _, row in SNOMED_scored.iterrows():
SNOMED_dic[str(row["SNOMED CT Code"])] = row["Abbreviation"]
classes_dic_name_id = dict()
i = 0
for value in SNOMED_dic.values():
if value not in classes_dic_name_id.keys():
classes_dic_name_id[value] = i
i = i + 1
classes_dic_name_id["Average"] = i
classes_dic = dict()
for key, value in classes_dic_name_id.items():
classes_dic[value] = key
num_classes = len(classes_dic.keys())-1
global_roc_auc_mean = np.zeros((num_classes+1, len(paths)))
global_roc_auc_left = np.zeros((num_classes+1, len(paths)))
global_roc_auc_right = np.zeros((num_classes+1, len(paths)))
j = 0
for path in paths:
roc_auc_mean = np.zeros(num_classes+1)
roc_auc_variance = np.zeros(num_classes+1)
runs = len(fnmatch.filter(os.listdir(path + "/"), '20Classes_*')) + 1
for count in range(1, runs):
roc_auc_local = np.zeros(num_classes+1)
# Load the files
with open(path + '/20Classes_' + str(count-1) + '/y_pred_Georgia', 'rb') as y_pred_file:
y_pred = pickle.load(y_pred_file)
with open(path + '/20Classes_' + str(count-1) + '/y_test_Georgia', 'rb') as y_test_file:
y_test = pickle.load(y_test_file)
# Plot ROC curves
fpr = dict()
tpr = dict()
for i in range(num_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])
roc_auc_local[i] = auc(fpr[i], tpr[i])
roc_auc_local[num_classes] = np.mean(roc_auc_local[0:num_classes])
if first_iteration:
roc_auc_mean = roc_auc_local
first_iteration = False
else:
roc_auc_mean = roc_auc_mean + (roc_auc_local - roc_auc_mean) / count
roc_auc_variance = roc_auc_variance + ((count - 1) / count) * (roc_auc_local - roc_auc_mean) ** 2
roc_auc_std = np.sqrt(roc_auc_variance / (count - 1))
roc_auc_left = roc_auc_mean - 1.96 * (roc_auc_std / math.sqrt(count))
roc_auc_right = roc_auc_mean + 1.96 * (roc_auc_std / math.sqrt(count))
print("\n" + path + ":")
print("AUC:")
for i in range(num_classes+1):
print("{0}: {1:0.5f} {2:0.5f} {3:0.5f} (±{4:0.5f})".format(classes_dic.get(i), roc_auc_left[i]*100, roc_auc_mean[i]*100, roc_auc_right[i]*100, (1.96 * (roc_auc_std[i] / math.sqrt(count)))*100))
global_roc_auc_mean[:, j] = roc_auc_mean * 100
global_roc_auc_left[:, j] = roc_auc_left * 100
global_roc_auc_right[:, j] = roc_auc_right * 100
j = j + 1
pd.DataFrame(global_roc_auc_mean, index=classes_dic_name_id.keys(), columns=paths).to_csv("mean_AUC_GeorgiaRefinementLastLayer.csv", float_format='%.2f')
pd.DataFrame(global_roc_auc_left, index=classes_dic_name_id.keys(), columns=paths).to_csv("left_AUC_GeorgiaRefinementLastLayer.csv", float_format='%.2f')
pd.DataFrame(global_roc_auc_right, index=classes_dic_name_id.keys(), columns=paths).to_csv("right_AUC_GeorgiaRefinementLastLayer.csv", float_format='%.2f')