forked from SandraHernandezH/DD2424-SEResNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLoadDataset.py
133 lines (105 loc) · 4.44 KB
/
LoadDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import sys
import time
import pickle
import random
import numpy as np
img_size = 32
img_channels = 3
# How to load data
#train_X, train_lab, test_X, test_lab = get_data()
#train_X, test_X = normalize(train_X, test_X)
def unpickle10(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
data_X = dict[b'data']
labels = dict[b'labels']
print("Loading %s : %d." % (file, len(data_X)))
return data_X, labels
def unpickle100(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
data_X = dict[b'data']
labels = dict[b'fine_labels']
print("Loading %s : %d." % (file, len(data_X)))
return data_X, labels
def load_data10(files, data_dir, label_count):
global img_size, img_channels
data, labels = unpickle10(data_dir + '/' + files[0])
for f in files[1:]:
data_n, labels_n = unpickle10(data_dir + '/' + f)
data = np.append(data, data_n, axis=0)
labels = np.append(labels, labels_n, axis=0)
#labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
# channels_last
data = data.reshape([-1, img_channels, img_size, img_size])
data = data.transpose([0, 2, 3, 1])
return data, labels
def load_data100(files, data_dir, label_count):
global img_size, img_channels
data, labels = unpickle100(data_dir + '/' + files[0])
#labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
# channels_last
data = data.reshape([-1, img_channels, img_size, img_size])
data = data.transpose([0, 2, 3, 1])
return data, labels
def get_data(dataset):
img_dim = img_size * img_size * img_channels
if dataset == 100:
n_labels = 100
#data_dir = './Datasets/cifar-100-python'
data_dir = '../cifar-100-python'
train_data, train_labels = load_data100(['train'], data_dir, n_labels)
test_data, test_labels = load_data100(['test'], data_dir, n_labels)
else:
n_labels = 10
data_dir = './Datasets/cifar-10-batches-py'
train_files = ['data_batch_%d' % d for d in range(1, 6)]
train_data, train_labels = load_data10(train_files, data_dir, n_labels)
test_data, test_labels = load_data10(['test_batch'], data_dir, n_labels)
print("Train data:", np.shape(train_data), np.shape(train_labels))
print("Test data :", np.shape(test_data), np.shape(test_labels))
# Shuffle
ind = np.random.permutation(len(train_data))
train_data = train_data[ind]
train_labels = np.array(train_labels)
train_labels = train_labels[ind]
return train_data, train_labels, test_data, test_labels
def normalize(train_X, test_X):
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
mean_X = np.mean(train_X / np.max(train_X))
std_X = np.std(train_X / np.max(train_X))
train_norm = train_X / np.max(train_X)
train_norm = (train_norm - mean_X) / std_X
test_norm = test_X / np.max(test_X)
test_norm = (test_norm - mean_X) / std_X
return train_norm, test_norm
# =============================================================================== #
# Functions for data augmentation (not really needed for a comparison study)
# =============================================================================== #
def _random_crop(batch, crop_shape, padding=None):
oshape = np.shape(batch[0])
if padding:
oshape = (oshape[0] + 2 * padding, oshape[1] + 2 * padding)
new_batch = []
npad = ((padding, padding), (padding, padding), (0, 0))
for i in range(len(batch)):
new_batch.append(batch[i])
if padding:
new_batch[i] = np.lib.pad(batch[i], pad_width=npad,
mode='constant', constant_values=0)
nh = random.randint(0, oshape[0] - crop_shape[0])
nw = random.randint(0, oshape[1] - crop_shape[1])
new_batch[i] = new_batch[i][nh:nh + crop_shape[0],
nw:nw + crop_shape[1]]
return new_batch
def _random_flip_leftright(batch):
for i in range(len(batch)):
if bool(random.getrandbits(1)):
batch[i] = np.fliplr(batch[i])
return batch
def data_augmentation(batch):
batch = _random_flip_leftright(batch)
batch = _random_crop(batch, [32, 32], 4)
return batch