Skip to content

Commit ca87806

Browse files
committed
updates
1 parent 107f18f commit ca87806

File tree

11 files changed

+1047
-297
lines changed

11 files changed

+1047
-297
lines changed

notes/face.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#one of the two classes (ArcFace) and one function are used by tps2020.py
2+
import os
3+
import cv2 #opencv (Open Source Computer Vision); pip install opencv-python; https://docs.opencv.org/4.x/d1/dfb/intro.html
4+
#for difference between opencv-contrib-python and opencv-python, see this link:
5+
#https://stackoverflow.com/questions/64902852/the-difference-between-opencv-python-and-opencv-contrib-python
6+
#To test whether cv2 module is available:
7+
#python -c "import cv2"
8+
#python -c "import cv2; print(cv2.__version__)" #4.5.5
9+
import numpy as np
10+
from argparse import ArgumentParser
11+
from utils import walk, progress_bar
12+
from face_models.face_model import ArcFaceModel, FaceNetModel
13+
14+
15+
np.random.seed(42)
16+
17+
18+
class FaceNet:
19+
def __init__(self, gpu=-1):
20+
self.__model = FaceNetModel(gpu) #FaceNetModel is a class
21+
22+
def preprocess(self, image):
23+
return self.__model.get_input(image)
24+
25+
def extract(self, image, align=True):
26+
if align:
27+
image = self.preprocess(image) #preprocessing should output an image of size (160,160,3)? inconsistent with (112,112,3)?
28+
29+
if image.shape != (160, 160, 3):
30+
image = cv2.resize(image, (160, 160))
31+
#extract return a vector of 512 float values
32+
return self.__model.get_feature(image)
33+
34+
35+
class ArcFace:
36+
def __init__(self, gpu=-1):
37+
self.__model = ArcFaceModel(gpu) #ArcFaceModel is a class
38+
39+
def preprocess(self, image): #image is of class 'numpy.ndarray'; returns a numpy.ndarray
40+
return self.__model.get_input(image)
41+
42+
def extract(self, image, align=True):
43+
if align:
44+
image = self.preprocess(image) #preprocessing should output an image of size (112,112,3)
45+
46+
if image.shape != (3, 112, 112): #should be (112, 112, 3)?
47+
image = cv2.resize(image, (112, 112))
48+
image = np.rollaxis(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), 2, 0)
49+
#extract return a vector of 512 float values
50+
return self.__model.get_feature(image)
51+
52+
#return a 2D array of the shape:
53+
#number of rows is the image count;
54+
#number of columns is 513 (last column is 1-based subject_id)
55+
#this function will display a progress bar
56+
def extract_dataset(dataset, extractor="arcface", gpu=-1):
57+
if extractor == "arcface":
58+
face = ArcFace(gpu)
59+
else:
60+
face = FaceNet(gpu)
61+
62+
dataset_path = os.path.join(os.path.abspath(""), "images", dataset) #dataset will be "lfw" or "gtdb"
63+
64+
file_cnt = len(walk(dataset_path))
65+
features = np.zeros((file_cnt, 513))
66+
#features_flip = np.zeros((file_cnt, 513)) #omitted by Kai
67+
68+
image_cnt = 0
69+
subjects = os.listdir(dataset_path)
70+
subjects = [x for _, x in sorted(
71+
zip([subject.lower() for subject in subjects], subjects))] # this is to do case-insensitive sorting
72+
for subject_id, subject in enumerate(subjects):
73+
progress_bar(dataset + " " + extractor, float(image_cnt + 1) / file_cnt)
74+
75+
for image in os.listdir(os.path.join(dataset_path, subject)):
76+
image = cv2.imread(os.path.join(dataset_path, subject, image))
77+
78+
feature = face.extract(image) #the return value of extract here should be a row vector of 512 elements
79+
features[image_cnt, :] = np.append(feature, subject_id + 1) #the return value of append here should be a row vector of 513 elements
80+
81+
#feature_flip = face.extract(cv2.flip(image, 1)) #omitted by Kai
82+
#features_flip[image_cnt, :] = np.append(feature_flip, subject_id + 1) #omitted by Kai
83+
84+
image_cnt += 1
85+
86+
#return features, features_flip #omitted by Kai
87+
return features
88+
89+
90+
if __name__ == "__main__":
91+
'''
92+
facenet = FaceNet()
93+
94+
img_1 = cv2.imread(os.path.join(
95+
os.path.abspath(""), "src", "face_models", "tom1.jpg"))
96+
img_2 = cv2.imread(os.path.join(
97+
os.path.abspath(""), "src", "face_models", "adrien.jpg"))
98+
99+
feat_1 = facenet.extract(img_1)
100+
feat_2 = facenet.extract(img_2)
101+
print(np.sum(np.square(feat_1 - feat_2)))
102+
print(np.dot(feat_1, feat_2.T))
103+
104+
cv2.imshow("before", img_1)
105+
img = facenet.preprocess(img_1)
106+
cv2.imshow("after", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
107+
cv2.waitKey(0)
108+
cv2.destroyAllWindows()
109+
110+
arcface = ArcFace()
111+
112+
feat_1 = arcface.extract(img_1)
113+
feat_2 = arcface.extract(img_2)
114+
print(np.sum(np.square(feat_1 - feat_2)))
115+
print(np.dot(feat_1, feat_2.T))
116+
117+
cv2.imshow("before", img_2)
118+
img = arcface.preprocess(img_2)
119+
cv2.imshow("after", cv2.cvtColor(
120+
np.rollaxis(img, 0, 3), cv2.COLOR_RGB2BGR))
121+
cv2.waitKey(0)
122+
cv2.destroyAllWindows()
123+
'''
124+
parser = ArgumentParser()
125+
parser.add_argument("-d", "--dataset", required=True, help="dataset to use in feature extraction")
126+
parser.add_argument("-m", "--method", required=True, choices=["arcface", "facenet"], help="method to use in feature extraction")
127+
parser.add_argument("-gpu", "--gpu", required=False, type=int, default=-1, help="gpu to use in feature extraction")
128+
args = vars(parser.parse_args())
129+
#extract_dataset is a function in this module
130+
#features, features_flip = extract_dataset(args["dataset"], args["method"], args["gpu"])
131+
features = extract_dataset(args["dataset"], args["method"], args["gpu"])
132+
#are these used to create files like lfw_arcface_feat.npz and lfw_facenet_feat.npz in downloaeded features.tar.gz?
133+
np.savez_compressed(os.path.join(os.path.abspath(""), "data", "{}_{}_feat.npz".format(args["dataset"], args["method"])), features)
134+
#np.savez_compressed(os.path.join(os.path.abspath(""), "data", "{}_{}_feat_flip.npz".format(args["dataset"], args["method"])), features_flip)

notes/tps2020.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#this module is alone and not used by others.
2+
#Uses two datasets: lfw and gtdb
3+
import datetime
4+
import os
5+
import shutil
6+
#from sklearn.neighbors import KNeighborsClassifier
7+
from argparse import ArgumentParser
8+
9+
import cv2
10+
#from queue import Queue #FIFO
11+
#from threading import Thread
12+
import numpy as np
13+
#from sklearn.metrics import confusion_matrix
14+
from sklearn.linear_model import LogisticRegression
15+
from sklearn.model_selection import StratifiedKFold
16+
17+
#from utils import progress_bar
18+
from biocapsule import BioCapsuleGenerator
19+
from face import ArcFace, extract_dataset
20+
21+
np.random.seed(42)
22+
23+
def filter_lfw(features): #only used in this module; second input features_flip removed by Kai
24+
y = np.unique(features[:, -1])
25+
mask = np.ones(features[:, -1].shape, dtype=bool)
26+
for y_i in y:
27+
if features[features[:, -1] == y_i].shape[0] < 5:
28+
idxes = np.where(features[:, -1] == y_i)
29+
mask[idxes] = False
30+
features = features[mask]
31+
#features_flip = features_flip[mask]
32+
33+
y_map = {}
34+
y = np.unique(features[:, -1])
35+
for i, y_i in enumerate(y):
36+
y_map[y_i] = i + 1
37+
38+
for i in range(features[:, -1].shape[0]):
39+
features[i, -1] = y_map[features[i, -1]]
40+
#features_flip[i, -1] = y_map[features_flip[i, -1]]
41+
42+
#return features, features_flip #second return value features_flip removed by Kai
43+
return features
44+
45+
#returns a 2D array of 6 by 512
46+
def get_rs_features(): #only used in this module
47+
arcface = ArcFace() #an object of ArcFace class; this can be customized with a different feature extraction method
48+
49+
# if os.path.isdir(os.path.join(os.path.abspath(""), "images", "rs_aligned")):
50+
# shutil.rmtree(os.path.join(os.path.abspath(""), "images", "rs_aligned"))
51+
52+
rs_features = np.zeros((6, 512))
53+
#os.mkdir(os.path.join(os.path.abspath(""), "images", "rs_aligned"))
54+
for s_id, subject in enumerate(os.listdir(os.path.join(os.path.abspath(""), "images", "rs"))[4:]): #here listdir should return a list of 10 directory names rs_00 to rs_09
55+
for image in os.listdir(os.path.join(os.path.abspath(""), "images", "rs", subject)): #image will be sth like rs_04.jpg ... rs_09.jpg; subject is rs_04 ... rs_09
56+
img = cv2.imread(os.path.join(os.path.abspath(""), "images", "rs", subject, image)) #img is of class 'numpy.ndarray'
57+
img_aligned = arcface.preprocess(img) #get an aligned image with just facial region (five facial landmarks)
58+
feature = arcface.extract(img_aligned, align=False) #the return value of extract here should be a row vector of 512 elements
59+
rs_features[s_id] = feature
60+
61+
if img_aligned.shape != (3, 112, 112): #this is unnecessary since extract function has already done this?
62+
img_aligned = cv2.resize(img_aligned, (112, 112))
63+
img_aligned = np.rollaxis(cv2.cvtColor(img_aligned, cv2.COLOR_RGB2BGR), 2, 0)
64+
65+
#cv2.imwrite(os.path.join(os.path.abspath(""), "images", "rs_aligned", image), cv2.cvtColor(np.rollaxis(img_aligned, 0, 3), cv2.COLOR_RGB2BGR))
66+
67+
return rs_features
68+
69+
#yLen is the number of subjects (LFW: 423; GTDB: 50)
70+
#return a vector of random values (0~5) of length yLen
71+
def rs_rbac(yLen, dist): #only used in this module; REVISED BY KAI
72+
if dist == "unbal":
73+
rs_map = np.random.choice(6, yLen, p=[0.05, 0.1, 0.15, 0.2, 0.25, 0.25])
74+
else:
75+
rs_map = np.random.choice(6, yLen)
76+
return rs_map
77+
78+
#return biocapsules
79+
#input features is a 2D array: number of rows is the image count; number of columns is 513
80+
#input rs_features is of shape 6 by 512
81+
#original input rs_map is also removed by Kai
82+
def get_bcs(features, rs_features): #only used in this module; #second input features_flip and second return value bcs_flip removed by Kai
83+
bcs = np.zeros((rs_features.shape[0], features.shape[0], 513)) # 3D array of 6 by image_count by 513
84+
#bcs_flip = np.zeros((rs_features.shape[0], features_flip.shape[0], 513))
85+
#features[:, :-1] is of shape image_count by 512
86+
bc_gen = BioCapsuleGenerator()
87+
for i in range(rs_features.shape[0]): #i: 0~5
88+
bcs[i, :, :] = np.hstack([bc_gen.biocapsule_batch(features[:, :-1], rs_features[i]), features[:, -1][:, np.newaxis]]) #note: the features 2D array will be updated here (but its last column remains the same)!
89+
#bcs_flip[i, :, :] = np.hstack([bc_gen.biocapsule_batch(features_flip[:, :-1], rs_features[i]), features_flip[:, -1][:, np.newaxis]])
90+
#last column features[:, -1][:, np.newaxis] is subject_id
91+
#return bcs, bcs_flip #second return value bcs_flip removed by Kai
92+
return bcs
93+
94+
if __name__ == "__main__":
95+
parser = ArgumentParser()
96+
parser.add_argument("-d", "--dataset", required=True, choices=["lfw", "gtdb"], help="dataset to use in experiment")
97+
parser.add_argument("-m", "--mode", required=True, choices=["under", "bc"], help="feature mode to use in experiment")
98+
parser.add_argument("-r", "--role_dist", required=False, choices=["bal", "unbal"], default="unbal", help="role distribution to use in experiment")
99+
parser.add_argument("-t", "--thread_cnt", required=False, type=int, default=1, help="thread count to use in classifier training")
100+
parser.add_argument("-gpu", "--gpu", required=False, type=int, default=-1, help="gpu to use in feature extraction")
101+
args = vars(parser.parse_args())
102+
103+
if args["mode"] == "under":
104+
fi = open(os.path.join(os.path.abspath(""), "results", "tps2020_{}_under.txt".format(args["dataset"])), "w")
105+
else:
106+
fi = open(os.path.join(os.path.abspath(""), "results", "tps2020_{}_bc_{}.txt".format(args["dataset"], args["role_dist"])), "w")
107+
print("computing features:",datetime.datetime.now())
108+
# extract features for experiment: extract_dataset is in face.py
109+
if args["dataset"]=="lfw":
110+
features = np.load(os.path.join(os.path.abspath(""), "data", "lfw_arcface_feat.npz"))["arr_0"]
111+
elif args["dataset"]=="gtdb":
112+
features = np.load(os.path.join(os.path.abspath(""), "data", "gtdb_arcface_feat.npz"))["arr_0"]
113+
else:
114+
features = extract_dataset(args["dataset"], "arcface", args["gpu"]) #second return value features_flip removed by Kai
115+
# features is a 2D array: number of rows is the image count; number of columns is 513 (last column is 1-based subject_id). Each row is a feature vector plus subject_id.
116+
print("done computing features",datetime.datetime.now())
117+
# remove all subjects with less than 5 images from LFW dataset
118+
if args["dataset"] == "lfw": #filter_lfw is in this module
119+
print("filtering lfw features.")
120+
features = filter_lfw(features) #second input and return value features_flip removed by Kai
121+
122+
# if biocapsules are used, we can perform authn-authz operation using reference subjects
123+
if args["mode"] == "bc":
124+
# get reference subjects for roles; get_rs_features is in this module
125+
print("computing bcs:",datetime.datetime.now())
126+
rs_features = get_rs_features() #a 2D array of 6 by 512
127+
128+
# assign subjects their reference subjects/roles; rs_rbac is in this module
129+
rs_map = rs_rbac(len(np.unique(features[:, -1])), args["role_dist"]) #each element (0~5) of the vector rs_map (of length number_of_subjects) represents a reference_subject/role for a subject
130+
cnts = np.unique(rs_map, return_counts=True)[1]
131+
for i, cnt in enumerate(cnts): #histogram: how many subjects for each role 0~5
132+
fi.write("Role {} -- {} Subjects\n".format(i + 1, cnt))
133+
134+
# create all possible biocapsules: get_bcs is in this module; note: features will get updated by the get_bcs call
135+
bcs = get_bcs(features, rs_features) #second input features_flip and fourth input rs_map and second return value bcs_flip removed by Kai
136+
137+
# tn, fp, fn, tp
138+
#conf_mat = np.zeros((4,))
139+
ctp=0
140+
ctn=0
141+
cfp=0
142+
cfn=0
143+
cfp1=0
144+
print("begin skf...",datetime.datetime.now())
145+
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
146+
for k, (train_index, test_index) in enumerate(skf.split(features[:, :-1], features[:, -1])): #k will be 0 to 4; test_index is a vector of length image_count/5; train_index is a vector of length image_count*4/5
147+
print("fold",k)
148+
if args["mode"] == "under":
149+
X_train = features[:, :-1][train_index] #2D array of shape train_image_count by 512
150+
y_train = features[:, -1][train_index] #a vector of subject_id's of length train_image_count
151+
X_test = features[:, :-1][test_index] #2D array of shape test_image_count by 512
152+
y_test = features[:, -1][test_index] #a vector of subject_id's of length test_image_count
153+
# labels = np.unique(y_train) #a vector of unique subject_id's
154+
# labels_test=np.unique(y_test)
155+
# assert labels.size==labels_test.size
156+
# knn = KNeighborsClassifier() #typically no better than LR?
157+
# print("fold",k,"KNN score:", knn.fit(X_train, y_train).score(X_test, y_test))
158+
clf = LogisticRegression(class_weight="balanced", random_state=42).fit(X_train, y_train)
159+
#print("fold",k,"LR score:", clf.score(X_test, y_test))
160+
y_pred=clf.predict(X_test)
161+
for j in range(len(test_index)):
162+
if y_pred[j]==y_test[j]:
163+
ctp=ctp+1
164+
else:
165+
cfn=cfn+1
166+
cfp=cfp+1
167+
else: #args["mode"] == "bc"
168+
for i in range(len(rs_features)): #i: 0~5
169+
X_train = bcs[i, :, :-1][train_index]
170+
y_train = bcs[i, :, -1][train_index] #based on bcs construction, equivalent to features[:, -1][train_index]
171+
X_test = bcs[i, :, :-1][test_index]
172+
y_test = bcs[i, :, -1][test_index] #based on bcs construction, equivalent to features[:, -1][test_index]
173+
# knn = KNeighborsClassifier() #typically no better than LR?
174+
# print("fold",k,"rs",i,"KNN score:", knn.fit(X_train, y_train).score(X_test, y_test))
175+
clf = LogisticRegression(class_weight="balanced", random_state=42).fit(X_train, y_train)
176+
#print("fold",k,"rs",i," LR score:", clf.score(X_test, y_test))
177+
y_pred=clf.predict(X_test)
178+
#indices = [idx+1 for idx, el in enumerate(rs_map) if el == i] #subject ids who are assigned rs role i
179+
for j in range(len(test_index)):
180+
if rs_map[int(y_test[j]-1)]==i: #subject y_test[j] is known to be in role i
181+
if y_pred[j]==y_test[j]:
182+
ctp=ctp+1
183+
else:
184+
cfn=cfn+1
185+
else: #subject y_test[j] is known to be not in role i
186+
if y_pred[j]!=y_test[j]:
187+
cfp1=cfp1+1
188+
if rs_map[int(y_pred[j]-1)]==i:
189+
cfp=cfp+1
190+
#labels = np.unique(y_train[0]) #y_train[0] is the first row of y_train: unique 1-based subject ids; equuivalently, we could use y_train[1~5]
191+
192+
print("ctp =",ctp)
193+
print("ctn =",ctn)
194+
print("cfp =",cfp)
195+
print("cfn =",cfn)
196+
print("cfp1 =",cfp1)
197+
# (tn + tp) / (tn + fp + fn + tp)
198+
# acc = (conf_mat[0] + conf_mat[3]) / np.sum(conf_mat)
199+
# # fp / (tn + fp)
200+
# far = conf_mat[1] / (conf_mat[0] + conf_mat[1])
201+
# # fn / (fn + tp)
202+
# frr = conf_mat[2] / (conf_mat[2] + conf_mat[3])
203+
204+
fi.write("Dataset -- {}\n".format(args["dataset"]))
205+
fi.write("BC -- {}\n".format(args["mode"]))
206+
fi.write("RS -- {}\n".format(args["role_dist"]))
207+
# fi.write("TN -- {:.6f}\n".format(conf_mat[0]))
208+
# fi.write("TP -- {:.6f}\n".format(conf_mat[3]))
209+
# fi.write("FP -- {:.6f}\n".format(conf_mat[1]))
210+
# fi.write("FN -- {:.6f}\n".format(conf_mat[2]))
211+
# fi.write("ACC -- {:.6f}\n".format(acc))
212+
# fi.write("FAR -- {:.6f}\n".format(far))
213+
# fi.write("FRR -- {:.6f}\n".format(frr))
214+
fi.close()
215+
216+
#on lfw, I got 5,5(under);27,29(bc bal);31,50(bc unbal) for fp,fn
217+
#https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
218+
# confusion_matrix is a function that computes confusion matrix to evaluate the accuracy of a classification.
219+
# By definition a confusion matrix C is such that C_i,j is equal to the number of observations known to be in group i and predicted to be in group j.
220+
# Thus in binary classification, the count of true negatives is C_0,0, false negatives is C_1,0, true positives is C_1,1 and false positives is C_0,1.

0 commit comments

Comments
 (0)