参考以下博客:

1.https://segmentfault.com/a/1190000015917420

2.https://blog.csdn.net/chenzhenyu123456/article/details/80751477(主要参考)

3.https://zhuanlan.zhihu.com/p/37705980(MTCNN和FaceNet讲解)

论文链接:

MTCNN:https://arxiv.org/ftp/arxiv/papers/1604/1604.02878.pdf

FaceNet:https://arxiv.org/pdf/1503.03832.pdf

论文项目地址:

https://github.com/AITTSMD/MTCNN-Tensorflow

https://github.com/davidsandberg/facenet

 

结合博客1和博客2实现了图片的验证、识别、查找实时的验证、识别、查找。

本来想把2020年全明星的成员都训练一遍再进行识别,后来发现识别率较低,可能是数据量不够,每个文件夹下的图片大概有20-25张。因此最后提交作业时仅训练了三个类别。

此次项目仅仅是融合了前两篇博客的内容,并对主函数做了一些调整,由于是课程项目作业,所以并未深究一些论文细节。

【深度学习】使用MTCNN+FaceNet进行人脸识别

 

【深度学习】使用MTCNN+FaceNet进行人脸识别

 

完整项目可直接参考博客2里的源码,以下代码是对/face_recognition/test_image.py所做的修改:

  1 # -*- coding: utf-8 -*-
  2 import os
  3 import sys
  4 
  5 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 sys.path.append(BASE_DIR)
  7 sys.path.append(os.path.join(BASE_DIR, '../ui_with_tkinter'))
  8 sys.path.append(os.path.join(BASE_DIR, '../face_recognition'))
  9 import classifier as clf
 10 import numpy as np
 11 import cv2
 12 import tensorflow as tf
 13 import facenet
 14 import detect_face
 15 import align_dlib
 16 import dlib
 17 from PIL import Image, ImageDraw, ImageFont
 18 
 19 os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # use GPU with ID=0
 20 config = tf.ConfigProto()
 21 config.gpu_options.per_process_gpu_memory_fraction = 0.5  # maximun alloc gpu50% of MEM
 22 config.gpu_options.allow_growth = True  # allocate dynamically
 23 
 24 
 25 def predict_labels(best_model_list,
 26                    test_features,
 27                    all_data=False):
 28     """Predict with the best three models.
 29     Inputs:
 30       - best_model_list: A list contains the best three models.
 31       - test_features: A numpy array contains the features of the test images.
 32       - face_number: An integer, the number of faces that we found in the test image.
 33       - all_data: True or False, whether the model is trained with all data.
 34 
 35     Return:
 36       - labels_array: A numpy array contains the labels which predicted by
 37             the best three models .
 38     """
 39     labels_array = []
 40     for index, model in enumerate(best_model_list):
 41         if all_data:
 42             predict = model[0].predict(test_features)
 43         else:
 44             predict = model[0].predict(test_features)
 45         labels_array.append(predict)
 46 
 47     labels_array = np.array(labels_array)
 48     return labels_array
 49 
 50 
 51 def predict_probability(best_probability_model,
 52                         test_features,
 53                         face_number,
 54                         all_data=False):
 55     """Predict with softmax probability model.
 56     Inputs:
 57       - best_probability_model: The best pre_trained softmax model.
 58       - test_features: A numpy array contains the features of the test images.
 59       - face_number: An integer, the number of faces that we found in the test image.
 60       - all_data: True or False, whether the model is trained with all data.
 61 
 62     Returns:
 63       - labels: A numpy array contains the predicted labels.
 64       - probability: A numpy array contains the probability of the label.
 65     """
 66     labels, probability = None, None
 67     if face_number is not 0:
 68         if all_data:
 69             labels = best_probability_model[0].predict(test_features)
 70             all_probability = best_probability_model[0].predict_proba(test_features)
 71         else:
 72             labels = best_probability_model[0].predict(test_features)
 73             all_probability = best_probability_model[0].predict_proba(test_features)
 74         print('all_probability', all_probability)
 75         print('labels', labels)
 76         print('all_probability.shape', all_probability.shape)
 77 
 78         # print(probability.shape) -> (face_number, class_number)
 79         # print(all_probability)
 80         probability = all_probability[np.arange(face_number), np.argmax(all_probability, axis=1)]
 81         # np.argmax(all_probability, axis=1)]
 82         print('probability', probability)
 83         print('probability.shape', probability.shape)
 84 
 85     return labels, probability
 86 
 87 
 88 def find_people_from_image(best_probability_model, test_features, face_number, all_data=False):
 89     """
 90     Inputs:
 91       - best_probability_model: The best pre_trained softmax model.
 92       - test_features: A numpy array contains the features of the test images.
 93       - face_number: An integer, the number of faces that we found in the test image.
 94       - all_data: If all_data is True, using the models which trained by all
 95           training data. Otherwise using the models which trained by partial data.
 96 
 97     e.g.
 98     test_features: [feature_1, feature_2, feature_3, feature_4, feature_5]
 99 
100     first we get all predictive labels and their corresponding probability
101     [(A, 0.3), (A, 0.2), (B, 0.1), (B, 0.3), (C, 0.1)]
102     second we choose the maximum probability for each unique label
103     [(A, 0.3), (B, 0.3), (C, 0.1)]
104     finally, get the indices for each unique label.
105     [0, 3, 4]
106 
107     then return
108     label:
109     probability:
110     unique_labels: [A, B, C]
111     unique_probability: [0.3, 0.3, 0.1]
112     unique_indices: [0, 3, 4]
113     """
114     labels, probability = predict_probability(
115         best_probability_model, test_features, face_number, all_data=all_data)
116 
117     unique_labels = np.unique(labels)
118     print('unique_labels', unique_labels)
119     unique_probability, unique_indices = [], []
120 
121     for label in unique_labels:
122         indices = np.argwhere(labels == label)[:, 0]
123         unique_probability.append(np.max(probability[indices]))
124         unique_indices.append(indices[np.argmax(probability[indices])])
125 
126     unique_probability = np.array(unique_probability)
127     unique_indices = np.array(unique_indices)
128     print('unique_probability', unique_probability)
129     print('unique_indices', unique_indices)
130 
131     return labels, probability, unique_labels, unique_probability, unique_indices
132 
133 
134 def check_sf_features(feature, label):
135     """Verification.
136     Inputs:
137        - feature: The feature to be verified.
138        - label: The label used for verification.
139 
140     Returns:
141        - True or False, verification result.
142        - sum_dis, the distance loss.
143     """
144     if os.path.exists('features'):
145         sf_features = np.load(open('features/{}.npy'.format(label), 'rb'))
146     else:
147         sf_features = np.load(open('../features/{}.npy'.format(label), 'rb'))
148     sf_dis = np.sqrt(np.sum((sf_features - feature) ** 2, axis=1))
149     # print(sf_dis)
150     sum_dis = np.sum(sf_dis)
151     # print(sum_dis)
152     valid_num1 = np.sum(sf_dis > 1.0)
153     valid_num2 = np.sum(sf_dis > 1.1)
154     if valid_num1 >= 4:
155         return False, sum_dis
156 
157     if valid_num2 >= 3:
158         return False, sum_dis
159 
160     return True, sum_dis
161 
162 
163 def recognition(image, state, fr=None, all_data=False, language='english'):
164     """Implement face verification, face recognition and face search functions.
165     Inputs:
166       - image_path: A string contains the path to the image.
167       - fr: The object of the UI class.
168       - state: 'verification', face verification
169                'recognition', face recognition
170                'search', face search.
171       - used_labels: The labels used to face verification and face search,
172             which does not used in face recognition
173       - image_size: The input size of the MTCNN.
174       - all_data: If all_data is True, using the models which trained by all
175             training data. Otherwise using the models which trained by partial data.
176       - output: True or False, output the process information.
177 
178     Returns:
179       - answer: The answer predicted by the model.
180       - image_data: The image data after prediction.
181     """
182     answer = ''
183     predict_info = []
184     for i in state:
185         if i not in {'verification', 'recognition', 'search'}:
186             raise ValueError('{} is not a valid argument!'.format(state))
187     test_features, face_number, face_boxes, flag = process('test',
188                                                            image,
189                                                            image_size=144, )
190     if flag:
191         for i in state:
192             if i == 'verification':
193                 print('Start verification')
194 
195                 labels_array = predict_labels(best_classifier_model_list,
196                                               test_features,
197                                               all_data=all_data)
198                 labels = []
199                 for line in labels_array.T:  # 转置
200                     unique, counts = np.unique(line, return_counts=True)  # 该函数是去除数组中的重复数字,并进行排序之后输出
201                     temp_label = unique[np.argmax(counts)]
202                     labels.append(temp_label)
203 
204                 if used_labels[0] in labels:
205                     if language == 'chinese':
206                         answer = answer + '验证成功!这张图像被认定为{}!'.format(used_labels[0])
207                         info = '验证成功!这张图像被认定为{}!'.format(used_labels[0])
208                     else:
209                         answer = answer + 'Successful Verification! This image was ' \
210                                           'identified as {}!'.format(used_labels[0])
211                         info = 'Successful Verification! This image ' \
212                                'was identified as {}!'.format(used_labels[0])
213                 else:
214                     if language == 'chinese':
215                         answer = answer + '验证失败!这张图像不被认定为{}!' \
216                                           ''.format(used_labels[0])
217                         info = '验证失败!这张图像不被认定为{}!'.format(used_labels[0])
218                     else:
219                         answer = answer + 'Verification failed! This image is not ' \
220                                           'recognized as {}!'.format(used_labels[0])
221                         info = 'Verification failed! This image is not ' \
222                                'recognized as {}!'.format(used_labels[0])
223 
224                 for index, box in enumerate(face_boxes):
225                     face_position = box.astype(int)
226                     cv2.rectangle(image, (face_position[0], face_position[1]), (
227                         face_position[2], face_position[3]), (0, 255, 0), 2)
228 
229                 if fr is not None:
230                     predict_info.append(info)
231                     fr.show_information(predict_info, predict=True)
232                 if mode == 'video':
233                     cv2.imshow('camera', image)
234                 else:
235                     cv2.imshow('camera', image)
236                     cv2.imwrite('../result/verification.jpg', image)
237                     cv2.waitKey()
238 
239             elif i == 'recognition':
240                 print('Start recognition')
241                 labels, _, unique_labels, unique_probability, unique_indices = \
242                     find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data)
243                 info = ''
244                 if language == 'chinese':
245                     info = info + '从图像中检测到'
246                 else:
247                     info = info + 'Detected '
248                 answer = answer + info
249 
250                 for index, label in enumerate(labels):
251                     if index in unique_indices:
252                         if check_sf_features(test_features[index], label)[0] is False:
253                             if language == 'chinese':
254                                 # labels[index] = '未知'
255                                 labels[index] = ''
256                             else:
257                                 # labels[index] = 'Unknown'
258                                 labels[index] = ''
259                         else:
260                             if language == 'chinese':
261                                 info = info + '{},'.format(label)
262                                 answer = answer + '{},'.format(label)
263                             else:
264                                 info = info + '{},'.format(label)
265                                 answer = answer + '{},'.format(label)
266                     else:
267                         if language == 'chinese':
268                             labels[index] = ''
269                             # labels[index] = '未知'
270                         else:
271                             # labels[index] = 'Unknown'
272                             labels[index] = ''
273                 info = info[:-1]
274                 answer = answer[:-1]
275                 if language == 'english':
276                     info = info + ' in this image!'
277                     answer = answer + ' in this image!'
278                 else:
279                     info = info + ''
280                     answer = answer + ''
281 
282                 if fr is not None:
283                     predict_info.append(info)
284                     fr.show_information(predict_info, predict=True)
285                 for index, label in enumerate(labels):
286                     face_position = face_boxes[index].astype(int)
287                     image_data = puttext(image, face_position, label, face_number, language='english')
288                 if mode == 'video':
289                     cv2.imshow('camera', image_data)
290                 else:
291                     cv2.imshow('camera', image_data)
292                     cv2.imwrite('../result/recognition.jpg', image_data)
293                     cv2.waitKey()
294 
295 
296             elif i == 'search':
297                 print('Start search')
298                 _, _, unique_labels, unique_probability, unique_indices = \
299                     find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data)
300                 n = unique_labels.shape[0]
301 
302                 found_indices = []
303                 if language == 'chinese':
304                     info = '从图像中找到'
305                 else:
306                     info = 'Found '
307                 answer = answer + info
308                 for i in range(n):
309                     if unique_labels[i] not in used_labels:
310                         continue
311                     index = unique_indices[i]
312                     if check_sf_features(test_features[index], unique_labels[i])[0] is False:
313                         continue
314                     if language == 'chinese':
315                         answer = answer + '{},'.format(unique_labels[i])
316                         info = info + '{},'.format(unique_labels[i])
317                     else:
318                         answer = answer + '{},'.format(unique_labels[i])
319                         info = info + '{},'.format(unique_labels[i])
320                     found_indices.append(i)
321 
322                 info = info[:-1]
323                 answer = answer[:-1]
324                 if language == 'english':
325                     info = info + ' in this image!'
326                     answer = answer + ' in this image!'
327                 else:
328                     info = info + ''
329                     answer = answer + ''
330 
331                 if fr is not None:
332                     predict_info.append(info)
333                     fr.show_information(predict_info, predict=True)
334                 for i in found_indices:
335                     index = unique_indices[i]
336                     face_position = face_boxes[index].astype(int)
337                     image_data = puttext(image, face_position, unique_labels[i], face_number, language='english')
338                     if mode == 'video':
339                         cv2.imshow('camera', image_data)
340                     else:
341                         cv2.imshow('camera', image_data)
342                         cv2.imwrite('../result/search.jpg', image_data)
343                         cv2.waitKey()
344             return answer
345     else:
346         return 0
347 
348 
349 def process(state, image, image_size=144):
350     if state == 'test':
351         test_image_data = image.copy()
352         test_features = []
353         face_boxes, _ = detect_face.detect_face(
354             test_image_data, minsize, p_net, r_net, o_net, threshold, factor)
355         face_number = face_boxes.shape[0]
356 
357         if face_number is 0:
358             print('face number is 0')
359             return None, face_number, None, 0
360         else:
361             for face_position in face_boxes:
362                 face_position = face_position.astype(int)
363                 face_rect = dlib.rectangle(int(face_position[0]), int(
364                     face_position[1]), int(face_position[2]), int(face_position[3]))
365 
366                 # test_pose_landmarks = face_pose_predictor(test_image_data, face_rect)
367                 # test_image_landmarks = test_pose_landmarks.parts()
368 
369                 aligned_data = face_aligner.align(
370                     image_size, test_image_data, face_rect, landmarkIndices=align_dlib.AlignDlib.OUTER_EYES_AND_NOSE)
371 
372                 # plt.subplot(face_number, 1, index)
373                 # plt.imshow(aligned_data)
374                 # plt.axis('off')
375                 # plt.show()
376                 # cv2.imwrite('datasets/team_aligned/{}.jpg'.format(str(index)),
377                 #             cv2.cvtColor(aligned_data, cv2.COLOR_RGB2BGR))
378 
379                 aligned_data = facenet.prewhiten(aligned_data)
380                 last_data = aligned_data.reshape((1, image_size, image_size, 3))
381                 test_features.append(sess.run(embeddings, feed_dict={
382                     images_placeholder: last_data, phase_train_placeholder: False})[0])
383             test_features = np.array(test_features)
384 
385             return test_features, face_number, face_boxes, 1
386 
387 
388 def puttext(image_data, face_position, label, face_number, language='english'):
389     # print('face_position[%d]' % (index), face_position)
390     label_pixels, font_size, line_size, rect_size, up_offset = \
391         None, None, None, None, None
392     if face_number == 1:
393         rect_size = 2
394         if language == 'chinese':
395             label_pixels = 30 * len(label)  # 140 * len(label)
396             font_size = 30  # 140
397             up_offset = 40  # 140
398         else:
399             label_pixels = 20 * len(label)
400             up_offset = 20
401             font_size = 1
402         line_size = 2
403     elif face_number < 4:
404         rect_size = 2  # 7
405         if language == 'chinese':
406             label_pixels = 30 * len(label)  # 140 * len(label)
407             font_size = 30  # 140
408             up_offset = 40  # 140
409         else:
410             label_pixels = 20 * len(label)
411             up_offset = 20
412             font_size = 1
413         line_size = 2
414     elif face_number >= 4:
415         rect_size = 2  # 6
416         if language == 'chinese':
417             label_pixels = 30 * len(label)  # 100 * len(label)
418             font_size = 30  # 100
419             up_offset = 40  # 100
420         else:
421             label_pixels = 10 * len(label)
422             up_offset = 20
423             font_size = 1
424         line_size = 2
425 
426     dis = (label_pixels - (face_position[2] - face_position[0])) // 2
427     # dis = 0
428 
429     if language == 'chinese':
430         # The color coded storage order in cv2(BGR) and PIL(RGB) is different
431         cv2img = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
432         pilimg = Image.fromarray(cv2img)
433         # use PIL to display Chinese characters in images
434         draw = ImageDraw.Draw(pilimg)
435         font = ImageFont.truetype('../resources/STKAITI.TTF',
436                                   font_size, encoding="utf-8")
437         # draw.text((face_position[0] - dis, face_position[1]-up_offset),
438         # label, (0, 0, 255), font=font)
439         draw.text((face_position[0] - dis, face_position[1] - up_offset),
440                   label, (0, 0, 255), font=font)
441         # convert to cv2
442         image_data = cv2.cvtColor(np.array(pilimg), cv2.COLOR_RGB2BGR)
443     else:
444         cv2.putText(image_data, label,
445                     (face_position[0] - dis, face_position[1] - up_offset),
446                     cv2.FONT_HERSHEY_SIMPLEX, font_size,
447                     (0, 0, 255), line_size)
448     image_data = cv2.rectangle(image_data, (face_position[0], face_position[1]),
449                                (face_position[2], face_position[3]),
450                                (0, 255, 0), rect_size)
451     return image_data
452 
453 
454 if __name__ == '__main__':
455     minsize = 20  # minimum size of face
456     threshold = [0.6, 0.7, 0.7]  # three steps's threshold
457     factor = 0.709  # scale factor
458     gpu_memory_fraction = 0.6
459 
460     with tf.Graph().as_default():
461         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
462         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
463                                                 log_device_placement=False))
464         with sess.as_default():
465             p_net, r_net, o_net = detect_face.create_mtcnn(sess, '../models/mtcnn/')
466     predictor_model = '../models/shape_predictor_68_face_landmarks.dat'
467     face_aligner = align_dlib.AlignDlib(predictor_model)
468     model_dir = '../models/20170512-110547/20170512-110547.pb'  # model directory
469     tf.Graph().as_default()
470     sess = tf.Session()
471     facenet.load_model(model_dir)
472     images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
473     embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
474     phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
475     all_data = True
476     best_classifier_model_list = clf.load_best_classifier_model(all_data=all_data)
477     best_probability_model = clf.load_best_probability_model(all_data=True)
478     #print('best_probability_model', best_probability_model)
     #以上是加载模型
479 480 fit_all_data = True 481 mode = 'video' # 'video' or 'picture' 482 state = ['recognition'] # state = ['verification', 'recognition', 'search'] 483 used_labels = ['LeBron'] # verification只验证used_labels[0],search时会查找所有label 484 485 if mode == 'video': 486 video = "http://admin:admin@192.168.0.13:8081"#ip摄像头地址,需要修改为自己的ip地址 487 # video = 0 488 capture = cv2.VideoCapture(video) 489 cv2.namedWindow("camera", 1) 490 language = 'english' 491 c = 0 492 num = 0 493 frame_interval = 3 # frame intervals 494 test_features = [] 495 while True: 496 ret, frame = capture.read() 497 cv2.imshow("camera", frame) 498 answer = recognition(frame, state, fr=None, all_data=fit_all_data, language='english', ) 499 print(answer) 500 c += 1 501 key = cv2.waitKey(3) 502 if key == 27: 503 # esc键退出 504 print("esc break...") 505 break 506 if key == ord(' '): 507 # 保存一张图像 508 num = num + 1 509 filename = "frames_%s.jpg" % num 510 cv2.imwrite('../result/' + filename, frame) 511 # When everything is done, release the capture 512 capture.release() 513 cv2.destroyWindow("camera") 514 else: 515 image = cv2.imread('../test/42.jpg') 516 answer = recognition(image, fr=None, state=state, all_data=fit_all_data, language='english', ) 517 print(answer)