参考以下博客:
1.https://segmentfault.com/a/1190000015917420
2.https://blog.csdn.net/chenzhenyu123456/article/details/80751477(主要参考)
3.https://zhuanlan.zhihu.com/p/37705980(MTCNN和FaceNet讲解)
论文链接:
MTCNN:https://arxiv.org/ftp/arxiv/papers/1604/1604.02878.pdf
FaceNet:https://arxiv.org/pdf/1503.03832.pdf
论文项目地址:
https://github.com/AITTSMD/MTCNN-Tensorflow
https://github.com/davidsandberg/facenet
结合博客1和博客2实现了图片的验证、识别、查找和实时的验证、识别、查找。
本来想把2020年全明星的成员都训练一遍再进行识别,后来发现识别率较低,可能是数据量不够,每个文件夹下的图片大概有20-25张。因此最后提交作业时仅训练了三个类别。
此次项目仅仅是融合了前两篇博客的内容,并对主函数做了一些调整,由于是课程项目作业,所以并未深究一些论文细节。
完整项目可直接参考博客2里的源码,以下代码是对/face_recognition/test_image.py所做的修改:
1 # -*- coding: utf-8 -*- 2 import os 3 import sys 4 5 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 6 sys.path.append(BASE_DIR) 7 sys.path.append(os.path.join(BASE_DIR, '../ui_with_tkinter')) 8 sys.path.append(os.path.join(BASE_DIR, '../face_recognition')) 9 import classifier as clf 10 import numpy as np 11 import cv2 12 import tensorflow as tf 13 import facenet 14 import detect_face 15 import align_dlib 16 import dlib 17 from PIL import Image, ImageDraw, ImageFont 18 19 os.environ["CUDA_VISIBLE_DEVICES"] = '0' # use GPU with ID=0 20 config = tf.ConfigProto() 21 config.gpu_options.per_process_gpu_memory_fraction = 0.5 # maximun alloc gpu50% of MEM 22 config.gpu_options.allow_growth = True # allocate dynamically 23 24 25 def predict_labels(best_model_list, 26 test_features, 27 all_data=False): 28 """Predict with the best three models. 29 Inputs: 30 - best_model_list: A list contains the best three models. 31 - test_features: A numpy array contains the features of the test images. 32 - face_number: An integer, the number of faces that we found in the test image. 33 - all_data: True or False, whether the model is trained with all data. 34 35 Return: 36 - labels_array: A numpy array contains the labels which predicted by 37 the best three models . 38 """ 39 labels_array = [] 40 for index, model in enumerate(best_model_list): 41 if all_data: 42 predict = model[0].predict(test_features) 43 else: 44 predict = model[0].predict(test_features) 45 labels_array.append(predict) 46 47 labels_array = np.array(labels_array) 48 return labels_array 49 50 51 def predict_probability(best_probability_model, 52 test_features, 53 face_number, 54 all_data=False): 55 """Predict with softmax probability model. 56 Inputs: 57 - best_probability_model: The best pre_trained softmax model. 58 - test_features: A numpy array contains the features of the test images. 59 - face_number: An integer, the number of faces that we found in the test image. 60 - all_data: True or False, whether the model is trained with all data. 61 62 Returns: 63 - labels: A numpy array contains the predicted labels. 64 - probability: A numpy array contains the probability of the label. 65 """ 66 labels, probability = None, None 67 if face_number is not 0: 68 if all_data: 69 labels = best_probability_model[0].predict(test_features) 70 all_probability = best_probability_model[0].predict_proba(test_features) 71 else: 72 labels = best_probability_model[0].predict(test_features) 73 all_probability = best_probability_model[0].predict_proba(test_features) 74 print('all_probability', all_probability) 75 print('labels', labels) 76 print('all_probability.shape', all_probability.shape) 77 78 # print(probability.shape) -> (face_number, class_number) 79 # print(all_probability) 80 probability = all_probability[np.arange(face_number), np.argmax(all_probability, axis=1)] 81 # np.argmax(all_probability, axis=1)] 82 print('probability', probability) 83 print('probability.shape', probability.shape) 84 85 return labels, probability 86 87 88 def find_people_from_image(best_probability_model, test_features, face_number, all_data=False): 89 """ 90 Inputs: 91 - best_probability_model: The best pre_trained softmax model. 92 - test_features: A numpy array contains the features of the test images. 93 - face_number: An integer, the number of faces that we found in the test image. 94 - all_data: If all_data is True, using the models which trained by all 95 training data. Otherwise using the models which trained by partial data. 96 97 e.g. 98 test_features: [feature_1, feature_2, feature_3, feature_4, feature_5] 99 100 first we get all predictive labels and their corresponding probability 101 [(A, 0.3), (A, 0.2), (B, 0.1), (B, 0.3), (C, 0.1)] 102 second we choose the maximum probability for each unique label 103 [(A, 0.3), (B, 0.3), (C, 0.1)] 104 finally, get the indices for each unique label. 105 [0, 3, 4] 106 107 then return 108 label: 109 probability: 110 unique_labels: [A, B, C] 111 unique_probability: [0.3, 0.3, 0.1] 112 unique_indices: [0, 3, 4] 113 """ 114 labels, probability = predict_probability( 115 best_probability_model, test_features, face_number, all_data=all_data) 116 117 unique_labels = np.unique(labels) 118 print('unique_labels', unique_labels) 119 unique_probability, unique_indices = [], [] 120 121 for label in unique_labels: 122 indices = np.argwhere(labels == label)[:, 0] 123 unique_probability.append(np.max(probability[indices])) 124 unique_indices.append(indices[np.argmax(probability[indices])]) 125 126 unique_probability = np.array(unique_probability) 127 unique_indices = np.array(unique_indices) 128 print('unique_probability', unique_probability) 129 print('unique_indices', unique_indices) 130 131 return labels, probability, unique_labels, unique_probability, unique_indices 132 133 134 def check_sf_features(feature, label): 135 """Verification. 136 Inputs: 137 - feature: The feature to be verified. 138 - label: The label used for verification. 139 140 Returns: 141 - True or False, verification result. 142 - sum_dis, the distance loss. 143 """ 144 if os.path.exists('features'): 145 sf_features = np.load(open('features/{}.npy'.format(label), 'rb')) 146 else: 147 sf_features = np.load(open('../features/{}.npy'.format(label), 'rb')) 148 sf_dis = np.sqrt(np.sum((sf_features - feature) ** 2, axis=1)) 149 # print(sf_dis) 150 sum_dis = np.sum(sf_dis) 151 # print(sum_dis) 152 valid_num1 = np.sum(sf_dis > 1.0) 153 valid_num2 = np.sum(sf_dis > 1.1) 154 if valid_num1 >= 4: 155 return False, sum_dis 156 157 if valid_num2 >= 3: 158 return False, sum_dis 159 160 return True, sum_dis 161 162 163 def recognition(image, state, fr=None, all_data=False, language='english'): 164 """Implement face verification, face recognition and face search functions. 165 Inputs: 166 - image_path: A string contains the path to the image. 167 - fr: The object of the UI class. 168 - state: 'verification', face verification 169 'recognition', face recognition 170 'search', face search. 171 - used_labels: The labels used to face verification and face search, 172 which does not used in face recognition 173 - image_size: The input size of the MTCNN. 174 - all_data: If all_data is True, using the models which trained by all 175 training data. Otherwise using the models which trained by partial data. 176 - output: True or False, output the process information. 177 178 Returns: 179 - answer: The answer predicted by the model. 180 - image_data: The image data after prediction. 181 """ 182 answer = '' 183 predict_info = [] 184 for i in state: 185 if i not in {'verification', 'recognition', 'search'}: 186 raise ValueError('{} is not a valid argument!'.format(state)) 187 test_features, face_number, face_boxes, flag = process('test', 188 image, 189 image_size=144, ) 190 if flag: 191 for i in state: 192 if i == 'verification': 193 print('Start verification') 194 195 labels_array = predict_labels(best_classifier_model_list, 196 test_features, 197 all_data=all_data) 198 labels = [] 199 for line in labels_array.T: # 转置 200 unique, counts = np.unique(line, return_counts=True) # 该函数是去除数组中的重复数字,并进行排序之后输出 201 temp_label = unique[np.argmax(counts)] 202 labels.append(temp_label) 203 204 if used_labels[0] in labels: 205 if language == 'chinese': 206 answer = answer + '验证成功!这张图像被认定为{}!'.format(used_labels[0]) 207 info = '验证成功!这张图像被认定为{}!'.format(used_labels[0]) 208 else: 209 answer = answer + 'Successful Verification! This image was ' \ 210 'identified as {}!'.format(used_labels[0]) 211 info = 'Successful Verification! This image ' \ 212 'was identified as {}!'.format(used_labels[0]) 213 else: 214 if language == 'chinese': 215 answer = answer + '验证失败!这张图像不被认定为{}!' \ 216 ''.format(used_labels[0]) 217 info = '验证失败!这张图像不被认定为{}!'.format(used_labels[0]) 218 else: 219 answer = answer + 'Verification failed! This image is not ' \ 220 'recognized as {}!'.format(used_labels[0]) 221 info = 'Verification failed! This image is not ' \ 222 'recognized as {}!'.format(used_labels[0]) 223 224 for index, box in enumerate(face_boxes): 225 face_position = box.astype(int) 226 cv2.rectangle(image, (face_position[0], face_position[1]), ( 227 face_position[2], face_position[3]), (0, 255, 0), 2) 228 229 if fr is not None: 230 predict_info.append(info) 231 fr.show_information(predict_info, predict=True) 232 if mode == 'video': 233 cv2.imshow('camera', image) 234 else: 235 cv2.imshow('camera', image) 236 cv2.imwrite('../result/verification.jpg', image) 237 cv2.waitKey() 238 239 elif i == 'recognition': 240 print('Start recognition') 241 labels, _, unique_labels, unique_probability, unique_indices = \ 242 find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data) 243 info = '' 244 if language == 'chinese': 245 info = info + '从图像中检测到' 246 else: 247 info = info + 'Detected ' 248 answer = answer + info 249 250 for index, label in enumerate(labels): 251 if index in unique_indices: 252 if check_sf_features(test_features[index], label)[0] is False: 253 if language == 'chinese': 254 # labels[index] = '未知' 255 labels[index] = '' 256 else: 257 # labels[index] = 'Unknown' 258 labels[index] = '' 259 else: 260 if language == 'chinese': 261 info = info + '{},'.format(label) 262 answer = answer + '{},'.format(label) 263 else: 264 info = info + '{},'.format(label) 265 answer = answer + '{},'.format(label) 266 else: 267 if language == 'chinese': 268 labels[index] = '' 269 # labels[index] = '未知' 270 else: 271 # labels[index] = 'Unknown' 272 labels[index] = '' 273 info = info[:-1] 274 answer = answer[:-1] 275 if language == 'english': 276 info = info + ' in this image!' 277 answer = answer + ' in this image!' 278 else: 279 info = info + '!' 280 answer = answer + '!' 281 282 if fr is not None: 283 predict_info.append(info) 284 fr.show_information(predict_info, predict=True) 285 for index, label in enumerate(labels): 286 face_position = face_boxes[index].astype(int) 287 image_data = puttext(image, face_position, label, face_number, language='english') 288 if mode == 'video': 289 cv2.imshow('camera', image_data) 290 else: 291 cv2.imshow('camera', image_data) 292 cv2.imwrite('../result/recognition.jpg', image_data) 293 cv2.waitKey() 294 295 296 elif i == 'search': 297 print('Start search') 298 _, _, unique_labels, unique_probability, unique_indices = \ 299 find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data) 300 n = unique_labels.shape[0] 301 302 found_indices = [] 303 if language == 'chinese': 304 info = '从图像中找到' 305 else: 306 info = 'Found ' 307 answer = answer + info 308 for i in range(n): 309 if unique_labels[i] not in used_labels: 310 continue 311 index = unique_indices[i] 312 if check_sf_features(test_features[index], unique_labels[i])[0] is False: 313 continue 314 if language == 'chinese': 315 answer = answer + '{},'.format(unique_labels[i]) 316 info = info + '{},'.format(unique_labels[i]) 317 else: 318 answer = answer + '{},'.format(unique_labels[i]) 319 info = info + '{},'.format(unique_labels[i]) 320 found_indices.append(i) 321 322 info = info[:-1] 323 answer = answer[:-1] 324 if language == 'english': 325 info = info + ' in this image!' 326 answer = answer + ' in this image!' 327 else: 328 info = info + '!' 329 answer = answer + '!' 330 331 if fr is not None: 332 predict_info.append(info) 333 fr.show_information(predict_info, predict=True) 334 for i in found_indices: 335 index = unique_indices[i] 336 face_position = face_boxes[index].astype(int) 337 image_data = puttext(image, face_position, unique_labels[i], face_number, language='english') 338 if mode == 'video': 339 cv2.imshow('camera', image_data) 340 else: 341 cv2.imshow('camera', image_data) 342 cv2.imwrite('../result/search.jpg', image_data) 343 cv2.waitKey() 344 return answer 345 else: 346 return 0 347 348 349 def process(state, image, image_size=144): 350 if state == 'test': 351 test_image_data = image.copy() 352 test_features = [] 353 face_boxes, _ = detect_face.detect_face( 354 test_image_data, minsize, p_net, r_net, o_net, threshold, factor) 355 face_number = face_boxes.shape[0] 356 357 if face_number is 0: 358 print('face number is 0') 359 return None, face_number, None, 0 360 else: 361 for face_position in face_boxes: 362 face_position = face_position.astype(int) 363 face_rect = dlib.rectangle(int(face_position[0]), int( 364 face_position[1]), int(face_position[2]), int(face_position[3])) 365 366 # test_pose_landmarks = face_pose_predictor(test_image_data, face_rect) 367 # test_image_landmarks = test_pose_landmarks.parts() 368 369 aligned_data = face_aligner.align( 370 image_size, test_image_data, face_rect, landmarkIndices=align_dlib.AlignDlib.OUTER_EYES_AND_NOSE) 371 372 # plt.subplot(face_number, 1, index) 373 # plt.imshow(aligned_data) 374 # plt.axis('off') 375 # plt.show() 376 # cv2.imwrite('datasets/team_aligned/{}.jpg'.format(str(index)), 377 # cv2.cvtColor(aligned_data, cv2.COLOR_RGB2BGR)) 378 379 aligned_data = facenet.prewhiten(aligned_data) 380 last_data = aligned_data.reshape((1, image_size, image_size, 3)) 381 test_features.append(sess.run(embeddings, feed_dict={ 382 images_placeholder: last_data, phase_train_placeholder: False})[0]) 383 test_features = np.array(test_features) 384 385 return test_features, face_number, face_boxes, 1 386 387 388 def puttext(image_data, face_position, label, face_number, language='english'): 389 # print('face_position[%d]' % (index), face_position) 390 label_pixels, font_size, line_size, rect_size, up_offset = \ 391 None, None, None, None, None 392 if face_number == 1: 393 rect_size = 2 394 if language == 'chinese': 395 label_pixels = 30 * len(label) # 140 * len(label) 396 font_size = 30 # 140 397 up_offset = 40 # 140 398 else: 399 label_pixels = 20 * len(label) 400 up_offset = 20 401 font_size = 1 402 line_size = 2 403 elif face_number < 4: 404 rect_size = 2 # 7 405 if language == 'chinese': 406 label_pixels = 30 * len(label) # 140 * len(label) 407 font_size = 30 # 140 408 up_offset = 40 # 140 409 else: 410 label_pixels = 20 * len(label) 411 up_offset = 20 412 font_size = 1 413 line_size = 2 414 elif face_number >= 4: 415 rect_size = 2 # 6 416 if language == 'chinese': 417 label_pixels = 30 * len(label) # 100 * len(label) 418 font_size = 30 # 100 419 up_offset = 40 # 100 420 else: 421 label_pixels = 10 * len(label) 422 up_offset = 20 423 font_size = 1 424 line_size = 2 425 426 dis = (label_pixels - (face_position[2] - face_position[0])) // 2 427 # dis = 0 428 429 if language == 'chinese': 430 # The color coded storage order in cv2(BGR) and PIL(RGB) is different 431 cv2img = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB) 432 pilimg = Image.fromarray(cv2img) 433 # use PIL to display Chinese characters in images 434 draw = ImageDraw.Draw(pilimg) 435 font = ImageFont.truetype('../resources/STKAITI.TTF', 436 font_size, encoding="utf-8") 437 # draw.text((face_position[0] - dis, face_position[1]-up_offset), 438 # label, (0, 0, 255), font=font) 439 draw.text((face_position[0] - dis, face_position[1] - up_offset), 440 label, (0, 0, 255), font=font) 441 # convert to cv2 442 image_data = cv2.cvtColor(np.array(pilimg), cv2.COLOR_RGB2BGR) 443 else: 444 cv2.putText(image_data, label, 445 (face_position[0] - dis, face_position[1] - up_offset), 446 cv2.FONT_HERSHEY_SIMPLEX, font_size, 447 (0, 0, 255), line_size) 448 image_data = cv2.rectangle(image_data, (face_position[0], face_position[1]), 449 (face_position[2], face_position[3]), 450 (0, 255, 0), rect_size) 451 return image_data 452 453 454 if __name__ == '__main__': 455 minsize = 20 # minimum size of face 456 threshold = [0.6, 0.7, 0.7] # three steps's threshold 457 factor = 0.709 # scale factor 458 gpu_memory_fraction = 0.6 459 460 with tf.Graph().as_default(): 461 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) 462 sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, 463 log_device_placement=False)) 464 with sess.as_default(): 465 p_net, r_net, o_net = detect_face.create_mtcnn(sess, '../models/mtcnn/') 466 predictor_model = '../models/shape_predictor_68_face_landmarks.dat' 467 face_aligner = align_dlib.AlignDlib(predictor_model) 468 model_dir = '../models/20170512-110547/20170512-110547.pb' # model directory 469 tf.Graph().as_default() 470 sess = tf.Session() 471 facenet.load_model(model_dir) 472 images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 473 embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 474 phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 475 all_data = True 476 best_classifier_model_list = clf.load_best_classifier_model(all_data=all_data) 477 best_probability_model = clf.load_best_probability_model(all_data=True) 478 #print('best_probability_model', best_probability_model)
#以上是加载模型
479 480 fit_all_data = True 481 mode = 'video' # 'video' or 'picture' 482 state = ['recognition'] # state = ['verification', 'recognition', 'search'] 483 used_labels = ['LeBron'] # verification只验证used_labels[0],search时会查找所有label 484 485 if mode == 'video': 486 video = "http://admin:admin@192.168.0.13:8081"#ip摄像头地址,需要修改为自己的ip地址 487 # video = 0 488 capture = cv2.VideoCapture(video) 489 cv2.namedWindow("camera", 1) 490 language = 'english' 491 c = 0 492 num = 0 493 frame_interval = 3 # frame intervals 494 test_features = [] 495 while True: 496 ret, frame = capture.read() 497 cv2.imshow("camera", frame) 498 answer = recognition(frame, state, fr=None, all_data=fit_all_data, language='english', ) 499 print(answer) 500 c += 1 501 key = cv2.waitKey(3) 502 if key == 27: 503 # esc键退出 504 print("esc break...") 505 break 506 if key == ord(' '): 507 # 保存一张图像 508 num = num + 1 509 filename = "frames_%s.jpg" % num 510 cv2.imwrite('../result/' + filename, frame) 511 # When everything is done, release the capture 512 capture.release() 513 cv2.destroyWindow("camera") 514 else: 515 image = cv2.imread('../test/42.jpg') 516 answer = recognition(image, fr=None, state=state, all_data=fit_all_data, language='english', ) 517 print(answer)
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:【深度学习】使用MTCNN+FaceNet进行人脸识别 - Python技术站