一.代码实现

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Sat Feb  9 15:33:39 2019
 4 
 5 @author: zhen
 6 """
 7 
 8 from keras.applications.vgg16 import VGG16
 9 
10 from keras.layers import Flatten
11 from keras.layers import Dense
12 from keras.layers import Dropout
13 from keras.models import Model
14 from keras.optimizers import SGD
15 
16 from keras.datasets import mnist
17 
18 import cv2
19 import numpy as np
20 # 因初始设置需大量内存(至少24G),现设置为最小分辨率以降低内存的要求
21 model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(48, 48, 3))
22 
23 for layer in model_vgg.layers:
24     layer.trainable = False
25 model = Flatten(name='flatten')(model_vgg.output) # 扁平化
26 model = Dense(4096, activation='relu', name='fc1')(model)
27 model = Dense(4096, activation='relu', name='fc2')(model)
28 model = Dropout(0.5)(model)
29 model = Dense(10, activation='softmax')(model)
30 model_vgg_mnist = Model(inputs=model_vgg.input, outputs=model, name='vgg16')
31 
32 model_vgg_mnist.summary()
33 
34 # VGGNet初始推荐
35 model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
36 for layer in model_vgg.layers:
37     layer.trainable = False
38     
39 model = Flatten()(model_vgg.output)
40 model = Dense(4096, activation='relu', name='fc1')(model)
41 model = Dense(4096, activation='relu', name='fc2')(model)
42 model = Dropout(0.5)(model)
43 model = Dense(10, activation='softmax', name='prediction')(model)
44 model_vgg_mnist_pretrain = Model(model_vgg.input, model, name='vgg16_pretrain')
45 
46 model_vgg_mnist_pretrain.summary()
47 
48 sgd = SGD(lr=0.05, decay=1e-5) # 随机梯度下降
49 model_vgg_mnist.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
50 
51 (x_train, y_train), (x_test, y_test) = mnist.load_data("../test_data_home")
52 x_train, y_train = x_train[:1000], y_train[:1000]
53 x_test, y_test = x_test[:1000], y_test[:1000]
54 # GRAY两通道转换为RGB三通道
55 x_train = [cv2.cvtColor(cv2.resize(i, (48, 48)), cv2.COLOR_GRAY2RGB) for i in x_train]
56 x_train = np.concatenate([arr[np.newaxis] for arr in x_train]).astype('float32')
57 
58 x_test = [cv2.cvtColor(cv2.resize(i, (48, 48)), cv2.COLOR_GRAY2RGB) for i in x_test]
59 x_test = np.concatenate([arr[np.newaxis] for arr in x_test]).astype('float32')
60 
61 print(x_train.shape)
62 print(x_test.shape)
63 
64 x_train = x_train / 255
65 x_test = x_test / 255
66 
67 def tran_y(y):
68     y_ohe = np.zeros(10)
69     y_ohe[y] = 1
70     return y_ohe
71 
72 y_train_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_train))])
73 y_test_ohe = np.array([tran_y(y_test[i]) for i in range(len(y_test))])
74 
75 model_vgg_mnist.fit(x_train, y_train_ohe, validation_data=(x_test, y_test_ohe), epochs=20, batch_size=100)

二.结果

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_9 (InputLayer)         (None, 48, 48, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 12, 12, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 12, 12, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 12, 12, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 6, 6, 256)         0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 6, 6, 512)         1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 6, 6, 512)         2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 6, 6, 512)         2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 3, 3, 512)         0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 3, 3, 512)         2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 3, 3, 512)         2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 3, 3, 512)         2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 1, 1, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              2101248   
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
dropout_9 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                40970     
=================================================================
Total params: 33,638,218
Trainable params: 18,923,530
Non-trainable params: 14,714,688
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_10 (InputLayer)        (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
dropout_10 (Dropout)         (None, 4096)              0         
_________________________________________________________________
prediction (Dense)           (None, 10)                40970     
=================================================================
Total params: 134,301,514
Trainable params: 119,586,826
Non-trainable params: 14,714,688
_________________________________________________________________
(1000, 48, 48, 3)
(1000, 48, 48, 3)
Train on 1000 samples, validate on 1000 samples
Epoch 1/20
1000/1000 [==============================] - 175s 175ms/step - loss: 2.1289 - acc: 0.2350 - val_loss: 1.9100 - val_acc: 0.4230
Epoch 2/20
1000/1000 [==============================] - 190s 190ms/step - loss: 1.7685 - acc: 0.4420 - val_loss: 1.6503 - val_acc: 0.4930
Epoch 3/20
1000/1000 [==============================] - 265s 265ms/step - loss: 1.5582 - acc: 0.5140 - val_loss: 1.5005 - val_acc: 0.5440
Epoch 4/20
1000/1000 [==============================] - 373s 373ms/step - loss: 1.4210 - acc: 0.5710 - val_loss: 1.3019 - val_acc: 0.6160
Epoch 5/20
1000/1000 [==============================] - 295s 295ms/step - loss: 1.1946 - acc: 0.6490 - val_loss: 1.1182 - val_acc: 0.7280
Epoch 6/20
1000/1000 [==============================] - 277s 277ms/step - loss: 1.0291 - acc: 0.7330 - val_loss: 1.0279 - val_acc: 0.7430
Epoch 7/20
1000/1000 [==============================] - 177s 177ms/step - loss: 1.0065 - acc: 0.7060 - val_loss: 0.9229 - val_acc: 0.7690
Epoch 8/20
1000/1000 [==============================] - 169s 169ms/step - loss: 0.8438 - acc: 0.7810 - val_loss: 0.9716 - val_acc: 0.6670
Epoch 9/20
1000/1000 [==============================] - 169s 169ms/step - loss: 0.8898 - acc: 0.7230 - val_loss: 0.9710 - val_acc: 0.6660
Epoch 10/20
1000/1000 [==============================] - 166s 166ms/step - loss: 0.8258 - acc: 0.7460 - val_loss: 0.9026 - val_acc: 0.7130
Epoch 11/20
1000/1000 [==============================] - 169s 169ms/step - loss: 0.7592 - acc: 0.7640 - val_loss: 0.9691 - val_acc: 0.6730
Epoch 12/20
1000/1000 [==============================] - 165s 165ms/step - loss: 0.7793 - acc: 0.7520 - val_loss: 0.8350 - val_acc: 0.6800
Epoch 13/20
1000/1000 [==============================] - 164s 164ms/step - loss: 0.6677 - acc: 0.7780 - val_loss: 0.7203 - val_acc: 0.7730
Epoch 14/20
1000/1000 [==============================] - 164s 164ms/step - loss: 0.7018 - acc: 0.7630 - val_loss: 0.6947 - val_acc: 0.7760
Epoch 15/20
1000/1000 [==============================] - 163s 163ms/step - loss: 0.6129 - acc: 0.8100 - val_loss: 0.7025 - val_acc: 0.7610
Epoch 16/20
1000/1000 [==============================] - 163s 163ms/step - loss: 0.6104 - acc: 0.8190 - val_loss: 0.6385 - val_acc: 0.8220
Epoch 17/20
1000/1000 [==============================] - 163s 163ms/step - loss: 0.5507 - acc: 0.8320 - val_loss: 0.6273 - val_acc: 0.8290
Epoch 18/20
1000/1000 [==============================] - 164s 164ms/step - loss: 0.5205 - acc: 0.8360 - val_loss: 0.8740 - val_acc: 0.6750
Epoch 19/20
1000/1000 [==============================] - 163s 163ms/step - loss: 0.5852 - acc: 0.8150 - val_loss: 0.6614 - val_acc: 0.7890
Epoch 20/20
1000/1000 [==============================] - 166s 166ms/step - loss: 0.5310 - acc: 0.8340 - val_loss: 0.5718 - val_acc: 0.8250

 三.解析

  VGGNet是牛津大学计算机视觉组(Visual Geometry Group)和Google DeepMind公司的研究员一起研发的深度卷积神经网络。VGG探索了卷积神经网络的深度与其性能之间的关系,通过反复堆叠3*3的小型卷积核和2*2的最大池化层,VGG成功构筑了16-19层深的卷积神经网络。
  VGG取得了2014年比赛分类项目第二名和定位项目第一名。同时,VGG拓展性很强,迁移到其他图片数据上的泛化性非常好。VGG的结构简洁,整个网络都是使用了同样大小的卷积核尺寸3*3和池化层2*2。VGG现在也还经常被用来提取图像特征,可用来在图像分类任务上进行再训练,相当于提供了非常好的初始化权重。
  VGG通过加深层次来提升性能,拥有5段卷积,每一段内有2-3个卷积层,同时每段尾部都会连接一个最大池化层来缩小图片尺寸。每段内的卷积核数量一样,越靠后段的卷积核数量越多,64-128-256-512-512。