1 #coding = utf-8
  2 
  3 import collections
  4 import tensorflow as tf
  5 from datetime import datetime
  6 import math
  7 import time
  8 
  9 slim = tf.contrib.slim
 10 
 11 
 12 class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
 13     '''A named tuple describing a ResNet block.'''
 14 
 15 def subsample(inputs, factor, scope=None):
 16     '''降采样方法:
 17     factor:采样因子 1:不做修改直接返回 不为1:使用slim.max_pool2d降采样'''
 18     if factor ==1:
 19         return inputs
 20     else:
 21         return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
 22 
 23 
 24 def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
 25     '''创建卷积层'''
 26     if stride == 1:
 27         '''stride为1,使用slim.conv2d,padding为SAME'''
 28         return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
 29                            padding='SAME', scope=scope)
 30 
 31     else:
 32         '''显示地pad zero:
 33         pad zero总数为kernel size-1,pad_beg:pad//2, pad_end:余下部分'''
 34         pad_total = kernel_size-1
 35         pad_beg = pad_total//2
 36         pad_end = pad_total - pad_beg
 37         '''tf.pad对inputs进行补零操作'''
 38         inputs = tf.pad(inputs, [[0,0], [pad_beg, pad_end],
 39                                  [pad_beg, pad_end], [0, 0]])
 40 
 41         return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
 42                             padding='VALID', scope=scope)
 43 
 44 @slim.add_arg_scope
 45 def stack_blocks_dense(net, blocks, outputs_collections=None):
 46     '''net:input
 47        blocks:Block的class的列表
 48        outputs_collections:收集各个end_points的collections'''
 49     for block in blocks:
 50         '''双层for循环,逐个Block,逐个Residual Unit堆叠'''
 51         with tf.variable_scope(block.scope, 'block', [net]) as sc:
 52             '''两个tf.variable将残差学习单元命名为block_1/unit_1形式'''
 53 
 54             for i, unit in enumerate(block.args):
 55                 with tf.variable_scope('unit_%d' %(i+1), values=[net]):
 56 
 57                     '''利用第二层for循环拿到前面定义Blocks Residual Unit中args,
 58                     将其展开为depth、depth_bottleneck、stride'''
 59                     unit_depth, unit_depth_bottleneck, unit_stride = unit
 60 
 61                     '''使用unit_fn函数(残差学习单元的生成函数)
 62                     顺序地创建并连接所有的残差学习单元'''
 63                     net = block.unit_fn(net,
 64                                         depth=unit_depth,
 65                                         depth_bottleneck=unit_depth_bottleneck,
 66                                         stride=unit_stride)
 67 
 68             '''slim.utils.collect_named_outputs将输出net添加到collection中'''
 69             net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
 70 
 71         '''所有的Residual Unit都堆叠完后,最后返回net作为stack_blocks_dense的结果'''
 72         return net
 73 
 74 
 75 def resnet_arg_scope(is_training=True,
 76                      weight_decay=0.0001,
 77                      batch_norm_decay=0.097,
 78                      batch_norm_epsilon=1e-5,
 79                      batch_norm_scale=True):
 80     '''创建ResNet通用的arg_scope(作用:定义某些函数的参数默认值)'''
 81 
 82     batch_norm_params = {
 83         'is_training': is_training,
 84         'decay': batch_norm_decay,#默认为0.0001,BN的衰减速率默认为:0.997
 85         'epsilon': batch_norm_epsilon,#默认为1e-5
 86         'scale': batch_norm_scale,#BN的scale默认为True
 87         'updates_collections': tf.GraphKeys.UPDATE_OPS,
 88     }
 89 
 90     with slim.arg_scope(
 91         [slim.conv2d],
 92         weights_regularizer=slim.l2_regularizer(weight_decay),
 93         weights_initializer=slim.variance_scaling_initializer(),
 94         activation_fn=tf.nn.relu,
 95         normalizer_fn=slim.batch_norm,
 96         normalizer_params=batch_norm_params):
 97 
 98         with slim.arg_scope([slim.batch_norm], **batch_norm_params):
 99             with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
100 
101                 return arg_sc
102 
103 @slim.add_arg_scope
104 def bottleneck(inputs, depth, depth_bottleneck, stride,
105                outputs_collections=None, scope=None):
106     '''bottleneck残差学习单元
107     inputs:输入
108     depth、depth_bottleneck、stride是Blocks类中的args
109     outputs_collections:收集end_points的collection
110     scope:unit的名称'''
111     with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
112 
113         '''slim.utils.last_dimension获取输入的最后一个维度,输出通道数,min_rank=4限定最少为4个维度'''
114         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
115 
116         '''slim.batch_norm对输入进行Batch Normalization,接着用relu进行预激活的Preactivate'''
117         preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu,
118                                  scope='preact')
119         '''定义shortcut(直连的x)'''
120         if depth == depth_in:
121             '''如果残差单元输入通道数和输出通道数一样
122             使用subsample按步长对inputs进行空间上的降采样'''
123             shortcut = subsample(inputs, stride, 'shortcut')
124 
125         else:
126             '''如果残差单元输入通道数和输出通道数不一样,
127             使用stride步长的1x1卷积改变其通道数,使得输入通道数和输出通道数一致'''
128             shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
129                                    normalizer_fn=None, activation_fn=None,
130                                    scope='shortcut')
131         '''定义残差:
132         第一步:1x1尺寸、步长为1、输出通道数为depth_bottleneck的卷积
133         第二步:3x3尺寸、步长为stride、输出通道数为depth_bottleneck的卷积
134         第三步:1x1尺寸、步长为1、输出通道数为depth的卷积'''
135         residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
136                                scope='conv1')
137 
138         residual = slim.conv2d(residual, depth_bottleneck, 3, stride,
139                                scope='conv2')
140         residual = slim.conv2d(residual, depth, [1, 1], stride=1,
141                                normalizer_fn=None, activation_fn=None,
142                                scope='conv3')
143 
144         output = shortcut + residual
145 
146         '''slim.utils.collect_named_ouputs将结果添加到outputs_collections并返回output作为函数结果'''
147         return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
148 
149 
150 def resnet_v2(inputs,
151               blocks,
152               num_classes=None,
153               global_pool=True,
154               include_root_block=True,
155               reuse=None,
156               scope=None):
157     '''定义生成ResNet V2的主函数
158        inputs:输入
159        blocks:定义好的Blocks类的的列表
160        num_classes:最后输出的类数
161        global_pool:是否加上最后的一层全局平均池化的标志
162        include_root_blocks:是否加上ResNet网络最前面通常使用的7x7卷积核最大池化的标志
163        reuse:是否重用的标志
164        scope:整个网络名称'''
165 
166     with tf.variable_scope(scope, 'resent_v2', [inputs], reuse=reuse) as sc:
167         end_points_collection = sc.original_name_scope + '_end_points'
168 
169         '''slim.arg_scope将slim.conv2d, bottleneck,stack_blocks_dense 3个函数的参数
170         outputs_collections默认设置为end_points_collection'''
171         with slim.arg_scope([slim.conv2d, bottleneck,
172                             stack_blocks_dense],
173                             outputs_collections=end_points_collection):
174 
175             net = inputs
176 
177             if include_root_block:
178 
179                 with slim.arg_scope([slim.conv2d], activation_fn=None,
180                                     normalizer_fn=None):
181                     '''根据include_root_block标记,创建ResNet
182                     最前面的64输出通道的步长为2的7x7卷积'''
183                     net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
184 
185                     '''步长为2的3x3最大池化,经过2次步长为2的层后,图片尺寸已经缩小为1/4'''
186                 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
187             '''利用stack_blocks_dens将残差学习模块完成'''
188             net = stack_blocks_dense(net, blocks)
189             net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
190 
191             if global_pool:
192                 '''根据标记添加平均池化层,这里用tf.reduce_mean比avg_pool高'''
193                 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
194 
195             if num_classes is not None:
196                 '''根据是否有分类数,添加一个输出通道为num_classes的1x1卷积'''
197                 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
198                                   normalizer_fn=None, scope='logits')
199 
200             '''slim.utils.convert_collection_to_dict将collection转化为dict'''
201             end_points = slim.utils.convert_collection_to_dict(end_points_collection)
202 
203             if num_classes is not None:
204                 '''添加一个softmax层输出网络结果'''
205                 end_points['prediction'] = slim.softmax(net, scope='predictions')
206 
207             return net, end_points
208 
209 
210 def resnet_v2_50(inputs,
211                  num_classes=None,
212                  global_pool=True,
213                  reuse=None,
214                  scope='resnet_v2_50'):
215     '''设计50层的ResNet
216     四个blocks的units数量为3、4、6、3,总层数为(3+4+6+3)*3+2=50
217     前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
218     blocks = [
219         Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]),
220         Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
221         Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
222         Block('block4', bottleneck, [(2048, 512, 1)] * 3)
223     ]
224 
225     return resnet_v2(inputs, blocks, num_classes, global_pool,
226                       include_root_block=True, reuse=reuse, scope=scope)
227 
228 def resnet_v2_101(inputs,
229                  num_classes=None,
230                  global_pool=True,
231                  reuse=None,
232                  scope='resnet_v2_101'):
233     '''设计101层的ResNet
234     四个blocks的units数量为3、4、23、3,总层数为(3+4+23+3)*3+2=101
235     前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
236     blocks = [
237         Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
238         Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
239         Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
240         Block('block4', bottleneck, [(2048, 512, 1)] * 3)
241     ]
242 
243     return resnet_v2(inputs, blocks, num_classes, global_pool,
244                      include_root_block=True, reuse=reuse, scope=scope)
245 
246 def resnet_v2_152(inputs,
247                  num_classes=None,
248                  global_pool=True,
249                  reuse=None,
250                  scope='resnet_v2_152'):
251     '''设计152层的ResNet
252     四个blocks的units数量为3、8、36、3,总层数为(3+8+36+3)*3+2=152
253     前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
254     blocks = [
255         Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
256         Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
257         Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
258         Block('block4', bottleneck, [(2048, 512, 1)] * 3)
259     ]
260 
261     return resnet_v2(inputs, blocks, num_classes, global_pool,
262                      include_root_block=True, reuse=reuse, scope=scope)
263 
264 def resnet_v2_200(inputs,
265                  num_classes=None,
266                  global_pool=True,
267                  reuse=None,
268                  scope='resnet_v2_200'):
269     '''设计200层的ResNet
270     四个blocks的units数量为3、8、36、3,总层数为(3+24+36+3)*3+2=200
271     前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
272     blocks = [
273         Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
274         Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
275         Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
276         Block('block4', bottleneck, [(2048, 512, 1)] * 3)
277     ]
278 
279     return resnet_v2(inputs, blocks, num_classes, global_pool,
280                      include_root_block=True, reuse=reuse, scope=scope)
281 
282 def time_tensorflow_run(session, target, info_string):
283 
284     num_steps_burn_in = 10
285     total_duration = 0.0
286     total_duration_squared = 0.0
287     for i in range(num_batches+num_steps_burn_in):
288         start_time = time.time()
289         _ = session.run(target)
290         duration = time.time()-start_time
291 
292         if i >= num_steps_burn_in:
293             if not i % 10:
294                 print('%s: step %d, duration = %.3f' %(datetime.now(), i-num_steps_burn_in, duration))
295                 total_duration += duration
296                 total_duration_squared += duration*duration
297 
298     mn = total_duration/num_batches
299     vr = total_duration_squared/num_batches-mn*mn
300     sd = math.sqrt(vr)
301 
302     print('%s: %s across %d steps, %.3f +/- %3.3f sec/batch' %(datetime.now(), info_string, num_batches, mn, sd))
303 
304 batch_size = 32
305 height, width = 224, 224
306 inputs = tf.random_uniform((batch_size, height, width, 3))
307 with slim.arg_scope(resnet_arg_scope(is_training=False)):
308     net, end_points = resnet_v2_152(inputs, 1000)
309 
310 init = tf.global_variables_initializer()
311 sess = tf.Session()
312 sess.run(init)
313 num_batches = 100
314 time_tensorflow_run(sess, net, 'Forward')
 1 2017-12-23 23:51:01.359100: step 0, duration = 0.099
 2 2017-12-23 23:51:02.359100: step 10, duration = 0.100
 3 2017-12-23 23:51:03.358100: step 20, duration = 0.099
 4 2017-12-23 23:51:04.359100: step 30, duration = 0.100
 5 2017-12-23 23:51:05.361100: step 40, duration = 0.100
 6 2017-12-23 23:51:06.363100: step 50, duration = 0.100
 7 2017-12-23 23:51:07.366100: step 60, duration = 0.100
 8 2017-12-23 23:51:08.372100: step 70, duration = 0.100
 9 2017-12-23 23:51:09.388100: step 80, duration = 0.102
10 2017-12-23 23:51:10.394100: step 90, duration = 0.100
11 2017-12-23 23:51:11.298100: Forward across 100 steps, 0.010 +/- 0.030 sec/batch