本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者疆--------------

------点击此处链接至博客园原文------

 

_DEBUG默认为False

1.SolverWrapper类

class SolverWrapper(object):
    # caffe中solver通过协调网络前向推理和反向梯度传播来进行模型优化,并通过权重参数更新来改善网络损失求解最优算法
    # 而solver学习的任务被划分为:监督优化和参数更新,生成损失并计算梯度
    # 它定义着整个模型如何运转,不管是命令行方式还是pycaffe接口方式进行网络训练或测试,都是需要一个solver配置文件的
    """A simple wrapper around Caffe's solver.
    This wrapper gives us control over the snapshotting process, which we
    use to unnormalize the learned bounding-box regression weights.
    """

类中定义如下函数:

------------------------------------------------------__init__(...)---------------------------------------------------------

def __init__(self,sess,network,imdb,output_dir,logdir,pretrained_model=None) 构造函数

    def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.net = network
        self.imdb = imdb
        self.roidb = roidb
        self.output_dir = output_dir
        self.pretrained_model = pretrained_model

        print 'Computing bounding-box regression targets...'
        if cfg.TRAIN.BBOX_REG:  # 默认cfg.TRAIN.BBOX_REG=True
            # 暂未知?
            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
        print 'done'

        # For checkpoint
        # 模型保存与恢复
        self.saver = tf.train.Saver(max_to_keep=100,write_version=saver_pb2.SaverDef.V1)
        # 指定一个文件保存图,其中,tf.get_default_graph()事件文件要记录的图,也就是tensorflow默认的图
        self.writer = tf.summary.FileWriter(logdir=logdir,
                                             graph=tf.get_default_graph(),
                                             flush_secs=5)   

其中,调用add_bbox_regression_targets(roidb)返回bbox回归目标(均值bbox_means和标准差bbox_stds)(roi_data_layer/roidb.py中参与计算bbox_pred层weights和bias的计算规范化

def add_bbox_regression_targets(roidb):
    """
    Add information needed to train bounding-box regressors.
    For each roi find the corresponding gt box, and compute the distance.
    then normalize the distance into Gaussian by minus mean and divided by std
    """

tf.train.Saver(...)与模型保存与恢复有关、tf.summary.FileWriter(...)与保存计算图有关

------------------------------------------------------snapshot(...)---------------------------------------------------------

def snapshot(self,sess,iter)

    def snapshot(self, sess, iter):
        """Take a snapshot of the network after unnormalizing the learned   非规范
        bounding-box regression weights. This enables easy use at test-time.
        """
        net = self.net

        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred') and cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
            # net.layers字典记录网络各层输出
            # 默认cfg.TRAIN.BBOX_REG=True  cfg.TRAIN.BBOX_NORMALIZE_TARGETS=True 含义?
            # save original values,bbox_pred为VGGnet最后一层
            with tf.variable_scope('bbox_pred', reuse=True):
                weights = tf.get_variable("weights")
                biases = tf.get_variable("biases")

            # 暂未知eval()函数?
            orig_0 = weights.eval()
            orig_1 = biases.eval()

            # scale and shift with bbox reg unnormalization; then save snapshot
            weights_shape = weights.get_shape().as_list()
            # np.tile(array,reps) 按reps指定轴用于重复array构造新的数组
            # 暂未知含义?为何后面又恢复weights和biases的原值?
            sess.run(weights.assign(orig_0 * np.tile(self.bbox_stds, (weights_shape[0],1))))
            sess.run(biases.assign(orig_1 * self.bbox_stds + self.bbox_means))

        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        # TRAIN.SNAPSHOT_INFIX = ''
        infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
                 if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
        # TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn'
        filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix +
                    '_iter_{:d}'.format(iter+1) + '.ckpt')
        filename = os.path.join(self.output_dir, filename)  #如E:\TFFRCNN\output\faster_rcnn_voc_vgg\voc_2007_trainval\VGGnet_fast_rcnn_iter_100.ckpt
        # 存储.ckpt模型文件
        self.saver.save(sess, filename)
        print 'Wrote snapshot to: {:s}'.format(filename)

        # 这里为什么又恢复bbox_pred的原始weights和biases?
        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'):
            # restore net to original state
            sess.run(weights.assign(orig_0))
            sess.run(biases.assign(orig_1))

与存储.ckpt训练模型有关,这里又涉及对bbox_pred层的weights和biases值的规范化后又恢复原始值,不解其意,cfg.TRAIN.BBOX_NORMALIZE_TARGETS和eval()函数意义不明,被train_model(...)调用

------------------------------------------------------build_image_summary(...)---------------------------------------------------------

def build_image_summary(self)

    def build_image_summary(self):
        """
        A simple graph for write image summary
        """
        log_image_data = tf.placeholder(tf.uint8, [None, None, 3])
        log_image_name = tf.placeholder(tf.string)
        # import tensorflow.python.ops.gen_logging_ops as logging_ops                
        from tensorflow.python.ops import gen_logging_ops
        from tensorflow.python.framework import ops as _ops
        log_image = gen_logging_ops._image_summary(log_image_name, tf.expand_dims(log_image_data, 0), max_images=1)
        _ops.add_to_collection(_ops.GraphKeys.SUMMARIES, log_image)
        # log_image = tf.summary.image(log_image_name, tf.expand_dims(log_image_data, 0), max_outputs=1)
        return log_image, log_image_data, log_image_name     

生成与图像、图像数据、图像名相关的摘要日志文件,tensorflow相关机制不懂,如gen_logging_ops._image_summary(...)与_ops.add_to_collection(...),被train_model(...)调用

2.SolverWrapper类中train_model(...)函数(训练过程主要代码)代码逻辑

def train_model(self,sess,max_iters,restore=False)

调用get_data_layer(self.roidb, self.imdb.num_classes)实例化RoIDataLayer类roi_data_layer/layer.py中)对象并返回layer---->

调用build_loss定义各种训练loss(network.py中)--->

利用tf.summary中各类方法保存训练过程,可供tensorboard,如tf.summary.scalar记录loss可用于绘制loss曲线--->

# 训练过程中,用到的tf.summary()各类方法,能够保存训练过程以及参数分布图并在tensorboard显示
# scalar summary  生成标量图,可以用于画各类loss图像
tf.summary.scalar('rpn_rgs_loss', rpn_loss_box)
tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy)
tf.summary.scalar('cls_loss', cross_entropy)
tf.summary.scalar('rgs_loss', loss_box)
tf.summary.scalar('loss', loss)
summary_op = tf.summary.merge_all()

调用类内函数build_image_summary()生成与图像、图像数据、图像名相关的摘要日志文件log_image, log_image_data, log_image_name--->

定义默认优化方式为opt=tf.train.MomentumOptimizer(lr,momentum)--->

# optimizer  定义优化方式
# 默认TRAIN.SOLVER = 'Momentum'    TRAIN.LEARNING_RATE = 0.001  TRAIN.MOMENTUM = 0.9
if cfg.TRAIN.SOLVER == 'Adam':
      opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE)
elif cfg.TRAIN.SOLVER == 'RMS':
      opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE)
else:
      lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
      # lr = tf.Variable(0.0, trainable=False)
      momentum = cfg.TRAIN.MOMENTUM
      opt = tf.train.MomentumOptimizer(lr, momentum)  # Momentum优化方式,使用了超参数

定义记录全局训练步骤的单值global_step = tf.Variable(0, trainable=False)--->

训练核心代码,tf.clip_by_global_norm(tf.gradients(loss, tvars), 10.0)、opt.apply_gradients(zip(grads, tvars), global_step=global_step),但是反向传播体现在哪?--->

with_clip = True
if with_clip:
# 获取所有可训练的向量
tvars = tf.trainable_variables()
# tf.gradients计算向量梯度,传入参数loss和所有trainable的向量
# tf.clip_by_global_norm梯度缩放
# 传入所有trainable向量的梯度,grad为clip好的梯度,globalnorm为梯度平方和
grads, norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), 10.0)
# apply_gradients是tf.train.Optimizer.minimize实际操作中两步中的一步
# minizie其实是分了两步运算,第一步计算梯度tf.train.Optimizer.compute_gradients
# 和第二步更新梯度tf.train.Optimizer.apply_gradients
# 由于我们已经计算了梯度,所以我们只用更新梯度
train_op = opt.apply_gradients(zip(grads, tvars), global_step=global_step)
else:
     train_op = opt.minimize(loss, global_step=global_step)

创建会话权值初始化--->定义restore_iter = 0--->

调用self.net.load(self.pretrained_model, sess, True)函数(network.py中)加载预训练模型--->

        # load vgg16
        if self.pretrained_model is not None and not restore:
            try:
                print ('Loading pretrained model '
                   'weights from {:s}').format(self.pretrained_model)
                self.net.load(self.pretrained_model, sess, True)
            except:
                raise 'Check your pretrained model {:s}'.format(self.pretrained_model)

        # resuming a trainer
        # 恢复继续训练
        if restore:
            try:
                ckpt = tf.train.get_checkpoint_state(self.output_dir)
                print 'Restoring from {}...'.format(ckpt.model_checkpoint_path),
                self.saver.restore(sess, ckpt.model_checkpoint_path)
                # basename函数去掉目录路径单独返回文件名、splitext分离文件名和扩展名
                stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0]  
                restore_iter = int(stem.split('_')[-1])
                sess.run(global_step.assign(restore_iter))  # 注意global_step变量
                print 'done'
            except:
                raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)

View Code