网络结构图

实践目标检测--构造SSD网络与训练

基础网络块可自定义,可使用VGG、ResNet、DenseNet,这里使用了简单三层卷积+SSD的网络配置。

多尺度特征块:使用两层3*3的网络保持图片大小不变后,使用2*2的平均池化,将图片大小减半,来提取不同尺度的特征。

类别、边界框预测:使用多通道的输出来预测结果。

文件名SSD_model.py

import sys
sys.path.insert(0, '..')

import gluonbook as gb
from mxnet import autograd, contrib, gluon, image, init, nd
from mxnet.gluon import loss as gloss, nn
import time

def cls_predictor(num_anchors, num_classes):
    return nn.Conv2D(num_anchors * (num_classes + 1), kernel_size=3,
                     padding=1)

def bbox_predictor(num_anchors):
    return nn.Conv2D(num_anchors * 4, kernel_size=3, padding=1)

def forward(x, block):
    block.initialize()
    return block(x)

def flatten_pred(pred):
    return pred.transpose((0, 2, 3, 1)).flatten()

def concat_preds(preds):
    return nd.concat(*[flatten_pred(p) for p in preds], dim=1)

def down_sample_blk(num_channels):
    blk = nn.Sequential()
    for _ in range(2):
        blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1),
                nn.BatchNorm(in_channels=num_channels),
                nn.Activation('relu'))
    blk.add(nn.MaxPool2D(2))
    return blk

def body_blk():
    blk = nn.Sequential()
    for num_filters in [16, 32, 64]:
        blk.add(down_sample_blk(num_filters))
    return blk

def get_blk(i):
    if i == 0:
        blk = body_blk()
    elif i == 4:
        blk = nn.GlobalMaxPool2D()
    else:
        blk = down_sample_blk(128)
    return blk

def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
    Y = blk(X)
    anchor = contrib.ndarray.MultiBoxPrior(Y, sizes=size, ratios=ratio)
    cls_pred = cls_predictor(Y)
    bbox_pred = bbox_predictor(Y)
    return (Y, anchor, cls_pred, bbox_pred)

sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
         [0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1

class TinySSD(nn.Block):
    def __init__(self, num_classes, **kwargs):
        super(TinySSD, self).__init__(**kwargs)
        self.num_classes = num_classes
        for i in range(5):
            setattr(self, 'blk_%d' % i, get_blk(i))
            setattr(self, 'cls_%d' % i, cls_predictor(num_anchors,
                                                      num_classes))
            setattr(self, 'bbox_%d' % i, bbox_predictor(num_anchors))

    def forward(self, X):
        anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
        for i in range(5):
            X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(
                X, getattr(self, 'blk_%d' % i), sizes[i], ratios[i],
                getattr(self, 'cls_%d' % i), getattr(self, 'bbox_%d' % i))
        # 每个模块的锚框需要连结。
        return (nd.concat(*anchors, dim=1),
                concat_preds(cls_preds).reshape(
                    (0, -1, self.num_classes + 1)), concat_preds(bbox_preds))

训练

文件名:SSD_run.py

from SSD_model import *
from load_my_data import load_my_data
import time


# 本函数已保存在 gluonbook 包中方便以后使用。
path = './VOCtemplate/VOC2012/Annotations/'
batch_size,edge_size = 4,256
train_data=load_my_data(batch_size,edge_size)
#batch = train_iter.next()
#print(batch.data[0])

train_data.reshape(label_shape=(3, 5))

net = TinySSD(num_classes=5)
#net.initialize(init=init.Xavier())
net.load_parameters('my_model.params')
trainer = gluon.Trainer(net.collect_params(), 'sgd',
                        {'learning_rate': 0.2, 'wd': 5e-4})

cls_loss = gloss.SoftmaxCrossEntropyLoss()
bbox_loss = gloss.L1Loss()

def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
    cls = cls_loss(cls_preds, cls_labels)
    bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
    return cls + bbox

def cls_eval(cls_preds, cls_labels):
    # 由于类别预测结果放在最后一维,argmax 需要指定最后一维。
    return (cls_preds.argmax(axis=-1) == cls_labels).mean().asscalar()

def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
    return ((bbox_labels - bbox_preds) * bbox_masks).abs().mean().asscalar()


acc, mae = 0, 0
train_data.reset()  # 从头读取数据。
start = time.time()
for i, batch in enumerate(train_data):
    X = batch.data[0]
    Y = batch.label[0]
    with autograd.record():
        # 生成多尺度的锚框,为每个锚框预测类别和偏移量。
        anchors, cls_preds, bbox_preds = net(X)
        # 为每个锚框标注类别和偏移量。
        bbox_labels, bbox_masks, cls_labels = contrib.nd.MultiBoxTarget(
            anchors, Y, cls_preds.transpose((0, 2, 1)))
        # 根据类别和偏移量的预测和标注值计算损失函数。
        l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels,
                      bbox_masks)
    l.backward()
    trainer.step(batch_size)
    acc += cls_eval(cls_preds, cls_labels)
    mae += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
    if (i + 1) % 10 == 0:
        print('step %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
            i + 1, 1 - acc / (10), mae / (10), time.time() - start))
        acc, mae = 0, 0
        start = time.time()
    if (i+1) % 100==0:
        print('writing params......')
        net.save_parameters('my_model.params')


开始训练:

实践目标检测--构造SSD网络与训练