网络结构图
基础网络块可自定义,可使用VGG、ResNet、DenseNet,这里使用了简单三层卷积+SSD的网络配置。
多尺度特征块:使用两层3*3的网络保持图片大小不变后,使用2*2的平均池化,将图片大小减半,来提取不同尺度的特征。
类别、边界框预测:使用多通道的输出来预测结果。
文件名SSD_model.py
import sys
sys.path.insert(0, '..')
import gluonbook as gb
from mxnet import autograd, contrib, gluon, image, init, nd
from mxnet.gluon import loss as gloss, nn
import time
def cls_predictor(num_anchors, num_classes):
return nn.Conv2D(num_anchors * (num_classes + 1), kernel_size=3,
padding=1)
def bbox_predictor(num_anchors):
return nn.Conv2D(num_anchors * 4, kernel_size=3, padding=1)
def forward(x, block):
block.initialize()
return block(x)
def flatten_pred(pred):
return pred.transpose((0, 2, 3, 1)).flatten()
def concat_preds(preds):
return nd.concat(*[flatten_pred(p) for p in preds], dim=1)
def down_sample_blk(num_channels):
blk = nn.Sequential()
for _ in range(2):
blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1),
nn.BatchNorm(in_channels=num_channels),
nn.Activation('relu'))
blk.add(nn.MaxPool2D(2))
return blk
def body_blk():
blk = nn.Sequential()
for num_filters in [16, 32, 64]:
blk.add(down_sample_blk(num_filters))
return blk
def get_blk(i):
if i == 0:
blk = body_blk()
elif i == 4:
blk = nn.GlobalMaxPool2D()
else:
blk = down_sample_blk(128)
return blk
def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
Y = blk(X)
anchor = contrib.ndarray.MultiBoxPrior(Y, sizes=size, ratios=ratio)
cls_pred = cls_predictor(Y)
bbox_pred = bbox_predictor(Y)
return (Y, anchor, cls_pred, bbox_pred)
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
[0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
class TinySSD(nn.Block):
def __init__(self, num_classes, **kwargs):
super(TinySSD, self).__init__(**kwargs)
self.num_classes = num_classes
for i in range(5):
setattr(self, 'blk_%d' % i, get_blk(i))
setattr(self, 'cls_%d' % i, cls_predictor(num_anchors,
num_classes))
setattr(self, 'bbox_%d' % i, bbox_predictor(num_anchors))
def forward(self, X):
anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
for i in range(5):
X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(
X, getattr(self, 'blk_%d' % i), sizes[i], ratios[i],
getattr(self, 'cls_%d' % i), getattr(self, 'bbox_%d' % i))
# 每个模块的锚框需要连结。
return (nd.concat(*anchors, dim=1),
concat_preds(cls_preds).reshape(
(0, -1, self.num_classes + 1)), concat_preds(bbox_preds))
训练
文件名:SSD_run.py
from SSD_model import *
from load_my_data import load_my_data
import time
# 本函数已保存在 gluonbook 包中方便以后使用。
path = './VOCtemplate/VOC2012/Annotations/'
batch_size,edge_size = 4,256
train_data=load_my_data(batch_size,edge_size)
#batch = train_iter.next()
#print(batch.data[0])
train_data.reshape(label_shape=(3, 5))
net = TinySSD(num_classes=5)
#net.initialize(init=init.Xavier())
net.load_parameters('my_model.params')
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.2, 'wd': 5e-4})
cls_loss = gloss.SoftmaxCrossEntropyLoss()
bbox_loss = gloss.L1Loss()
def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
cls = cls_loss(cls_preds, cls_labels)
bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
return cls + bbox
def cls_eval(cls_preds, cls_labels):
# 由于类别预测结果放在最后一维,argmax 需要指定最后一维。
return (cls_preds.argmax(axis=-1) == cls_labels).mean().asscalar()
def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
return ((bbox_labels - bbox_preds) * bbox_masks).abs().mean().asscalar()
acc, mae = 0, 0
train_data.reset() # 从头读取数据。
start = time.time()
for i, batch in enumerate(train_data):
X = batch.data[0]
Y = batch.label[0]
with autograd.record():
# 生成多尺度的锚框,为每个锚框预测类别和偏移量。
anchors, cls_preds, bbox_preds = net(X)
# 为每个锚框标注类别和偏移量。
bbox_labels, bbox_masks, cls_labels = contrib.nd.MultiBoxTarget(
anchors, Y, cls_preds.transpose((0, 2, 1)))
# 根据类别和偏移量的预测和标注值计算损失函数。
l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels,
bbox_masks)
l.backward()
trainer.step(batch_size)
acc += cls_eval(cls_preds, cls_labels)
mae += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
if (i + 1) % 10 == 0:
print('step %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
i + 1, 1 - acc / (10), mae / (10), time.time() - start))
acc, mae = 0, 0
start = time.time()
if (i+1) % 100==0:
print('writing params......')
net.save_parameters('my_model.params')
开始训练:
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:实践目标检测–构造SSD网络与训练 - Python技术站