源码地址 https://github.com/qqwweee/keras-yolo3
├── coco_annotation.py
├── convert.py
├── darknet53.cfg
├── font
│ ├── FiraMono-Medium.otf
│ └── SIL Open Font License.txt
├── .gitignore
├── kmeans.py
├── model_data
│ ├── coco_classes.txt
│ ├── tiny_yolo_anchors.txt
│ ├── voc_classes.txt
│ └── yolo_anchors.txt
├── README.md
├── train_bottleneck.py
├── train.py
├── voc_annotation.py
├── yolo3
│ ├── __init__.py
│ ├── model.py
│ └── utils.py
├── yolo.py
├── yolov3.cfg
├── yolov3-tiny.cfg
└── yolo_video.py
- font是字体目录
- model_data:
coco_classes文件: 就是coco文件的类别文件
如下: -
- yolo3:
- model.py 就是构建yolo3的主要模块文件,这里一共有14个函数/
- utils.py 是在模型训练时进行数据处理的工具文件,一共有3个函数:
- *_annoataion.py 对数据进行转换的文件,把原始的文件转换为txt文件。
- coco_annoataion.py 把json文件转换为txt文件
- voc_annoataion.py 把xml文件转换为txt
- convert.py 把原始权重转换为kares的能读取的原始h5文件
- kmeans.py 输入上面得到的txt文件,通过聚类得到数据最佳anchors。
- train.py 进行yolov3训练的文件
- yolo.py 构建以yolov3为底层构件的yolo检测模型,因为上面的yolov3还是分开的单个函数,功能并没有融合在一起,即使在训练的时候所有的yolov3组件还是分开的功能,并没有统一接口,供在模型训练完成之后,直接使用。通过yolo.py融合所有的组件。
- yolo_video.py 使用yolo.py文件中的yolo检测模型,并且对视频中的物体进行检测。
- yolov3.cfg 构建yolov3检测模型的整个超参文件。
- 探索 YOLO v3 源码 - 第1篇 训练---在train.py中
- 探索 YOLO v3 源码 - 第2篇 模型---在train.py中
- 探索 YOLO v3 源码 - 第3篇 网络---在yolo3/model.py中
- 探索 YOLO v3 源码 - 第4篇 真值---在yolo3/utils.py和yolo3/model.py中
- 探索 YOLO v3 源码 - 第5篇 Loss---在yolo3/model.py中
- 探索 YOLO v3 源码 - 完结篇 预测---在yolo.py中
import numpy as np
class YOLO_Kmeans:
def __init__(self, cluster_number, filename):
# 读取kmeans的中心数
self.cluster_number = cluster_number
# 标签文件的文件名
self.filename = "2012_train.txt"
def iou(self, boxes, clusters): # 1 box -> k clusters
# boxes : 所有的[width, height]
# clusters : 9个随机的中心点[width, height]
n = boxes.shape[0]
k = self.cluster_number
# 所有的boxes的面积
box_area = boxes[:, 0] * boxes[:, 1]
# 将box_area的每个元素重复k次
box_area = box_area.repeat(k)
box_area = np.reshape(box_area, (n, k))
# 计算9个中点的面积
cluster_area = clusters[:, 0] * clusters[:, 1]
# 对cluster_area进行复制n份
cluster_area = np.tile(cluster_area, [1, n])
cluster_area = np.reshape(cluster_area, (n, k))
# 获取box和中心的的交叉w的宽
box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
# 获取box和中心的的交叉w的高
box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
# 交叉点的面积
inter_area = np.multiply(min_w_matrix, min_h_matrix)
# 9个交叉点和所有的boxes的iou值
result = inter_area / (box_area + cluster_area - inter_area)
return result
def avg_iou(self, boxes, clusters):
# 计算9个中点与所有的boxes总的iou,n个点的平均iou
accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
return accuracy
def kmeans(self, boxes, k, dist=np.median):
# np.median 求众数
# boxes = [宽, 高]C
# k 中心点数
box_number = boxes.shape[0]
distances = np.empty((box_number, k))
last_nearest = np.zeros((box_number,))
# 从所有的boxe中选区9个随机中心点
clusters = boxes[np.random.choice(
box_number, k, replace=False)] # init k clusters
while True:
# 计算所有的boxes和clusters的值(n,k)
distances = 1 - self.iou(boxes, clusters)
# 选取iou值最小的点(n,)
current_nearest = np.argmin(distances, axis=1)
# 中心点未改变,跳出
if (last_nearest == current_nearest).all():
break # clusters won't change
# 计算每个群组的中心或者众数
for cluster in range(k):
clusters[cluster] = dist( # update clusters
boxes[current_nearest == cluster], axis=0)
# 改变中心点
last_nearest = current_nearest
return clusters
def result2txt(self, data):
# 把9个中心点,写入txt文件
f = open("yolo_anchors.txt", 'w')
row = np.shape(data)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (data[i][0], data[i][1])
x_y = ", %d,%d" % (data[i][0], data[i][1])
def txt2boxes(self):
# 打开文件
f = open(self.filename, 'r')
dataSet = []
# 读取文件
for line in f:
infos = line.split(" ")
length = len(infos)
# infons[0] 为图片的名称
for i in range(1, length):
# 获取文件的宽和高
width = int(infos[i].split(",")[2]) - \
height = int(infos[i].split(",")[3]) - \
dataSet.append([width, height])
result = np.array(dataSet)
return result
def txt2clusters(self):
# 获取所有的文件目标的宽和高,width, height
all_boxes = self.txt2boxes()
# result 9个中心点
result = self.kmeans(all_boxes, k=self.cluster_number)
# 按最后一列顺序排序
result = result[np.lexsort(result.T[0, None])]
# 把结果写入txt文件
print("K anchors:\n {}".format(result))
# 计算9个中点与所有的boxes总的iou,n个点的平均iou
print("Accuracy: {:.2f}%".format(
self.avg_iou(all_boxes, result) * 100))
if __name__ == "__main__":
cluster_number = 9
filename = "2012_train.txt"
kmeans = YOLO_Kmeans(cluster_number, filename)
#!/usr/bin/env python
# -- coding: utf-8 --
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/4
import os
import numpy as np
import tensorflow as tf
import keras.backend as K
from keras.backend import mean
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.utils import plot_model
from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data
def _main():
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
from keras import backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
annotation_path = 'dataset/WIDER_train.txt' # 数据
classes_path = 'configs/wider_classes.txt' # 类别
log_dir = 'logs/004/' # 日志文件夹
# pretrained_path = 'model_data/yolo_weights.h5' # 预训练模型
pretrained_path = 'logs/003/ep074-loss26.535-val_loss27.370.h5' # 预训练模型
anchors_path = 'configs/yolo_anchors.txt' # anchors
class_names = get_classes(classes_path) # 类别列表
num_classes = len(class_names) # 类别数
anchors = get_anchors(anchors_path) # anchors列表
input_shape = (416, 416) # 32的倍数,输入图像
# 创建需要训练的模型
model = create_model(input_shape, anchors, num_classes,
weights_path=pretrained_path) # make sure you know what you freeze
logging = TensorBoard(log_dir=log_dir)
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True,
save_best_only=True, period=3) # 只存储weights,
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) # 当评价指标不在提升时,减少学习率
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) # 测试集准确率,下降前终止
val_split = 0.1 # 训练和验证的比例
with open(annotation_path) as f:
lines = f.readlines()
num_val = int(len(lines) * val_split) # 验证集数量
num_train = len(lines) - num_val # 训练集数量
if False:
model.compile(optimizer=Adam(lr=1e-3), loss={
# 使用定制的 yolo_loss Lambda层
'yolo_loss': lambda y_true, y_pred: y_pred}) # 损失函数
batch_size = 32 # batch尺寸
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train // batch_size),
lines[num_train:], batch_size, input_shape, anchors, num_classes),
validation_steps=max(1, num_val // batch_size),
callbacks=[logging, checkpoint])
model.save_weights(log_dir + 'trained_weights_stage_1.h5') # 存储最终的参数,再训练过程中,通过回调存储
if True: # 全部训练
for i in range(len(model.layers)):
model.layers[i].trainable = True
loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')
batch_size = 16 # note that more GPU memory is required after unfreezing the body
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train // batch_size),
validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors,
validation_steps=max(1, num_val // batch_size),
callbacks=[logging, checkpoint, reduce_lr, early_stopping])
model.save_weights(log_dir + 'trained_weights_final.h5')
def get_classes(classes_path):
# 输入类别文件,读取文件中所有的类别,生成list
'''loads the classes'''
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def get_anchors(anchors_path):
# 获取所有的anchors的长和宽
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
K.clear_session() # 清除session
h, w = input_shape # 尺寸
image_input = Input(shape=(w, h, 3)) # 图片输入格式
num_anchors = len(anchors) # anchor数量
# YOLO的三种尺度,每个尺度的anchor数,类别数+边框4个+置信度1
y_true = [Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l],
num_anchors // 3, num_classes + 5)) for l in range(3)]
model_body = yolo_body(image_input, num_anchors // 3, num_classes) # model
print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
if load_pretrained: # 加载预训练模型
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) # 加载参数,跳过错误
print('Load weights {}.'.format(weights_path))
if freeze_body in [1, 2]:
# Freeze darknet53 body or freeze all but 3 output layers.
num = (185, len(model_body.layers) - 3)[freeze_body - 1]
for i in range(num):
model_body.layers[i].trainable = False # 将其他层的训练关闭
print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
# 构建 yolo_loss
# model_body: [(?, 13, 13, 18), (?, 26, 26, 18), (?, 52, 52, 18)]
# y_true: [(?, 13, 13, 18), (?, 26, 26, 18), (?, 52, 52, 18)]
model_loss = Lambda(yolo_loss,
output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors,
'num_classes': num_classes,
'ignore_thresh': 0.5}
)(model_body.output + y_true)
model = Model(inputs=[model_body.input] + y_true, outputs=model_loss) # 模型,inputs和outputs
plot_model(model, to_file=os.path.join('model_data', 'model.png'), show_shapes=True, show_layer_names=True)
return model
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
'''data generator for fit_generator
annotation_lines: 所有的图片名称
input_shape: 图片的输入尺寸
anchors: 大小
num_classes: 类别数
n = len(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size):
if i == 0:
# 随机排列图片顺序
# image_data: (16, 416, 416, 3)
# box_data: (16, 20, 5) # 每个图片最多含有20个框
image, box = get_random_data(annotation_lines[i], input_shape, random=True) # 获取图片和盒子
# 随机进行图片的翻转,标记数据数据也根据比例改变
image_data.append(image) # 添加图片
box_data.append(box) # 添加盒子
i = (i + 1) % n
image_data = np.array(image_data)
box_data = np.array(box_data)
# y_true是3个预测特征的列表
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) # 真值
# y_true的第0和1位是中心点xy,范围是(0~13/26/52),第2和3位是宽高wh,范围是0~1,
# 第4位是置信度1或0,第5~n位是类别为1其余为0。
# [(16, 13, 13, 3, 6), (16, 26, 26, 3, 6), (16, 52, 52, 3, 6)]
yield [image_data] + y_true, np.zeros(batch_size)
def data_generator_wrapper
