本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者疆--------------
------点击此处链接至博客园原文------
定义了pascal_voc类,继承自imdb类,类中定义了18个函数
1.__init__(self,image_set,year,devkit_path=None)构造函数,初始化部分变量
这里面部分变量未在本脚本中被更新,如self._num_classes和self._roidb
# pascal_voc类继承自imdb类 class pascal_voc(imdb): # image_set(如trainval等),如voc_2007_tainval def __init__(self, image_set, year, devkit_path=None): # 初始化self._name(如voc_2007_tainval)、self._num_classes(该数据集对应值应为21,但imdb类构造函数初始化为0???)、 # self._classes(空列表--->本脚本中被更新为存储类别名称的元组) # self._image_index列表(空列表--->本脚本中被更新为不含后缀的数据集(如trainval数据集)图像名称组成的列表) # 和self._obj_proposer、self._roidb(为None,本脚本中未被更新???)、self._roidb_handler(self.default_roidb--->本脚本被更新为self.gt_roidb)、 # self.config字典(空字典--->本脚本中更新为数据集相关设置信息构成的字典,字段包括‘cleanup’、'use_salt'、'use_diff'、'matlab_eval'、'rpn_file'、'min_size') imdb.__init__(self, 'voc_' + year + '_' + image_set) self._year = year self._image_set = image_set # 如E:\TFFRCNN\data\VOCdevkit2007 self._devkit_path = self._get_default_path() if devkit_path is None \ else devkit_path # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007 VOC2007数据路径 self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) self._classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') ''' self._classes = ('__background__', # always index 0 'craft') #2018.1.30 ''' # self._class_to_ind中存放的是{'__background__':0,'craft':1 ...}key-value 字典 # zip函数:对应取出每一个数组中的元素再组合 self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = '.jpg' # 不含后缀的数据集(如trainval数据集)图像名称组成的列表(从trainval.txt中获取) self._image_index = self._load_image_set_index() # Default to roidb handler # self._roidb_handler = self.selective_search_roidb # 返回的是gt_roidb(各图像gt roi字典组成的列表)的内存地址 self._roidb_handler = self.gt_roidb # 生成一个随机的uuid,即对于分布式数据,每个数据都有自己对应的唯一的标识符! # _get_comp_id(...)中使用 self._salt = str(uuid.uuid4()) self._comp_id = 'comp4' # PASCAL specific config options self.config = {'cleanup' : True, 'use_salt' : True, 'use_diff' : False, 'matlab_eval' : False, 'rpn_file' : None, 'min_size' : 2} assert os.path.exists(self._devkit_path), \ 'VOCdevkit path does not exist: {}'.format(self._devkit_path) assert os.path.exists(self._data_path), \ 'Path does not exist: {}'.format(self._data_path)
2.image_path_at(self,i)获取数据集第i张图像的绝对路径,未见调用
# 获取数据集第i张图像的绝对路径 def image_path_at(self, i): """ Return the absolute path to image i in the image sequence. """ # self._image_index为不含后缀的图像名称组成的列表 return self.image_path_from_index(self._image_index[i])
3.image_path_from_index(self,index)根据图像不含后缀的名称(如000001)获取该图像绝对路径,被image_path_at(...)调用
# 根据图像不含后缀的名称获取图像绝对路径 def image_path_from_index(self, index): """ Construct an image path from the image's "index" identifier. """ image_path = os.path.join(self._data_path, 'JPEGImages', index + self._image_ext) #self._image_ext = '.jpg' assert os.path.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path
4._load_image_set_index(self)
获得数据集图像名称构成的列表,如trainval数据集从trainval.txt中取出[000001, 000002, ...]图像名称列表,被__init__(...)调用
# 获得数据集图像名称构成的列表,如[000001, 000003, ...] def _load_image_set_index(self): """ Load the indexes listed in this dataset's image set file. """ # Example path to image set file: # 如E:\TFFRCNN\data\VOCdevkit2007\VOC2007\ImageSets\Main\trainval.txt # 该路劲下存储相关图像名称信息,如000001 000002... image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', self._image_set + '.txt') assert os.path.exists(image_set_file), \ 'Path does not exist: {}'.format(image_set_file) with open(image_set_file) as f: # readlines()函数一行一行读取 # x.strip(rm)函数 删除x字符串中开头结尾处rm删除序列的字符 image_index = [x.strip() for x in f.readlines()] return image_index
5._get_default_path(self)
获取数据集文件夹默认路径,如E:\TFFRCNN\data\VOCdevkit2007,被__init__(...)调用
# 获取pascal voc数据集文件夹默认路径 def _get_default_path(self): """ Return the default path where PASCAL VOC is expected to be installed. """ # 默认DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) # ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)
6.gt_roidb(self)
从/向cache文件夹中(如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl)cPickle序列化读/写gt roi相关信息。若路径已存在表明曾经创建过,则读,否则则写供下次读。被__init__(...)不加()调用,表明加载gt roi相关信息,返回gt_roidb的地址(self._roidb_handler = self.gt_roidb,但该值未见调用)该函数是通过调用_load_pascal_annotation(...)得到由各图像gt roi信息字典组成的列表gt_roidb。实际上还被selective_search_roidb(...)、rpn_roidb(...)调用,但未使用SS产生roi。
# 从/向cache文件夹中cPickle序列化读/写groundtruth roi相关信息 # cache路径存在则读,否则则写 def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ # 如E:\TFFRCNN\data\cache\voc_2007_trainval_gt_roidb.pkl # cache_path为imdb类中的属性方法 cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') # 序列化读gt_roidb if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb # 序列化写gt_roidb(先有写,后有读) # 读取每张图片对应的xml文件信息构成的字典 存入gt_roidb列表!!! gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return gt_roidb
7.selective_search_roidb(self)
与SS算法相关,未使用,类似于gt_roidb(...),从/向cache文件夹中(如E:\TFFRCNN\data\cache\voc_2007_trainval__selective_search_roidb.pkl)cPickle序列化读/写gt roi相关信息,未见调用。
该函数表明(VOC2007数据集上,可见if判断语句)self.roidb中既包含了gt roi也包含了(由SS)产生的roi
# 与SS算法有关,未使用 def selective_search_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included !!!!!! This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_selective_search_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb if int(self._year) == 2007 or self._image_set != 'test': gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_roidb(gt_roidb) roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) # 合并gt_roidb和ss_roidb else: roidb = self._load_selective_search_roidb(None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb
8.rpn_roidb(self)
返回rpn_roidb,从该函数同样看出(int(self._year) == 2007 or self._image_set != 'test')时self.roidb来源包括gt_roidb和rpn_roidb,未见调用(应该在某个地方被调用了!)
def rpn_roidb(self): if int(self._year) == 2007 or self._image_set != 'test': gt_roidb = self.gt_roidb() rpn_roidb = self._load_rpn_roidb(gt_roidb) # 合并gt_roidb和rpn_roidb roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) else: roidb = self._load_rpn_roidb(None) return roidb
9._load_rpn_roidb(self,gt_roidb)
调用(imdb.py中)create_roidb_from_box_list(...)函数更新由RPN产生的rpn_roidb列表中(各图像rpn_roi信息构成的)字典内容(传入gt_roidb是为了得到‘gt_overlaps’,其他字段‘gt_classes’全0且不更新、‘flipped’为False、‘seg_areas’全0且不更新,此处0表明该roi非gt roi而是由RPN产生的roi,可见create_roidb_from_box_list(...)函数),被rpn_roidb(...)调用,应注意这里self.config['rpn_file']表示rpn_roidb序列化内容的存储路径,在__init__()构造函数中初值为None,在调用该函数之前self.config应在某处更新!
注意:rpn_roidb为各张图像产生roi相关信息构成的字典组成的列表,字典内容见如下create_roidb_from_box_list(...)函数
def _load_rpn_roidb(self, gt_roidb): # __init__()构造函数中该字段初值为None,在本句之前self.config应在某处更新! # 该字段为rpn_roidb序列化内容的存储路径 filename = self.config['rpn_file'] print 'loading {}'.format(filename) assert os.path.exists(filename), \ 'rpn data not found at: {}'.format(filename) with open(filename, 'rb') as f: box_list = cPickle.load(f) return self.create_roidb_from_box_list(box_list, gt_roidb)
----------------注意以下函数中gt_classes为全0表明:对应的roi不是gt roi,这也解释了test.py中的遗留的问题----------------------
对于各张图像中由RPN产生的roi,与gt_roi计算IoU值,最大值对应的gt_roi作为gt,因此overlaps仅对应类别位置有>0的IoU值,其余位置全0,但是这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0
def create_roidb_from_box_list(self, box_list, gt_roidb): # box_list(即rpn_roidb存储路径中序列化的内容)为数据集各张图像产生的boxz组成的元组-组成的列表 assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] # rpn_roidb为列表 for i in xrange(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: # 对于各张图像,为什么rpn_roi和gt_roi是对应起来的,需要看rpn_roidb存储的序列化内容 gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] # 对于各张图像中由RPN产生的roi,与gt_roi计算IoU值,最大值对应的gt_roi作为gt,因此overlaps仅对应类别位置有>0的IoU值,其余位置全0 # 但是这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) # 因此,rpn_roidb为各张图像产生roi相关信息构成的字典组成的列表 # 这里并没有更新‘gt_classes’字段为gt_roi对应的类别,而是设置为全0,同时‘seg_areas’也被设置为全0 # 此处‘gt_classes’表明对应的roi非gt roi roidb.append({ 'boxes' : boxes, 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps' : overlaps, 'flipped' : False, 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), }) return roidb
10._load_selective_search_roidb(self,gt_roidb)
类似于_load_rpn_roidb(self,gt_roidb),返回由SS算法得到的roidb数据,由于算法中未使用,不过多解释,被selective_search_roidb(...)调用
def _load_selective_search_roidb(self, gt_roidb): # 用SS算法预先得到的.mat文件 filename = os.path.abspath(os.path.join(cfg.DATA_DIR, 'selective_search_data', self.name + '.mat')) assert os.path.exists(filename), \ 'Selective search data not found at: {}'.format(filename) # sio即scipy.io读写mat文件 # ravel()扁平化函数 raw_data = sio.loadmat(filename)['boxes'].ravel() box_list = [] for i in xrange(raw_data.shape[0]): boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 # 可见ds_utils.py文件 keep = ds_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = ds_utils.filter_small_boxes(boxes, self.config['min_size']) boxes = boxes[keep, :] box_list.append(boxes) return self.create_roidb_from_box_list(box_list, gt_roidb)
11._load_pascal_annotation(self, index)
根据不含后缀的图像名称(如index为000001)读取相应xml文件,获得该图像gt roi相关信息构成的字典,字典包含'boxes'(shape为(None,4),存储该图像所有gt roi坐标信息)、'gt_classes'(None,),存储该图像所有gt roi类别索引信息)、'gt_ishard'(shape为(None,),存储该图像所有gt roi是否为难例)、'gt_overlaps'(稀疏矩阵未压缩前shape为(None,21),存储该图像所有gt roi IOU值,对应gt类别位置其值为1.0,其他全0)、'flipped'(为false)、'seg_areas'(shape为(None,),存储该图像所有gt roi面积)字段,数据集全部图像的gt roi信息字典组成的列表为gt_roidb,该函数被gt_roidb(...)调用,可以看到gt_roidb与rpn_roidb在结构上是一致的,未查到相关资料 overlaps = scipy.sparse.csr_matrix(overlaps)
# 根据不含后缀的图像名称加载图片,读取xml文件获取groundtruth roi相关信息 def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') # if not self.config['use_diff']: # # Exclude the samples labeled as difficult # non_diff_objs = [ # obj for obj in objs if int(obj.find('difficult').text) == 0] # # if len(non_diff_objs) != len(objs): # # print 'Removed {} difficult objects'.format( # # len(objs) - len(non_diff_objs)) # objs = non_diff_objs num_objs = len(objs) # 初始化boxes,建立一个shape为(num_objs, 4)的全0数组,4列表示某个object gt bbox坐标 boxes = np.zeros((num_objs, 4), dtype=np.uint16) # 初始化gt_classes,建立一个shape为(num_objs)的向量,pascal voc数据集对应值为1--21中的任一个 gt_classes = np.zeros((num_objs), dtype=np.int32) # 初始化overlaps,建立一个shape为(num_objs, self.num_classes)的全0数组,gt roi对应类别所在列为1,其余全0 overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # "Seg" area for pascal is just the box area # 存储gt roi面积 seg_areas = np.zeros((num_objs), dtype=np.float32) # 存储是否为难例(0或1,1表示hard ) ishards = np.zeros((num_objs), dtype=np.int32) # Load object bounding boxes into a data frame. # 对该图像所有的obj循环处理,存储相应值 for ix, obj in enumerate(objs): bbox = obj.find('bndbox') # Make pixel indexes 0-based # 记录gt roi位置信息,这里为何要减1 x1 = float(bbox.find('xmin').text) - 1 y1 = float(bbox.find('ymin').text) - 1 x2 = float(bbox.find('xmax').text) - 1 y2 = float(bbox.find('ymax').text) - 1 diffc = obj.find('difficult') difficult = 0 if diffc == None else int(diffc.text) ishards[ix] = difficult # self._class_to_ind中存放的是{'__background__':0,'craft':1 ...}key-value 字典 # 取出类别名对应的index cls = self._class_to_ind[obj.find('name').text.lower().strip()] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls # 生成类似于one-hot编码[[0,0,0,0,1,0,0,0,...][0,0,0,0,1,0,0,0,...]] overlaps[ix, cls] = 1.0 seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) # 对于那些零元素数目远远多于非零元素数目,并且非零元素的分布没有规律的矩阵称为稀疏矩阵、存储和计算更为高效 # 将overlaps稀疏矩阵压缩!!! # 如(0,0) 1.0 (1,2) 1.0等???未查到相关内容 overlaps = scipy.sparse.csr_matrix(overlaps) # 该图像gt roi信息构成的字典 return {'boxes' : boxes, # (None,4) 'gt_classes': gt_classes, # (None,1) 'gt_ishard': ishards, # (None,1) 'gt_overlaps' : overlaps, # 压缩前为(None,21) 压缩后的,形式见上 'flipped' : False, # 1 'seg_areas' : seg_areas} # (None,1)
xml文件示例
<annotation> <folder>VOC2007</folder> <filename>000001.jpg</filename> <source> <database>My Database</database> <annotation>VOC2007</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>sunyifeng</name> </owner> <size> <width>1920</width> <height>1080</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>craft</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>963</xmin> <ymin>696</ymin> <xmax>1038</xmax> <ymax>739</ymax> </bndbox> </object> </annotation>
# -*- coding:utf-8 -*- # Author: WUJiang # 测试功能,未查到相关内容 import scipy.sparse.csr import numpy as np # pascal voc数据集,该值none行21列 overlaps = np.array([ [1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ]) """ (0,0) 1.0 (1,2) 1.0 """ print(scipy.sparse.csr_matrix(overlaps))
View Code
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:Tensorflow版Faster RCNN源码解析(TFFRCNN) (20) datasets/pascal_voc.py - Python技术站