caffe中大多数层用C++写成。 但是对于自己数据的输入要写对应的输入层,比如你要去图像中的一部分,不能用LMDB,或者你的label 需要特殊的标记。 这时候就需要用python 写一个输入层。
如在fcn 的voc_layers.py 中 有两个类:
VOCSegDataLayer
SBDDSegDataLayer
分别包含:setup,reshape,forward, backward, load_image, load_label. 不需要backward 没有参数更新。
import caffe import numpy as np from PIL import Image import random class VOCSegDataLayer(caffe.Layer): """ Load (input image, label image) pairs from PASCAL VOC one-at-a-time while reshaping the net to preserve dimensions. Use this to feed data to a fully convolutional network. """ def setup(self, bottom, top): """ Setup data layer according to parameters: - voc_dir: path to PASCAL VOC year dir - split: train / val / test - mean: tuple of mean values to subtract - randomize: load in random order (default: True) - seed: seed for randomization (default: None / current time) for PASCAL VOC semantic segmentation. example params = dict(voc_dir="/path/to/PASCAL/VOC2011", mean=(104.00698793, 116.66876762, 122.67891434), split="val") """ # config params = eval(self.param_str) self.voc_dir = params['voc_dir'] self.split = params['split'] self.mean = np.array(params['mean']) self.random = params.get('randomize', True) self.seed = params.get('seed', None) # two tops: data and label if len(top) != 2: raise Exception("Need to define two tops: data and label.") # data layers have no bottoms if len(bottom) != 0: raise Exception("Do not define a bottom.") # load indices for images and labels split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir, self.split) self.indices = open(split_f, 'r').read().splitlines() self.idx = 0 # make eval deterministic if 'train' not in self.split: self.random = False # randomization: seed and pick if self.random: random.seed(self.seed) self.idx = random.randint(0, len(self.indices)-1) def reshape(self, bottom, top): # load image + label image pair self.data = self.load_image(self.indices[self.idx]) self.label = self.load_label(self.indices[self.idx]) # reshape tops to fit (leading 1 is for batch dimension) top[0].reshape(1, *self.data.shape) top[1].reshape(1, *self.label.shape) def forward(self, bottom, top): # assign output top[0].data[...] = self.data top[1].data[...] = self.label # pick next input if self.random: self.idx = random.randint(0, len(self.indices)-1) else: self.idx += 1 if self.idx == len(self.indices): self.idx = 0 def backward(self, top, propagate_down, bottom): pass def load_image(self, idx): """ Load input image and preprocess for Caffe: - cast to float - switch channels RGB -> BGR - subtract mean - transpose to channel x height x width order """ im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx)) in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= self.mean in_ = in_.transpose((2,0,1)) return in_ def load_label(self, idx): """ Load label image as 1 x height x width integer array of label indices. The leading singleton dimension is required by the loss. """ im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx)) label = np.array(im, dtype=np.uint8) label = label[np.newaxis, ...] return label class SBDDSegDataLayer(caffe.Layer): """ Load (input image, label image) pairs from the SBDD extended labeling of PASCAL VOC for semantic segmentation one-at-a-time while reshaping the net to preserve dimensions. Use this to feed data to a fully convolutional network. """ def setup(self, bottom, top): """ Setup data layer according to parameters: - sbdd_dir: path to SBDD `dataset` dir - split: train / seg11valid - mean: tuple of mean values to subtract - randomize: load in random order (default: True) - seed: seed for randomization (default: None / current time) for SBDD semantic segmentation. N.B.segv11alid is the set of segval11 that does not intersect with SBDD. Find it here: https://gist.github.com/shelhamer/edb330760338892d511e. example params = dict(sbdd_dir="/path/to/SBDD/dataset", mean=(104.00698793, 116.66876762, 122.67891434), split="valid") """ # config params = eval(self.param_str) self.sbdd_dir = params['sbdd_dir'] self.split = params['split'] self.mean = np.array(params['mean']) self.random = params.get('randomize', True) self.seed = params.get('seed', None) # two tops: data and label if len(top) != 2: raise Exception("Need to define two tops: data and label.") # data layers have no bottoms if len(bottom) != 0: raise Exception("Do not define a bottom.") # load indices for images and labels split_f = '{}/{}.txt'.format(self.sbdd_dir, self.split) self.indices = open(split_f, 'r').read().splitlines() self.idx = 0 # make eval deterministic if 'train' not in self.split: self.random = False # randomization: seed and pick if self.random: random.seed(self.seed) self.idx = random.randint(0, len(self.indices)-1) def reshape(self, bottom, top): # load image + label image pair self.data = self.load_image(self.indices[self.idx]) self.label = self.load_label(self.indices[self.idx]) # reshape tops to fit (leading 1 is for batch dimension) top[0].reshape(1, *self.data.shape) top[1].reshape(1, *self.label.shape) def forward(self, bottom, top): # assign output top[0].data[...] = self.data top[1].data[...] = self.label # pick next input if self.random: self.idx = random.randint(0, len(self.indices)-1) else: self.idx += 1 if self.idx == len(self.indices): self.idx = 0 def backward(self, top, propagate_down, bottom): pass def load_image(self, idx): """ Load input image and preprocess for Caffe: - cast to float - switch channels RGB -> BGR - subtract mean - transpose to channel x height x width order """ im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx)) in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= self.mean in_ = in_.transpose((2,0,1)) return in_ def load_label(self, idx): """ Load label image as 1 x height x width integer array of label indices. The leading singleton dimension is required by the loss. """ import scipy.io mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx)) label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8) label = label[np.newaxis, ...] return label
对于 最终的loss 层:
在prototxt 中定义的layer:
layer { type: 'Python' #python name: 'loss' # loss 层 top: 'loss' bottom: 'ipx' bottom: 'ipy' python_param { module: 'pyloss' # 写在pyloss 文件中 layer: 'EuclideanLossLayer' # 对应此类的名字 } # set loss weight so Caffe knows this is a loss layer loss_weight: 1 }
loss 层的实现 :
import caffe import numpy as np class EuclideanLossLayer(caffe.Layer): """ Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer to demonstrate the class interface for developing layers in Python. """ def setup(self, bottom, top):# top是最后的loss, bottom 中有两个值,一个网络的输出, 一个是label。 # check input pair if len(bottom) != 2: raise Exception("Need two inputs to compute distance.") def reshape(self, bottom, top): # check input dimensions match if bottom[0].count != bottom[1].count: raise Exception("Inputs must have the same dimension.") # difference is shape of inputs self.diff = np.zeros_like(bottom[0].data, dtype=np.float32) # loss output is scalar top[0].reshape(1) def forward(self, bottom, top): self.diff[...] = bottom[0].data - bottom[1].data top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2. def backward(self, top, propagate_down, bottom): for i in range(2): if not propagate_down[i]: continue if i == 0: sign = 1 else: sign = -1 bottom[i].diff[...] = sign * self.diff / bottom[i].num
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:caffe 中 python 数据层 - Python技术站