今天心血来潮,想把传统的卷积算法实现一份不采用各种加速方式,仅优化算法逻辑的纯净版本。

写完发现性能还可以,特发出来分享之,若有博友在此基础上,进行了再次优化,那就更赞了。

算法很简单:

inline unsigned char Clamp2Byte(int n) {
	return (((255 - n) >> 31) | (n & ~(n >> 31)));
}
void Convolution2D(unsigned char * data, unsigned int width, unsigned int height, unsigned int channels, int * filter, unsigned char filterW, unsigned char cfactor, unsigned char bias) {
		unsigned char * tmpData = (unsigned char * ) malloc(width * height * channels);
		int factor = 256 / cfactor;
		int halfW = filterW / 2;
		if (channels == 3 || channels == 4) {
			for (int y = 0; y < height; y++) {
				int y1 = y - halfW + height;
				for (int x = 0; x < width; x++) {
					int x1 = x - halfW + width;
					int r = 0;
					int g = 0;
					int b = 0;
					unsigned int p = (y * width + x) * channels;
					for (unsigned int fx = 0; fx < filterW; fx++) {
						int dx = (x1 + fx) % width;
						int fidx = fx * (filterW);
						for (unsigned int fy = 0; fy < filterW; fy++) {
							int pos = (((y1 + fy) % height) * width + dx) * channels;
							int * pfilter = & filter[fidx + (fy)];
							r += data[pos] * ( * pfilter);
							g += data[pos + 1] * ( * pfilter);
							b += data[pos + 2] * ( * pfilter);
						}
					}
					tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias);
					tmpData[p + 1] = Clamp2Byte(((factor * g) >> 8) + bias);
					tmpData[p + 2] = Clamp2Byte(((factor * b) >> 8) + bias);
				}
			}
		} else
		if (channels == 1) {
			for (int y = 0; y < height; y++) {
				int y1 = y - halfW + height;
				for (int x = 0; x < width; x++) {
					int r = 0;
					unsigned int p = (y * width + x);
					int x1 = x - halfW + width;
					for (unsigned int fx = 0; fx < filterW; fx++) {
						int dx = (x1 + fx) % width;
						int fidx = fx * (filterW);
						for (unsigned int fy = 0; fy < filterW; fy++) {
							int pos = (((y1 + fy) % height) * width + dx);
							int szfilter = filter[fidx + (fy)];
							r += data[pos] * szfilter;
						}
					}
					tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias);
				}
			}
		}
		memcpy(data, tmpData, width * height * channels);

		free(tmpData);
	}

  调用例子:

例子
//模糊
int Blurfilter[25] = {
	0, 0, 1, 0, 0,
	0, 1, 1, 1, 0,
	1, 1, 1, 1, 1,
	0, 1, 1, 1, 0,
	0, 0, 1, 0, 0,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Blurfilter, 5, 13, 0);

// 运动模糊
int MotionBlurfilter[81] = {
	1, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 1, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 1, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 1, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 1, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 1, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 1, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 1, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, MotionBlurfilter, 9, 9, 0);


//边缘探测1
int edges1filter[25] = {
	-1, 0, 0, 0, 0,
	0, -2, 0, 0, 0,
	0, 0, 6, 0, 0,
	0, 0, 0, -2, 0,
	0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);


//边缘探测2
int edges2filter[9] = {
	-1, -1, -1, -1, 8, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges2filter, 3, 1, 0);

//锐化1
int sharpen1filter[9] = {
	-1, -1, -1, -1, 9, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen1filter, 3, 1, 0);
//锐化2
int sharpen2filter[25] = {
	-1, -1, -1, -1, -1, -1, 2, 2, 2, -1, -1, 2, 8, 2, -1, -1, 2, 2, 2, -1, -1, -1, -1, -1, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen2filter, 5, 8, 0);
//锐化3
int sharpen3filter[9] = {
	1, 1, 1,
	1, -7, 1,
	1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen3filter, 3, 1, 0);

// 浮雕1 
int Embossfilter[9] = {
	-1, -1, 0, -1, 0, 1,
	0, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Embossfilter, 3, 1, 128);
// 浮雕2
int emboss2filter[25] = {
	-1, -1, -1, -1, 0, -1, -1, -1, 0, 1, -1, -1, 0, 1, 1, -1, 0, 1, 1, 1,
	0, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, emboss2filter, 5, 1, 128);
// 均值模糊1
int meanfilter[9] = {
	1, 1, 1,
	1, 1, 1,
	1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, meanfilter, 3, 9, 0);
// 均值模糊2
int mean2filter[81] = {
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, mean2filter, 9, 81, 0);

  博主在一张大小为960x1280的图片,进行了边缘探测卷积核的处理,在博主机子上耗时是100毫秒。 

//边缘探测1
int edges1filter[25] = {
	-1, 0, 0, 0, 0,
	0, -2, 0, 0, 0,
	0, 0, 6, 0, 0,
	0, 0, 0, -2, 0,
	0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);

  效果图:

图像处理卷积算法实现

其他相关资料,见各种百科网站。

关键词:卷积(英语:Convolution)