今天心血來潮,想把傳統的卷積算法實現一份不采用各種加速方式,僅優化算法邏輯的純淨版本。
寫完發現性能還可以,特發出來分享之,若有博友在此基礎上,進行了再次優化,那就更贊了。
算法很簡單:
inline unsigned char Clamp2Byte(int n) {
return (((255 - n) >> 31) | (n & ~(n >> 31)));
}
void Convolution2D(unsigned char * data, unsigned int width, unsigned int height, unsigned int channels, int * filter, unsigned char filterW, unsigned char cfactor, unsigned char bias) {
unsigned char * tmpData = (unsigned char * ) malloc(width * height * channels);
int factor = 256 / cfactor;
int halfW = filterW / 2;
if (channels == 3 || channels == 4) {
for (int y = 0; y < height; y++) {
int y1 = y - halfW + height;
for (int x = 0; x < width; x++) {
int x1 = x - halfW + width;
int r = 0;
int g = 0;
int b = 0;
unsigned int p = (y * width + x) * channels;
for (unsigned int fx = 0; fx < filterW; fx++) {
int dx = (x1 + fx) % width;
int fidx = fx * (filterW);
for (unsigned int fy = 0; fy < filterW; fy++) {
int pos = (((y1 + fy) % height) * width + dx) * channels;
int * pfilter = & filter[fidx + (fy)];
r += data[pos] * ( * pfilter);
g += data[pos + 1] * ( * pfilter);
b += data[pos + 2] * ( * pfilter);
}
}
tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias);
tmpData[p + 1] = Clamp2Byte(((factor * g) >> 8) + bias);
tmpData[p + 2] = Clamp2Byte(((factor * b) >> 8) + bias);
}
}
} else
if (channels == 1) {
for (int y = 0; y < height; y++) {
int y1 = y - halfW + height;
for (int x = 0; x < width; x++) {
int r = 0;
unsigned int p = (y * width + x);
int x1 = x - halfW + width;
for (unsigned int fx = 0; fx < filterW; fx++) {
int dx = (x1 + fx) % width;
int fidx = fx * (filterW);
for (unsigned int fy = 0; fy < filterW; fy++) {
int pos = (((y1 + fy) % height) * width + dx);
int szfilter = filter[fidx + (fy)];
r += data[pos] * szfilter;
}
}
tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias);
}
}
}
memcpy(data, tmpData, width * height * channels);
free(tmpData);
}
調用例子:
例子
//模糊
int Blurfilter[25] = {
0, 0, 1, 0, 0,
0, 1, 1, 1, 0,
1, 1, 1, 1, 1,
0, 1, 1, 1, 0,
0, 0, 1, 0, 0,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Blurfilter, 5, 13, 0);
// 運動模糊
int MotionBlurfilter[81] = {
1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, MotionBlurfilter, 9, 9, 0);
//邊緣探測1
int edges1filter[25] = {
-1, 0, 0, 0, 0,
0, -2, 0, 0, 0,
0, 0, 6, 0, 0,
0, 0, 0, -2, 0,
0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);
//邊緣探測2
int edges2filter[9] = {
-1, -1, -1, -1, 8, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges2filter, 3, 1, 0);
//銳化1
int sharpen1filter[9] = {
-1, -1, -1, -1, 9, -1, -1, -1, -1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen1filter, 3, 1, 0);
//銳化2
int sharpen2filter[25] = {
-1, -1, -1, -1, -1, -1, 2, 2, 2, -1, -1, 2, 8, 2, -1, -1, 2, 2, 2, -1, -1, -1, -1, -1, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen2filter, 5, 8, 0);
//銳化3
int sharpen3filter[9] = {
1, 1, 1,
1, -7, 1,
1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen3filter, 3, 1, 0);
// 浮雕1
int Embossfilter[9] = {
-1, -1, 0, -1, 0, 1,
0, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Embossfilter, 3, 1, 128);
// 浮雕2
int emboss2filter[25] = {
-1, -1, -1, -1, 0, -1, -1, -1, 0, 1, -1, -1, 0, 1, 1, -1, 0, 1, 1, 1,
0, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, emboss2filter, 5, 1, 128);
// 均值模糊1
int meanfilter[9] = {
1, 1, 1,
1, 1, 1,
1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, meanfilter, 3, 9, 0);
// 均值模糊2
int mean2filter[81] = {
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, mean2filter, 9, 81, 0);
博主在一張大小為960x1280的圖片,進行了邊緣探測卷積核的處理,在博主機子上耗時是100毫秒。
//邊緣探測1
int edges1filter[25] = {
-1, 0, 0, 0, 0,
0, -2, 0, 0, 0,
0, 0, 6, 0, 0,
0, 0, 0, -2, 0,
0, 0, 0, 0, -1,
};
Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);
效果圖:

其他相關資料,見各種百科網站。
關鍵詞:卷積(英語:Convolution)