OpenCL双边滤波实现美颜功能
Posted 啊基米舍的博客
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了OpenCL双边滤波实现美颜功能相关的知识,希望对你有一定的参考价值。
OpenCL是一个并行异构计算的框架,包括intel,AMD,英伟达等等许多厂家都有对它的支持,不过英伟达只到1.2版本,主要发展自己的CUDA去了。虽然没有用过CUDA,但个人感觉CUDA比OpenCL更好一点,但OpenCL支持面更管,CPU,GPU,DSP,FPGA等多种芯片都能支持OpenCL。OpenCL与D3D中的像素着色器非常相似。
1.双边滤波原理
双边滤波器的原理参考女神Rachel-Zhang的博客 双边滤波器的原理及实现. 引自Rachel-Zhang的博客,原理如下:
双边滤波(Bilateral filter)是一种可以保边去噪的滤波器。之所以可以达到此去噪效果,是因为滤波器是由两个函数构成。一个函数是由几何空间距离决定滤波器系数。另一个由像素差值决定滤波器系数。可以与其相比较的两个filter:高斯低通滤波器(http://en.wikipedia.org/wiki/Gaussian_filter)和α-截尾均值滤波器(去掉百分率为α的最小值和最大之后剩下像素的均值作为滤波器)。
双边滤波器中,输出像素的值依赖于邻域像素的值的加权组合,
,
权重系数w(i,j,k,l)取决于定义域核和值域核的乘积。同时考虑了空间域与值域的差别,而Gaussian Filter和α均值滤波分别只考虑了空间域和值域差别。
本文基于这个公式用OpenCL实现双边滤波来做美颜。
2.核函数
磨皮算法原理参考自http://www.zealfilter.com/portal.php?mod=view&aid=138,其中的肤色检测算法不好,我给去掉了,本来还要做个锐化处理的,但发现不做锐化效果也蛮好,所以就先没做,学下一步的OpenCL时在做锐化。
const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; kernel void bilateralBlur(read_only image2d_t src,write_only image2d_t dst) { int x = (int)get_global_id(0); int y = (int)get_global_id(1); if (x >= get_image_width(src) || y >= get_image_height(src)) return; int ksize = 11; float sigma_d = 3.0; float sigma_r = 0.1; float4 fij = read_imagef(src, sampler, (int2)(x, y)); float alpha = 0.2; float4 fkl; float dkl; float4 rkl; float4 wkl; float4 numerator = (float4)(0.0f,0.0f,0.0f,0.0f); float4 denominator = (float4)(1.0f, 1.0f, 1.0f, 1.0f); for (int K = -ksize / 2; K <= ksize / 2; K++) { for (int L = -ksize / 2; L <= ksize / 2; L++) { fkl = read_imagef(src, sampler, (int2)(x + K, y + L)); dkl = -(K*K + L*L) / (2 * sigma_d*sigma_d); rkl.x = -(fij.x - fkl.x)*(fij.x - fkl.x) / (2 * sigma_r*sigma_r); rkl.y = -(fij.y - fkl.y)*(fij.y - fkl.y) / (2 * sigma_r*sigma_r); rkl.z = -(fij.z - fkl.z)*(fij.z - fkl.z) / (2 * sigma_r*sigma_r); wkl.x = exp(dkl + rkl.x); wkl.y = exp(dkl + rkl.y); wkl.z = exp(dkl + rkl.z); numerator.x += fkl.x * wkl.x; numerator.y += fkl.y * wkl.y; numerator.z += fkl.z * wkl.z; denominator.x += wkl.x; denominator.y += wkl.y; denominator.z += wkl.z; } } float4 gij = (float4)(0.0f, 0.0f, 0.0f, 1.0f); if (denominator.x > 0 && denominator.y > 0 && denominator.z) { gij.x = numerator.x / denominator.x; gij.y = numerator.y / denominator.y; gij.z = numerator.z / denominator.z; //双边滤波后再做一个融合
gij.x = fij.x*alpha + gij.x*(1.0 - alpha); gij.y = fij.y*alpha + gij.y*(1.0 - alpha); gij.z = fij.z*alpha + gij.z*(1.0 - alpha); } write_imagef(dst, (int2)(x, y), gij); }
kernel函数里面基本就是把数学公式写出来,可以说是非常简单的。
3.host端代码
OpenCL代码分为host端的代码和device端的代码,kernel是跑在并行设备device上的,host一般适合跑串行的逻辑性强的代码,device则比较适合用来做计算,如卷积运算。计算机中,通常把CPU当host,把GPU当device。不过实际上CPU也可以作为device,因为intel也是支持OpenCL的。本文以CPU为host,GPU为device。
#include "stdafx.h" #include <iostream> #include <fstream> #include <sstream> #include <malloc.h> #include <string.h> #include <opencv2/opencv.hpp> #include <CL/cl.h> //----------获取OpenCL平台设备信息--------- void DisplayPlatformInfo( cl_platform_id id, cl_platform_info name, std::string str) { cl_int errNum; std::size_t paramValueSize; errNum = clGetPlatformInfo( id, name, 0, NULL, ¶mValueSize); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL platform " << str << "." << std::endl; return; } char * info = (char *)alloca(sizeof(char) * paramValueSize); errNum = clGetPlatformInfo( id, name, paramValueSize, info, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL platform " << str << "." << std::endl; return; } std::cout << "\\t" << str << ":\\t" << info << std::endl; } template<typename T> void appendBitfield(T info, T value, std::string name, std::string & str) { if (info & value) { if (str.length() > 0) { str.append(" | "); } str.append(name); } } /// // Display information for a particular device. // As different calls to clGetDeviceInfo may return // values of different types a template is used. // As some values returned are arrays of values, a templated class is // used so it can be specialized for this case, see below. // template <typename T> class InfoDevice { public: static void display( cl_device_id id, cl_device_info name, std::string str) { cl_int errNum; std::size_t paramValueSize; errNum = clGetDeviceInfo( id, name, 0, NULL, ¶mValueSize); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl; return; } T * info = (T *)alloca(sizeof(T) * paramValueSize); errNum = clGetDeviceInfo( id, name, paramValueSize, info, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl; return; } // Handle a few special cases switch (name) { case CL_DEVICE_TYPE: { std::string deviceType; appendBitfield<cl_device_type>( *(reinterpret_cast<cl_device_type*>(info)), CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", deviceType); appendBitfield<cl_device_type>( *(reinterpret_cast<cl_device_type*>(info)), CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", deviceType); appendBitfield<cl_device_type>( *(reinterpret_cast<cl_device_type*>(info)), CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", deviceType); appendBitfield<cl_device_type>( *(reinterpret_cast<cl_device_type*>(info)), CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", deviceType); std::cout << "\\t\\t" << str << ":\\t" << deviceType << std::endl; } break; case CL_DEVICE_SINGLE_FP_CONFIG: { std::string fpType; appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_DENORM, "CL_FP_DENORM", fpType); appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_INF_NAN, "CL_FP_INF_NAN", fpType); appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_ROUND_TO_NEAREST, "CL_FP_ROUND_TO_NEAREST", fpType); appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_ROUND_TO_ZERO, "CL_FP_ROUND_TO_ZERO", fpType); appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_ROUND_TO_INF, "CL_FP_ROUND_TO_INF", fpType); appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_FMA, "CL_FP_FMA", fpType); #ifdef CL_FP_SOFT_FLOAT appendBitfield<cl_device_fp_config>( *(reinterpret_cast<cl_device_fp_config*>(info)), CL_FP_SOFT_FLOAT, "CL_FP_SOFT_FLOAT", fpType); #endif std::cout << "\\t\\t" << str << ":\\t" << fpType << std::endl; } case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: { std::string memType; appendBitfield<cl_device_mem_cache_type>( *(reinterpret_cast<cl_device_mem_cache_type*>(info)), CL_NONE, "CL_NONE", memType); appendBitfield<cl_device_mem_cache_type>( *(reinterpret_cast<cl_device_mem_cache_type*>(info)), CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", memType); appendBitfield<cl_device_mem_cache_type>( *(reinterpret_cast<cl_device_mem_cache_type*>(info)), CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", memType); std::cout << "\\t\\t" << str << ":\\t" << memType << std::endl; } break; case CL_DEVICE_LOCAL_MEM_TYPE: { std::string memType; appendBitfield<cl_device_local_mem_type>( *(reinterpret_cast<cl_device_local_mem_type*>(info)), CL_GLOBAL, "CL_LOCAL", memType); appendBitfield<cl_device_local_mem_type>( *(reinterpret_cast<cl_device_local_mem_type*>(info)), CL_GLOBAL, "CL_GLOBAL", memType); std::cout << "\\t\\t" << str << ":\\t" << memType << std::endl; } break; case CL_DEVICE_EXECUTION_CAPABILITIES: { std::string memType; appendBitfield<cl_device_exec_capabilities>( *(reinterpret_cast<cl_device_exec_capabilities*>(info)), CL_EXEC_KERNEL, "CL_EXEC_KERNEL", memType); appendBitfield<cl_device_exec_capabilities>( *(reinterpret_cast<cl_device_exec_capabilities*>(info)), CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", memType); std::cout << "\\t\\t" << str << ":\\t" << memType << std::endl; } break; case CL_DEVICE_QUEUE_PROPERTIES: { std::string memType; appendBitfield<cl_device_exec_capabilities>( *(reinterpret_cast<cl_device_exec_capabilities*>(info)), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", memType); appendBitfield<cl_device_exec_capabilities>( *(reinterpret_cast<cl_device_exec_capabilities*>(info)), CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", memType); std::cout << "\\t\\t" << str << ":\\t" << memType << std::endl; } break; default: std::cout << "\\t\\t" << str << ":\\t" << *info << std::endl; break; } } }; /// // Simple trait class used to wrap base types. // template <typename T> class ArrayType { public: static bool isChar() { return false; } }; /// // Specialized for the char (i.e. null terminated string case). // template<> class ArrayType<char> { public: static bool isChar() { return true; } }; /// // Specialized instance of class InfoDevice for array types. // template <typename T> class InfoDevice<ArrayType<T> > { public: static void display( cl_device_id id, cl_device_info name, std::string str) { cl_int errNum; std::size_t paramValueSize; errNum = clGetDeviceInfo( id, name, 0, NULL, ¶mValueSize); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl; return; } T * info = (T *)alloca(sizeof(T) * paramValueSize); errNum = clGetDeviceInfo( id, name, paramValueSize, info, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl; return; } if (ArrayType<T>::isChar()) { std::cout << "\\t" << str << ":\\t" << info << std::endl; } else if (name == CL_DEVICE_MAX_WORK_ITEM_SIZES) { cl_uint maxWorkItemDimensions; errNum = clGetDeviceInfo( id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL device info " << "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS." << std::endl; return; } std::cout << "\\t" << str << ":\\t"; for (cl_uint i = 0; i < maxWorkItemDimensions; i++) { std::cout << info[i] << " "; } std::cout << std::endl; } } }; /// // Enumerate platforms and display information about them // and their associated devices. // void displayInfo(void) { cl_int errNum; cl_uint numPlatforms; cl_platform_id * platformIds; cl_context context = NULL; // First, query the total number of platforms errNum = clGetPlatformIDs(0, NULL, &numPlatforms); if (errNum != CL_SUCCESS || numPlatforms <= 0) { std::cerr << "Failed to find any OpenCL platform." << std::endl; return; } // Next, allocate memory for the installed plaforms, and qeury // to get the list. platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * numPlatforms); // First, query the total number of platforms errNum = clGetPlatformIDs(numPlatforms, platformIds, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find any OpenCL platforms." << std::endl; return; } std::cout << "Number of platforms: \\t" << numPlatforms << std::endl; // Iterate through the list of platforms displaying associated information for (cl_uint i = 0; i < numPlatforms; i++) { // First we display information associated with the platform DisplayPlatformInfo( platformIds[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR"); DisplayPlatformInfo( platformIds[i], CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS"); // Now query the set of devices associated with the platform cl_uint numDevices; errNum = clGetDeviceIDs( platformIds[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL devices." << std::endl; return; } cl_device_id * devices = (cl_device_id *)alloca(sizeof(cl_device_id) * numDevices); errNum = clGetDeviceIDs( platformIds[i], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Failed to find OpenCL devices." << std::endl; return; } std::cout << "\\tNumber of devices: \\t" << numDevices << std::endl; // Iterate through each device, displaying associated information for (cl_uint j = 0; j < numDevices; j++) { InfoDevice<cl_device_type>::display( devices[j], CL_DEVICE_TYPE, "CL_DEVICE_TYPE"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_VENDOR_ID, "CL_DEVICE_VENDOR_ID"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, "CL_DEVICE_MAX_COMPUTE_UNITS"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); InfoDevice<ArrayType<size_t> >::display( devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, "CL_DEVICE_MAX_WORK_ITEM_SIZES"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, "CL_DEVICE_MAX_WORK_GROUP_SIZE"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE"); #ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, "CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, "CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, "CL_DEVICE_NATIVE_VECTOR_WIDTH_INT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, "CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, "CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, "CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, "CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF"); #endif InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "CL_DEVICE_MAX_CLOCK_FREQUENCY"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_ADDRESS_BITS, "CL_DEVICE_ADDRESS_BITS"); InfoDevice<cl_ulong>::display( devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, "CL_DEVICE_MAX_MEM_ALLOC_SIZE"); InfoDevice<cl_bool>::display( devices[j], CL_DEVICE_IMAGE_SUPPORT, "CL_DEVICE_IMAGE_SUPPORT"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_READ_IMAGE_ARGS, "CL_DEVICE_MAX_READ_IMAGE_ARGS"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_WRITE_IMAGE_ARGS, "CL_DEVICE_MAX_WRITE_IMAGE_ARGS"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, "CL_DEVICE_IMAGE2D_MAX_WIDTH"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, "CL_DEVICE_IMAGE2D_MAX_WIDTH"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT, "CL_DEVICE_IMAGE2D_MAX_HEIGHT"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE3D_MAX_WIDTH, "CL_DEVICE_IMAGE3D_MAX_WIDTH"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE3D_MAX_HEIGHT, "CL_DEVICE_IMAGE3D_MAX_HEIGHT"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_IMAGE3D_MAX_DEPTH, "CL_DEVICE_IMAGE3D_MAX_DEPTH"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MAX_SAMPLERS, "CL_DEVICE_MAX_SAMPLERS"); InfoDevice<std::size_t>::display( devices[j], CL_DEVICE_MAX_PARAMETER_SIZE, "CL_DEVICE_MAX_PARAMETER_SIZE"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MEM_BASE_ADDR_ALIGN, "CL_DEVICE_MEM_BASE_ADDR_ALIGN"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE"); InfoDevice<cl_device_fp_config>::display( devices[j], CL_DEVICE_SINGLE_FP_CONFIG, "CL_DEVICE_SINGLE_FP_CONFIG"); InfoDevice<cl_device_mem_cache_type>::display( devices[j], CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "CL_DEVICE_GLOBAL_MEM_CACHE_TYPE"); InfoDevice<cl_uint>::display( devices[j], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE"); InfoDevice<cl_ulong>::display( devices[j], CL_DEVICE以上是关于OpenCL双边滤波实现美颜功能的主要内容,如果未能解决你的问题,请参考以下文章