准备 OCR OpenCV
Posted
技术标签:
【中文标题】准备 OCR OpenCV【英文标题】:Preparing for OCR OpenCV 【发布时间】:2014-04-02 08:36:24 【问题描述】:我正在制作一个使用 OCR 的应用程序,并且我正在使用 OpenCV 对图像进行阈值处理以改进 OCR 结果,我得到了很好的结果,但我想知道是否有人有任何改进建议。
这是我到目前为止所做的:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
如果您有任何改进结果的建议,请告诉我,谢谢。
示例图片:
之后:
【问题讨论】:
请至少发布一张示例图片。作为一般规则,最好在发布示例图像时解决图像处理应用程序。出现文本的场景非常多,仅仅查看库中的几行代码并判断下一步应该做什么是不够的。 添加了示例图片。我计划在 ios 上添加一个摄像头覆盖,以消除需要的边缘检测。 【参考方案1】:在 OCR 领域中解决阈值问题的最佳算法之一是 sauvola 方法。您可以使用以下代码。
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethodOTSU,NIBLACK,SAUVOLA,WOLFJOLION;
class BhThresholder
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
;
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y)
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++)
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++)
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy)
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
return max_s;
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR)
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++)
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++)
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version)
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
thsurf.fset(i+wxh,j,th);
if (i==0)
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
if (src.uget(x,y) >= thsurf.fget(x,y))
dst.uset(x,y,255);
else
dst.uset(x,y,0);
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0)
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
【讨论】:
【参考方案2】:这里是阈值方法的比较表:http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
【讨论】:
【参考方案3】:一些想法:
由于您是从一个可能以非正常角度观看的矩形对象开始的,因此请使用仿射变换来扭曲图像,使其显示为带有直角的矩形。 在仿射变换之前,您可能应该消除桶形失真(卡片边缘的弯曲度)。 考虑使用自适应阈值而不是简单的全局二值化阈值。 如果您能找到不需要二进制图像的合适 OCR 算法,请使用它。虽然二值化适用于白色背景上的黑色文本,但一般而言,如果您想获得高精度(即任意字符串的字符识别率接近 98% 以上),二值化会带来很多问题 尝试以更好的分辨率进行采样。【讨论】:
感谢您的输入,我相信我正在执行上述自适应阈值,但我会尝试其他建议并回复您。以上是关于准备 OCR OpenCV的主要内容,如果未能解决你的问题,请参考以下文章
OpenCVopencv3.0中的SVM训练 mnist 手写字体识别