准备 OCR OpenCV

Posted 2023-04-17

技术标签:

【中文标题】准备 OCR OpenCV【英文标题】：Preparing for OCR OpenCV 【发布时间】：2014-04-02 08:36:24 【问题描述】：

我正在制作一个使用 OCR 的应用程序，并且我正在使用 OpenCV 对图像进行阈值处理以改进 OCR 结果，我得到了很好的结果，但我想知道是否有人有任何改进建议。

这是我到目前为止所做的：

// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);

如果您有任何改进结果的建议，请告诉我，谢谢。

示例图片：

之后：

【问题讨论】：

请至少发布一张示例图片。作为一般规则，最好在发布示例图像时解决图像处理应用程序。出现文本的场景非常多，仅仅查看库中的几行代码并判断下一步应该做什么是不够的。添加了示例图片。我计划在 ios 上添加一个摄像头覆盖，以消除需要的边缘检测。 【参考方案1】：

在 OCR 领域中解决阈值问题的最佳算法之一是 sauvola 方法。您可以使用以下代码。

#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;

enum class BhThresholdMethodOTSU,NIBLACK,SAUVOLA,WOLFJOLION;


class BhThresholder

public :
    void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
;

#endif //_THRESHOLDER
thresholder.cpp

#include "stdafx.h"

#define uget(x,y)    at<unsigned char>(y,x)
#define uset(x,y,v)  at<unsigned char>(y,x)=v;
#define fget(x,y)    at<float>(y,x)
#define fset(x,y,v)  at<float>(y,x)=v;

// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION  "2.3 (February 26th, 2013)"


double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) 

    double m,s,max_s, sum, sum_sq, foo;
    int wxh = win_x / 2;
    int wyh = win_y / 2;
    int x_firstth = wxh;
    int y_lastth = im.rows-wyh-1;
    int y_firstth= wyh;
    double winarea = win_x*win_y;

    max_s = 0;
    for (int j = y_firstth ; j<=y_lastth; j++) 
    
        // Calculate the initial window at the beginning of the line
        sum = sum_sq = 0;
        for (int wy=0 ; wy<win_y; wy++)
            for (int wx=0 ; wx<win_x; wx++) 
                foo = im.uget(wx,j-wyh+wy);
                sum    += foo;
                sum_sq += foo*foo;
            
        m  = sum / winarea;
        s  = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
        if (s > max_s)
            max_s = s;
        map_m.fset(x_firstth, j, m);
        map_s.fset(x_firstth, j, s);

        // Shift the window, add and remove new/old values to the histogram
        for (int i=1 ; i <= im.cols  -win_x; i++) 

            // Remove the left old column and add the right new column
            for (int wy=0; wy<win_y; ++wy) 
                foo = im.uget(i-1,j-wyh+wy);
                sum    -= foo;
                sum_sq -= foo*foo;
                foo = im.uget(i+win_x-1,j-wyh+wy);
                sum    += foo;
                sum_sq += foo*foo;
            
            m  = sum / winarea;
            s  = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
            if (s > max_s)
                max_s = s;
            map_m.fset(i+wxh, j, m);
            map_s.fset(i+wxh, j, s);
        
    

    return max_s;





void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) 

    Mat src = _src.getMat();
    Mat dst = _dst.getMat();
    double m, s, max_s;
    double th=0;
    double min_I, max_I;
    int wxh = winx/2;
    int wyh = winy/2;
    int x_firstth= wxh;
    int x_lastth = src.cols-wxh-1;
    int y_lastth = src.rows-wyh-1;
    int y_firstth= wyh;
    int mx, my;

    // Create local statistics and store them in a double matrices
    Mat map_m = Mat::zeros (src.size(), CV_32FC1);
    Mat map_s = Mat::zeros (src.size(), CV_32FC1);
    max_s = calcLocalStats (src, map_m, map_s, winx, winy);

    minMaxLoc(src, &min_I, &max_I);

    Mat thsurf (src.size(), CV_32FC1);

    // Create the threshold surface, including border processing
    // ----------------------------------------------------

    for (int j = y_firstth ; j<=y_lastth; j++) 

        // NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
        for (int i=0 ; i <= src.cols-winx; i++) 

            m  = map_m.fget(i+wxh, j);
            s  = map_s.fget(i+wxh, j);

            // Calculate the threshold
            switch (version) 

            case BhThresholdMethod::NIBLACK:
                    th = m + k*s;
                    break;

            case BhThresholdMethod::SAUVOLA:
                    th = m * (1 + k*(s/dR-1));
                    break;

            case BhThresholdMethod::WOLFJOLION:
                    th = m + k * (s/max_s-1) * (m-min_I);
                    break;

                default:
                    cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
                    exit (1);
            

            thsurf.fset(i+wxh,j,th);

            if (i==0) 
                // LEFT BORDER
                for (int i=0; i<=x_firstth; ++i)
                    thsurf.fset(i,j,th);

                // LEFT-UPPER CORNER
                if (j==y_firstth)
                    for (int u=0; u<y_firstth; ++u)
                    for (int i=0; i<=x_firstth; ++i)
                        thsurf.fset(i,u,th);

                // LEFT-LOWER CORNER
                if (j==y_lastth)
                    for (int u=y_lastth+1; u<src.rows; ++u)
                    for (int i=0; i<=x_firstth; ++i)
                        thsurf.fset(i,u,th);
            

            // UPPER BORDER
            if (j==y_firstth)
                for (int u=0; u<y_firstth; ++u)
                    thsurf.fset(i+wxh,u,th);

            // LOWER BORDER
            if (j==y_lastth)
                for (int u=y_lastth+1; u<src.rows; ++u)
                    thsurf.fset(i+wxh,u,th);
        

        // RIGHT BORDER
        for (int i=x_lastth; i<src.cols; ++i)
            thsurf.fset(i,j,th);

        // RIGHT-UPPER CORNER
        if (j==y_firstth)
            for (int u=0; u<y_firstth; ++u)
            for (int i=x_lastth; i<src.cols; ++i)
                thsurf.fset(i,u,th);

        // RIGHT-LOWER CORNER
        if (j==y_lastth)
            for (int u=y_lastth+1; u<src.rows; ++u)
            for (int i=x_lastth; i<src.cols; ++i)
                thsurf.fset(i,u,th);
    
    cerr << "surface created" << endl;


    for (int y=0; y<src.rows; ++y) 
    for (int x=0; x<src.cols; ++x) 
    
        if (src.uget(x,y) >= thsurf.fget(x,y))
        
            dst.uset(x,y,255);
        
        else
        
            dst.uset(x,y,0);
        
    


void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)

    Mat src = _src.getMat();

    int winx = 0;
    int winy = 0;
    float optK=0.5;
    if (winx==0 || winy==0) 
        winy = (int) (2.0 * src.rows - 1)/3;
        winx = (int) src.cols-1 < winy ? src.cols-1 : winy;

        // if the window is too big, than we asume that the image
        // is not a single text box, but a document page: set
        // the window size to a fixed constant.
        if (winx > 100)
            winx = winy = 40;
    

    // Threshold
    _dst.create(src.size(), CV_8UC1);
    Mat dst = _dst.getMat();

    //medianBlur(src,dst,5);
    GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
    #define _BH_SHOW_IMAGE
#endif
    //medianBlur(src,dst,7);
    switch (method)
    
    case BhThresholdMethod::OTSU :
        threshold(dst,dst,128,255,CV_THRESH_OTSU);
        break;
    case BhThresholdMethod::SAUVOLA :
    case BhThresholdMethod::WOLFJOLION :
        NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);


    

    bitwise_not(dst,dst);


#ifdef _BH_SHOW_IMAGE

#undef _BH_SHOW_IMAGE
#endif

【讨论】：

【参考方案2】：

这里是阈值方法的比较表：http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b

【讨论】：

【参考方案3】：

一些想法：

由于您是从一个可能以非正常角度观看的矩形对象开始的，因此请使用仿射变换来扭曲图像，使其显示为带有直角的矩形。在仿射变换之前，您可能应该消除桶形失真（卡片边缘的弯曲度）。考虑使用自适应阈值而不是简单的全局二值化阈值。如果您能找到不需要二进制图像的合适 OCR 算法，请使用它。虽然二值化适用于白色背景上的黑色文本，但一般而言，如果您想获得高精度（即任意字符串的字符识别率接近 98% 以上），二值化会带来很多问题尝试以更好的分辨率进行采样。

【讨论】：

感谢您的输入，我相信我正在执行上述自适应阈值，但我会尝试其他建议并回复您。