使用 OpenCV 改进文本二值化/OCR 预处理

Posted 2023-04-17

技术标签:

【中文标题】使用 OpenCV 改进文本二值化/OCR 预处理【英文标题】：Improve Text Binarization / OCR Preprocessing with OpenCV 【发布时间】：2017-09-25 08:05:10 【问题描述】：

我正在为我的应用构建扫描仪功能，并使用 OpenCV 对文档的照片进行二值化：

    // convert to greyscale
    cv::Mat converted, blurred, blackAndWhite;
    converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
    cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );

    // remove noise
    cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);

    // adaptive threshold
    cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);

结果还可以，但来自不同扫描仪应用的扫描效果要好得多。尤其是非常小的文本要好得多：用opencv处理

使用 DropBox 扫描

我能做些什么来提高我的成绩？

【问题讨论】：

你的结果看起来不像是二值图像（字符有一些褪色），还是保存图像时的jpg压缩？嗯是的，这实际上是真的，它看起来真的像下面的图像（结果，我想要实现的）确实包含超过 2 种颜色。那么也许，应用自适应阈值可能是完全错误的方法？应用阈值必须生成二值图像。检查您的图像是否为二进制并将其保存为非丢失格式，以便您可以实际看到您的真实结果。然后你也可以应用一些形态来清理一点结果。可以分享原图吗？我想尝试在我的机器上处理它。当然，我在这里上传了一张类似的图像，其中包含 opencv 的处理版本和扫描仪应用程序：imgur.com/a/e0UZk 【参考方案1】：

可能是应用程序正在使用anti-aliasing 使其二值化输出看起来更好。为了获得类似的效果，我首先尝试对图像进行二值化，但所有锯齿状边缘的结果看起来都不是很好。然后我对结果进行了金字塔上采样再下采样，结果更好了。

但是，我没有使用自适应阈值。我分割了类似文本的区域并仅处理了这些区域，然后将它们粘贴以形成最终图像。它是一种使用 Otsu 方法或 k-means 的局部阈值处理（在代码中使用 thr_roi_otsu、thr_roi_kmeans 和 proc_parts 的组合）。以下是一些结果。

对所有文本区域应用 Otsu 阈值，然后上采样，然后下采样：

一些文字：

全图：

对输入图像进行上采样，对单个文本区域应用 Otsu 阈值，对结果进行下采样：

一些文字：

全图：

/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)

    Mat bw = Mat::ones(im.size(), CV_8U) * 255;

    vector<unsigned char> pixels(countNonZero(mask));
    int index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                pixels[index++] = im.at<unsigned char>(r, c);
            
        
    
    // threshold pixels
    threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
    // paste pixels
    index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                bw.at<unsigned char>(r, c) = pixels[index++];
            
        
    

    return bw;


/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)

    Mat bw = Mat::ones(im.size(), CV_8U) * 255;

    vector<float> pixels(countNonZero(mask));
    int index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                pixels[index++] = (float)im.at<unsigned char>(r, c);
            
        
    
    // cluster pixels by gray level
    int k = 2;
    Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
    vector<float> centers;
    vector<int> labels(countNonZero(mask));
    kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
    // examine cluster centers to see which pixels are dark
    int label0 = centers[0] > centers[1] ? 1 : 0;
    // paste pixels
    index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
            
        
    

    return bw;


/*
apply procfn to each connected component in the mask, 
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))

    Mat tmp = mask.clone();
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

    Mat byparts = Mat::ones(im.size(), CV_8U) * 255;

    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    
        Rect rect = boundingRect(contours[idx]);
        Mat msk = mask(rect);
        Mat img = im(rect);
        // process the rect
        Mat roi = procfn(msk, img);
        // paste it to the final image
        roi.copyTo(byparts(rect));
    

    return byparts;


int _tmain(int argc, _TCHAR* argv[])

    Mat im = imread("1.jpg", 0);
    // detect text regions
    Mat morph;
    Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
    // prepare a mask for text regions
    Mat bw;
    threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
    morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);

    Mat bw2x, im2x;
    pyrUp(bw, bw2x);
    pyrUp(im, im2x);

    // apply Otsu threshold to all text regions, then upsample followed by downsample
    Mat otsu1x = thr_roi_otsu(bw, im);
    pyrUp(otsu1x, otsu1x);
    pyrDown(otsu1x, otsu1x);

    // apply k-means to all text regions, then upsample followed by downsample
    Mat kmeans1x = thr_roi_kmeans(bw, im);
    pyrUp(kmeans1x, kmeans1x);
    pyrDown(kmeans1x, kmeans1x);

    // upsample input image, apply Otsu threshold to all text regions, downsample the result
    Mat otsu2x = thr_roi_otsu(bw2x, im2x);
    pyrDown(otsu2x, otsu2x);

    // upsample input image, apply k-means to all text regions, downsample the result
    Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
    pyrDown(kmeans2x, kmeans2x);

    // apply Otsu threshold to individual text regions, then upsample followed by downsample
    Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
    pyrUp(otsuparts1x, otsuparts1x);
    pyrDown(otsuparts1x, otsuparts1x);

    // apply k-means to individual text regions, then upsample followed by downsample
    Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
    pyrUp(kmeansparts1x, kmeansparts1x);
    pyrDown(kmeansparts1x, kmeansparts1x);

    // upsample input image, apply Otsu threshold to individual text regions, downsample the result
    Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
    pyrDown(otsuparts2x, otsuparts2x);

    // upsample input image, apply k-means to individual text regions, downsample the result
    Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
    pyrDown(kmeansparts2x, kmeansparts2x);

    return 0;

【讨论】：

以上是关于使用 OpenCV 改进文本二值化/OCR 预处理的主要内容，如果未能解决你的问题，请参考以下文章

Ubuntu 14.04 下使用 OpenCV 图片二值化处理

用opencv如何将一个二值化图像反色

使用opencv，对一个已经二值化的身份证图像，怎么样将身份证号码所在的图像切割出来？

C++ opencv 图片二值化最佳阈值确定（大津法,OTSU算法)

opencv中怎么获取二值化图像的每个像素点的值

为啥用二值化处理图像之后，还会有其他的灰度值，尤其是在一些边缘的位置。比如opencv里的cvThreshold