使用 OpenCV 改进文本二值化/OCR 预处理

Posted

技术标签:

【中文标题】使用 OpenCV 改进文本二值化/OCR 预处理【英文标题】:Improve Text Binarization / OCR Preprocessing with OpenCV 【发布时间】:2017-09-25 08:05:10 【问题描述】:

我正在为我的应用构建扫描仪功能,并使用 OpenCV 对文档的照片进行二值化:

    // convert to greyscale
    cv::Mat converted, blurred, blackAndWhite;
    converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
    cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );

    // remove noise
    cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);

    // adaptive threshold
    cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);

结果还可以,但来自不同扫描仪应用的扫描效果要好得多。尤其是非常小的文本要好得多: 用opencv处理

使用 DropBox 扫描

我能做些什么来提高我的成绩?

【问题讨论】:

你的结果看起来不像是二值图像(字符有一些褪色),还是保存图像时的jpg压缩? 嗯是的,这实际上是真的,它看起来真的像下面的图像(结果,我想要实现的)确实包含超过 2 种颜色。那么也许,应用自适应阈值可能是完全错误的方法? 应用阈值必须生成二值图像。检查您的图像是否为二进制并将其保存为非丢失格式,以便您可以实际看到您的真实结果。然后你也可以应用一些形态来清理一点结果。 可以分享原图吗?我想尝试在我的机器上处理它。 当然,我在这里上传了一张类似的图像,其中包含 opencv 的处理版本和扫描仪应用程序:imgur.com/a/e0UZk 【参考方案1】:

可能是应用程序正在使用anti-aliasing 使其二值化输出看起来更好。为了获得类似的效果,我首先尝试对图像进行二值化,但所有锯齿状边缘的结果看起来都不是很好。然后我对结果进行了金字塔上采样再下采样,结果更好了。

但是,我没有使用自适应阈值。我分割了类似文本的区域并仅处理了这些区域,然后将它们粘贴以形成最终图像。它是一种使用 Otsu 方法或 k-means 的局部阈值处理(在代码中使用 thr_roi_otsuthr_roi_kmeansproc_parts 的组合)。以下是一些结果。

对所有文本区域应用 Otsu 阈值,然后上采样,然后下采样:

一些文字:

全图:

对输入图像进行上采样,对单个文本区域应用 Otsu 阈值,对结果进行下采样:

一些文字:

全图:

/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)

    Mat bw = Mat::ones(im.size(), CV_8U) * 255;

    vector<unsigned char> pixels(countNonZero(mask));
    int index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                pixels[index++] = im.at<unsigned char>(r, c);
            
        
    
    // threshold pixels
    threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
    // paste pixels
    index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                bw.at<unsigned char>(r, c) = pixels[index++];
            
        
    

    return bw;


/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)

    Mat bw = Mat::ones(im.size(), CV_8U) * 255;

    vector<float> pixels(countNonZero(mask));
    int index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                pixels[index++] = (float)im.at<unsigned char>(r, c);
            
        
    
    // cluster pixels by gray level
    int k = 2;
    Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
    vector<float> centers;
    vector<int> labels(countNonZero(mask));
    kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
    // examine cluster centers to see which pixels are dark
    int label0 = centers[0] > centers[1] ? 1 : 0;
    // paste pixels
    index = 0;
    for (int r = 0; r < mask.rows; r++)
    
        for (int c = 0; c < mask.cols; c++)
        
            if (mask.at<unsigned char>(r, c))
            
                bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
            
        
    

    return bw;


/*
apply procfn to each connected component in the mask, 
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))

    Mat tmp = mask.clone();
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

    Mat byparts = Mat::ones(im.size(), CV_8U) * 255;

    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    
        Rect rect = boundingRect(contours[idx]);
        Mat msk = mask(rect);
        Mat img = im(rect);
        // process the rect
        Mat roi = procfn(msk, img);
        // paste it to the final image
        roi.copyTo(byparts(rect));
    

    return byparts;


int _tmain(int argc, _TCHAR* argv[])

    Mat im = imread("1.jpg", 0);
    // detect text regions
    Mat morph;
    Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
    // prepare a mask for text regions
    Mat bw;
    threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
    morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);

    Mat bw2x, im2x;
    pyrUp(bw, bw2x);
    pyrUp(im, im2x);

    // apply Otsu threshold to all text regions, then upsample followed by downsample
    Mat otsu1x = thr_roi_otsu(bw, im);
    pyrUp(otsu1x, otsu1x);
    pyrDown(otsu1x, otsu1x);

    // apply k-means to all text regions, then upsample followed by downsample
    Mat kmeans1x = thr_roi_kmeans(bw, im);
    pyrUp(kmeans1x, kmeans1x);
    pyrDown(kmeans1x, kmeans1x);

    // upsample input image, apply Otsu threshold to all text regions, downsample the result
    Mat otsu2x = thr_roi_otsu(bw2x, im2x);
    pyrDown(otsu2x, otsu2x);

    // upsample input image, apply k-means to all text regions, downsample the result
    Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
    pyrDown(kmeans2x, kmeans2x);

    // apply Otsu threshold to individual text regions, then upsample followed by downsample
    Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
    pyrUp(otsuparts1x, otsuparts1x);
    pyrDown(otsuparts1x, otsuparts1x);

    // apply k-means to individual text regions, then upsample followed by downsample
    Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
    pyrUp(kmeansparts1x, kmeansparts1x);
    pyrDown(kmeansparts1x, kmeansparts1x);

    // upsample input image, apply Otsu threshold to individual text regions, downsample the result
    Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
    pyrDown(otsuparts2x, otsuparts2x);

    // upsample input image, apply k-means to individual text regions, downsample the result
    Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
    pyrDown(kmeansparts2x, kmeansparts2x);

    return 0;

【讨论】:

以上是关于使用 OpenCV 改进文本二值化/OCR 预处理的主要内容,如果未能解决你的问题,请参考以下文章

Ubuntu 14.04 下使用 OpenCV 图片二值化处理

用opencv如何将一个二值化图像反色

使用opencv,对一个已经二值化的身份证图像,怎么样将身份证号码所在的图像切割出来?

C++ opencv 图片二值化最佳阈值确定(大津法,OTSU算法)

opencv中怎么获取二值化图像的每个像素点的值

为啥用二值化处理图像之后,还会有其他的灰度值,尤其是在一些边缘的位置。比如opencv里的cvThreshold