OpenCV在字符提取中进行的预处理(转)

Posted abella

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了OpenCV在字符提取中进行的预处理(转)相关的知识,希望对你有一定的参考价值。

OCR简介
熟悉OCR的人都了解,OCR大致分为两个部分:

-文字提取text extractor
-文字识别text recognition

其中,第一部分是属于图像处理部分,涉及到图像分割的知识,而第二部分则大多数利用谷歌的Tesseract来进行字符的识别,设计到的东西不多,当然也不难,难的是要能够做到非常准确的识别率,以及它的识别速率。

文字提取
这一部分工作是很关键的,因为文字提取的好坏,直接影响到最后的识别结果,相当于预处理部分,是非常重要的,其主要目的是为了分割出文字字符。
主要涉及工作有:

  1. : -灰度化
  2. -锐化
  3. -Otsu
  4. -处理0和1边界值
  5. -如果有必要,还需要进行噪声去除,这里要涉及到找连通分量的相关计算;
  1 void TextDetector::segmentText(cv::Mat &spineImage, cv::Mat &segSpine, bool removeNoise){
  2 
  3     cv::Mat spineGray;
  4     cvtColor(spineImage, spineGray, CV_BGR2GRAY);
  5     imshow("gray source" , spineGray);
  6     spineGray = spineGray - 0.5;
  7 //    WriteData("/Users/eternity/Desktop/未命名文件夹/gray1.txt", spineGray);
  8 //    waitKey();
  9     cv::Mat spineAhe;
 10     adaptiveHistEqual(spineGray, spineAhe, 0.01);
 11     imshow("ahe", spineAhe);
 12 //    WriteData("/Users/eternity/Desktop/未命名文件夹/gray2.txt", spineAhe);
 13 
 14     int window_num = 40;
 15 
 16     double window_h = (spineImage.rows / (double)window_num + 1e-3);
 17 
 18     int window_w = spineImage.cols;
 19 
 20     cv::Mat spine_th = cv::Mat::zeros(spineGray.size(), CV_8U);
 21 
 22     for (int i = 0; i < window_num; i ++) {
 23         double cut_from_r = window_h * i;
 24         double cut_to_r = window_h * (i+1);
 25         cv::Mat window_img = cv::Mat::zeros(Size(cut_to_r-cut_from_r + 1, window_w), CV_8U);
 26         cv::Rect rect = cv::Rect(0, cut_from_r, window_w-1, cut_to_r - cut_from_r + 1);
 27         window_img = cv::Mat(spineGray, rect);
 28         imshow("window section", window_img);
 29 
 30         sharpenImage(window_img, window_img);
 31         imshow("sharpen", window_img);
 32 //        waitKey();
 33 //        WriteData("/Users/eternity/Desktop/未命名文件夹/gray4.txt", window_img);
 34         double max_local,min_local;
 35         minMaxLoc(window_img, &min_local, &max_local);
 36         double color_diff = max_local - min_local;
 37         double thresh;
 38         cv::Mat window_tmp;
 39         if (color_diff > 50)
 40             thresh = threshold(window_img, window_tmp, 1, 255, THRESH_OTSU);
 41         else
 42             thresh = 0;
 43 //        cout<<thresh<<endl;
 44         cv::Mat seg_window(window_img.size(), CV_64F);
 45         imgQuantize(window_img, seg_window, thresh);
 46 //        WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", seg_window);
 47         seg_window = seg_window == 1;
 48 //        seg_window = seg_window / 255;
 49         //处理0边界值
 50         vector<int> cols1,cols2,rows1,rows2;
 51         findKEdgeFirst(seg_window, 0, 5, rows1, cols1);
 52         findKEdgeLast (seg_window, 0, 5, rows2, cols2);
 53         float max_zero_dist, max_one_dist;
 54         if(cols1.empty() || cols2.empty())
 55             max_zero_dist = 0.0;
 56         else{
 57             float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2);
 58             float avg_left  = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1);
 59             max_zero_dist = avg_right - avg_left;
 60         }
 61         cols1.clear();
 62         cols2.clear();
 63         rows1.clear();
 64         rows2.clear();
 65 
 66         //处理1边界值
 67         findKEdgeFirst(seg_window, 255, 5, rows1, cols1);
 68         findKEdgeLast (seg_window, 255, 5, rows2, cols2);
 69         if(cols1.empty() || cols2.empty())
 70             max_one_dist = 0;
 71         else{
 72             float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2);
 73             float avg_left  = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1);
 74             max_one_dist = avg_right - avg_left;
 75         }
 76         cols1.clear();
 77         cols2.clear();
 78         rows1.clear();
 79         rows2.clear();
 80 
 81         cv::Mat idx;
 82         findNonZero(seg_window, idx);
 83         int one_count = (int)idx.total();
 84         int zero_count = (int)seg_window.total() - one_count;
 85 
 86         float one_zero_diff = max_one_dist - max_zero_dist;
 87         float  dist_limit = 5;
 88 
 89         if(one_zero_diff > dist_limit)
 90             seg_window = ~ seg_window;
 91         else{
 92             if(one_zero_diff > -dist_limit && one_count > zero_count)
 93                 seg_window = ~ seg_window;
 94         }
 95 
 96         seg_window.copyTo(cv::Mat( spine_th, rect));
 97 //        imshow("spine_th", spine_th);
 98 //        waitKey();
 99 
100 
101     }
102     //去除噪声
103     if (removeNoise) {
104         vector<vector<cv::Point>> contours;
105         imshow("spine_th", spine_th);
106 //        WriteData("/Users/eternity/Desktop/未命名文件夹/quantize1.txt", spine_th);
107 //        waitKey();
108         findContours(spine_th, contours, RETR_EXTERNAL, CHAIN_APPROX_NONE);
109 
110         for (int i = 0; i < contours.size(); i ++) {
111             //compute bounding rect
112             cv::Rect rect = boundingRect(contours[i]);
113             double bbox_aspect = rect.width / (double)rect.height;
114             int bbox_area = rect.width * rect.height;
115             //compute solidity
116             vector<vector<Point>> hull(1);
117             convexHull( contours[i], hull[0] );
118             double convex_area = contourArea(hull[0]);
119             double solidity = bbox_area / convex_area;
120 
121             for (int j = 0; j < contours[i].size(); j ++) {
122                 if ( (rect.width > spineImage.cols / 1.001)
123                     || (rect.width > spineImage.cols / 1.4 && bbox_aspect > 5.0)
124                     || (rect.height > spineImage.cols / 1.1)
125                     || (bbox_area < pow(spineImage.cols/30, 2))
126                     || (bbox_aspect > 0.5 && bbox_aspect < 1.7 && solidity > 0.9) )
127 
128                     spine_th.at<int>(contours[i][j].x, contours[i][j].y) = 0;
129 //                WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", spine_th);
130             }
131 
132 
133         }
134 
135     }
136     segSpine = spine_th;
137 //    transpose(segSpine, segSpine);
138 //    flip(segSpine, segSpine, 0);
139     imshow("segspine", segSpine);
140 //    waitKey();
141     spine_th.release();
142 
143 
144 
145 }
146 //对图片进行level量化
147 void TextDetector::imgQuantize(cv::Mat &src, cv::Mat &dst, double level){
148     dst = cv::Mat::zeros(src.rows, src.cols, CV_8U);
149     for (int i = 0; i < src.rows; i ++) {
150         uchar *data = src.ptr<uchar>(i);
151         uchar *data2 = dst.ptr<uchar>(i);
152         for (int j = 0; j < src.cols; j ++) {
153             if(data[j] <= level)
154                 data2[j] = 1;
155             else
156                 data2[j] = 2;
157 
158         }
159     }
160 
161 }
162 //找出最左边界处,前edgeValue个值为k的边界值
163 void TextDetector::findKEdgeFirst(cv::Mat &data, int edgeValue,int k,vector<int> &rows,vector<int> &cols){
164     int count = 0;
165     for (int i = 0; i < data.cols; i ++) {
166         uchar *u = data.ptr<uchar>(i);
167         for (int j = 0; j < data.rows; j ++) {
168             if(edgeValue == (int)u[j]){
169                 if(count < k){
170                     count ++;
171                     cols.push_back(i);
172                     rows.push_back(j);
173                 }
174 
175             }
176 
177         }
178     }
179 
180 }
181 //找出最右边界处,倒数edgeValue个值为k的边界值
182 void TextDetector::findKEdgeLast(cv::Mat &data, int edgeValue,int k,vector<int> &rows, vector<int> &cols){
183     int count = 0;
184     for (int i = data.cols - 1; i >= 0; i --) {
185         uchar *u = data.ptr<uchar>(i);
186         for (int j = data.rows - 1; j >= 0; j --) {
187             if(edgeValue == (int)u[j]){
188                 if(count < k){
189                     count ++;
190                     cols.push_back(i);
191                     rows.push_back(j);
192                 }
193 
194             }
195         }
196 
197     }
198 
199 }
200 //直方图均衡
201 void TextDetector::adaptiveHistEqual(cv::Mat &src,cv::Mat &dst,double clipLimit)
202 {
203     Ptr<cv::CLAHE> clahe = createCLAHE();
204     clahe->setClipLimit(clipLimit);
205     clahe->apply(src, dst);
206 }

 

---------------------
作者:eternity1118_
来源:CSDN
原文:https://blog.csdn.net/eternity1118_/article/details/52575374
版权声明:本文为博主原创文章,转载请附上博文链接!









以上是关于OpenCV在字符提取中进行的预处理(转)的主要内容,如果未能解决你的问题,请参考以下文章

OpenCV+Python实现将车牌数字分割为单个的字符图片

OpenCV-C++选择提取感兴趣区域(ROI区域)附用鼠标选取ROI区域的代码

OpenCV Java 实现票据纸张的四边形边缘检测与提取摆正

opencv提取论文中曲线坐标重新拟合绘制

OpenCV C++案例实战十《车牌号识别》

使用Python,OpenCV进行基本的图像处理——提取红色圆圈轮廓并绘制