OpenCV在字符提取中进行的预处理(转)
Posted abella
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了OpenCV在字符提取中进行的预处理(转)相关的知识,希望对你有一定的参考价值。
OCR简介
熟悉OCR的人都了解,OCR大致分为两个部分:
-文字提取text extractor
-文字识别text recognition
其中,第一部分是属于图像处理部分,涉及到图像分割的知识,而第二部分则大多数利用谷歌的Tesseract来进行字符的识别,设计到的东西不多,当然也不难,难的是要能够做到非常准确的识别率,以及它的识别速率。
文字提取
这一部分工作是很关键的,因为文字提取的好坏,直接影响到最后的识别结果,相当于预处理部分,是非常重要的,其主要目的是为了分割出文字字符。
主要涉及工作有:
- : -灰度化
- -锐化
- -Otsu
- -处理0和1边界值
- -如果有必要,还需要进行噪声去除,这里要涉及到找连通分量的相关计算;
1 void TextDetector::segmentText(cv::Mat &spineImage, cv::Mat &segSpine, bool removeNoise){ 2 3 cv::Mat spineGray; 4 cvtColor(spineImage, spineGray, CV_BGR2GRAY); 5 imshow("gray source" , spineGray); 6 spineGray = spineGray - 0.5; 7 // WriteData("/Users/eternity/Desktop/未命名文件夹/gray1.txt", spineGray); 8 // waitKey(); 9 cv::Mat spineAhe; 10 adaptiveHistEqual(spineGray, spineAhe, 0.01); 11 imshow("ahe", spineAhe); 12 // WriteData("/Users/eternity/Desktop/未命名文件夹/gray2.txt", spineAhe); 13 14 int window_num = 40; 15 16 double window_h = (spineImage.rows / (double)window_num + 1e-3); 17 18 int window_w = spineImage.cols; 19 20 cv::Mat spine_th = cv::Mat::zeros(spineGray.size(), CV_8U); 21 22 for (int i = 0; i < window_num; i ++) { 23 double cut_from_r = window_h * i; 24 double cut_to_r = window_h * (i+1); 25 cv::Mat window_img = cv::Mat::zeros(Size(cut_to_r-cut_from_r + 1, window_w), CV_8U); 26 cv::Rect rect = cv::Rect(0, cut_from_r, window_w-1, cut_to_r - cut_from_r + 1); 27 window_img = cv::Mat(spineGray, rect); 28 imshow("window section", window_img); 29 30 sharpenImage(window_img, window_img); 31 imshow("sharpen", window_img); 32 // waitKey(); 33 // WriteData("/Users/eternity/Desktop/未命名文件夹/gray4.txt", window_img); 34 double max_local,min_local; 35 minMaxLoc(window_img, &min_local, &max_local); 36 double color_diff = max_local - min_local; 37 double thresh; 38 cv::Mat window_tmp; 39 if (color_diff > 50) 40 thresh = threshold(window_img, window_tmp, 1, 255, THRESH_OTSU); 41 else 42 thresh = 0; 43 // cout<<thresh<<endl; 44 cv::Mat seg_window(window_img.size(), CV_64F); 45 imgQuantize(window_img, seg_window, thresh); 46 // WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", seg_window); 47 seg_window = seg_window == 1; 48 // seg_window = seg_window / 255; 49 //处理0边界值 50 vector<int> cols1,cols2,rows1,rows2; 51 findKEdgeFirst(seg_window, 0, 5, rows1, cols1); 52 findKEdgeLast (seg_window, 0, 5, rows2, cols2); 53 float max_zero_dist, max_one_dist; 54 if(cols1.empty() || cols2.empty()) 55 max_zero_dist = 0.0; 56 else{ 57 float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2); 58 float avg_left = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1); 59 max_zero_dist = avg_right - avg_left; 60 } 61 cols1.clear(); 62 cols2.clear(); 63 rows1.clear(); 64 rows2.clear(); 65 66 //处理1边界值 67 findKEdgeFirst(seg_window, 255, 5, rows1, cols1); 68 findKEdgeLast (seg_window, 255, 5, rows2, cols2); 69 if(cols1.empty() || cols2.empty()) 70 max_one_dist = 0; 71 else{ 72 float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2); 73 float avg_left = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1); 74 max_one_dist = avg_right - avg_left; 75 } 76 cols1.clear(); 77 cols2.clear(); 78 rows1.clear(); 79 rows2.clear(); 80 81 cv::Mat idx; 82 findNonZero(seg_window, idx); 83 int one_count = (int)idx.total(); 84 int zero_count = (int)seg_window.total() - one_count; 85 86 float one_zero_diff = max_one_dist - max_zero_dist; 87 float dist_limit = 5; 88 89 if(one_zero_diff > dist_limit) 90 seg_window = ~ seg_window; 91 else{ 92 if(one_zero_diff > -dist_limit && one_count > zero_count) 93 seg_window = ~ seg_window; 94 } 95 96 seg_window.copyTo(cv::Mat( spine_th, rect)); 97 // imshow("spine_th", spine_th); 98 // waitKey(); 99 100 101 } 102 //去除噪声 103 if (removeNoise) { 104 vector<vector<cv::Point>> contours; 105 imshow("spine_th", spine_th); 106 // WriteData("/Users/eternity/Desktop/未命名文件夹/quantize1.txt", spine_th); 107 // waitKey(); 108 findContours(spine_th, contours, RETR_EXTERNAL, CHAIN_APPROX_NONE); 109 110 for (int i = 0; i < contours.size(); i ++) { 111 //compute bounding rect 112 cv::Rect rect = boundingRect(contours[i]); 113 double bbox_aspect = rect.width / (double)rect.height; 114 int bbox_area = rect.width * rect.height; 115 //compute solidity 116 vector<vector<Point>> hull(1); 117 convexHull( contours[i], hull[0] ); 118 double convex_area = contourArea(hull[0]); 119 double solidity = bbox_area / convex_area; 120 121 for (int j = 0; j < contours[i].size(); j ++) { 122 if ( (rect.width > spineImage.cols / 1.001) 123 || (rect.width > spineImage.cols / 1.4 && bbox_aspect > 5.0) 124 || (rect.height > spineImage.cols / 1.1) 125 || (bbox_area < pow(spineImage.cols/30, 2)) 126 || (bbox_aspect > 0.5 && bbox_aspect < 1.7 && solidity > 0.9) ) 127 128 spine_th.at<int>(contours[i][j].x, contours[i][j].y) = 0; 129 // WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", spine_th); 130 } 131 132 133 } 134 135 } 136 segSpine = spine_th; 137 // transpose(segSpine, segSpine); 138 // flip(segSpine, segSpine, 0); 139 imshow("segspine", segSpine); 140 // waitKey(); 141 spine_th.release(); 142 143 144 145 } 146 //对图片进行level量化 147 void TextDetector::imgQuantize(cv::Mat &src, cv::Mat &dst, double level){ 148 dst = cv::Mat::zeros(src.rows, src.cols, CV_8U); 149 for (int i = 0; i < src.rows; i ++) { 150 uchar *data = src.ptr<uchar>(i); 151 uchar *data2 = dst.ptr<uchar>(i); 152 for (int j = 0; j < src.cols; j ++) { 153 if(data[j] <= level) 154 data2[j] = 1; 155 else 156 data2[j] = 2; 157 158 } 159 } 160 161 } 162 //找出最左边界处,前edgeValue个值为k的边界值 163 void TextDetector::findKEdgeFirst(cv::Mat &data, int edgeValue,int k,vector<int> &rows,vector<int> &cols){ 164 int count = 0; 165 for (int i = 0; i < data.cols; i ++) { 166 uchar *u = data.ptr<uchar>(i); 167 for (int j = 0; j < data.rows; j ++) { 168 if(edgeValue == (int)u[j]){ 169 if(count < k){ 170 count ++; 171 cols.push_back(i); 172 rows.push_back(j); 173 } 174 175 } 176 177 } 178 } 179 180 } 181 //找出最右边界处,倒数edgeValue个值为k的边界值 182 void TextDetector::findKEdgeLast(cv::Mat &data, int edgeValue,int k,vector<int> &rows, vector<int> &cols){ 183 int count = 0; 184 for (int i = data.cols - 1; i >= 0; i --) { 185 uchar *u = data.ptr<uchar>(i); 186 for (int j = data.rows - 1; j >= 0; j --) { 187 if(edgeValue == (int)u[j]){ 188 if(count < k){ 189 count ++; 190 cols.push_back(i); 191 rows.push_back(j); 192 } 193 194 } 195 } 196 197 } 198 199 } 200 //直方图均衡 201 void TextDetector::adaptiveHistEqual(cv::Mat &src,cv::Mat &dst,double clipLimit) 202 { 203 Ptr<cv::CLAHE> clahe = createCLAHE(); 204 clahe->setClipLimit(clipLimit); 205 clahe->apply(src, dst); 206 }
---------------------
作者:eternity1118_
来源:CSDN
原文:https://blog.csdn.net/eternity1118_/article/details/52575374
版权声明:本文为博主原创文章,转载请附上博文链接!
以上是关于OpenCV在字符提取中进行的预处理(转)的主要内容,如果未能解决你的问题,请参考以下文章
OpenCV+Python实现将车牌数字分割为单个的字符图片
OpenCV-C++选择提取感兴趣区域(ROI区域)附用鼠标选取ROI区域的代码