当我当时从单词中提取字符时,为啥单词中相似字符的置信度值不同?
Posted
技术标签:
【中文标题】当我当时从单词中提取字符时,为啥单词中相似字符的置信度值不同?【英文标题】:while i am extract the character from the word at that time why confidence values of the similar character in words are different?当我当时从单词中提取字符时,为什么单词中相似字符的置信度值不同? 【发布时间】:2016-10-29 16:50:20 【问题描述】:看,我从图像中提取单词并检查该单词中每个字符的置信度。每次我得到不同的信心水平,即使它属于同一个词。
下面的例子。
01.01.2012 单词,其中 3 次 0 字符遇到和 3 次我得到不同的置信度。
这是输入图像,我将输出为文本文件,您可以在下面看到。我的要求是只有我想要的数字数据。因此在图像中,如果任何单词包含 0 到 9 之间的数字,那么我会将特定单词和相应的字符和置信度值存储到文件中。正如您在下面看到的那样。就像我制作了从 0 到 9 的单个文件。为了我们的参考目的,我显示了 0 char 文件。
WORD CHAR confidence
7/11/2014 0 94.3153
01.01.2012 0 91.9117
01.01.2012 0 95.059
01.01.2012 0 95.1877
31.12.2012 0 92.1003
05.07.2013 0 94.4376
05.07.2013 0 97.3389
05.07.2013 0 92.4576
2012 0 94.0608
2012 0 93.1969
31.12.2012 0 93.8993
31.12.2011 0 94.513
606 0 93.5746
405 0 93.6727
109.821 0 96.2786
331.028 0 96.1837
109.506; 0 93.1421
109.506; 0 93.7133
110.427 0 93.7141
946.130 0 96.3438
200.274 0 95.7532
200.274 0 94.8678
10.553.331 0 96.3162
10.186.341 0 94.15
63.401 0 94.6042
346.350 0 96.2305
343.044 0 95.9801
346.350; 0 93.5741
343.044 0 93.8484
284.506 0 94.6761
0 0 95.9303
420 0 94.0067
0 0 93.2645
7.355.042 0 95.9187
9.108 0 96.3331
10. 0 93.9019
12.042 0 97.3823
294.704 0 93.4084
4.350 0 96.0915
0 0 95.4884
20.559.209 0 95.4821
20.559.209 0 94.1849
19.207.660 0 95.9933
19.207.660 0 95.1577
31.12.2012 0 92.7785
31.12.2011 0 94.6773
14.054 0 95.3734
44.706 0 93.6371
58.760 0 97.2008
4.111.720 0 94.4336
2.873.806 0 95.8218
11.083.608 0 96.1708
11.083.608 0 94.3456
10.721.302 0 93.3877
10.721.302 0 94.978
5.045.424 0 95.424
4.242.083 0 95.424
谁能告诉我所见背后的置信水平如何运作?
#include "./include/header.h"
#include "./include/enum.h"
class RECT
public:
int col;
int row;
int width;
int height;
char *ocrResult;
;
class OCR
public:
int *g_pixelBuffer;
int *g_pixelBufferForWord;
int *g_histogram;
int g_Id;
int initialize (const int row, const int col)
/// find the size of the image
int size = row * col ;
/// allocate the memory for pixel bufferr
g_pixelBuffer = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBuffer == NULL) return MemoryNotAllocated;
g_pixelBufferForWord = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBufferForWord == NULL) return MemoryNotAllocated;
/// allocating memory for histogram
g_histogram = (int *)calloc(size ,sizeof(int));
/// check proper allocation
if(g_histogram == NULL) return MemoryNotAllocated;
g_Id = 1;
return Success;
vector<RECT > processImage(Mat &image, int size, int ,int );
void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void release()
// release pixel_buffer memory
free(g_pixelBuffer);
free(g_pixelBufferForWord);
/// release histogram memory
free(g_histogram);
;
ofstream myfile1("1.txt");
ofstream myfile2("2.txt");
ofstream myfile3("3.txt");
ofstream myfile4("4.txt");
ofstream myfile5("5.txt");
ofstream myfile6("6.txt");
ofstream myfile7("7.txt");
ofstream myfile8("8.txt");
ofstream myfile9("9.txt");
ofstream myfile0("0.txt");
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow
, int *PixelBufferForWord, int);
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char);
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer);
void dumpDataIntoFile (int *, int collenth, int strow, int stcol,
int enrow, int encols, char *output);
//void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void GetBinaryImage(Mat &image ,OCR *,const int size);
void getBinaryImage(OCR *,int size);
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord);
int main(int argc ,char **argv)
int rs = Success;
Mat image = imread(argv[1]);//read the image
if(!image.data)
cout << "can't able to read the image" << endl;
return 0;
int rows = image.rows;// get the rows
int cols = image.cols;// get the col
int size = rows * cols;// get the size
OCR ocr;
/// Allocate or initialize memory
rs = ocr.initialize (rows, cols);
/// check proper allocation
if(rs == MemoryNotAllocated) return rs;
myfile1 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile2 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile3 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile4 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile5 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile6 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile7 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile8 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile9 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile0 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
vector <RECT> rectBuffer = ocr.processImage(image, size, rows, cols);
ocr.dumpIntoFile(rectBuffer, argv[2] );
//dumpDataIntoFile (ocr.g_pixelBufferForWord, cols, 0, 0, rows - 1, cols - 1, ( char *)"test123456.pbm");
ocr.release();
vector <RECT> OCR ::processImage(Mat &image, int size, int rows, int cols)
GetBinaryImage (image, this, size);// convert the image into the binary
for(int i = 0; i < size; i++)
g_pixelBufferForWord[i] = g_pixelBuffer[i];
// dumpDataIntoFile (this, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tess;
if (tess.Init("/usr/share/tesseract/tessdata", "eng"))
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
tess.SetImage((unsigned char*)g_pixelBuffer, cols, rows, sizeof(int)
,sizeof(int) * cols);
tess.Recognize(0);
tesseract::ResultIterator *riLine = tess.GetIterator();
tesseract::PageIteratorLevel levelLine = tesseract:: RIL_TEXTLINE;
RECT rectLine;
vector <RECT> rectBuffer;
if(riLine!=0)
do
char *Line = riLine->GetUTF8Text(levelLine);
if(Line != NULL)
int startCol, startRow, endCol, endRow;
riLine->BoundingBox(levelLine, &startCol, &startRow, &endCol, &endRow);
int width = endCol - startCol + 1;
int height = endRow - startRow + 1;
rectLine.col = startCol;
rectLine.row = startRow;
rectLine.width = width;
rectLine.height = height;
int length = strlen(Line) + 1;
rectLine.ocrResult = (char *)calloc( length, sizeof(char));
if(rectLine.ocrResult == NULL)
cout << "rectLine.ocrResult is not allocate"<< endl;
exit(1);
strcpy(rectLine.ocrResult, Line);
rectBuffer.push_back(rectLine);
getWordDataFromLine(g_pixelBuffer,g_pixelBufferForWord, rectLine,cols, rectBuffer);
free(Line);
//delete Line;
while (riLine->Next(levelLine));
//dumpIntoFile(rectBuffer, argv[2]);
tess.End();
delete riLine;
return rectBuffer;
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer)
int index;
int *SubImageBuffer = (int *)calloc(rectLine.width * rectLine.height, sizeof(int));
if(!SubImageBuffer)
cout << "SubImageBuffer not allocate" << endl;
int i = 0;
for(int r = rectLine.row ; r < rectLine.row + rectLine.height; r++)
for(int c = rectLine.col ; c < rectLine.col + rectLine.width; c++)
index = r * mainImageCol + c;
SubImageBuffer[i++] = PixelBuffer[index];
//dumpDataIntoFile (SubImageBuffer, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessWord;
if (tessWord.Init("/usr/share/tesseract/tessdata", "eng"))
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
tessWord.SetImage((unsigned char*)SubImageBuffer, rectLine.width,
rectLine.height, sizeof(int) ,sizeof(int) * rectLine.width);
tessWord.Recognize(0);
tesseract::ResultIterator *riWord = tessWord.GetIterator();
tesseract::PageIteratorLevel levelWord = tesseract:: RIL_WORD;
RECT rectWord;
if(riWord!=0)
do
char *Word = riWord->GetUTF8Text(levelWord);
if(Word != NULL)
int startCol, startRow, endCol, endRow;
int staCol = 0, staRow = 0, edCol = 0, edRow = 0;
riWord->BoundingBox(levelWord, &startCol, &startRow, &endCol, &endRow);
staCol = startCol;
staRow = startRow;
edCol = endCol;
edRow = endRow;
staCol += rectLine.col;
staRow += rectLine.row;
edRow += rectLine.row;
edCol += rectLine.col;
rectWord.col = staCol;
rectWord.row = staRow;
rectWord.width = edCol - staCol + 1;
rectWord.height = edRow - staRow + 1;
int length = strlen(Word) + 1;
rectWord.ocrResult = (char *)calloc( length, sizeof(char));
if(rectWord.ocrResult == NULL)
cout << "rectWord.ocrResult is not allocate"<< endl;
exit(1);
strcpy(rectWord.ocrResult, Word);
rectBuffer.push_back(rectWord);
//displayBoundingBox(staCol, staRow, edCol, edRow ,PixelBufferForWord, mainImageCol);
getCharDataFromWord(SubImageBuffer, PixelBufferForWord, startCol, startRow ,endCol ,endRow,
rectLine,mainImageCol, rectBuffer, rectWord);
//delete Word;
free(Word);
while (riWord->Next(levelWord));
delete riWord;
tessWord.End();
free(SubImageBuffer);
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord)
int index;
int width = endColWord - startColWord + 1;
int height = endRowWord - startRowWord + 1;
int *SubImageBufferForChar = (int *)calloc(width * height, sizeof(int));
if(!SubImageBufferForChar)
cout << "SubImageBuffer not read" << endl;
int i = 0;
for(int r = startRowWord ; r <= endRowWord; r++)
for(int c = startColWord; c <= endColWord; c++)
index = r * rectLine.width + c;
SubImageBufferForChar[i++] = PixelBuffer[index];
//dumpDataIntoFile (SubImageBufferForChar, width, 0, 0, height - 1, width - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessChar;
if (tessChar.Init("/usr/share/tesseract/tessdata", "eng"))
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
tessChar.SetImage((unsigned char*)SubImageBufferForChar, width,
height, sizeof(int) ,sizeof(int) * width);
tessChar.Recognize(0);
tesseract::ResultIterator *riChar = tessChar.GetIterator();
tesseract::PageIteratorLevel levelChar = tesseract:: RIL_SYMBOL;
RECT rectChar;
if(riChar!=0)
do
char *Char = riChar->GetUTF8Text(levelChar);
if(Char != NULL)
float conf = riChar->Confidence(levelChar);
int startCol, startRow, endCol, endRow;
riChar->BoundingBox(levelChar, &startCol, &startRow, &endCol, &endRow);
startCol += rectWord.col;
startRow += rectWord.row;
endRow += rectWord.row;
endCol += rectWord.col;
rectChar.col = startCol;
rectChar.row = startRow;
rectChar.width = endCol - startCol + 1;
rectChar.height = endRow - startRow + 1;
int length = strlen(Char) + 1;
rectChar.ocrResult = (char *)calloc( length, sizeof(char));
if(rectChar.ocrResult == NULL)
cout << "rectChar.ocrResult is not allocate"<< endl;
exit(1);
strcpy(rectChar.ocrResult, Char);
rectBuffer.push_back(rectChar);
dumpNumberConfidenceIntoFile(rectWord.ocrResult, conf, Char);
//displayBoundingBox(startCol, startRow, endCol, endRow ,PixelBufferForChar, mainImageCol);
//delete Char;
free(Char);
while (riChar->Next(levelChar));
delete riChar;
tessChar.End();
free(SubImageBufferForChar);
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char)
if(Char[0] >= '0' && Char[0] <= '9')
if(Char[0] == '0')
myfile0 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '1')
myfile1 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
else if(Char[0] == '2')
myfile2 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '3')
myfile3 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '4')
myfile4 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
else if(Char[0] == '5')
myfile5 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
else if(Char[0] == '6')
myfile6 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '7')
myfile7 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '8')
myfile8 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
else if(Char[0] == '9')
myfile9 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
void OCR ::dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile)
ofstream myfile(outputFile);
myfile << "ID" << '\t' << "CORD_X" << '\t' << "CORD_Y" << '\t' <<
"CORD_W" << '\t' << "CORD_H" << '\t' << "STRING" << endl;
for(auto it = rectBuffer.begin(); it != rectBuffer.end(); it++)
myfile << g_Id++ << '\t' << it->col << '\t' << it->row << '\t' <<
it->width << '\t' << it->height << '\t';
int length = strlen(it->ocrResult);
//cout << "in the string (" << length << ") ::" << endl;
for(int j = 0; j < length && it->ocrResult[j] != '\n'; j++)
myfile << it->ocrResult[j];
myfile << endl;
void getBinaryImage(OCR *ocr, int size)
long long int total = size;
long long int sum = 0;
long long int q1 = 0;
long long int q2 = 0;
float SUM = 0;
float u1 = 0;
float u2 = 0;
float result = 0;
float var_max = 0;
int threshold = 0;
for(int i = 0; i < 256 ;i++)
sum = sum + i * ocr->g_histogram[i];
for(int t = 0; t < 256 ; t++)
q1 = q1 + ocr->g_histogram[t];
q2 = total - q1;
SUM = SUM + t * ocr->g_histogram[t];
u1 = SUM / q1 ;
u2 = (sum - SUM) / q2;
result = q1 * q2 * (u1 - u2) * (u1 - u2);
if(result > var_max)
threshold = t;
var_max = result;
for(int i = 0; i < size; i++)
if(ocr->g_pixelBuffer[i] > threshold)
ocr->g_pixelBuffer[i] = 0;
else
ocr->g_pixelBuffer[i] = 1;
void GetBinaryImage(Mat &image ,OCR *ocr ,const int size)
Mat channel[3];// convert the image first into Binary image
split(image,channel); // spilt the image
uchar *Blue = channel[0].data; // get the blue value
uchar *Green = channel[1].data; // get the green value
uchar *Red = channel[2].data; // get the red value
for(int i = 0; i < size; i++)
ocr->g_pixelBuffer[i]= ((Red[i]) + (Green[i]) + (Blue[i])) / 3;// get the gray data
for(int i = 0; i < size; i++)
ocr->g_histogram[ocr->g_pixelBuffer[i]]++;// create the histogram for the OTSU thersholding
getBinaryImage (ocr,size);// got the binary image
void dumpDataIntoFile (int *pixelBuffer, int collenth, int strow, int stcol,
int enrow, int encols, char *output)
int i, j, index;
int cols = encols - stcol + 1;
int rows = enrow - strow + 1;
ofstream myfile(output);
myfile << "P1" << endl;
myfile << cols << " " << rows << endl;
for(i = strow; i <= enrow ; i++ )
for(j = stcol; j <= encols; j++ )
index = i * collenth + j;
if(pixelBuffer[index] != 0)
myfile << "1" << " ";
else
myfile << "0" << " ";
myfile << endl;
myfile.close();
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow , int *PixelBufferForWord,int mainImageCol)
int index;
for(int i = staRow; i < edRow ; i++ )
index = i * mainImageCol + staCol ;//height left
PixelBufferForWord[index] = 255;
index = i * mainImageCol + edCol ;// height right
PixelBufferForWord[index] = 255;
for(int j = staCol; j < edCol; j++ )
index = staRow * mainImageCol + j;//top
PixelBufferForWord[index] = 255;
index = edRow * mainImageCol + j;// bottom
PixelBufferForWord[index] = 255;
【问题讨论】:
您需要出示您的代码。另外,请选择:C 或 C++,不要同时在标签中。 因为在01.01.2012
中,OCR 代码正在查看 0
图像的三个不同位置。所以每个人的置信度都很高但不完全一样,因为图片不完全一样。
我们需要查看更多机密数据才能拨打电话。
...比如今天金库里有多少现金。
现在你可以使用我的代码了,Barmar。你有更好的主意。实际上,我先提取第一行,然后从该行中提取单词,然后从该单词中提取字符。最后我比较 char 是否存在于 o 到 9 之间。如果它在它们之间,那么我将存储到文件中。最后我创建了一个文件,其中存储第一行数据,然后是单词数据属于该行,然后是字符数据属于该单词。
【参考方案1】:
问:- 置信度是如何计算的。
置信度算法适用于已识别字符与可用字符的距离。 此链接上的“语言分析”部分提供了您正在寻找的答案:- https://github.com/tesseract-ocr/docs/blob/master/tesseracticdar2007.pdf
【讨论】:
以上是关于当我当时从单词中提取字符时,为啥单词中相似字符的置信度值不同?的主要内容,如果未能解决你的问题,请参考以下文章