Azure 计算机视觉:识别印刷文本
Posted
技术标签:
【中文标题】Azure 计算机视觉:识别印刷文本【英文标题】:Azure Computer Vision : Recognize Printed Text 【发布时间】:2021-12-30 21:09:14 【问题描述】:我正在使用带有 nodejs 的 Azure 计算机视觉,我想在图像上提取文本,它按预期工作,但我面临一些挑战: 代码:
'use strict';
const request = require('request');
const subscriptionKey = 'key';
const endpoint = 'endpoint'
var uriBase = endpoint + 'vision/v3.1/ocr';
const imageUrl = 'https://livesimply.me/wp-content/uploads/2015/09/foods-to-avoid-real-food-3036-2-1024x683.jpg';
// Request parameters.
const params =
'language': 'unk',
'detectOrientation': 'true',
;
const options =
uri: uriBase,
qs: params,
body: '"url": ' + '"' + imageUrl + '"',
headers:
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key' : subscriptionKey
;
request.post(options, (error, response, body) =>
if (error)
console.log('Error: ', error);
return;
let jsonResponse = JSON.stringify(JSON.parse(body), null, ' ');
console.log('JSON Response\n');
console.log(jsonResponse);
);
输出:
"regions": [
"boundingBox": "0,191,277,281",
"lines": [
"boundingBox": "53,191,23,49",
"words": [
"boundingBox": "53,191,23,49",
"text": "in"
]
,
"boundingBox": "0,285,277,82",
"words": [
"boundingBox": "0,285,150,82",
"text": ")arb.0g"
,
"boundingBox": "214,288,63,63",
"text": "0%"
]
,
"boundingBox": "14,393,45,79",
"words": [
"boundingBox": "14,393,45,79",
"text": "Og"
]
,
"boundingBox": "213,394,63,63",
"words": [
"boundingBox": "213,394,63,63",
"text": "00/0"
]
]
,
"boundingBox": "322,184,352,457",
"lines": [
"boundingBox": "326,184,348,54",
"words": [
"boundingBox": "326,184,239,52",
"text": "INGREDIENTS:"
,
"boundingBox": "588,188,86,50",
"text": "WHITE"
]
,
"boundingBox": "325,248,281,59",
"words": [
"boundingBox": "325,248,83,56",
"text": "TUNA,"
,
"boundingBox": "417,250,127,51",
"text": "SOYBEAN"
,
"boundingBox": "555,252,51,55",
"text": "OIL,"
]
,
"boundingBox": "324,313,341,60",
"words": [
"boundingBox": "324,313,155,52",
"text": "VEGETABLE"
,
"boundingBox": "489,316,101,56",
"text": "BROTH,"
,
"boundingBox": "598,317,67,56",
"text": "SALT,"
]
,
"boundingBox": "324,378,334,53",
"words": [
"boundingBox": "324,378,235,52",
"text": "PYROPHOSPHATE"
,
"boundingBox": "566,381,92,50",
"text": "ADDED"
]
,
"boundingBox": "323,519,248,52",
"words": [
"boundingBox": "323,519,193,51",
"text": "DISTRIBUTED"
,
"boundingBox": "528,521,43,50",
"text": "BY:"
]
,
"boundingBox": "322,584,298,57",
"words": [
"boundingBox": "322,584,124,50",
"text": "BUMBLE"
,
"boundingBox": "457,585,52,50",
"text": "BEE"
,
"boundingBox": "519,585,101,56",
"text": "FOODS,"
]
]
,
"boundingBox": "791,400,198,117",
"lines": [
"boundingBox": "921,400,68,45",
"words": [
"boundingBox": "921,400,68,45",
"text": ",11."
]
,
"boundingBox": "791,464,128,53",
"words": [
"boundingBox": "791,464,75,53",
"text": "PRC:"
,
"boundingBox": "874,467,45,48",
"text": "x"
]
]
]
但我在使用这段代码时面临一些挑战:
-
我希望输出为字符串而不是 JSON 树。
我想只提取成分而不是所有文本。
在某些情况下,图像可能包含成分而未指定成分关键字,在这种情况下如何提取成分?
图片:
感谢各位专家的帮助。
【问题讨论】:
【参考方案1】:我们使用计算机视觉 REST API 从图像中提取带有光学字符识别 (OCR) 的印刷文本。并以 JSON 格式返回成功的响应。您无法从此 Azure 认知服务获得直接的字符串输出。
针对问题-
我希望输出为字符串而不是 JSON 树。
我们不能像图片中看到的那样直接打印成分。要提取内容并以特定格式显示,在获得 JSON 字符串后,将其解析为 JSON 对象并运行循环以从中提取数据。之后使用 split 函数将数据存储到数组中。如下图sn-p所示。
function(error, response, body)
if(error)
console.log(error);
else
//parsing the JSON string
var jsonObj = JSON.parse(body);
var ob = jsonObj;
//running loop to extract the text values
for(i=0;i<....)
for(j=0;j<....)
for(k=0;k<....)
var str = str + " "+ob.....text;
str = str + "\n";
var arr = str.split("\n");
根据你得到的 JSON 结构放置你的逻辑。
关于你的第二个和第三个问题-
我想只提取成分而不是所有文本。
在某些情况下,图像可能包含成分而没有指定成分关键字,在这种情况下如何提取成分?
计算机视觉将从图像中提取所有打印文本并将它们作为 JSON 提供给您,您无法提取特定文本。使用与上述相同的方法,只需提取成分即可达到所需的结果。
我建议阅读此Extract printed text (OCR) using the Computer Vision REST API and Node.js GitHub 文档以获取更多信息。
【讨论】:
是的,我了解您的解决方案,使用计算机视觉,我们仍然需要实现正确的逻辑来提取正确的文本,还有其他聪明的方法吗?以上是关于Azure 计算机视觉:识别印刷文本的主要内容,如果未能解决你的问题,请参考以下文章
自强队视觉组训练项目-3 | 20180107-印刷体数字识别
Azure 计算机视觉 API - PDF 文件上的 OCR 到文本