PPOCRLabel标注格式和icdar2015格式互转
Posted AI浩
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了PPOCRLabel标注格式和icdar2015格式互转相关的知识,希望对你有一定的参考价值。
1、PPOCRLabel标注格式转icdar2015格式
# -*- coding : UTF-8 -*-
# @file : conver_json_icdar.py
# @Time : 2021/4/9 11:24
# @Author : wmz
import os
import json
def json_2_icdar(js_path, ic_path):
with open(js_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
print(line)
content = line.split('\\t')
print(content[0])
txt_file = str(content[0]).replace('jpg', 'txt')
dst_file = os.path.join(ic_path, txt_file)
# write file
file_lineinfo = open(dst_file, 'w', encoding='utf-8')
list_dict = json.loads(content[1])
nsize = len(list_dict)
print(nsize)
for i in range(nsize):
print(list_dict[i])
lin = list_dict[i]
info = lin['transcription']
points = lin['points']
points = [int(y) for x in points for y in x]
pts = ','.join(map(str, points))
lineinfo = pts + ',' + info + '\\n'
file_lineinfo.write(lineinfo)
file_lineinfo.close()
if __name__ == "__main__":
src_path = r"C:\\Users\\WT\\Desktop\\hkb-bz\\Label.txt"
dst_path = r"C:\\Users\\WT\\Desktop\\hkb-bz"
json_2_icdar(src_path, dst_path)
2、icdar2015数据集转换成paddleOCR标注数据格式
import os
import argparse
import json
def gen_rec_label(input_path, out_label):
with open(out_label, 'w') as out_file:
with open(input_path, 'r') as f:
for line in f.readlines():
tmp = line.strip('\\n').replace(" ", "").split(',')
img_path, label = tmp[0], tmp[1]
label = label.replace("\\"", "")
out_file.write(img_path + '\\t' + label + '\\n')
def gen_det_label(root_path, input_dir, out_label):
with open(out_label, 'w',encoding='utf-8') as out_file:
for label_file in os.listdir(input_dir):
img_path = root_path + label_file[3:-4] + ".jpg"
label = []
print(label_file)
with open(os.path.join(input_dir, label_file), "r",encoding='UTF-8-sig') as f:
for line in f.readlines():
print(line)
#tmp = str(line).replace("\\\\xef\\\\xbb\\\\xbf", "").split(',')
#tmp = str(line).strip("\\\\r\\\\n").replace("\\\\xef\\\\xbb\\\\xbf", "").split(',')
tmp=str(line).strip("\\n\\r").split(',')
print(tmp)
points = tmp[:8]
print(len(points))
s = []
for i in range(0, len(points), 2):
b = points[i:i + 2]
print(b)
b = [int(float(t)) for t in b]
s.append(b)
result = "transcription": tmp[8], "points": s
label.append(result)
out_file.write(img_path + '\\t' + json.dumps(
label, ensure_ascii=False) + '\\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--mode',
type=str,
default="rec",
help='Generate rec_label or det_label, can be set rec or det')
parser.add_argument(
'--root_path',
type=str,
default="text_localization/ch4_test_images/",
help='The root directory of images.Only takes effect when mode=det ')
parser.add_argument(
'--input_path',
type=str,
default="icdar2015/text_localization/Challenge4_Test_Task1_GT",
help='Input_label or input path to be converted')
parser.add_argument(
'--output_label',
type=str,
default="text_localization/test_icdar2015_label.txt",
help='Output file name')
args = parser.parse_args()
if args.mode == "rec":
print("Generate rec label")
gen_rec_label(args.input_path, args.output_label)
elif args.mode == "det":
gen_det_label(args.root_path, args.input_path, args.output_label)
参考:
https://blog.csdn.net/qq_41672428/article/details/110427385
https://blog.csdn.net/juluwangriyue/article/details/115565665
以上是关于PPOCRLabel标注格式和icdar2015格式互转的主要内容,如果未能解决你的问题,请参考以下文章