PPOCRLabel格式的数据集操作总结。

Posted AI浩

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了PPOCRLabel格式的数据集操作总结。相关的知识,希望对你有一定的参考价值。

1、生成识别数据

获取PPOCRLabel格式的数据集中的目标的四个点,然后使用getPerspectiveTransform和warpPerspective获取图片,生成识别数据集。

import json
import os
import numpy as np
import cv2

def get_rotate_crop_image(img, points):
    '''
    img_height, img_width = img.shape[0:2]
    left = int(np.min(points[:, 0]))
    right = int(np.max(points[:, 0]))
    top = int(np.min(points[:, 1]))
    bottom = int(np.max(points[:, 1]))
    img_crop = img[top:bottom, left:right, :].copy()
    points[:, 0] = points[:, 0] - left
    points[:, 1] = points[:, 1] - top
    '''
    assert len(points) == 4, "shape of points must be 4*2"
    img_crop_width = int(
        max(
            np.linalg.norm(points[0] - points[1]),
            np.linalg.norm(points[2] - points[3])))
    img_crop_height = int(
        max(
            np.linalg.norm(points[0] - points[3]),
            np.linalg.norm(points[1] - points[2])))
    pts_std = np.float32([[0, 0], [img_crop_width, 0],
                          [img_crop_width, img_crop_height],
                          [0, img_crop_height]])
    M = cv2.getPerspectiveTransform(points, pts_std)
    dst_img = cv2.warpPerspective(
        img,
        M, (img_crop_width, img_crop_height),
        borderMode=cv2.BORDER_REPLICATE,
        flags=cv2.INTER_CUBIC)
    dst_img_height, dst_img_width = dst_img.shape[0:2]
    if dst_img_height * 1.0 / dst_img_width >= 1.5:
        dst_img = np.rot90(dst_img)
    return dst_img
def write_txt_img(src_path,label_txt,file_dir):
    with open(src_path, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            print(line)
            content = line.split('\\t')
            print(content[0])
            imag_name = content[0].split('/')[1]
            image_path = './train_data/icdar2015/text_localization/' + content[0]
            img = cv2.imread(image_path)
            content[1] = content[1].replace("'", "\\"")
            list_dict = json.loads(content[1])
            nsize = len(list_dict)
            print(nsize)
            num = 0
            for i in range(nsize):
                print(list_dict[i])
                lin = list_dict[i]
                info = lin['transcription']
                info=info.replace(" ","")
                points = lin['points']
                points = [list(x) for x in points]
                points = np.float32([list(map(float, item)) for item in points])
                imag_name=str(num)+"_"+imag_name
                save_path = './train_data/rec/' +file_dir+ imag_name
                dst_img = get_rotate_crop_image(img, points)
                cv2.imwrite(save_path, dst_img)
                label_txt.write(file_dir+imag_name+'\\t'+info+'\\n')
                num=num+1
if not os.path.exists('train_data/rec/train/'):
    os.makedirs('train_data/rec/train/')
if not os.path.exists('train_data/rec/val/'):
    os.makedirs('train_data/rec/val/')
src_path = r"./train_data/icdar2015/text_localization/train.txt"
label_txt=r"./train_data/rec/train.txt"
src_test_path = r"./train_data/icdar2015/text_localization/val.txt"
label_test_txt=r"./train_data/rec/val.txt"
with open(label_txt, 'w') as w_label:
    write_txt_img(src_path,w_label,'train/')
with open(label_test_txt, 'w') as w_label:
    write_txt_img(src_test_path, w_label,'val/')

2、切分训练集和验证集

按照一定的比例,将数据集切分为训练集和验证集

# 制作数据集,将Label.txt切分为训练集和验证集
import os
import shutil
from sklearn.model_selection import train_test_split

os.makedirs('train',exist_ok=True)
os.makedirs('val',exist_ok=True)
label_txt='Label.txt'
with open(label_txt, 'r',encoding='gbk') as f:
   txt_List=f.readlines()
   trainval_files, val_files = train_test_split(txt_List, test_size=0.2, random_state=42)
   train_list=[]
   for file_Line in trainval_files:
       image_path = file_Line.split('\\t')[0]
       image_new_path='train/'+image_path.split('/')[1]+'\\t'+file_Line.split('\\t')[1]
       train_list.append(image_new_path)
   f = open("train.txt", "w")
   f.writelines(train_list)
   f.close()
   val_list = []
   for file_Line in val_files:
       image_path = file_Line.split('\\t')[0]
       image_new_path = 'val/' + image_path.split('/')[1] + '\\t' + file_Line.split('\\t')[1]
       val_list.append(image_new_path)
   f = open("val.txt", "w")
   f.writelines(val_list)
   f.close()
   for txt in trainval_files:
       image_name=txt.split('\\t')[0]
       new_path="./train/"+image_name.split('/')[1]
       shutil.copy(image_name, new_path)
       print(image_name)
   for txt in val_files:
       image_name=txt.split('\\t')[0]
       new_path="./val/"+image_name.split('/')[1]
       shutil.copy(image_name, new_path)
       print(image_name)

3、将数据集生成LabelImg格式

将PPOCRLabel格式的数据集转为LabelImg标注的xml格式的数据集。

import os
from collections import defaultdict
import cv2
# import misc_utils as utils  # pip3 install utils-misc==0.0.5 -i https://pypi.douban.com/simple/
import json

os.makedirs('./Annotations', exist_ok=True)
print('建立Annotations目录', 3)
# os.makedirs('./PaddleOCR/train_data/ImageSets/Main', exist_ok=True)
# print('建立ImageSets/Main目录', 3)

mem = defaultdict(list)

with open('Label.txt', 'r', encoding='utf8') as fp:
    s = [i.replace('\\n','').split('\\t') for i in fp.readlines()]
    for i in enumerate(s):
        path = i[1][0]
        anno = json.loads(i[1][1])
        print(anno)
        filename = path.split('/')[1]
        img = cv2.imread(path)
        cv2.imwrite('Annotations/'+filename.split('.')[0]+'.jpg',img)
        height, width = img.shape[:-1]
        for j in range(len(anno)):
            label = 'No'
            x1 = min(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
                     int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
            x2 = max(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
                     int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
            y1 = min(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
                     int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
            y2 = max(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
                     int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
            mem[filename].append([label, x1, y1, x2, y2])

            # for i, filename in enumerate(mem):
            #     img = cv2.imread(os.path.join('train', filename))
            # height, width, _ = img.shape

            with open(os.path.join('./Annotations', filename.split('.')[0]) + '.xml', 'w') as f:
                f.write(f"""<annotation>
            <folder>JPEGImages</folder>
            <filename>filename.split('.')[0].jpg</filename>
            <size>
                <width>width</width>
                <height>height</height>
                <depth>3</depth>
            </size>
            <segmented>0</segmented>\\n""")
                for label, x1, y1, x2, y2 in mem[filename]:
                    f.write(f"""    <object>
                <name>label</name>
                <pose>Unspecified</pose>
                <truncated>0</truncated>
                <difficult>0</difficult>
                <bndbox>
                    <xmin>x1</xmin>
                    <ymin>y1</ymin>
                    <xmax>x2</xmax>
                    <ymax>y2</ymax>
                </bndbox>
            </object>\\n""")
                f.write("</annotation>")

4、将PPOCRLabel格式的数据集转为DBNet训练用的icdar2015格式的数据集

import os
import json

def json_2_icdar(js_path, ic_path):
    with open(js_path, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            print(line)
            content = line.split('\\t')
            print(content[0])
            txt_file = str(content[0].split('.')[0])+'.txt'
            dst_file = os.path.join(ic_path, txt_file)
            # write file
            file_lineinfo = open(txt_file, 'w', encoding='utf-8')
            list_dict = json.loads(content[1])
            nsize = len(list_dict)
            print(nsize)
            for i in range(nsize):
                print(list_dict[i])
                lin = list_dict[i]
                info = lin['transcription']
                points = lin['points']
                points = [int(y) for x in points for y in x]
                pts = ','.join(map(str, points))
                lineinfo = pts + ',' + info + '\\n'
                file_lineinfo.write(lineinfo)
            file_lineinfo.close()


if __name__ == "__main__":
    src_path = r"train/Label.txt"
    dst_path = r""
    json_2_icdar(src_path, dst_path)

5、数据增强

对标注的数据集做旋转、高斯模糊、色彩饱和度、亮度等增强。

import json
import os

import cv2
import numpy as np
import torchvision.transforms as transforms
from torchtoolbox.transform import Cutout
from PIL import Image
from random import randint
# 数据预处理7
t=[
    transforms.ColorJitter(brightness=0.3, contrast=0.5, saturation=0.5),
    transforms.GaussianBlur(5,sigma=(0.1,0.5)),
    ]
transform = transforms.Compose([
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.2),
    transforms.GaussianBlur(5, sigma=(0.1, 3.0)),
    transforms.ToTensor(),
    transforms.ToPILImage(),
])
"""
旋转后图片返回
"""
def dumpRotateImage(img, degree): #图片,角度
    height, width = img.shape[:2]
    heightNew = height
    widthNew = width
    matRotation = cv2.getRotationMatrix2D((width//2,height//2), degree, 1)
    imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
    return imgRotation, matRotation

os.makedirs('train', exist_ok=True)
src_path = "Label_new.txt"
d_path='dd.txt'
radom_p=[-3,-2,-1,0,1,2,3,4,5]
with open(d_path, 'w') as w_label:
    with open(src_path, 'r', encoding='utf-8') as f以上是关于PPOCRLabel格式的数据集操作总结。的主要内容,如果未能解决你的问题,请参考以下文章

PPOCRLabel标注格式和icdar2015格式互转

PPOCRLabel在windows启动错误处理

PPOCRLabel在windows启动错误处理

Mysql视图使用总结

让多个线程对一个数据集进行操作,而一个线程对其进行总结

深度整理: 超详细 Pandas 合并数据集操作总结