深度学习和目标检测系列教程 18-300:关于yolovoc格式标签转化问题
Posted 刘润森!
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了深度学习和目标检测系列教程 18-300:关于yolovoc格式标签转化问题相关的知识,希望对你有一定的参考价值。
@Author:Runsen
PASCAL VOC(The PASCAL Visual Object Classes)是一个世界级的计算机视觉挑战赛,第一届比赛在2005年举办,随后一年举办一次,直到2012年最后一次。PASCAL的全称是Pattern Analysis, Statistical modelling and ComputAtional Learning,官方地址是 http://host.robots.ox.ac.uk/pascal/VOC/
一个典型的xml标签文件如下
<annotation>
<folder>images</folder>
<filename>003707.jpg</filename>
<path>G:\\sz210628\\images\\003707.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1920</width>
<height>1080</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>航空器头</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>827</xmin>
<ymin>224</ymin>
<xmax>916</xmax>
<ymax>333</ymax>
</bndbox>
</object>
</annotation>
- folder:所属文件夹
- filename:文件名
- database:数据库名
- annotation:标记文件格式
- size:图像尺寸,width宽、height高,depth通道数目
- segmented:分割
- object: 表示一个目标,name标签名、pose拍摄角度,有front、rear、left、right和unspecified、truncated是否被截断也就是图片中是否包含完整目标、
- difficult检测难易程度,1表示是、0表示否
- bndbox:目标所在的位置,用xmin、ymin、xmax、ymax来表示
yolo要求每张图片对应一个txt标注文件。
每行一个标注对象,分别是:class_id x y width height。
class_id: 从0到(classes-1)的整数。
x y width height: 相对于图像的宽度和高度的浮点值,它可以等于(0.0到1.0]。例如:x=pixel_x/image_width, height=pixel_height/image_height
x y: 是标注对象的中心点。
0 0.750000 0.501111 0.315000 0.993333
下面脚本读取 PascalVOC xml 文件,并将它们转换为 YOLO txt 文件。
#coding=gbk
import os
import xml.etree.ElementTree as ET
# xml VOC标注的类
classes=['person','rider','car','bus','truck','bike','motor','tl_green','tl_red','tl_yellow','tl_none','t_sign','train']
def convert(size,box):
# VOC to YOLO format
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def converting_annotation(ann_file,yolo_out_str):
# ann_file 标注的所有xml文件
# out_str yolo格式的txt 文件夹
for ann in ann_file:
try:
txt_file= ann.split('.')[0]+'.txt'
in_file=open(ann_dir + "/" + ann,encoding="utf-8")
out_file =open(yolo_out_str + "\\\\"+txt_file, 'w',encoding="utf-8")
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls=obj.find('name').text
if cls not in classes:
continue
cls_id=classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
data = convert((w,h), b)
out_file.write( yolo_out_str + str(cls_id) + " " + " ".join([str(a) for a in data]) + '\\n')
except Exception as e:
print(ann)
continue
if __name__ == '__main__':
ann_dir= "Annotations"
ann_file=os.listdir(ann_dir)
ann_file.sort()
# "label" yolo txt 存放的位置
converting_annotation(ann_file,"label")
下面脚本读取 YOLO txt 文件,并将它们转换为PascalVOC xml 文件。
#coding=gbk
import csv
import os
from lxml import etree
from PIL import Image
IMG_PATH = r"images\\trains"
fw = os.listdir(IMG_PATH)
save_path = r'labels\\xml' # keep it blank
# yolo txt_folder
txt_folder = r'labels\\trains'
labels=['person','rider','car','bus','truck','bike','motor','tl_green','tl_red','tl_yellow','tl_none','t_sign','train']
def csvread(fn):
with open(fn, 'r') as csvfile:
list_arr = []
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
list_arr.append(row)
return list_arr
def convert_label(txt_file):
global label
for i in range(len(labels)):
if txt_file[0] == str(i):
label = labels[i]
return label
return label
def extract_coor(txt_file, img_width, img_height):
x_rect_mid = float(txt_file[1])
y_rect_mid = float(txt_file[2])
width_rect = float(txt_file[3])
height_rect = float(txt_file[4])
x_min_rect = ((2 * x_rect_mid * img_width) - (width_rect * img_width)) / 2
x_max_rect = ((2 * x_rect_mid * img_width) + (width_rect * img_width)) / 2
y_min_rect = ((2 * y_rect_mid * img_height) -
(height_rect * img_height)) / 2
y_max_rect = ((2 * y_rect_mid * img_height) +
(height_rect * img_height)) / 2
return x_min_rect, x_max_rect, y_min_rect, y_max_rect
for line in fw:
root = etree.Element("annotation")
# try debug to check your path
img_style = IMG_PATH.split('/')[-1]
img_name = line
image_info = IMG_PATH + "\\\\" + line
img_txt_root = txt_folder + "\\\\" + line[:-4]
txt = ".txt"
txt_path = img_txt_root + txt
txt_file = csvread(txt_path)
######################################
# read the image information
img_size = Image.open(image_info).size
img_width = img_size[0]
img_height = img_size[1]
img_depth = Image.open(image_info).layers
######################################
folder = etree.Element("folder")
folder.text = "%s" % (img_style)
filename = etree.Element("filename")
filename.text = "%s" % (img_name)
path = etree.Element("path")
path.text = "%s" % (IMG_PATH)
source = etree.Element("source")
##################source - element##################
source_database = etree.SubElement(source, "database")
source_database.text = "Unknown"
####################################################
size = etree.Element("size")
####################size - element##################
image_width = etree.SubElement(size, "width")
image_width.text = "%d" % (img_width)
image_height = etree.SubElement(size, "height")
image_height.text = "%d" % (img_height)
image_depth = etree.SubElement(size, "depth")
image_depth.text = "%d" % (img_depth)
####################################################
segmented = etree.Element("segmented")
segmented.text = "0"
root.append(folder)
root.append(filename)
root.append(path)
root.append(source)
root.append(size)
root.append(segmented)
for ii in range(len(txt_file)):
label = convert_label(txt_file[ii][0])
x_min_rect, x_max_rect, y_min_rect, y_max_rect = extract_coor(
txt_file[ii], img_width, img_height)
object = etree.Element("object")
####################object - element##################
name = etree.SubElement(object, "name")
name.text = "%s" % (label)
pose = etree.SubElement(object, "pose")
pose.text = "Unspecified"
truncated = etree.SubElement(object, "truncated")
truncated.text = "0"
difficult = etree.SubElement(object, "difficult")
difficult.text = "0"
bndbox = etree.SubElement(object, "bndbox")
#####sub_sub########
xmin = etree.SubElement(bndbox, "xmin")
xmin.text = "%d" % (x_min_rect)
ymin = etree.SubElement(bndbox, "ymin")
ymin.text = "%d" % (y_min_rect)
xmax = etree.SubElement(bndbox, "xmax")
xmax.text = "%d" % (x_max_rect)
ymax = etree.SubElement(bndbox, "ymax")
ymax.text = "%d" % (y_max_rect)
#####sub_sub########
root.append(object)
####################################################
file_output = etree.tostring(root, pretty_print=True, encoding='UTF-8')
xml_dir = save_path + "\\\\" + '%s.xml' % (img_name[:-4])
print(xml_dir)
ff = open(xml_dir, 'w', encoding="utf-8")
ff.write(file_output.decode('utf-8'))
在处理 VOC数据集,往往需要从voc数据集获取所有标签的所有类别数,具体的脚本如下所示:
#coding=gbk
import xml.dom.minidom as xmldom
import os
#voc数据集获取所有标签的所有类别数
annotation_path="Annotations"
annotation_names=[os.path.join(annotation_path,i) for i in os.listdir(annotation_path)]
labels = list()
for names in annotation_names:
xmlfilepath = names
domobj = xmldom.parse(xmlfilepath)
# 得到元素对象
elementobj = domobj.documentElement
#获得子标签
subElementObj = elementobj.getElementsByTagName("object")
for s in subElementObj:
label=s.getElementsByTagName("name")[0].firstChild.data
if label not in labels:
labels.append(label)
print(labels)
在进行yolo模型训练,需要统计每一个类别的数量,来寻找哪些数据标签的样本样例比较少,怎么处理样本不平衡问题?
下面代码是样本类别统计计算的代码。
import os
import xml.etree.ElementTree as ET
from PIL import Image
def parse_obj(xml_path, filename):
tree = ET.parse(xml_path + filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
objects.append(obj_struct)
return objects
def read_image(image_path, filename):
im = Image.open(image_path + filename)
W = im.size[0]
H = im.size[1]
area = W * H
im_info = [W, H, area]
return im_info
if __name__ == '__main__':
xml_path = 'Annotations\\\\'
filenamess = os.listdir(xml_path)
filenames = []
for name in filenamess:
name = name.replace('.xml', '')
filenames.append(name)
recs = {}
obs_shape = {}
classnames = []
num_objs = {}
obj_avg = {}
for i, name in enumerate(filenames):
recs[name] = parse_obj(xml_path, name + '.xml')
for name in filenames:
for object in recs[name]:
if object['name'] not in num_objs.keys():
num_objs[object['name']] = 1
else:
num_objs[object['name']] += 1
if object['name'] not in classnames:
classnames.append(object['name'])
for name in classnames:
print('{}:{}个'.format(name, num_objs[name]))
print('信息统计算完毕。')
以上是关于深度学习和目标检测系列教程 18-300:关于yolovoc格式标签转化问题的主要内容,如果未能解决你的问题,请参考以下文章
深度学习和目标检测系列教程 19-300:关于目标检测APIoU和mAP简介
深度学习和目标检测系列教程 19-300:关于目标检测APIoU和mAP简介
深度学习和目标检测系列教程 22-300:关于人体姿态常见的估计方法
深度学习和目标检测系列教程 22-300:关于人体姿态常见的估计方法