作者:pengfei
邮箱:pengfeidip@qq.com
时间:2019年4月15日08:31:38
---
The following code is for VOC dataset label for transformation, from XML format to csv format.
The getted .csv like following, every row is all objects belong to an image.
```
image_name_1, xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
image_name_2, xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
```
import csv
import xmltodict
import os
def xml2csv(xml_path, saved_name):
# param:
# xml_path: (string)a foler. In this folder there are many xml files
# saved_name :(string)name of csv to be saved.Format like following
# image_name_1, xmin,xmax,ymin,ymax,class_index, xmin.......
# image_name_2, ...............
VOC_CLASSES = ( # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
with open(saved_name, "w+", newline="") as csvfile:
csv_writer = csv.writer(csvfile)
xml_name = os.listdir(xml_path) # get all xim files' name
# an image's info is a line in the csv
for i_xml_name in xml_name:
info = []
with open(os.path.join(xml_path, i_xml_name)) as xmlfile:
file_dict = xmltodict.parse(xmlfile.read()) # read a xml as a dict with keys annotation
info.append(file_dict["annotation"]["filename"])
# traverse all objects in the image
if isinstance(file_dict["annotation"]["object"], list): # if there are objects
for i_object in file_dict["annotation"]["object"]:
class_index = VOC_CLASSES.index(i_object["name"]) # get class index by it's name
xmin = i_object["bndbox"]["xmin"]
xmax = i_object["bndbox"]["xmax"]
ymin = i_object["bndbox"]["ymin"]
ymax = i_object["bndbox"]["ymax"]
info.append(xmin)
info.append(ymin)
info.append(xmax)
info.append(ymax)
info.append(class_index)
else: # if there is only an objects, file_dict["annotation"]["object"] is a dict
class_index = VOC_CLASSES.index(
file_dict["annotation"]["object"]["name"]) # get class index by it's name
xmin = file_dict["annotation"]["object"]["bndbox"]["xmin"]
xmax = file_dict["annotation"]["object"]["bndbox"]["xmax"]
ymin = file_dict["annotation"]["object"]["bndbox"]["ymin"]
ymax = file_dict["annotation"]["object"]["bndbox"]["ymax"]
info.append(xmin)
info.append(ymin)
info.append(xmax)
info.append(ymax)
info.append(class_index)
csv_writer.writerow(info)
print("conversion is finished !!! ")
return
if __name__ == "__main__":
xml_path = "I:/PASCAL_VOC/2007/VOC2007_trainval/Annotations"
saved_name = "VOC2007_trainval.csv"
xml2csv(xml_path=xml_path, saved_name=saved_name)