python [xml文件到voc的csv文件]将voc标签转换为xml格式为csv格式#python #csv #xml

Posted 2021-05-09

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python [xml文件到voc的csv文件]将voc标签转换为xml格式为csv格式#python #csv #xml相关的知识，希望对你有一定的参考价值。

作者：pengfei  
邮箱：pengfeidip@qq.com  
时间：2019年4月15日08:31:38  

---

The following code is for VOC dataset label for transformation, from XML format to csv format.  
The getted .csv like following, every row is all objects belong to an image.

```
image_name_1,  xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
image_name_2,  xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
```

import csv
import xmltodict
import os


def xml2csv(xml_path, saved_name):
	# param:
    # xml_path: (string)a foler. In this folder there are many xml files
    # saved_name :(string)name of csv to be saved.Format like following 
    #				image_name_1, xmin,xmax,ymin,ymax,class_index, xmin.......
    #				image_name_2， ...............
    
	VOC_CLASSES = (  # always index 0
		'aeroplane', 'bicycle', 'bird', 'boat',
		'bottle', 'bus', 'car', 'cat', 'chair',
		'cow', 'diningtable', 'dog', 'horse',
		'motorbike', 'person', 'pottedplant',
		'sheep', 'sofa', 'train', 'tvmonitor')

	with open(saved_name, "w+", newline="") as csvfile:
		csv_writer = csv.writer(csvfile)

		xml_name = os.listdir(xml_path) # get all xim files' name

		# an image's info is a line in the csv
		for i_xml_name in xml_name:
			info = []
			with open(os.path.join(xml_path, i_xml_name)) as xmlfile:

				file_dict = xmltodict.parse(xmlfile.read())  # read a xml as a dict with keys annotation
				info.append(file_dict["annotation"]["filename"])

				#  traverse all objects in the image
				if isinstance(file_dict["annotation"]["object"], list): #  if there are  objects
					for i_object in file_dict["annotation"]["object"]:
						class_index = VOC_CLASSES.index(i_object["name"])  # get class index by it's name
						xmin = i_object["bndbox"]["xmin"]
						xmax = i_object["bndbox"]["xmax"]
						ymin = i_object["bndbox"]["ymin"]
						ymax = i_object["bndbox"]["ymax"]
						info.append(xmin)
						info.append(ymin)
						info.append(xmax)
						info.append(ymax)
						info.append(class_index)
				else:  #  if there is only an objects, file_dict["annotation"]["object"] is a dict
					class_index = VOC_CLASSES.index(
						file_dict["annotation"]["object"]["name"])  # get class index by it's name

					xmin = file_dict["annotation"]["object"]["bndbox"]["xmin"]
					xmax = file_dict["annotation"]["object"]["bndbox"]["xmax"]
					ymin = file_dict["annotation"]["object"]["bndbox"]["ymin"]
					ymax = file_dict["annotation"]["object"]["bndbox"]["ymax"]

					info.append(xmin)
					info.append(ymin)
					info.append(xmax)
					info.append(ymax)
					info.append(class_index)

			csv_writer.writerow(info)

		print("conversion is finished !!! ")
		return

if __name__ == "__main__":
	xml_path = "I:/PASCAL_VOC/2007/VOC2007_trainval/Annotations"
	saved_name = "VOC2007_trainval.csv"
	xml2csv(xml_path=xml_path, saved_name=saved_name)

以上是关于python [xml文件到voc的csv文件]将voc标签转换为xml格式为csv格式#python #csv #xml的主要内容，如果未能解决你的问题，请参考以下文章