目标检测:数据增强方法及代码实现
Posted 告白少年
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了目标检测:数据增强方法及代码实现相关的知识,希望对你有一定的参考价值。
数据增强 :
生成数据增强后的图像和对应的xml
- 高斯模糊
- 调整亮度
- 平移
- cutout
- 裁剪
# -*- coding=utf-8 -*-
import os
import time
import random
import cv2
import numpy as np
from skimage import exposure
import xml.etree.ElementTree as ET
import xml.dom.minidom as DOC
# 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
def parse_xml(xml_path):
'''
输入:
xml_path: xml的文件路径
输出:
从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
'''
tree = ET.parse(xml_path)
root = tree.getroot()
objs = root.findall('object')
coords = list()
for ix, obj in enumerate(objs):
name = obj.find('name').text
box = obj.find('bndbox')
x_min = int(box[0].text)
y_min = int(box[1].text)
x_max = int(box[2].text)
y_max = int(box[3].text)
coords.append([x_min, y_min, x_max, y_max, name])
return coords
#将bounding box信息写入xml文件中, bouding box格式为[[x_min, y_min, x_max, y_max, name]]
def generate_xml(img_name,coords,img_size,out_root_path,cnt):
'''
输入:
img_name:图片名称,如a.jpg
coords:坐标list,格式为[[x_min, y_min, x_max, y_max, name]],name为概况的标注
img_size:图像的大小,格式为[h,w,c]
out_root_path: xml文件输出的根路径
'''
doc = DOC.Document() # 创建DOM文档对象
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
title = doc.createElement('folder')
title_text = doc.createTextNode('Tianchi')
title.appendChild(title_text)
annotation.appendChild(title)
title = doc.createElement('filename')
title_text = doc.createTextNode(img_name)
title.appendChild(title_text)
annotation.appendChild(title)
source = doc.createElement('source')
annotation.appendChild(source)
title = doc.createElement('database')
title_text = doc.createTextNode('The Tianchi Database')
title.appendChild(title_text)
source.appendChild(title)
title = doc.createElement('annotation')
title_text = doc.createTextNode('Tianchi')
title.appendChild(title_text)
source.appendChild(title)
size = doc.createElement('size')
annotation.appendChild(size)
title = doc.createElement('width')
title_text = doc.createTextNode(str(img_size[1]))
title.appendChild(title_text)
size.appendChild(title)
title = doc.createElement('height')
title_text = doc.createTextNode(str(img_size[0]))
title.appendChild(title_text)
size.appendChild(title)
title = doc.createElement('depth')
title_text = doc.createTextNode(str(img_size[2]))
title.appendChild(title_text)
size.appendChild(title)
for coord in coords:
object = doc.createElement('object')
annotation.appendChild(object)
title = doc.createElement('name')
title_text = doc.createTextNode(coord[4])
title.appendChild(title_text)
object.appendChild(title)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
object.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
object.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
object.appendChild(difficult)
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
title = doc.createElement('xmin')
title_text = doc.createTextNode(str(int(float(coord[0]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('ymin')
title_text = doc.createTextNode(str(int(float(coord[1]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('xmax')
title_text = doc.createTextNode(str(int(float(coord[2]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('ymax')
title_text = doc.createTextNode(str(int(float(coord[3]))))
title.appendChild(title_text)
bndbox.appendChild(title)
# 将DOM对象doc写入文件
f = open(os.path.join(out_root_path, "new_"+str(cnt)+"_"+img_name[:-4]+'.xml'),'w')
f.write(doc.toprettyxml(indent = ''))
f.close()
def show_pic(img, bboxes=None):
'''
输入:
img:图像array
bboxes:图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....]
names:每个box对应的名称
'''
cv2.imwrite('./1.jpg', img)
img = cv2.imread('./1.jpg')
for i in range(len(bboxes)):
bbox = bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3)
cv2.putText(img, bbox[4], (int(x_min),int(y_min)), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255,255, 0), thickness=2)
cv2.namedWindow('pic', 0) # 1表示原图
cv2.moveWindow('pic', 0, 0)
cv2.resizeWindow('pic', 1200,800) # 可视化的图片大小
cv2.imshow('pic', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
os.remove('./1.jpg')
# 图像均为cv2读取
class DataAugmentForObjectDetection():
def __init__(self, crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5, add_noise_rate=0.5,
cutout_rate=0.5, cut_out_length=50, cut_out_holes=1, cut_out_threshold=0.5):
self.crop_rate = crop_rate
self.shift_rate = shift_rate
self.change_light_rate = change_light_rate
self.cutout_rate = cutout_rate
self.add_noise_rate = add_noise_rate
self.cut_out_length = cut_out_length
self.cut_out_holes = cut_out_holes
self.cut_out_threshold = cut_out_threshold
# 高斯模糊
def _addNoise(self, img):
size = random.choice((5,9,11))
return cv2.GaussianBlur(img, ksize=(size,size), sigmaX=0, sigmaY=0)
# 调整亮度
def _changeLight(self, img):
flag = random.uniform(0.6, 1.3) #flag>1为调暗,小于1为调亮
return exposure.adjust_gamma(img, flag)
# cutout
def _cutout(self, img, bboxes, length=100, n_holes=1, threshold=0.5):
def cal_iou(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
if xB <= xA or yB <= yA:
return 0.0
interArea = (xB - xA + 1) * (yB - yA + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxBArea)
return iou
if img.ndim == 3:
h,w,c = img.shape
else:
_,h,w,c = img.shape
mask = np.ones((h,w,c), np.float32)
for n in range(n_holes):
chongdie = True
while chongdie:
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - length // 2, 0, h)
y2 = np.clip(y + length // 2, 0, h)
x1 = np.clip(x - length // 2, 0, w)
x2 = np.clip(x + length // 2, 0, w)
chongdie = False
for box in bboxes:
if cal_iou([x1,y1,x2,y2], box) > threshold:
chongdie = True
break
mask[y1: y2, x1: x2, :] = 0.
img = img * mask
return img
# 裁剪
def _crop_img_bboxes(self, img, bboxes):
'''
裁剪后的图片要包含所有的框
输入:
img:图像array
bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
输出:
crop_img:裁剪后的图像array
crop_bboxes:裁剪后的bounding box的坐标list
'''
#---------------------- 裁剪图像 ----------------------
w = img.shape[1]
h = img.shape[0]
x_min = w #裁剪后的包含所有目标框的最小的框
x_max = 0
y_min = h
y_max = 0
for bbox in bboxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
d_to_left = x_min #包含所有目标框的最小框到左边的距离
d_to_right = w - x_max #包含所有目标框的最小框到右边的距离
d_to_top = y_min #包含所有目标框的最小框到顶端的距离
d_to_bottom = h - y_max #包含所有目标框的最小框到底部的距离
#随机扩展这个最小框
crop_x_min = int(x_min - random.uniform(0, d_to_left))
crop_y_min = int(y_min - random.uniform(0, d_to_top))
crop_x_max = int(x_max + random.uniform(0, d_to_right))
crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
#确保不要越界
crop_x_min = max(0, crop_x_min)
crop_y_min = max(0, crop_y_min)
crop_x_max = min(w, crop_x_max)
crop_y_max = min(h, crop_y_max)
crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
#---------------------- 裁剪boundingbox ----------------------
#裁剪后的boundingbox坐标计算
crop_bboxes = list()
for bbox in bboxes:
crop_bboxes.append([bbox[0]-crop_x_min, bbox[1]-crop_y_min, bbox[2]-crop_x_min, bbox[3]-crop_y_min])
return crop_img, crop_bboxes
# 平移
def _shift_pic_bboxes(self, img, bboxes):
'''
平移后的图片要包含所有的框
输入:
img:图像array
bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
输出:
shift_img:平移后的图像array
shift_bboxes:平移后的bounding box的坐标list
'''
#---------------------- 平移图像 ----------------------
w = img.shape[1]
h = img.shape[0]
x_min = w #裁剪后的包含所有目标框的最小的框
x_max = 0
y_min = h
y_max = 0
for bbox in bboxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
d_to_left = x_min #包含所有目标框的最大左移动距离
d_to_right = w - x_max #包含所有目标框的最大右移动距离
d_to_top = y_min #包含所有目标框的最大上移动距离
d_to_bottom = h - y_max #包含所有目标框的最大下移动距离
x = random.uniform(-(d_to_left-1) / 3, (d_to_right-1) / 3)
y = random.uniform(-(d_to_top-1) / 3, (d_to_bottom-1) / 3)
M = np.float32([[1, 0, x], [0, 1, y]]) #x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上
shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
#---------------------- 平移boundingbox ----------------------
shift_bboxes = list()
for bbox in bboxes:
shift_bboxes.append([bbox[0]+x, bbox[1]+y, bbox[2]+x, bbox[3]+y])
return shift_img, shift_bboxes
def dataAugment(self, img, bboxes):
'''
图像增强
输入:
img:图像array
bboxes:该图像的所有框坐标
输出:
img:增强后的图像
bboxes:增强后图片对应的box
'''
change_num = 0 #改变的次数
print('------')
while change_num < 1: #默认至少有一种数据增强生效
if random.random() < self.crop_rate: #裁剪
print('裁剪')
change_num += 1
img, bboxes = self以上是关于目标检测:数据增强方法及代码实现的主要内容,如果未能解决你的问题,请参考以下文章
目标检测(10) Mosaic 数据增强方法,附Python完整代码
目标检测数据增强:YOLO官方数据增强实现/imgaug的简单使用
目标检测:python实现多种图像数据增强的方法(光照,对比度,遮挡,模糊)
目标检测:python实现多种图像数据增强的方法(光照,对比度,遮挡,模糊)