目标检测YOLO+DOTA:小样本检测策略
Posted zstar-_
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了目标检测YOLO+DOTA:小样本检测策略相关的知识,希望对你有一定的参考价值。
前言
之前在使用YOLOv5跑xView数据集时,发现准确率还是非常低的。在网上冲浪时,我发现了一种小样本检测策略:那就是把大分辨率的图片分割成小块进行训练,然后再输入大图进行检测。那么本篇博文就使用DOTA数据集来验证一下这种思路是否可行。
主要参考的项目:https://github.com/postor/DOTA-yolov3
DOTA数据集简介
DOTA数据集全称:Dataset for Object deTection in Aerial images
DOTA数据集v1.0共收录2806张4000 × 4000的图片,总共包含188282个目标。
DOTA数据集论文介绍:https://arxiv.org/pdf/1711.10398.pdf
数据集官网:https://captain-whu.github.io/DOTA/dataset.html
DOTA数据集总共有3个版本
DOTAV1.0
- 类别数目:15
- 类别名称:plane, ship, storage tank, baseball diamond, tennis court, basketball court, ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout, soccer ball field , swimming pool
DOTAV1.5
- 类别数目:16
- 类别名称:plane, ship, storage tank, baseball diamond, tennis court, basketball court, ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout, soccer ball field, swimming pool , container crane
DOTAV2.0
- 类别数目:18
- 类别名称:plane, ship, storage tank, baseball diamond, tennis court, basketball court, ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout, soccer ball field, swimming pool, container crane, airport , helipad
本实验所使用的是DOTAV2.0版本,同样备份在我的GitHub上。
https://github.com/zstar1003/Dataset
图片分割
图片分割就是将大图切成一块块小图,同时需要注意将标签进行转换。
另外,为了防止目标被切断,每两个分割图有部分区域重合,具体的分割策略可以看我下方绘制的示意图。
分割代码使用的是参考项目提供的split.py
这个程序。
这里需指定下列参数:
- 输入图片文件夹路径
- 输出图片文件夹路径
- gap:两个子图的重合宽度
- subsize:子图大小
- num_process:线程数
完整代码:
import os
import codecs
import numpy as np
import math
from dota_utils import GetFileFromThisRootDir
import cv2
import shapely.geometry as shgeo
import dota_utils as util
import copy
from multiprocessing import Pool
from functools import partial
import time
def choose_best_pointorder_fit_another(poly1, poly2):
"""
To make the two polygons best fit with each point
"""
x1 = poly1[0]
y1 = poly1[1]
x2 = poly1[2]
y2 = poly1[3]
x3 = poly1[4]
y3 = poly1[5]
x4 = poly1[6]
y4 = poly1[7]
combinate = [np.array([x1, y1, x2, y2, x3, y3, x4, y4]), np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
np.array([x3, y3, x4, y4, x1, y1, x2, y2]), np.array([x4, y4, x1, y1, x2, y2, x3, y3])]
dst_coordinate = np.array(poly2)
distances = np.array([np.sum((coord - dst_coordinate) ** 2) for coord in combinate])
sorted = distances.argsort()
return combinate[sorted[0]]
def cal_line_length(point1, point2):
return math.sqrt(math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
def split_single_warp(name, split_base, rate, extent):
split_base.SplitSingle(name, rate, extent)
class splitbase():
def __init__(self,
basepath,
outpath,
code='utf-8',
gap=512,
subsize=1024,
thresh=0.7,
choosebestpoint=True,
ext='.png',
padding=True,
num_process=8
):
"""
:param basepath: base path for dota data
:param outpath: output base path for dota data,
the basepath and outputpath have the similar subdirectory, 'images' and 'labelTxt'
:param code: encodeing format of txt file
:param gap: overlap between two patches
:param subsize: subsize of patch
:param thresh: the thresh determine whether to keep the instance if the instance is cut down in the process of split
:param choosebestpoint: used to choose the first point for the
:param ext: ext for the image format
:param padding: if to padding the images so that all the images have the same size
"""
self.basepath = basepath
self.outpath = outpath
self.code = code
self.gap = gap
self.subsize = subsize
self.slide = self.subsize - self.gap
self.thresh = thresh
self.imagepath = os.path.join(self.basepath, 'images')
self.labelpath = os.path.join(self.basepath, 'labelTxt')
self.outimagepath = os.path.join(self.outpath, 'images')
self.outlabelpath = os.path.join(self.outpath, 'labelTxt')
self.choosebestpoint = choosebestpoint
self.ext = ext
self.padding = padding
self.num_process = num_process
self.pool = Pool(num_process)
print('padding:', padding)
# pdb.set_trace()
if not os.path.isdir(self.outpath):
os.mkdir(self.outpath)
if not os.path.isdir(self.outimagepath):
# pdb.set_trace()
os.mkdir(self.outimagepath)
if not os.path.isdir(self.outlabelpath):
os.mkdir(self.outlabelpath)
# pdb.set_trace()
## point: (x, y), rec: (xmin, ymin, xmax, ymax)
# def __del__(self):
# self.f_sub.close()
## grid --> (x, y) position of grids
def polyorig2sub(self, left, up, poly):
polyInsub = np.zeros(len(poly))
for i in range(int(len(poly) / 2)):
polyInsub[i * 2] = int(poly[i * 2] - left)
polyInsub[i * 2 + 1] = int(poly[i * 2 + 1] - up)
return polyInsub
def calchalf_iou(self, poly1, poly2):
"""
It is not the iou on usual, the iou is the value of intersection over poly1
"""
inter_poly = poly1.intersection(poly2)
inter_area = inter_poly.area
poly1_area = poly1.area
half_iou = inter_area / poly1_area
return inter_poly, half_iou
def saveimagepatches(self, img, subimgname, left, up):
subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)])
outdir = os.path.join(self.outimagepath, subimgname + self.ext)
h, w, c = np.shape(subimg)
if (self.padding):
outimg = np.zeros((self.subsize, self.subsize, 3))
outimg[0:h, 0:w, :] = subimg
cv2.imwrite(outdir, outimg)
else:
cv2.imwrite(outdir, subimg)
def GetPoly4FromPoly5(self, poly):
distances = [cal_line_length((poly[i * 2], poly[i * 2 + 1]), (poly[(i + 1) * 2], poly[(i + 1) * 2 + 1])) for i
in range(int(len(poly) / 2 - 1))]
distances.append(cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))
pos = np.array(distances).argsort()[0]
count = 0
outpoly = []
while count < 5:
# print('count:', count)
if (count == pos):
outpoly.append((poly[count * 2] + poly[(count * 2 + 2) % 10]) / 2)
outpoly.append((poly[(count * 2 + 1) % 10] + poly[(count * 2 + 3) % 10]) / 2)
count = count + 1
elif (count == (pos + 1) % 5):
count = count + 1
continue
else:
outpoly.append(poly[count * 2])
outpoly.append(poly[count * 2 + 1])
count = count + 1
return outpoly
def savepatches(self, resizeimg, objects, subimgname, left, up, right, down):
outdir = os.path.join(self.outlabelpath, subimgname + '.txt')
mask_poly = []
imgpoly = shgeo.Polygon([(left, up), (right, up), (right, down),
(left, down)])
with codecs.open(outdir, 'w', self.code) as f_out:
for obj in objects:
gtpoly = shgeo.Polygon([(obj['poly'][0], obj['poly'][1]),
(obj['poly'][2], obj['poly'][3]),
(obj['poly'][4], obj['poly'][5]),
(obj['poly'][6], obj['poly'][7])])
if (gtpoly.area <= 0):
continue
inter_poly, half_iou = self.calchalf_iou(gtpoly, imgpoly)
# print('writing...')
if (half_iou == 1):
polyInsub = self.polyorig2sub(left, up, obj['poly'])
outline = ' '.join(list(map(str, polyInsub)))
outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])
f_out.write(outline + '\\n')
elif (half_iou > 0):
# elif (half_iou > self.thresh):
## print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
inter_poly = shgeo.polygon.orient(inter_poly, sign=1)
out_poly = list(inter_poly.exterior.coords)[0: -1]
if len(out_poly) < 4:
continue
out_poly2 = []
for i in range(len(out_poly)):
out_poly2.append(out_poly[i][0])
out_poly2.append(out_poly[i][1])
if (len(out_poly) == 5):
# print('==========================')
out_poly2 = self.GetPoly4FromPoly5(out_poly2)
elif (len(out_poly) > 5):
"""
if the cut instance is a polygon with points more than 5, we do not handle it currently
"""
continue
if (self.choosebestpoint):
out_poly2 = choose_best_pointorder_fit_another(out_poly2, obj['poly'])
polyInsub = self.polyorig2sub(left, up, out_poly2)
for index, item in enumerate(polyInsub):
if (item <= 1):
polyInsub[index] = 1
elif (item >= self.subsize):
polyInsub[index] = self.subsize
outline = ' '.join(list(map(str, polyInsub)))
if (half_iou > self.thresh):
outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])
else:
## if the left part is too small, label as '2'
outline = outline + ' ' + obj['name'] + ' ' + '2'
f_out.write(outline + '\\n')
# else:
# mask_poly.append(inter_poly)
self.saveimagepatches(resizeimg, subimgname, left, up)
def SplitSingle(self, name, rate, extent):
"""
split a single image and ground truth
:param name: image name
:param rate: the resize scale for the image
:param extent: the image format
:return:
"""
img = cv2.imread(os.path.join(self.imagepath, name + extent))
if np.shape(img) == ():
return
fullname = os.path.join(self.labelpath, name + '.txt')
objects = util.parse_dota_poly2(fullname)
for obj in objects:
obj['poly'] = list(map(lambda x: rate * x, obj['poly']))
# obj['poly'] = list(map(lambda x: ([2 * y for y in x]), obj['poly']))
if (rate != 1):
resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
else:
resizeimg = img
outbasename = name + '__' + str(rate) + '__'
weight = np.shape(resizeimg)[1]
height = np.shape(resizeimg)[0]
left, up = 0, 0
while (left < weight):
if (left + self.subsize >= weight):
left = max(weight - self.subsize, 0)
up = 0
while (up < height):
if (up + self.subsize >= height):
up = max(height - self.subsize, 0)
right = min(left + self.subsize, weight - 1)
down = min(up + self.subsize, height - 1)
subimgname = outbasename + str(left) + '___' + str(up)
# self.f_sub.write(name + ' ' + subimgname + ' ' + str(left) + ' ' + str(up) + '\\n')
self.savepatches(resizeimg, objects, subimgname, left, up, right, down)
if (up + self.subsize >= height):
break
else:
up = up + self.slide
if (left + self.subsize >= weight):
break
else:
left = left + self.slide
def splitdata(self, rate):
"""
:param rate: resize rate before cut
"""
imagelist = GetFileFromThisRootDir(self.imagepath)
imagenames = [util.custombasename(x) fo以上是关于目标检测YOLO+DOTA:小样本检测策略的主要内容,如果未能解决你的问题,请参考以下文章