pytorch提供的maskrcnn训练自己的数据
Posted 东东就是我
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pytorch提供的maskrcnn训练自己的数据相关的知识,希望对你有一定的参考价值。
1.使用PennFudan数据训练
https://blog.csdn.net/u013685264/article/details/100564660
教我们怎么用PennFudan数据训练,其中查看mask 的时候报错,解决方法如下:
https://blog.csdn.net/weixin_42245375/article/details/119950143
2.自己训练集
https://blog.csdn.net/qq_40195360/article/details/106101756
这里的数据集是用labelme标注的,但是训练的时候我发现他的mask只保留了一个检测框,所以发现new_json_to_dataset.py代码有错,修改如下
1.修改new_json_to_dataset.py
import argparse
import json
import os
import os.path as osp
import warnings
import copy
import numpy as np
import PIL.Image
from skimage import io
import yaml
from labelme import utils
import draw
NAME_LABEL_MAP =
'_background_': 0,
"sack": 1,
LABEL_NAME_MAP = ['0: _background_',
'1: sack',]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--out', default=None)
args = parser.parse_args()
json_file = 'E:/custom/visionary_s_3d_dete/zhixi'
list = os.listdir(json_file)
for i in range(0, len(list)):
if list[i].find(".json")<0 :
continue;
path = os.path.join(json_file, list[i])
filename = list[i][:-5]
if os.path.isfile(path):
data = json.load(open(path))
img = utils.image.img_b64_to_arr(data['imageData'])
lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes']) # labelme_shapes_to_label
# modify labels according to NAME_LABEL_MAP
lbl_tmp = copy.copy(lbl)
for key_name in lbl_names:
old_lbl_val = lbl_names[key_name]
new_lbl_val = NAME_LABEL_MAP[key_name]
lbl_tmp[lbl == old_lbl_val] = new_lbl_val
lbl_names_tmp =
for key_name in lbl_names:
lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]
# Assign the new label to lbl and lbl_names dict
lbl = np.array(lbl_tmp, dtype=np.int8)
lbl_names = lbl_names_tmp
print ('lbl_names: ',lbl_names)
out_dir = osp.basename(list[i]).replace('.', '_')
out_dir = osp.join(osp.dirname(list[i]), out_dir)
out_dir=json_file+"\\\\"+out_dir
print(out_dir)
if not osp.exists(out_dir):
os.mkdir(out_dir)
PIL.Image.fromarray(img).save(osp.join(out_dir, '.png'.format(filename)))
PIL.Image.fromarray(lbl).save(osp.join(out_dir, '_gt.png'.format(filename)))
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()
2.修改labelme.utils.shape.py
我的文件位置为
D:\\anaconda3\\envs\\yolov7\\Lib\\site-packages\\labelme\\utils
def labelme_shapes_to_label(img_shape, shapes):
logger.warn(
"labelme_shapes_to_label is deprecated, so please use "
"shapes_to_label."
)
label_name_to_value = "_background_": 0
for shape in shapes:
label_name = shape["label"]
if label_name in label_name_to_value:
label_value = label_name_to_value[label_name]
else:
label_value = len(label_name_to_value)
label_name_to_value[label_name] = label_value
_ ,lbl= shapes_to_label(img_shape, shapes, label_name_to_value) #交换位置
return lbl, label_name_to_value
3.代码
# Sample code from the TorchVision 0.3 Object Detection Finetuning Tutorial
# http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
import os
import numpy as np
import torch
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import cv2
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import random
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target =
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def random_color():
b = random.randint(0, 255)
g = random.randint(0, 255)
r = random.randint(0, 255)
return (b, g, r)
def toTensor(img):
assert type(img) == np.ndarray, 'the img type is , but ndarry expected'.format(type(img))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = torch.from_numpy(img.transpose((2, 0, 1)))
return img.float().div(255) # 255也可以改为256
def findContours(*args, **kwargs):
"""
Wraps cv2.findContours to maintain compatiblity between versions
3 and 4
Returns:
contours, hierarchy
"""
if cv2.__version__.startswith('4'):
contours, hierarchy = cv2.findContours(*args, **kwargs)
elif cv2.__version__.startswith('3'):
_, contours, hierarchy = cv2.findContours(*args, **kwargs)
else:
raise AssertionError(
'cv2 must be either version 3 or 4 to call this method')
return contours, hierarchy
def PredictImg(image, device):
# img, _ = dataset_test[0]
img = cv2.imread(image)
result = img.copy()
dst = img.copy()
img = toTensor(img)
model=torch.load('model.pth')
names = '0': 'background', '1': 'sack'
# put the model in evaluati
# on mode
model.eval()
with torch.no_grad():
prediction = model([img.to(device)])
boxes = prediction[0]['boxes']
labels = prediction[0]['labels']
scores = prediction[0]['scores']
masks = prediction[0]['masks']
m_bOK = False;
for idx in range(boxes.shape[0]):
if scores[idx] >= 0.8:
m_bOK = True
color = random_color()
mask = masks[idx, 0].mul(255).byte().cpu().numpy()
thresh = mask
contours, hierarchy = findContours(
thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
)
cv2.drawContours(dst, contours, -1, color, -1)
x1, y1, x2, y2 = boxes[idx][0], boxes[idx][1], boxes[idx][2], boxes[idx][3]
name = names.get(str(labels[idx].item()))
cv2.rectangle(result, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness=2)
cv2.putText(result, text=name, org=(int(x1), int(y1) + 10), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=color)
dst1 = cv2.addWeighted(result, 0.7, dst, 0.3, 0)
if m_bOK:
cv2.imshow('result', dst1)
cv2.waitKey()
cv2.destroyAllWindows()
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed1', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed1', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-10])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-10:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=2, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
# model = get_model_instance_segmentation(num_classes)
model=torch.load('model.pth')
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=10,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 100
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("That's it!")
torch.save(model,'model.pth')
# utils.save_on_master(
# 'model': model,
# os.path.join('./', 'model.pth'))
print("That's it!")
PredictImg("12_08_22_51.png", device)
# def showbbox(model, img):
# # 输入的img是0-1范围的tensor
# model.eval()
# with torch.no_grad():
# '''
# prediction形如:
# ['boxes': tensor([[1492.6672, 238.4670, 1765.5385, 315.0320],
# [ 887.1390, 256.8106, 1154.6687, 330.2953]], device='cuda:0'),
# 'labels': tensor([1, 1], device='cuda:0'),
# 'scores': tensor([1.0000, 1.0000], device='cuda:0')]
# '''
# prediction = model([img.to(device)])
#
# print(prediction)
#
# img = img.permute(1, 2, 0) # C,H,W → H,W,C,用来画图
# img = (img * 255).byte().data.cpu() # * 255,float转0-255
# img = np.array(img) # tensor → ndarray
#
# for i in range(prediction[0]['boxes'].cpu().shape[0]):
# xmin = round(prediction[0]['boxes'][i][0].item())
# ymin = round(prediction[0]['boxes'][i][1].item())以上是关于pytorch提供的maskrcnn训练自己的数据的主要内容,如果未能解决你的问题,请参考以下文章
PyTorch版Mask R-CNN图像实例分割实战:训练自己的数据集