pytorch提供的maskrcnn训练自己的数据

Posted 2022-10-16 东东就是我

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了pytorch提供的maskrcnn训练自己的数据相关的知识，希望对你有一定的参考价值。

1.使用PennFudan数据训练

https://blog.csdn.net/u013685264/article/details/100564660
教我们怎么用PennFudan数据训练，其中查看mask 的时候报错，解决方法如下：
https://blog.csdn.net/weixin_42245375/article/details/119950143

2.自己训练集

https://blog.csdn.net/qq_40195360/article/details/106101756
这里的数据集是用labelme标注的，但是训练的时候我发现他的mask只保留了一个检测框，所以发现new_json_to_dataset.py代码有错，修改如下

1.修改new_json_to_dataset.py

import argparse
import json
import os
import os.path as osp
import warnings
import copy
import numpy as np
import PIL.Image
from skimage import io
import yaml

from labelme import utils
import draw

NAME_LABEL_MAP = 
    '_background_': 0,
    "sack": 1,



LABEL_NAME_MAP = ['0: _background_',
                  '1: sack',]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', '--out', default=None)
    args = parser.parse_args()

    json_file = 'E:/custom/visionary_s_3d_dete/zhixi'

    list = os.listdir(json_file)
    for i in range(0, len(list)):
    
        if list[i].find(".json")<0 :
             continue;
        path = os.path.join(json_file, list[i])
        filename = list[i][:-5]
        if os.path.isfile(path):
            data = json.load(open(path))
            img = utils.image.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes'])  # labelme_shapes_to_label

            # modify labels according to NAME_LABEL_MAP
            lbl_tmp = copy.copy(lbl)
            for key_name in lbl_names:
                old_lbl_val = lbl_names[key_name]
                new_lbl_val = NAME_LABEL_MAP[key_name]
                lbl_tmp[lbl == old_lbl_val] = new_lbl_val
            lbl_names_tmp = 
            for key_name in lbl_names:
                lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]

            # Assign the new label to lbl and lbl_names dict
            lbl = np.array(lbl_tmp, dtype=np.int8)
            lbl_names = lbl_names_tmp
            print ('lbl_names: ',lbl_names)

            out_dir = osp.basename(list[i]).replace('.', '_')
            out_dir = osp.join(osp.dirname(list[i]), out_dir)
            out_dir=json_file+"\\\\"+out_dir
            print(out_dir)
            if not osp.exists(out_dir):
                os.mkdir(out_dir)

            PIL.Image.fromarray(img).save(osp.join(out_dir, '.png'.format(filename)))
            PIL.Image.fromarray(lbl).save(osp.join(out_dir, '_gt.png'.format(filename)))


            print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()

2.修改labelme.utils.shape.py

我的文件位置为
D:\\anaconda3\\envs\\yolov7\\Lib\\site-packages\\labelme\\utils

def labelme_shapes_to_label(img_shape, shapes):
    logger.warn(
        "labelme_shapes_to_label is deprecated, so please use "
        "shapes_to_label."
    )

    label_name_to_value = "_background_": 0
    for shape in shapes:
        label_name = shape["label"]
        if label_name in label_name_to_value:
            label_value = label_name_to_value[label_name]
        else:
            label_value = len(label_name_to_value)
            label_name_to_value[label_name] = label_value

    _ ,lbl= shapes_to_label(img_shape, shapes, label_name_to_value)  #交换位置
    return lbl, label_name_to_value

3.代码

# Sample code from the TorchVision 0.3 Object Detection Finetuning Tutorial
# http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

import os
import numpy as np
import torch
from PIL import Image

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import cv2
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import random

class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = 
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model


def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


def random_color():
    b = random.randint(0, 255)
    g = random.randint(0, 255)
    r = random.randint(0, 255)

    return (b, g, r)


def toTensor(img):
    assert type(img) == np.ndarray, 'the img type is , but ndarry expected'.format(type(img))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = torch.from_numpy(img.transpose((2, 0, 1)))
    return img.float().div(255)  # 255也可以改为256

def findContours(*args, **kwargs):
    """
    Wraps cv2.findContours to maintain compatiblity between versions
    3 and 4

    Returns:
        contours, hierarchy
    """
    if cv2.__version__.startswith('4'):
        contours, hierarchy = cv2.findContours(*args, **kwargs)
    elif cv2.__version__.startswith('3'):
        _, contours, hierarchy = cv2.findContours(*args, **kwargs)
    else:
        raise AssertionError(
            'cv2 must be either version 3 or 4 to call this method')

    return contours, hierarchy

def PredictImg(image, device):
    # img, _ = dataset_test[0]
    img = cv2.imread(image)
    result = img.copy()
    dst = img.copy()
    img = toTensor(img)
    model=torch.load('model.pth')
    names = '0': 'background', '1': 'sack'
    # put the model in evaluati
    # on mode
    model.eval()
    with torch.no_grad():
        prediction = model([img.to(device)])

    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']
    masks = prediction[0]['masks']

    m_bOK = False;
    for idx in range(boxes.shape[0]):
        if scores[idx] >= 0.8:
            m_bOK = True
            color = random_color()
            mask = masks[idx, 0].mul(255).byte().cpu().numpy()
            thresh = mask
            contours, hierarchy = findContours(
                thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
            )
            cv2.drawContours(dst, contours, -1, color, -1)

            x1, y1, x2, y2 = boxes[idx][0], boxes[idx][1], boxes[idx][2], boxes[idx][3]
            name = names.get(str(labels[idx].item()))
            cv2.rectangle(result, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness=2)
            cv2.putText(result, text=name, org=(int(x1), int(y1) + 10), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=color)

            dst1 = cv2.addWeighted(result, 0.7, dst, 0.3, 0)

    if m_bOK:
        cv2.imshow('result', dst1)
        cv2.waitKey()
        cv2.destroyAllWindows()

def main():
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2
    # use our dataset and defined transformations
    dataset = PennFudanDataset('PennFudanPed1', get_transform(train=True))
    dataset_test = PennFudanDataset('PennFudanPed1', get_transform(train=False))

    # split the dataset in train and test set
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-10])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-10:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=4,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4,
        collate_fn=utils.collate_fn)

    # get the model using our helper function
    # model = get_model_instance_segmentation(num_classes)
    model=torch.load('model.pth')
    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=10,
                                                   gamma=0.1)

    # let's train it for 10 epochs
    num_epochs = 100

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)



    print("That's it!")

    torch.save(model,'model.pth')
    # utils.save_on_master(
    #     'model': model,
    #     os.path.join('./', 'model.pth'))

    print("That's it!")
    PredictImg("12_08_22_51.png",  device)


# def showbbox(model, img):
#     # 输入的img是0-1范围的tensor
#     model.eval()
#     with torch.no_grad():
#         '''
#         prediction形如：
#         ['boxes': tensor([[1492.6672,  238.4670, 1765.5385,  315.0320],
#         [ 887.1390,  256.8106, 1154.6687,  330.2953]], device='cuda:0'),
#         'labels': tensor([1, 1], device='cuda:0'),
#         'scores': tensor([1.0000, 1.0000], device='cuda:0')]
#         '''
#         prediction = model([img.to(device)])
#
#     print(prediction)
#
#     img = img.permute(1, 2, 0)  # C,H,W → H,W,C，用来画图
#     img = (img * 255).byte().data.cpu()  # * 255，float转0-255
#     img = np.array(img)  # tensor → ndarray
#
#     for i in range(prediction[0]['boxes'].cpu().shape[0]):
#         xmin = round(prediction[0]['boxes'][i][0].item())
#         ymin = round(prediction[0]['boxes'][i][1].item())以上是关于pytorch提供的maskrcnn训练自己的数据的主要内容，如果未能解决你的问题，请参考以下文章