深度学习语义分割实验:Unet网络/MSRC2数据集

Posted zstar-_

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了深度学习语义分割实验:Unet网络/MSRC2数据集相关的知识,希望对你有一定的参考价值。

本实验使用Unet网络对MSRC2数据集进行划分
源代码文件和MSRC2数据集获取方式见文末

1.数据划分

把图片数据从文件夹整理成csv文件,每一行代表其路径

class image2csv(object):
    # 分割训练集 验证集 测试集
    # 做成对应的txt
    def __init__(self, data_root, image_dir, label_dir, slice_data, width_input, height_input):
        self.data_root = data_root
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.slice_train = slice_data[0]
        self.slice_val = slice_data[1]
        self.width = width_input
        self.height = height_input
    def read_path(self):
        images = []
        labels = []
        for i, im in enumerate(os.listdir(self.image_dir)):
            label_name = im.split('.')[0] + '_GT' + '.bmp'
            # 由于各图片大小不同,这里进行简单的筛选,只有长宽均大于200px才被选取
            if os.path.exists(os.path.join(self.label_dir, label_name)):
                size_w, size_h = Image.open(
                    os.path.join(self.image_dir, im)).size
                size_lw, size_lh = Image.open(
                    os.path.join(self.label_dir, label_name)).size
                if min(size_w, size_lw) > self.width and min(size_h, size_lh) > self.height:
                    images.append(os.path.join(self.image_dir, im))
                    labels.append(os.path.join(self.label_dir, label_name))
                else:
                    continue
        self.data_length = len(images)  # 两个文件夹都有的图片的长度
        data_path = 
            'image': images,
            'label': labels,
        

        return data_path
    def generate_csv(self):
        data_path = self.read_path()  # 存放了路径
        data_path_pd = pd.DataFrame(data_path)
        train_slice_point = int(self.slice_train*self.data_length)  # 0.7*len
        validation_slice_point = int(
            (self.slice_train+self.slice_val)*self.data_length)  # 0.8*len

        train_csv = data_path_pd.iloc[:train_slice_point, :]
        validation_csv = data_path_pd.iloc[train_slice_point:validation_slice_point, :]
        test_csv = data_path_pd.iloc[validation_slice_point:, :]

        train_csv.to_csv(os.path.join(
            self.data_root, 'train.csv'), header=None, index=None)
        validation_csv.to_csv(os.path.join(
            self.data_root, 'val.csv'), header=None, index=None)
        test_csv.to_csv(os.path.join(self.data_root, 'test.csv'),
                        header=False, index=False)

2.数据预处理

颜色与分类标签的转换

语义分割主要是构建一个颜色图(colormap),对每一类分割的对象分别给予不同的颜色标注。

def colormap(n):
    cmap = np.zeros([n, 3]).astype(np.uint8)
    for i in np.arange(n):
        r, g, b = np.zeros(3)
        for j in np.arange(8):
            r = r + (1 << (7 - j)) * ((i & (1 << (3 * j))) >> (3 * j))
            g = g + (1 << (7 - j)) * ((i & (1 << (3 * j + 1))) >> (3 * j + 1))
            b = b + (1 << (7 - j)) * ((i & (1 << (3 * j + 2))) >> (3 * j + 2))
        cmap[i, :] = np.array([r, g, b])
    return cmap
   
class label2image():
    def __init__(self, num_classes=22):
        self.colormap = colormap(256)[:num_classes].astype('uint8')
    def __call__(self, label_pred, label_true):
        pred = self.colormap[label_pred]
        true = self.colormap[label_true]
        return pred, true

class image2label():
    def __init__(self, num_classes=22):
        # 给每一类都来一种颜色
        colormap = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                    [0, 0, 128], [0, 128, 128], [128, 128, 128], [192, 0, 0],
                    [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128],
                    [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0],
                    [0, 192, 0], [128, 64, 128], [
                        0, 192, 128], [128, 192, 128],
                    [64, 64, 0], [192, 64, 0]]
        self.colormap = colormap[:num_classes]
        # 创建256^3 次方空数组,颜色的所有组合
        cm2lb = np.zeros(256 ** 3)
        for i, cm in enumerate(self.colormap):
            cm2lb[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i  # 符合这种组合的标记这一类
        self.cm2lb = cm2lb

    def __call__(self, image):
        image = np.array(image, dtype=np.int64)
        idx = (image[:, :, 0] * 256 + image[:, :, 1]) * 256 + image[:, :, 2]
        label = np.array(self.cm2lb[idx], dtype=np.int64)  # 根据颜色条找到这个label的标号
        return label

图片裁剪

class RandomCrop(object):
    """
    自定义实现图像与label随机裁剪相同的位置
    """
    def __init__(self, size):
        self.size = size

    @staticmethod
    def get_params(img, output_size):
        w, h = img.size
        th, tw = output_size
        if w == tw and h == th:
            return 0, 0, h, w
        i = random.randint(0, h - th)
        j = random.randint(0, w - tw)
        return i, j, th, tw

    def __call__(self, img, label):
        i, j, h, w = self.get_params(img, self.size)
        return img.crop((j, i, j + w, i + h)), label.crop((j, i, j + w, i + h))

3.数据加载

class CustomDataset(Dataset):
    def __init__(self, data_root_csv, input_width, input_height, test=False):
        # 在子类进行初始化时,也想继承父类的__init__()就通过super()实现
        super(CustomDataset, self).__init__()
        self.data_root_csv = data_root_csv
        self.data_all = pd.read_csv(self.data_root_csv)
        self.image_list = list(self.data_all.iloc[:, 0])
        self.label_list = list(self.data_all.iloc[:, 1])
        self.width = input_width
        self.height = input_height

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        img = Image.open(self.image_list[index]).convert('RGB')
        label = Image.open(self.label_list[index]).convert('RGB')

        img, label = self.train_transform(
            img, label, crop_size=(self.width, self.height))

        # assert(img.size == label.size)s
        return img, label

    def train_transform(self, image, label, crop_size=(256, 256)):

        image, label = RandomCrop(crop_size)(
            image, label)  # 第一个括号是实例话对象,第二个是__call__方法
        tfs = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225])
        ])
        image = tfs(image)
        label = image2label()(label)
        label = torch.from_numpy(label).long()
        return image, label

4.Unet 网络结构

双卷积结构

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

下采样

class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

上采样

class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()
        if bilinear:
            self.up = nn.Upsample(
                scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(
                in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

输出

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

整体结构

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc UNET图像语义分割入门深度学习

Unet++语义分割网络(网络结构分析+代码分析)

「深度学习一遍过」必修18:基于pytorch的语义分割模型实现

深度学习系列分割网络模型(FCNUnetUnet++SegNetRefineNet)

基于深度学习的图像语义分割技术概述之背景与深度网络架构

Unet 语义分割模型(Keras)| 以细胞图像为例