第七节:CNN练习2手势识别

Posted 快乐江湖

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了第七节:CNN练习2手势识别相关的知识,希望对你有一定的参考价值。

文章目录

一:Sebastien Marcel Static Hand Posture Database(静态手势数据集)介绍

Sebastien Marcel Static Hand Posture Database提供了6种手势姿势,如下图,分别代表

  • A
  • B
  • C
  • five
  • point
  • V

图片格式为.ppm

  • PBM 是位图(bitmap),仅有黑与白,没有灰
  • PGM 是灰度图(grayscale)
  • PPM 是通过RGB三种颜色显现的图像(pixmaps)

数据下载并解压后,格式如下

二:模型设计

设计网络结构如下

其中MTB结构如下图所示

三:代码编写

(1)数据预处理文件

preprocess.py:原始数据集每个文件夹图片数据均不一样,并且在测试集中还存在两类测试图片,分别表示不同环境拍摄(complexuniform),所以这里对所有数据集进行整合操作,将训练集和测试集进行合并,然后再按照8:2:1的比例分别分配给训练集、验证集和测试集。最终会生成train_hand_gesture.txteval_hand_gesture.txttest_hand_gesture.txt三个文件,后续所有操作将会针对这三个文件进行,而不在原始数据集上进行

train_hand_gesture.txt为例,每一行表示图片的路径和其所属类别

  • “A”:0
  • “B”:1
  • “C”:2
  • “Five”:3
  • “Point”:4
  • “V”:5
0|./data/Marcel-Train\\A\\A-train1043.ppm
0|./data/Marcel-Train\\A\\A-train0577.ppm
2|./data/Marcel-Train\\C\\C-train455.ppm
0|./data/Marcel-Train\\A\\A-train1291.ppm
4|./data/Marcel-Train\\Point\\Point-train0220.ppm
0|./data/Marcel-Train\\A\\A-train1072.ppm
0|./data/Marcel-Train\\A\\A-train0599.ppm
4|./data/Marcel-Train\\Point\\Point-train0060.ppm
2|./data/Marcel-Test\\C\\complex\\C-complex07.ppm
4|./data/Marcel-Train\\Point\\Point-train0944.ppm
4|./data/Marcel-Train\\Point\\Point-train1333.ppm
4|./data/Marcel-Train\\Point\\Point-train0778.ppm
0|./data/Marcel-Train\\A\\A-train0795.ppm
3|./data/Marcel-Train\\Five\\Five-train148.ppm
3|./data/Marcel-Train\\Five\\Five-train394.ppm
3|./data/Marcel-Train\\Five\\Five-train441.ppm
...

代码如下,关键之处有注释说明

import os
import json
from Parameters import parameters
import random
from PIL import Image

random.seed(parameters.seed)


# 获取某个文件夹下面所有后缀名为suffix的文件,并返回其path的list
import os
import json
from Parameters import parameters
import random
from PIL import Image

random.seed(parameters.seed)


# 获取某个文件夹下面所有后缀名为suffix的文件,并返回其path的list
def recursive_fetching(root, suffix):
    all_file_path = []

    # get_all_files函数会被递归调用
    def get_all_files(path):
        all_file_list = os.listdir(path)
        # 遍历path文件夹下的所有文件和目录
        for file in all_file_list:
            filepath = os.path.join(path, file)
            # 如果是目录则再次递归
            if os.pa
            th.isdir(filepath):
                get_all_files(filepath)
            # 如果是文件则保存其文件路径和文件名到all_file_path中
            elif os.path.isfile(filepath):
                all_file_path.append(filepath)

    # 把根目录传入
    get_all_files(root)
    # 筛选所有后缀名为suffix的文件
    file_paths = [it for it in all_file_path if os.path.split(it)[-1].split('.')[-1].lower() in suffix]

    return file_paths

# 加载meta文件
def load_meta(meta_path):
    with open(meta_path, 'r') as fr:
        return [line.strip().split('|') for line in fr.readlines()]

# 加载图片
def load_image(image_path):
    return Image.open(image_path)

# 构建类别到id的映射
cls_mapper = 
    "clsToid": "A": 0, "B": 1, "C": 2, "Five": 3, "Point": 4, "V": 5,
    "idTocls": 0: "A", 1: "B", 2: "C", 3: "Five", 4: "Point", 5: "V"

if not os.path.exists(parameters.cls_mapper_path):
    json.dump(cls_mapper, open(parameters.cls_mapper_path, 'w'))

train_items = recursive_fetching(parameters.train_data_root, 'ppm')  # 获取Marcel-Train文件夹下数据路径
test_items = recursive_fetching(parameters.test_data_root, 'ppm')  # 获取Marcel-Test文件夹下数据路径
dataset = train_items + test_items  # 合并
random.shuffle(dataset)  # 打乱数据集
dataset_num = len(dataset)
print("数据集总数目:", dataset_num)



"""
    最终dataset_dict大概长这样子
    dataset_dict = 
    0: ["./data/Marcel-Test/A/complex/A-complex32.ppm", "./data/Marcel-Test/A/complex/A-complex31.ppm", ...]
    1: ["./data/Marcel-Train/B/B-train119.ppm", "./data/Marcel-Test/B/uniform/B-uniform04.ppm", ...]
    ...
    5: [...]

"""
dataset_dict = 
for it in dataset:
    # 例如"./data/Marcel-Train/B/B-train119.ppm",cls_name就是B, cls_id就是1
    cls_name = os.path.split(it)[-1].split('-')[0]
    cls_id = cls_mapper["clsToid"][cls_name]
    # 例如,把所有属于B的训练数据和图片数据放到一个列表中,该列表的k值为1
    if cls_id not in dataset_dict:
        dataset_dict[cls_id] = [it]
    else:
        dataset_dict[cls_id].append(it)

# 每个列表按照比例分配到train、eval、test中
train_ratio, eval_ratio, test_ratio = 0.8, 0.1, 0.1
train_set, eval_set, test_set = [], [], []

for idx, set_list in dataset_dict.items():
    length = len(set_list)
    train_num, eval_num = int(length*train_ratio), int(length*eval_ratio)
    test_num = length - train_num - eval_num
    random.shuffle(set_list)
    train_set.extend(set_list[:train_num])
    eval_set.extend(set_list[train_num:train_num+eval_num])
    test_set.extend(set_list[train_num+eval_num:])

random.shuffle(train_set)
random.shuffle(eval_set)
random.shuffle(test_set)
# print(train_set)
# print(eval_set)
# print(test_set)
# print(len(train_set) + len(eval_set) + len(test_set))

# 写入metafile

with open(parameters.metadata_train_path, 'w') as fw:
    for path in train_set:
        cls_name = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper["clsToid"][cls_name]
        fw.write("%d|%s\\n" % (cls_id, path))


with open(parameters.metadata_eval_path, 'w') as fw:
    for path in eval_set:
        cls_name = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper["clsToid"][cls_name]
        fw.write("%d|%s\\n" % (cls_id, path))

with open(parameters.metadata_test_path, 'w') as fw:
    for path in test_set:
        cls_name = os.path.split(path)[-1].split('-')[0]
        cls_id = cls_mapper["clsToid"][cls_name]
        fw.write("%d|%s\\n" % (cls_id, path))



# 测试,看一下所有图片的颜色模式和对应大小
mode_set, size_set = [], []
for _, path in load_meta(parameters.metadata_train_path):
    img = load_image(path)
    mode_set.append(img.mode)
    size_set.append(img.size)


print(set(mode_set), set(size_set))


结果显示这批图片颜色模式均为RGB,但是大小各异,所以在后面加载的时候需要统一图片大小

(2)MyDataset编写

Pytorch中的DataLoader在需要传入你自己封装好的MyDataset,它是torch.utils.data.Dataset的子类,至少重写__getitem(self)____len(self)__方法。另外在编写MyDataset时我们还要引入数据增强,也即transofroms.Compose

import torch
from torch.utils.data import DataLoader
from Parameters import parameters
from preporcess import load_meta, load_image
from torchvision import transforms

transform_train = transforms.Compose(
    [
        transforms.Resize((112, 112)),  # 保证输入图像大小为112×112
        transforms.RandomRotation(degrees=45),  # 减小倾斜图片影像
        transforms.GaussianBlur(kernel_size=(3, 3)),  # 抑制模糊图片影响
        transforms.RandomHorizontalFlip(),  # 左右手
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # 标准化
    ]
)

transform_test = transforms.Compose(
    [
        transforms.Resize((112, 112)),  # 保证输入图像大小为112×112
        # transforms.RandomRotation(degrees=45),  # 减小倾斜图片影像
        # transforms.GaussianBlur(kernel_size=(3, 3)),  # 抑制模糊图片影响
        # transforms.RandomHorizontalFlip(),  # 左右手
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # 标准化
    ]
)

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, metadata_path):
        self.dataset = load_meta(metadata_path)  # [(0, image_path), (), ...]
        self.metadata_path = metadata_path

    def __getitem__(self, idx):
        item = self.dataset[idx]
        cls_id, path = int(item[0]), item[1]
        img = load_image(path)

        if self.metadata_path == parameters.metadata_train_path or self.metadata_path == parameters.metadata_eval_path:
            return transform_train(img), cls_id

        # 对于测试集不需要数据增强
        return transform_test(img), cls_id
    def __len__(self):
        return len(self.dataset)

(3)模型编写

import torch
import torch.nn as nn
import torch.nn.functional as F

from Parameters import  parameters

# 定义mish激活函数
def mish(x):
    return x * torch.tanh(F.softplus(x))

# 封装mish激活函数
class Mish(nn.Module):
    def __init__(self):
        super(Mish, self).__init__()

    def forward(self, x):
        return mish(x)


# 深度可分离卷积
class DSConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super(DSConv2d, self).__init__()
        # 保证kernel_size必须是奇数
        assert kernel_size % 2 == 1, "kernel_size必须为奇数"
        self.depth_conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            padding=(kernel_size//2, kernel_size//2),
            groups=in_channels
        )
        self.pointwise_conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=1
        )

    def forward(self, input_x):
        out = self.depth_conv(input_x)
        out  = self.pointwise_conv(out)

        return out

# 编写MTB模块(残差网络)
class MTB(nn.Module):
    def __init__(self, in_channels):
        super(MTB, self).__init__()

        self.left_flow = nn.Sequential(
            # 点卷积
            nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1),
            nn.BatchNorm2d(in_channels),
            Mish(),
            # 深度可分离卷积
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            nn.BatchNorm2d(in_channels),
            Mish(),
            # 7×7卷积
            nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=7, padding=(7//2, 7//2)),
        )

        self.right_flow = nn.Sequential(
            # 7×7卷积
            nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=7, padding=(7 // 2, 7 // 2)),
            nn.BatchNorm2d(in_channels),
            Mish(),
            # 深度可分离卷积
            DSConv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3),
            nn.BatchNorm2d(in_channels),
            Mish(),
            # 点卷积
            nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1),
        )

    def forward(self, input_ft):
        left = self.left_flow(input_ft)
        right = self.right_flow(input_ft)
        out = left + right + input_ft

        out = mish(out)
        return out


# [N, 3, 112, 112] -> [N, 256, 7, 7]
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=parameters.data_channels, out_channels=64, kernel_size=3, padding=(3//2, 3//2)),
            nn.BatchNorm2d(64),
            Mish(),
            # MTB模块
            MTB(in_channels=64),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=(3 // 2, 3 // 2)),
            nn.BatchNorm2d(128),
            Mish(),
            # MTB模块
            MTB(in_channels=128),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=(3 // 2, 3 // 2)),
            nn.BatchNorm2d(256),
            Mish(),
            # MTB模块
            MTB(in_channels=256),
            nn.MaxPool2d(kernel_size=2, stride=2),

            MTB(in_channels=256),
            MTB(in_channels

以上是关于第七节:CNN练习2手势识别的主要内容,如果未能解决你的问题,请参考以下文章

2021算法竞赛入门班第七节课图论练习题

MATLAB教程案例53CNN卷积神经网络的MATLAB编程学习和实现,以手势识别为例进行仿真分析

第七节——异常

第七节 项目沟通管理项目合同管理

第七节课:字典

第二周 第七节 列表的使用