mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析

Posted 2021-09-08 WinstonYF
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析相关的知识，希望对你有一定的参考价值。
一，配置文件

# 模型设置
model = dict(# 模型的配置
    type='FastRCNN',# 时空检测器类型
    backbone=dict(# Backbone 字典设置
        type='ResNet3dSlowFast',# Backbone 名
        pretrained=None, # 预训练模型的 url 或文件位置
        resample_rate=4, # tau, 其对应论文中的参数τ
        speed_ratio=4, # alpha, 其对应论文中的参数α
        channel_ratio=8, # beta_inv， 其对应论文中β的倒数
        slow_pathway=dict( # 慢速路径
            type='resnet3d',# 使用resnet3d网络
            depth=50,# 其深度为50
            pretrained=None,# 不使用预训练模型
            lateral=True,# 是否使用侧面链接的方式
            conv1_kernel=(1, 7, 7),# 第一层卷积核大小
            dilations=(1, 1, 1, 1),#Dilation of each stage.  Default: (1, 1, 1, 1).
            conv1_stride_t=1,# 第一层卷积层在时序维度上的步伐
            pool1_stride_t=1,# 第一个池化层在时序方向上的步伐
            inflate=(0, 0, 1, 1), #Inflate Dims of each block. Default: (0, 0, 1, 1).
            spatial_strides=(1, 2, 2, 1)),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
        fast_pathway=dict( # 快速路径
            type='resnet3d',# 使用resnet3d网络
            depth=50, # 其深度为50
            pretrained=None,# 不使用预训练模型
            lateral=False,# 是否使用侧面链接的方式
            base_channels=8,# 基础通道数目
            conv1_kernel=(5, 7, 7),# 第一层卷积核大小
            conv1_stride_t=1, # 第一层卷积层在时序维度上的步伐
            pool1_stride_t=1, # 第一个池化层在时序方向上的步伐
            spatial_strides=(1, 2, 2, 1))),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
    roi_head=dict( # roi_head 字典设置
        type='Via3RoIHead',# roi_head 名
        bbox_roi_extractor=dict( # bbox_roi_extractor 字典设置
            type='SingleRoIExtractor3D', # bbox_roi_extractor 名
            roi_layer_type='RoIAlign', # RoI op 类型
            output_size=8, # RoI op 输出特征尺寸
            with_temporal_pool=True),# 时序维度是否要经过池化
        bbox_head=dict( # bbox_head 字典设置
            type='BBoxHeadAVA',# bbox_head 名
            in_channels=2304,# 输入特征通道数 2048+256
            num_classes=7,# 动作类别数 + 1（背景）
            topk=(1, 1),#Parameter for evaluating multilabel accuracy. Default: (3, 5)
            multilabel=True,# 数据集是否多标签
            dropout_ratio=0.5)  # dropout 比率
    ),
    # 模型训练和测试的设置
    train_cfg=dict(# 训练 FastRCNN 的超参配置
        rcnn=dict(# rcnn 训练字典设置
            assigner=dict(# assigner 字典设置
                type='MaxIoUAssignerAVA',# assigner 名
                pos_iou_thr=0.9,# 正样本 IoU 阈值, > pos_iou_thr -> positive
                neg_iou_thr=0.9,# 负样本 IoU 阈值, < neg_iou_thr -> negative
                min_pos_iou=0.9),# 正样本最小可接受 IoU
            sampler=dict(# sample 字典设置
                type='RandomSampler',# sampler 名
                num=32,# sampler 批大小
                pos_fraction=1, # sampler 正样本边界框比率
                neg_pos_ub=-1,# 负样本数转正样本数的比率上界
                add_gt_as_proposals=True),# 是否添加 ground truth 为候选
            pos_weight=1.0, # 正样本 loss 权重
            debug=False)), # 是否为 debug 模式
    test_cfg=dict(# 测试 FastRCNN 的超参设置
    			rcnn=dict(# rcnn 测试字典设置
    				action_thr=0.0)))# 某行为的阈值
# 图像进行正则化处理
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)

train_pipeline = [# 训练数据前处理流水线步骤组成的列表
    dict(# SampleFrames 类的配置
    	type='SampleVia3Frames', # 选定采样哪些视频帧
    	clip_len=32, # 每个输出视频片段的帧
    	frame_interval=2),# 所采相邻帧的时序间隔
    dict( # RawFrameDecode 类的配置
    	type='RawFrameDecode'), # 给定帧序列，加载对应帧，解码对应帧
    dict(# RandomRescale 类的配置
    	type='RandomRescale', # 给定一个范围，进行随机短边缩放
    	scale_range=(256, 320)), # RandomRescale 的短边缩放范围
    dict(# RandomCrop 类的配置
    	type='RandomCrop',  # 给定一个尺寸进行随机裁剪
    	size=256), # 裁剪尺寸
    dict(# Flip 类的配置
    	type='Flip',# 图片翻转 
    	flip_ratio=0.5),# 执行翻转几率
    dict(# Normalize 类的配置
        type='Normalize',# 图片正则化
        mean=[123.675, 116.28, 103.53],# 图片正则化参数
        std=[58.395, 57.12, 57.375],# 图片正则化参数
        to_bgr=False),
    dict(# FormatShape 类的配置
    	type='FormatShape',  # 将图片格式转变为给定的输入格式
    	input_format='NCTHW',  # 最终的图片组成格式
    	collapse=True), # 去掉 N 梯度当 N == 1
    dict( # Rename 类的配置
    	type='Rename', # 重命名 key 名
    	mapping=dict(imgs='img')),# 改名映射字典
    dict(# ToTensor 类的配置
    	type='ToTensor',  # ToTensor 类将其他类型转化为 Tensor 类型
    	keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),# 将被从其他类型转化为 Tensor 类型的特征
    dict(# ToDataContainer 类的配置
        type='ToDataContainer',# 将一些信息转入到 ToDataContainer 中
        fields=[# 域字典
            dict(key=['proposals', 'gt_bboxes', 'gt_labels'],  # 将转化为 DataContainer 的键
            stack=False)# 是否要堆列这些 tensor
        ]),
    dict(# Collect 类的配置
        type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
        keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'], # 输入的键
        meta_keys=[ 'original_shape', 'img_shape', 'flip_direction', 'img_norm_cfg']# 输入的元键
    )
]
val_pipeline = [# 验证数据前处理流水线步骤组成的列表
    dict(# SampleFrames 类的配置
    	type='SampleVia3Frames', # 选定采样哪些视频帧
    	clip_len=32, # 每个输出视频片段的帧
    	frame_interval=2),# 所采相邻帧的时序间隔
    dict(# RawFrameDecode 类的配置
    	type='RawFrameDecode'),# 给定帧序列，加载对应帧，解码对应帧
    dict(# Resize 类的配置
    	type='Resize',  # 调整图片尺寸
    	scale=(-1, 256)),# 调整比例
    dict(# Normalize 类的配置
        type='Normalize',# 图片正则化
        mean=[123.675, 116.28, 103.53],# 图片正则化参数
        std=[58.395, 57.12, 57.375],
        to_bgr=False),
    dict(# FormatShape 类的配置
    	type='FormatShape', # 将图片格式转变为给定的输入格式
    	input_format='NCTHW', # 最终的图片组成格式
    	collapse=True),# 去掉 N 梯度当 N == 1
    dict(# Rename 类的配置
    	type='Rename', # 重命名 key 名
    	mapping=dict(imgs='img')),# 改名映射字典
    dict(# ToTensor 类的配置
    	type='ToTensor', # ToTensor 类将其他类型转化为 Tensor 类型
    	keys=['img', 'proposals']), # 将被从其他类型转化为 Tensor 类型的特征
    dict(# ToDataContainer 类的配置
    	type='ToDataContainer', # 将一些信息转入到 ToDataContainer 中
    	fields=[# 转化为 Datacontainer 的域
    		dict(# 域字典
    			key='proposals',  # 将转化为 DataContainer 的键
    			stack=False)]),# 是否要堆列这些 tensor
    dict( # Collect 类的配置
        type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
        keys=['img', 'proposals'],# 输入的键
        meta_keys=['img_shape'], # 输入的元键
        nested=True)# 是否将数据包装为嵌套列表
]

dataset_type = 'VIA3Dataset' # 训练，验证，测试的数据集类型
train_images_root = './Datasets/Interaction/images/train'#'data/Interaction/images/train'
train_annotations_root = './Datasets/Interaction/annotations/train'#'data/Interaction/annotations/train'

test_images_root = './Datasets/Interaction/images/test'#'data/Interaction/images/test'
test_annotations_root = './Datasets/Interaction/annotations/test'#'data/Interaction/annotations/test'

train_seq1_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq1.json',
    proposal_file=train_annotations_root + '/seq1_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq2_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq2.json',
    proposal_file=train_annotations_root + '/seq2_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq3_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq3.json',
    proposal_file=train_annotations_root + '/seq3_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq4_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq4.json',
    proposal_file=train_annotations_root + '/seq4_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq5_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq5.json',
    proposal_file=train_annotations_root + '/seq5_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq6_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq6.json',
    proposal_file=train_annotations_root + '/seq6_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq7_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq7.json',
    proposal_file=train_annotations_root + '/seq7_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq8_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq8.json',
    proposal_file=train_annotations_root + '/seq8_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq9_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq9.json',
    proposal_file=train_annotations_root + '/seq9_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq10_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq10.json',
    proposal_file=train_annotations_root + '/seq10_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq11_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq11.json',
    proposal_file=train_annotations_root + '/seq11_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq12_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq12.json',
    proposal_file=train_annotations_root + '/seq12_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq13_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq13.json',
    proposal_file=train_annotations_root + '/seq13_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq14_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq14.json',
    proposal_file=train_annotations_root + '/seq14_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq15_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq15.json',
    proposal_file=train_annotations_root + '/seq15_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq16_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq16.json',
    proposal_file=train_annotations_root + '/seq16_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq17_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq17.json',
    proposal_file=train_annotations_root + '/seq17_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq18_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq18.json',
    proposal_file=train_annotations_root + '/seq18_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq19_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq19.json',
    proposal_file=train_annotations_root + '/seq19_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq20_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq20.json',
    proposal_file=train_annotations_root + '/seq20_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)



test_seq5_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq5.json',
    proposal_file=test_annotations_root + '/seq5_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq10_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq10.json',
    proposal_file=test_annotations_root + '/seq10_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq15_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq15.json',
    proposal_file=test_annotations_root + '/seq15_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq20_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq20.json',
    proposal_file=test_annotations_root + '/seq20_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

# 数据加载的相关配置
data = dict(
    # videos_per_gpu=8,
    # workers_per_gpu=10,
  以上是关于mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析的主要内容，如果未能解决你的问题，请参考以下文章
mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析

目录

一，配置文件