mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析
Posted WinstonYF
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析相关的知识,希望对你有一定的参考价值。
【mmaction2 slowfast 行为分析(商用级别)】总目录
一,配置文件
下面的配置文件是自定义的,参考:时空动作检测的配置文件系统
# 模型设置
model = dict(# 模型的配置
type='FastRCNN',# 时空检测器类型
backbone=dict(# Backbone 字典设置
type='ResNet3dSlowFast',# Backbone 名
pretrained=None, # 预训练模型的 url 或文件位置
resample_rate=4, # tau, 其对应论文中的参数τ
speed_ratio=4, # alpha, 其对应论文中的参数α
channel_ratio=8, # beta_inv, 其对应论文中β的倒数
slow_pathway=dict( # 慢速路径
type='resnet3d',# 使用resnet3d网络
depth=50,# 其深度为50
pretrained=None,# 不使用预训练模型
lateral=True,# 是否使用侧面链接的方式
conv1_kernel=(1, 7, 7),# 第一层卷积核大小
dilations=(1, 1, 1, 1),#Dilation of each stage. Default: (1, 1, 1, 1).
conv1_stride_t=1,# 第一层卷积层在时序维度上的步伐
pool1_stride_t=1,# 第一个池化层在时序方向上的步伐
inflate=(0, 0, 1, 1), #Inflate Dims of each block. Default: (0, 0, 1, 1).
spatial_strides=(1, 2, 2, 1)),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
fast_pathway=dict( # 快速路径
type='resnet3d',# 使用resnet3d网络
depth=50, # 其深度为50
pretrained=None,# 不使用预训练模型
lateral=False,# 是否使用侧面链接的方式
base_channels=8,# 基础通道数目
conv1_kernel=(5, 7, 7),# 第一层卷积核大小
conv1_stride_t=1, # 第一层卷积层在时序维度上的步伐
pool1_stride_t=1, # 第一个池化层在时序方向上的步伐
spatial_strides=(1, 2, 2, 1))),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
roi_head=dict( # roi_head 字典设置
type='Via3RoIHead',# roi_head 名
bbox_roi_extractor=dict( # bbox_roi_extractor 字典设置
type='SingleRoIExtractor3D', # bbox_roi_extractor 名
roi_layer_type='RoIAlign', # RoI op 类型
output_size=8, # RoI op 输出特征尺寸
with_temporal_pool=True),# 时序维度是否要经过池化
bbox_head=dict( # bbox_head 字典设置
type='BBoxHeadAVA',# bbox_head 名
in_channels=2304,# 输入特征通道数 2048+256
num_classes=7,# 动作类别数 + 1(背景)
topk=(1, 1),#Parameter for evaluating multilabel accuracy. Default: (3, 5)
multilabel=True,# 数据集是否多标签
dropout_ratio=0.5) # dropout 比率
),
# 模型训练和测试的设置
train_cfg=dict(# 训练 FastRCNN 的超参配置
rcnn=dict(# rcnn 训练字典设置
assigner=dict(# assigner 字典设置
type='MaxIoUAssignerAVA',# assigner 名
pos_iou_thr=0.9,# 正样本 IoU 阈值, > pos_iou_thr -> positive
neg_iou_thr=0.9,# 负样本 IoU 阈值, < neg_iou_thr -> negative
min_pos_iou=0.9),# 正样本最小可接受 IoU
sampler=dict(# sample 字典设置
type='RandomSampler',# sampler 名
num=32,# sampler 批大小
pos_fraction=1, # sampler 正样本边界框比率
neg_pos_ub=-1,# 负样本数转正样本数的比率上界
add_gt_as_proposals=True),# 是否添加 ground truth 为候选
pos_weight=1.0, # 正样本 loss 权重
debug=False)), # 是否为 debug 模式
test_cfg=dict(# 测试 FastRCNN 的超参设置
rcnn=dict(# rcnn 测试字典设置
action_thr=0.0)))# 某行为的阈值
# 图像进行正则化处理
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [# 训练数据前处理流水线步骤组成的列表
dict(# SampleFrames 类的配置
type='SampleVia3Frames', # 选定采样哪些视频帧
clip_len=32, # 每个输出视频片段的帧
frame_interval=2),# 所采相邻帧的时序间隔
dict( # RawFrameDecode 类的配置
type='RawFrameDecode'), # 给定帧序列,加载对应帧,解码对应帧
dict(# RandomRescale 类的配置
type='RandomRescale', # 给定一个范围,进行随机短边缩放
scale_range=(256, 320)), # RandomRescale 的短边缩放范围
dict(# RandomCrop 类的配置
type='RandomCrop', # 给定一个尺寸进行随机裁剪
size=256), # 裁剪尺寸
dict(# Flip 类的配置
type='Flip',# 图片翻转
flip_ratio=0.5),# 执行翻转几率
dict(# Normalize 类的配置
type='Normalize',# 图片正则化
mean=[123.675, 116.28, 103.53],# 图片正则化参数
std=[58.395, 57.12, 57.375],# 图片正则化参数
to_bgr=False),
dict(# FormatShape 类的配置
type='FormatShape', # 将图片格式转变为给定的输入格式
input_format='NCTHW', # 最终的图片组成格式
collapse=True), # 去掉 N 梯度当 N == 1
dict( # Rename 类的配置
type='Rename', # 重命名 key 名
mapping=dict(imgs='img')),# 改名映射字典
dict(# ToTensor 类的配置
type='ToTensor', # ToTensor 类将其他类型转化为 Tensor 类型
keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),# 将被从其他类型转化为 Tensor 类型的特征
dict(# ToDataContainer 类的配置
type='ToDataContainer',# 将一些信息转入到 ToDataContainer 中
fields=[# 域字典
dict(key=['proposals', 'gt_bboxes', 'gt_labels'], # 将转化为 DataContainer 的键
stack=False)# 是否要堆列这些 tensor
]),
dict(# Collect 类的配置
type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'], # 输入的键
meta_keys=[ 'original_shape', 'img_shape', 'flip_direction', 'img_norm_cfg']# 输入的元键
)
]
val_pipeline = [# 验证数据前处理流水线步骤组成的列表
dict(# SampleFrames 类的配置
type='SampleVia3Frames', # 选定采样哪些视频帧
clip_len=32, # 每个输出视频片段的帧
frame_interval=2),# 所采相邻帧的时序间隔
dict(# RawFrameDecode 类的配置
type='RawFrameDecode'),# 给定帧序列,加载对应帧,解码对应帧
dict(# Resize 类的配置
type='Resize', # 调整图片尺寸
scale=(-1, 256)),# 调整比例
dict(# Normalize 类的配置
type='Normalize',# 图片正则化
mean=[123.675, 116.28, 103.53],# 图片正则化参数
std=[58.395, 57.12, 57.375],
to_bgr=False),
dict(# FormatShape 类的配置
type='FormatShape', # 将图片格式转变为给定的输入格式
input_format='NCTHW', # 最终的图片组成格式
collapse=True),# 去掉 N 梯度当 N == 1
dict(# Rename 类的配置
type='Rename', # 重命名 key 名
mapping=dict(imgs='img')),# 改名映射字典
dict(# ToTensor 类的配置
type='ToTensor', # ToTensor 类将其他类型转化为 Tensor 类型
keys=['img', 'proposals']), # 将被从其他类型转化为 Tensor 类型的特征
dict(# ToDataContainer 类的配置
type='ToDataContainer', # 将一些信息转入到 ToDataContainer 中
fields=[# 转化为 Datacontainer 的域
dict(# 域字典
key='proposals', # 将转化为 DataContainer 的键
stack=False)]),# 是否要堆列这些 tensor
dict( # Collect 类的配置
type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
keys=['img', 'proposals'],# 输入的键
meta_keys=['img_shape'], # 输入的元键
nested=True)# 是否将数据包装为嵌套列表
]
dataset_type = 'VIA3Dataset' # 训练,验证,测试的数据集类型
train_images_root = './Datasets/Interaction/images/train'#'data/Interaction/images/train'
train_annotations_root = './Datasets/Interaction/annotations/train'#'data/Interaction/annotations/train'
test_images_root = './Datasets/Interaction/images/test'#'data/Interaction/images/test'
test_annotations_root = './Datasets/Interaction/annotations/test'#'data/Interaction/annotations/test'
train_seq1_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq1.json',
proposal_file=train_annotations_root + '/seq1_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq2_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq2.json',
proposal_file=train_annotations_root + '/seq2_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq3_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq3.json',
proposal_file=train_annotations_root + '/seq3_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq4_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq4.json',
proposal_file=train_annotations_root + '/seq4_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq5_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq5.json',
proposal_file=train_annotations_root + '/seq5_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq6_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq6.json',
proposal_file=train_annotations_root + '/seq6_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq7_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq7.json',
proposal_file=train_annotations_root + '/seq7_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq8_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq8.json',
proposal_file=train_annotations_root + '/seq8_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq9_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq9.json',
proposal_file=train_annotations_root + '/seq9_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq10_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq10.json',
proposal_file=train_annotations_root + '/seq10_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq11_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq11.json',
proposal_file=train_annotations_root + '/seq11_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq12_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq12.json',
proposal_file=train_annotations_root + '/seq12_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq13_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq13.json',
proposal_file=train_annotations_root + '/seq13_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq14_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq14.json',
proposal_file=train_annotations_root + '/seq14_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq15_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq15.json',
proposal_file=train_annotations_root + '/seq15_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq16_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq16.json',
proposal_file=train_annotations_root + '/seq16_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq17_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq17.json',
proposal_file=train_annotations_root + '/seq17_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq18_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq18.json',
proposal_file=train_annotations_root + '/seq18_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq19_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq19.json',
proposal_file=train_annotations_root + '/seq19_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
train_seq20_cfg = dict(
type=dataset_type,
data_prefix=train_images_root,
ann_file=train_annotations_root + '/seq20.json',
proposal_file=train_annotations_root + '/seq20_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=train_pipeline)
test_seq5_cfg = dict(
type=dataset_type,
data_prefix=test_images_root,
ann_file=test_annotations_root + '/seq5.json',
proposal_file=test_annotations_root + '/seq5_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=val_pipeline)
test_seq10_cfg = dict(
type=dataset_type,
data_prefix=test_images_root,
ann_file=test_annotations_root + '/seq10.json',
proposal_file=test_annotations_root + '/seq10_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=val_pipeline)
test_seq15_cfg = dict(
type=dataset_type,
data_prefix=test_images_root,
ann_file=test_annotations_root + '/seq15.json',
proposal_file=test_annotations_root + '/seq15_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=val_pipeline)
test_seq20_cfg = dict(
type=dataset_type,
data_prefix=test_images_root,
ann_file=test_annotations_root + '/seq20.json',
proposal_file=test_annotations_root + '/seq20_proposal.json',
# custom_classes=['None', 'handshake', 'point', 'push'],
attribute='person',
custom_classes=None,
pipeline=val_pipeline)
# 数据加载的相关配置
data = dict(
# videos_per_gpu=8,
# workers_per_gpu=10,
以上是关于mmaction2 slowfast 行为分析(商用级别)配置文件和自定义ava数据集加载过程解析的主要内容,如果未能解决你的问题,请参考以下文章
待完善 03mmaction2 slowfast 行为分析(商用级别)训练自定义ava数据集
mmaction2 slowfast 行为分析(商用级别)项目下载
mmaction2 slowfast 行为分析(商用级别)ava自定义数据集 1
mmaction2 入门教程 slowfast训练配置 日志分析 测试结果分析