Deeplab v3 : 源码分析
Posted 明天去哪
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Deeplab v3 : 源码分析相关的知识,希望对你有一定的参考价值。
代码: https://github.com/tensorflow/models/tree/master/research/deeplab
train.py主要函数及注释如下
main()
# 配置GPU
conifg = slim.deployment.model_deploy.DeploymentConfig(xxx) # Create a DeploymentConfig for multi-gpu
# 获取slim数据集实例
dataset = deeplab.datasets.segmentation_dataset.get_dataset(xxx) # Gets an instance of slim dataset
# 得到数据
samples = input_generator.get(dataset, xxx)
# Creates a queue to prefetch tensors from `tensors`
inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones)
#
clones = Clone(_build_deeplab(inputs_queue, xxx), scope, device)
learning_rate = train_utils.get_model_learning_rate(xxx)
slim.learning.train(xxx)
deeplab.datasets.segmentation_dataset.get_dataset(dataset_name, split_name, dataset_dir):
# 将example反序列化成存储之前的格式。由tf完成
keys_to_features
# 将反序列化的数据组装成更高级的格式。由slim完成
items_to_handlers
# 解码器,进行解码
decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
return dataset.Dataset(xxx)
deeplab.utils.input_generator.get(dataset, xxx)
# provider对象根据dataset信息读取数据
data_provider = slim.dataset_data_provider.DatasetDataProvider(dataset, xxx)
# 获取数据,获取到的数据是单个数据,还需要对数据进行预处理,组合数据
image, height, width = data_provider.get([common.IMAGE, common.HEIGHT, common.WIDTH])
original_image, image, label = input_preprocess.preprocess_image_and_label(xxx)
return tf.train.batch(xxx)
_build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label)
# 获取数据
samples = inputs_queue.dequeue()
model_options = common.ModelOptions(xxx)
# 构建模型
outputs_to_scales_to_logits = model.multi_scale_logits(xxx)
eval.py剖析
1.基本结构如下
def main(_):
# 获得contrib.slim数据集实例
dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)
# todo: 为什么要求用tf.Graph(), 这个地方不用tf.Graph()也是可以正常运行的
with tf.Graph().as_default():
# 得到数据
samples = input_generator.get(
dataset,
FLAGS.eval_crop_size,
FLAGS.eval_batch_size,
min_resize_value=FLAGS.min_resize_value,
max_resize_value=FLAGS.max_resize_value,
resize_factor=FLAGS.resize_factor,
dataset_split=FLAGS.eval_split,
is_training=False,
model_variant=FLAGS.model_variant)
# 修改一些默认配置参数
model_options = common.ModelOptions(
outputs_to_num_classes=common.OUTPUT_TYPE: dataset.num_classes,
crop_size=FLAGS.eval_crop_size,
atrous_rates=FLAGS.atrous_rates,
output_stride=FLAGS.output_stride)
if tuple(FLAGS.eval_scales) == (1.0,):
tf.logging.info('Performing single-scale test.')
predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid)
else:
tf.logging.info('Performing multi-scale test.')
predictions = model.predict_labels_multi_scale(samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images)
# flatten
predictions = tf.reshape(predictions, shape=[-1])
labels = tf.reshape(samples[common.LABEL], shape=[-1])
# 设置哪些位置计算miou
weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))
# Set ignore_label regions to label 0, because metrics.mean_iou requires
# range of labels = [0, dataset.num_classes). Note the ignore_label regions
# are not evaluated since the corresponding regions contain weights = 0.
labels = tf.where(
tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)
metric_map['miou_1.0'] = tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights)
metrics_to_values, metrics_to_updates = (tf.contrib.metrics.aggregate_metric_map(metric_map))
num_batches = int(math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))
tf.contrib.slim.evaluation.evaluation_loop(
master=FLAGS.master,
checkpoint_dir=FLAGS.checkpoint_dir,
logdir=FLAGS.eval_logdir,
num_evals=num_batches,
eval_op=list(metrics_to_updates.values()),
max_number_of_evaluations=num_eval_iters,
eval_interval_secs=FLAGS.eval_interval_secs)
# 返回contrib.slim数据集实例
def deeplab.datasets.segmentation_dataset.get_dataset(dataset_name, split_name, dataset_dir):
splits_to_sizes = 'train': 2975, 'val': 500
num_classes = 19
ignore_labe = 255
file_pattern = '/home/sjming/Documents/deeplearning/semantic-segmentation/cityscapes/tfrecord/val-*'
# tf.FixedLenFeature(x,x,x): 将example反序列化成存储之前的格式,将前边的类赋值给feature对象,由tf完成
keys_to_features =
'image/encoded': tf.FixedLenFeature(
(), tf.string, default_value=''),
'image/filename': tf.FixedLenFeature(
(), tf.string, default_value=''),
'image/format': tf.FixedLenFeature(
(), tf.string, default_value='jpeg'),
'image/height': tf.FixedLenFeature(
(), tf.int64, default_value=0),
'image/width': tf.FixedLenFeature(
(), tf.int64, default_value=0),
'image/segmentation/class/encoded': tf.FixedLenFeature(
(), tf.string, default_value=''),
'image/segmentation/class/format': tf.FixedLenFeature(
(), tf.string, default_value='png'),
# 将反序列化的数据组装成更高级的格式。由contrib.slim完成
items_to_handlers =
'image': tfexample_decoder.Image(
image_key='image/encoded',
format_key='image/format',
channels=3),
'image_name': tfexample_decoder.Tensor('image/filename'),
'height': tfexample_decoder.Tensor('image/height'),
'width': tfexample_decoder.Tensor('image/width'),
'labels_class': tfexample_decoder.Image(
image_key='image/segmentation/class/encoded',
format_key='image/segmentation/class/format',
channels=1),
# 解码器
decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
# 返回contrib.slim数据集实例
return dataset.Dataset(
data_sources=file_pattern,
reader=tf.TFRecordReader,
decoder=decoder,
num_samples=splits_to_sizes[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
ignore_label=ignore_label,
num_classes=num_classes,
name=dataset_name,
multi_label=True)
# This functions gets the dataset split for semantic segmentation. In
# particular, it is a wrapper of (1) dataset_data_provider which returns the raw
# dataset split, (2) input_preprcess which preprocess the raw data, and (3) the
# Tensorflow operation of batching the preprocessed data. Then, the output could
# be directly used by training, evaluation or visualization.
def deeplab.utils.input_generator.get(dataset,
crop_size,
batch_size,
min_resize_value=None,
max_resize_value=None,
resize_factor=None,
min_scale_factor=1.,
max_scale_factor=1.,
scale_factor_step_size=0,
num_readers=1,
num_threads=1,
dataset_split=None,
is_training=True,
model_variant=None):
# 创建DatasetDataProvider
data_provider = tf.contrib.slim.dataset_data_provider.DatasetDataProvider(dataset,num_readers=num_readers,num_epochs=None if is_training else 1,shuffle=is_training)
image, height, width = data_provider.get([common.IMAGE, common.HEIGHT, common.WIDTH])
label, = data_provider.get([common.LABELS_CLASS])
# 数据处理
original_image, image, label = input_preprocess.preprocess_image_and_label(
image,
label,
crop_height=crop_size[0],
crop_width=crop_size[1],
min_resize_value=min_resize_value,
max_resize_value=max_resize_value,
resize_factor=resize_factor,
min_scale_factor=min_scale_factor,
max_scale_factor=max_scale_factor,
scale_factor_step_size=scale_factor_step_size,
ignore_label=dataset.ignore_label,
is_training=is_training,
model_variant=model_variant)
sample =
common.IMAGE: image,
common.IMAGE_NAME: image_name,
common.HEIGHT: height,
common.WIDTH: width
common.LABEL: label
return tf.train.batch(
sample,
batch_size=batch_size,
num_threads=num_threads,
capacity=32 * batch_size,
allow_smaller_final_batch=not is_training,
dynamic_pad=True)
# 获得分割结果
def deeplab.model.predict_labels(images, model_options, image_pyramid=None)
# 得到分割结果(?, 129, 129, 19)
outputs_to_scales_to_logits = multi_scale_logits(
images,
model_options=model_options,
image_pyramid=image_pyramid,
is_training=False,
fine_tune_batch_norm=False)
# 上采样加求取最大值argmax
return predictions
def model.predict_labels_multi_scale(images,model_options, eval_scales=(1.0,), add_flipped_images=False):
for i, image_scale in enumerate(eval_scales):
with tf.variable_scope(tf.get_variable_scope(), reuse=True if i else None):
outputs_to_scales_to_logits = multi_scale_logits(
images,
model_options=model_options,
image_pyramid=[image_scale],
is_training=False,
fine_tune_batch_norm=False)
# bilinear
for output in sorted(outputs_to_predictions):
predictions = outputs_to_predictions[output]
# Compute average prediction across different scales and flipped images.
predictions = tf.reduce_mean(tf.concat(predictions, 4), axis=4)
outputs_to_predictions[output] = tf.argmax(predictions, 3)
return outputs_to_predictions
2.简单说明
学习本部分代码,可以参考tf.contrib.slim的相关内容,deeplab官方的提供的代码基本按照slim库官方文档进行编写,参考这篇https://blog.csdn.net/u014451076/article/details/80706318 ,可以当做模板使用.
3.代码修改
[后续补充]
存在的问题
- tensorboard不能显示dilation,dilation始终为1
这个可能是tensorbaord的问题,显示的时候无法显示正确的dilation,总是[1,1,1,1],实际上在exit flow/block2中的卷积的dilation应该是[1,2,2,1]. - deeplabv3的串行的aspp
在本程序中体现在每个flow的每个block中三个卷积,这三个卷积可以使用multi-grid,然后设置不同的stride,实现ASPP.不过在本程序中官方提供的代码并没有实践,可以自行实现
以上是关于Deeplab v3 : 源码分析的主要内容,如果未能解决你的问题,请参考以下文章
MATLAB深度学习采用 Deeplab v3+ 实现全景分割
MATLAB深度学习采用 Deeplab v3+ 实现全景分割