1、数据源:包括所有原始数据,分组后的数据;
2、数据预处理:包括数据的准备,数据的导入,数据的提取,数据的分组(训练与测试);
3、配置文件:包括各种参数与超参数,如:训练周期,训练步长,批量数据,学习率,卷积核大小,全连接大小,训练模型存放路径(checkpoint),摘要存放路径(summary)等;
4、基础网络:包括基本的网络组件,基础网,
5、训练主文件:主入口,用于搭建生成图(graph),会话(sess),数据导入模型训练,GPU配置,训练过程打印等
三、代码结构
以下为原始文件夹:
./data
-- ./InstanceSegmentationClass
-- ./JPEGImages
-- ./SegmentationClass
-- datasets_gen_culane.py 用于从上面三个图片目录生成list.txt,train.txt,test.txt
# coding=utf-8 #create date:12/5/2018 #modified date:2/12/2019 #author:jim.chen import os import glob import random import math import cv2 import numpy as np def gen_list_txt(rela_dir,img_dir,img_seg_dir,img_inst_dir): #cwd = os.getcwd() #print("gen_list_txt cwd:",cwd) list_txt = "list.txt" png_glob = img_seg_dir+‘/*.png‘ png_list_path = glob.glob(png_glob) png_list=[] print("gen_list_txt png_list_path:",png_list_path) with open(list_txt,"w") as w_f: for png in png_list_path: path,name = os.path.splitext(os.path.basename(png)) print("path:",path) w_f.write(rela_dir+img_dir+‘/‘+path+‘.jpg‘+‘ ‘+rela_dir+png+‘ ‘+rela_dir+img_inst_dir+‘/‘+path+‘.png‘+‘ ‘) w_f.close() with open(list_txt,"r") as r_f: for each_line in r_f: png_list.append(each_line) png_list.sort() print("gen_list_txt len(png_list):",len(png_list)) train=random.sample(png_list,int(math.floor(len(png_list)*9/10))) train.sort() print("gen_list_txt train:",train) val=list(set(png_list).difference(set(train))) print("gen_list_txt val:",val) enum_train_val=[‘train‘,‘val‘] for item in enum_train_val: with open(item+‘.txt‘,‘w‘) as w1_f: for num_item in eval(item): print("gen_list_txt num_item:",num_item) w1_f.write(num_item) w1_f.close() def sync_gt_2_img(img_dir,img_seg_dir,img_inst_dir): cwd = os.getcwd() print("sync_gt_2_img img_dir:",img_dir," img_seg_dir:",img_seg_dir," img_inst_dir:",img_inst_dir) img_full_dir = cwd + ‘/‘ +img_dir img_seg_full_dir = cwd + ‘/‘ +img_seg_dir img_inst_full_dir = cwd + ‘/‘ +img_inst_dir img_list = os.listdir(img_full_dir) for img in img_list: img_basename = os.path.splitext(img)[0] print("sync_gt_2_img img_basename:",img_basename) img_full_path = img_full_dir + ‘/‘+ img img_seg_full_path = img_seg_full_dir + ‘/‘+img_basename +‘.png‘ #print("sync_gt_2_img img_seg_full_path:",img_seg_full_path) img_inst_full_path = img_inst_full_dir + ‘/‘+img_basename +‘.png‘ #print("sync_gt_2_img img_inst_full_path:",img_inst_full_path) if not os.path.exists(img_inst_full_path): print("sync_gt_2_img not os.path.exists(img_seg_full_path)") if os.path.exists(img_full_path): os.remove(img_full_path) if os.path.exists(img_full_path): os.remove(img_full_path) def sync_seg_2_inst(img_seg_dir,img_inst_dir): cwd = os.getcwd() print("sync_seg_2_inst img_seg_dir:",img_seg_dir," img_inst_dir:",img_inst_dir) img_seg_full_dir = cwd + ‘/‘ +img_seg_dir img_inst_full_dir = cwd + ‘/‘ +img_inst_dir img_list = os.listdir(img_seg_dir) for img in img_list: img_basename = os.path.splitext(img)[0] print("sync_seg_2_inst img_basename:",img_basename) img_seg_full_path = img_seg_full_dir + ‘/‘+img_basename +‘.jpg‘ img_inst_full_path = img_inst_full_dir + ‘/‘+img_basename +‘.png‘ if not os.path.exists(img_inst_full_path): if os.path.exists(img_seg_full_path): print("sync_seg_2_inst os.remove(img_seg_full_path):",img_seg_full_path) os.remove(img_seg_full_path) def gen_seg_color(img_inst_dir,img_seg_dir): cwd = os.getcwd() inPath = os.path.join(cwd,img_inst_dir) print(inPath) outPath=os.path.join(cwd,img_seg_dir) inPathDir = os.listdir(inPath) if not os.path.exists(outPath): os.makedirs(outPath) for l,file_name in enumerate(inPathDir): img_instance = cv2.imread(os.path.join(inPath,file_name)) h,w,c = img_instance.shape print("l:",l," img_instance.shape:",img_instance.shape) img_instance_new = np.zeros((h, w, c), dtype=np.uint8) for i in range(0,h): for j in range(0,w): #print(img_instance[i][j]) if img_instance[i][j][0] != 0: img_instance_new[i][j] = [255,255,255] img_instance_gray = cv2.cvtColor(img_instance_new, cv2.COLOR_BGR2GRAY) cv2.imwrite(os.path.join(outPath,file_name), img_instance_gray) print("generate segment finished!") def gen_inst_color(img_inst_dir): cwd = os.getcwd() inPath = os.path.join(cwd,img_inst_dir) print(inPath) outPath=os.path.join(cwd,"img_inst_new") inPathDir = os.listdir(inPath) if not os.path.exists(outPath): os.makedirs(outPath) for l,file_name in enumerate(inPathDir): img_instance = cv2.imread(os.path.join(inPath,file_name)) h,w,c = img_instance.shape print("l:",l," img_instance.shape:",img_instance.shape) img_instance_new = np.zeros((h, w, c), dtype=np.uint8) for i in range(0,h): for j in range(0,w): #print(img_instance[i][j]) if img_instance[i][j][0] == 2: img_instance_new[i][j] = [20,20,20] elif img_instance[i][j][0] == 3: img_instance_new[i][j] = [70,70,70] elif img_instance[i][j][0] == 4: img_instance_new[i][j] = [120,120,120] elif img_instance[i][j][0] == 5: img_instance_new[i][j] = [170,170,170] elif img_instance[i][j][0] == 6: img_instance_new[i][j] = [220,220,220] img_instance_gray = cv2.cvtColor(img_instance_new, cv2.COLOR_BGR2GRAY) cv2.imwrite(os.path.join(outPath,file_name), img_instance_gray) print("generate instance finished!") def detect_invalid_img(img_path): img_instance = cv2.imread(img_path) h,w,c = img_instance.shape print("detect_invalid_img img_instance.shape:",img_instance.shape) for i in range(0,h): for j in range(0,w): if img_instance[i][j][0] != 0: return False return True def filter_invalid_img(img_test_dir,img_seg_dir,img_inst_dir): cwd = os.getcwd() inPath = os.path.join(cwd,img_test_dir) inPathDir = os.listdir(inPath) print("filter_invalid_img inPathDir:",inPathDir) for l,file_name in enumerate(inPathDir): img_path = os.path.join(inPath,file_name) isdel = detect_invalid_img(img_path) if isdel: print("filter_invalid_img isdel:",isdel) os.remove(os.path.join(inPath,file_name)) def main(): print("main begin") rela_dir = "data/datasets_culane_all/" img_dir = "image" img_seg_dir = "gt_image_binary" img_inst_dir = "gt_image_instance" gen_list_txt(rela_dir,img_dir,img_seg_dir,img_inst_dir) #sync_gt_2_img(img_dir,img_seg_dir,img_inst_dir) #gen_seg_color(img_inst_dir,img_seg_dir) #gen_inst_color(img_inst_dir) #filter_invalid_img(img_dir,img_seg_dir,img_inst_dir) #sync_seg_2_inst(img_dir,img_inst_dir) print("main end") if __name__ == ‘__main__‘: main()
--list.txt
data/datasets_culane/JPEGImages/0000.jpg data/datasets_culane/SegmentationClass 000.png data/datasets_culane/InstanceSegmentationClass/0000.png data/datasets_culane/JPEGImages/0001.jpg data/datasets_culane/SegmentationClass 001.png data/datasets_culane/InstanceSegmentationClass/0001.png data/datasets_culane/JPEGImages/0002.jpg data/datasets_culane/SegmentationClass 002.png data/datasets_culane/InstanceSegmentationClass/0002.png data/datasets_culane/JPEGImages/0003.jpg data/datasets_culane/SegmentationClass 003.png data/datasets_culane/InstanceSegmentationClass/0003.png data/datasets_culane/JPEGImages/0004.jpg data/datasets_culane/SegmentationClass 004.png data/datasets_culane/InstanceSegmentationClass/0004.png data/datasets_culane/JPEGImages/0005.jpg data/datasets_culane/SegmentationClass 005.png data/datasets_culane/InstanceSegmentationClass/0005.png
--train.txt
data/datasets_culane/JPEGImages/0000.jpg data/datasets_culane/SegmentationClass 000.png data/datasets_culane/InstanceSegmentationClass/0000.png data/datasets_culane/JPEGImages/0001.jpg data/datasets_culane/SegmentationClass 001.png data/datasets_culane/InstanceSegmentationClass/0001.png data/datasets_culane/JPEGImages/0002.jpg data/datasets_culane/SegmentationClass 002.png data/datasets_culane/InstanceSegmentationClass/0002.png data/datasets_culane/JPEGImages/0004.jpg data/datasets_culane/SegmentationClass 004.png data/datasets_culane/InstanceSegmentationClass/0004.png
--val.txt
data/datasets_culane/JPEGImages/0005.jpg data/datasets_culane/SegmentationClass 005.png data/datasets_culane/InstanceSegmentationClass/0005.png
data/datasets_culane/JPEGImages/0003.jpg data/datasets_culane/SegmentationClass 003.png data/datasets_culane/InstanceSegmentationClass/0003.png
./data_provider
--data_processor.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os.path as ops import cv2 import numpy as np try: from cv2 import cv2 except ImportError: pass class DataSet(object): def __init__(self, dataset_info_file): self._gt_img_list, self._gt_label_binary_list, self._gt_label_instance_list = self._init_dataset(dataset_info_file) self._random_dataset() self._next_batch_loop_count = 0 def _init_dataset(self, dataset_info_file): gt_img_list = [] gt_label_binary_list = [] gt_label_instance_list = [] assert ops.exists(dataset_info_file), ‘{:s} not exist‘.format(dataset_info_file) with open(dataset_info_file, ‘r‘) as file: for _info in file: info_tmp = _info.strip(‘ ‘).split() gt_img_list.append(info_tmp[0]) gt_label_binary_list.append(info_tmp[1]) gt_label_instance_list.append(info_tmp[2]) return gt_img_list, gt_label_binary_list, gt_label_instance_list def _random_dataset(self): assert len(self._gt_img_list) == len(self._gt_label_binary_list) == len(self._gt_label_instance_list) random_idx = np.random.permutation(len(self._gt_img_list)) new_gt_img_list = [] new_gt_label_binary_list = [] new_gt_label_instance_list = [] for index in random_idx: new_gt_img_list.append(self._gt_img_list[index]) new_gt_label_binary_list.append(self._gt_label_binary_list[index]) new_gt_label_instance_list.append(self._gt_label_instance_list[index]) self._gt_img_list = new_gt_img_list self._gt_label_binary_list = new_gt_label_binary_list self._gt_label_instance_list = new_gt_label_instance_list def next_batch(self, batch_size): """ :param batch_size: :return: """ assert len(self._gt_label_binary_list) == len(self._gt_label_instance_list) == len(self._gt_img_list) idx_start = batch_size * self._next_batch_loop_count idx_end = batch_size * self._next_batch_loop_count + batch_size if idx_start == 0 and idx_end > len(self._gt_label_binary_list): raise ValueError(‘Batch size cant be more than total numbers‘) if idx_end > len(self._gt_label_binary_list): self._random_dataset() self._next_batch_loop_count = 0 return self.next_batch(batch_size) else: gt_img_list = self._gt_img_list[idx_start:idx_end] gt_label_binary_list = self._gt_label_binary_list[idx_start:idx_end] gt_label_instance_list = self._gt_label_instance_list[idx_start:idx_end] gt_imgs = [] gt_labels_binary = [] gt_labels_instance = [] for gt_img_path in gt_img_list: gt_imgs.append(cv2.imread(gt_img_path, cv2.IMREAD_COLOR)) for gt_label_path in gt_label_binary_list: label_img = cv2.imread(gt_label_path, cv2.IMREAD_COLOR) label_binary = np.zeros([label_img.shape[0], label_img.shape[1]], dtype=np.uint8) idx = np.where((label_img[:, :, :] != [0, 0, 0]).all(axis=2)) label_binary[idx] = 1 gt_labels_binary.append(label_binary) for gt_label_path in gt_label_instance_list: label_img = cv2.imread(gt_label_path, cv2.IMREAD_UNCHANGED) gt_labels_instance.append(label_img) self._next_batch_loop_count += 1 return gt_imgs, gt_labels_binary, gt_labels_instance if __name__ == ‘__main__‘: val = DataSet(‘/media/baidu/Data/Semantic_Segmentation/TUSimple_Lane_Detection/training/val.txt‘) b1, b2, b3 = val.next_batch(50) c1, c2, c3 = val.next_batch(50) dd, d2, d3 = val.next_batch(50)
./config
--global_config.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from easydict import EasyDict as edict __C = edict() # Consumers can get config by: from config import cfg cfg = __C # Train options __C.TRAIN = edict() # Set the shadownet training epochs __C.TRAIN.EPOCHS = 200010 # Set the display step __C.TRAIN.DISPLAY_STEP = 1 # Set the test display step during training process __C.TRAIN.TEST_DISPLAY_STEP = 1000 # Set the momentum parameter of the optimizer __C.TRAIN.MOMENTUM = 0.9 # Set the initial learning rate __C.TRAIN.LEARNING_RATE = 0.0005 # Set the GPU resource used during training process __C.TRAIN.GPU_MEMORY_FRACTION = 0.85 # Set the GPU allow growth parameter during tensorflow training process __C.TRAIN.TF_ALLOW_GROWTH = True # Set the shadownet training batch size __C.TRAIN.BATCH_SIZE = 1 # Set the shadownet validation batch size __C.TRAIN.VAL_BATCH_SIZE = 1 # Set the learning rate decay steps __C.TRAIN.LR_DECAY_STEPS = 410000 # Set the learning rate decay rate __C.TRAIN.LR_DECAY_RATE = 0.1 # Set the class numbers __C.TRAIN.CLASSES_NUMS = 2 # Set the image height __C.TRAIN.IMG_HEIGHT = 256 # Set the image width __C.TRAIN.IMG_WIDTH = 512 # Test options __C.TEST = edict() # Set the GPU resource used during testing process __C.TEST.GPU_MEMORY_FRACTION = 0.8 # Set the GPU allow growth parameter during tensorflow testing process __C.TEST.TF_ALLOW_GROWTH = True # Set the test batch size __C.TEST.BATCH_SIZE = 1
./encoder_decoder_model
--cnn_basenet.py
#!/usr/bin/env python # -*- coding: utf-8 -*- """ The base convolution neural networks mainly implement some useful cnn functions """ import tensorflow as tf import numpy as np class CNNBaseModel(object): """ Base model for other specific cnn ctpn_models """ def __init__(self): pass @staticmethod def conv2d(inputdata, out_channel, kernel_size, padding=‘SAME‘, stride=1, w_init=None, b_init=None, split=1, use_bias=True, data_format=‘NHWC‘, name=None): with tf.variable_scope(name): in_shape = inputdata.get_shape().as_list() channel_axis = 3 if data_format == ‘NHWC‘ else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" assert in_channel % split == 0 assert out_channel % split == 0 padding = padding.upper() if isinstance(kernel_size, list): filter_shape = [kernel_size[0], kernel_size[1]] + [in_channel / split, out_channel] else: filter_shape = [kernel_size, kernel_size] + [in_channel / split, out_channel] if isinstance(stride, list): strides = [1, stride[0], stride[1], 1] if data_format == ‘NHWC‘ else [1, 1, stride[0], stride[1]] else: strides = [1, stride, stride, 1] if data_format == ‘NHWC‘ else [1, 1, stride, stride] if w_init is None: w_init = tf.contrib.layers.variance_scaling_initializer() if b_init is None: b_init = tf.constant_initializer() w = tf.get_variable(‘W‘, filter_shape, initializer=w_init) b = None if use_bias: b = tf.get_variable(‘b‘, [out_channel], initializer=b_init) if split == 1: conv = tf.nn.conv2d(inputdata, w, strides, padding, data_format=data_format) else: inputs = tf.split(inputdata, split, channel_axis) kernels = tf.split(w, split, 3) outputs = [tf.nn.conv2d(i, k, strides, padding, data_format=data_format) for i, k in zip(inputs, kernels)] conv = tf.concat(outputs, channel_axis) ret = tf.identity(tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv, name=name) return ret @staticmethod def relu(inputdata, name=None): return tf.nn.relu(features=inputdata, name=name) @staticmethod def sigmoid(inputdata, name=None): return tf.nn.sigmoid(x=inputdata, name=name) @staticmethod def maxpooling(inputdata, kernel_size, stride=None, padding=‘VALID‘, data_format=‘NHWC‘, name=None): padding = padding.upper() if stride is None: stride = kernel_size if isinstance(kernel_size, list): kernel = [1, kernel_size[0], kernel_size[1], 1] if data_format == ‘NHWC‘ else [1, 1, kernel_size[0], kernel_size[1]] else: kernel = [1, kernel_size, kernel_size, 1] if data_format == ‘NHWC‘ else [1, 1, kernel_size, kernel_size] if isinstance(stride, list): strides = [1, stride[0], stride[1], 1] if data_format == ‘NHWC‘ else [1, 1, stride[0], stride[1]] else: strides = [1, stride, stride, 1] if data_format == ‘NHWC‘ else [1, 1, stride, stride] return tf.nn.max_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding, data_format=data_format, name=name) @staticmethod def avgpooling(inputdata, kernel_size, stride=None, padding=‘VALID‘, data_format=‘NHWC‘, name=None): if stride is None: stride = kernel_size kernel = [1, kernel_size, kernel_size, 1] if data_format == ‘NHWC‘ else [1, 1, kernel_size, kernel_size] strides = [1, stride, stride, 1] if data_format == ‘NHWC‘ else [1, 1, stride, stride] return tf.nn.avg_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding, data_format=data_format, name=name) @staticmethod def globalavgpooling(inputdata, data_format=‘NHWC‘, name=None): assert inputdata.shape.ndims == 4 assert data_format in [‘NHWC‘, ‘NCHW‘] axis = [1, 2] if data_format == ‘NHWC‘ else [2, 3] return tf.reduce_mean(input_tensor=inputdata, axis=axis, name=name) @staticmethod def layernorm(inputdata, epsilon=1e-5, use_bias=True, use_scale=True, data_format=‘NHWC‘, name=None): shape = inputdata.get_shape().as_list() ndims = len(shape) assert ndims in [2, 4] mean, var = tf.nn.moments(inputdata, list(range(1, len(shape))), keep_dims=True) if data_format == ‘NCHW‘: channnel = shape[1] new_shape = [1, channnel, 1, 1] else: channnel = shape[-1] new_shape = [1, 1, 1, channnel] if ndims == 2: new_shape = [1, channnel] if use_bias: beta = tf.get_variable(‘beta‘, [channnel], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) else: beta = tf.zeros([1] * ndims, name=‘beta‘) if use_scale: gamma = tf.get_variable(‘gamma‘, [channnel], initializer=tf.constant_initializer(1.0)) gamma = tf.reshape(gamma, new_shape) else: gamma = tf.ones([1] * ndims, name=‘gamma‘) return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name) @staticmethod def instancenorm(inputdata, epsilon=1e-5, data_format=‘NHWC‘, use_affine=True, name=None): shape = inputdata.get_shape().as_list() if len(shape) != 4: raise ValueError("Input data of instancebn layer has to be 4D tensor") if data_format == ‘NHWC‘: axis = [1, 2] ch = shape[3] new_shape = [1, 1, 1, ch] else: axis = [2, 3] ch = shape[1] new_shape = [1, ch, 1, 1] if ch is None: raise ValueError("Input of instancebn require known channel!") mean, var = tf.nn.moments(inputdata, axis, keep_dims=True) if not use_affine: return tf.divide(inputdata - mean, tf.sqrt(var + epsilon), name=‘output‘) beta = tf.get_variable(‘beta‘, [ch], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) gamma = tf.get_variable(‘gamma‘, [ch], initializer=tf.constant_initializer(1.0)) gamma = tf.reshape(gamma, new_shape) return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name) @staticmethod def dropout(inputdata, keep_prob, noise_shape=None, name=None): return tf.nn.dropout(inputdata, keep_prob=keep_prob, noise_shape=noise_shape, name=name) @staticmethod def fullyconnect(inputdata, out_dim, w_init=None, b_init=None, use_bias=True, name=None): shape = inputdata.get_shape().as_list()[1:] if None not in shape: inputdata = tf.reshape(inputdata, [-1, int(np.prod(shape))]) else: inputdata = tf.reshape(inputdata, tf.stack([tf.shape(inputdata)[0], -1])) if w_init is None: w_init = tf.contrib.layers.variance_scaling_initializer() if b_init is None: b_init = tf.constant_initializer() ret = tf.layers.dense(inputs=inputdata, activation=lambda x: tf.identity(x, name=‘output‘), use_bias=use_bias, name=name, kernel_initializer=w_init, bias_initializer=b_init, trainable=True, units=out_dim) return ret @staticmethod def layerbn(inputdata, is_training, name): return tf.layers.batch_normalization(inputs=inputdata, training=is_training, name=name) @staticmethod def squeeze(inputdata, axis=None, name=None): return tf.squeeze(input=inputdata, axis=axis, name=name) @staticmethod def deconv2d(inputdata, out_channel, kernel_size, padding=‘SAME‘, stride=1, w_init=None, b_init=None, use_bias=True, activation=None, data_format=‘channels_last‘, trainable=True, name=None): with tf.variable_scope(name): in_shape = inputdata.get_shape().as_list() channel_axis = 3 if data_format == ‘channels_last‘ else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Deconv2D] Input cannot have unknown channel!" padding = padding.upper() if w_init is None: w_init = tf.contrib.layers.variance_scaling_initializer() if b_init is None: b_init = tf.constant_initializer() ret = tf.layers.conv2d_transpose(inputs=inputdata, filters=out_channel, kernel_size=kernel_size, strides=stride, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, kernel_initializer=w_init, bias_initializer=b_init, trainable=trainable, name=name) return ret @staticmethod def dilation_conv(input_tensor, k_size, out_dims, rate, padding=‘SAME‘, w_init=None, b_init=None, use_bias=False, name=None): with tf.variable_scope(name): in_shape = input_tensor.get_shape().as_list() in_channel = in_shape[3] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" padding = padding.upper() if isinstance(k_size, list): filter_shape = [k_size[0], k_size[1]] + [in_channel, out_dims] else: filter_shape = [k_size, k_size] + [in_channel, out_dims] if w_init is None: w_init = tf.contrib.layers.variance_scaling_initializer() if b_init is None: b_init = tf.constant_initializer() w = tf.get_variable(‘W‘, filter_shape, initializer=w_init) b = None if use_bias: b = tf.get_variable(‘b‘, [out_dims], initializer=b_init) conv = tf.nn.atrous_conv2d(value=input_tensor, filters=w, rate=rate, padding=padding, name=‘dilation_conv‘) if use_bias: ret = tf.add(conv, b) else: ret = conv return ret @staticmethod def spatial_dropout(input_tensor, keep_prob, is_training, name, seed=1234): tf.set_random_seed(seed=seed) def f1(): with tf.variable_scope(name): return input_tensor def f2(): with tf.variable_scope(name): num_feature_maps = [tf.shape(input_tensor)[0], tf.shape(input_tensor)[3]] random_tensor = keep_prob random_tensor += tf.random_uniform(num_feature_maps, seed=seed, dtype=input_tensor.dtype) binary_tensor = tf.floor(random_tensor) binary_tensor = tf.reshape(binary_tensor, [-1, 1, 1, tf.shape(input_tensor)[3]]) ret = input_tensor * binary_tensor return ret output = tf.cond(is_training, f2, f1) return output @staticmethod def lrelu(inputdata, name, alpha=0.2): with tf.variable_scope(name): return tf.nn.relu(inputdata) - alpha * tf.nn.relu(-inputdata)
--vgg_scnn_encoder.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from collections import OrderedDict import tensorflow as tf import glog as log import math import sys sys.path.append(‘encoder_decoder_model‘) import cnn_basenet class VGG16Encoder(cnn_basenet.CNNBaseModel): def __init__(self, phase): super(VGG16Encoder, self).__init__() self._train_phase = tf.constant(‘train‘, dtype=tf.string) self._test_phase = tf.constant(‘test‘, dtype=tf.string) self._phase = phase self._is_training = self._init_phase() def _init_phase(self): return tf.equal(self._phase, self._train_phase) def _conv_stage(self, input_tensor, k_size, out_dims, name, stride=1, pad=‘SAME‘): with tf.variable_scope(name): conv = self.conv2d(inputdata=input_tensor, out_channel=out_dims, kernel_size=k_size, stride=stride, use_bias=False, padding=pad, name=‘conv‘) bn = self.layerbn(inputdata=conv, is_training=self._is_training, name=‘bn‘) relu = self.relu(inputdata=bn, name=‘relu‘) return relu def _fc_stage(self, input_tensor, out_dims, name, use_bias=False): with tf.variable_scope(name): fc = self.fullyconnect(inputdata=input_tensor, out_dim=out_dims, use_bias=use_bias, name=‘fc‘) bn = self.layerbn(inputdata=fc, is_training=self._is_training, name=‘bn‘) relu = self.relu(inputdata=bn, name=‘relu‘) return relu def scnn_u2d_d2u(self,input_tensor): output_list_old = [] output_list_new = [] shape_list = input_tensor.get_shape().as_list() log.info("scnn_u2d_d2u shape_list:{:}".format(shape_list)) h_size = input_tensor.get_shape().as_list()[1] log.info("scnn_u2d_d2u h_size:{:}".format(h_size)) channel_size = input_tensor.get_shape().as_list()[3] #up2down conv for i in range(h_size): output_list_old.append(tf.expand_dims(input_tensor[:,i,:,:],axis=1)) output_list_new.append(tf.expand_dims(input_tensor[:,0,:,:],axis=1)) w_ud = tf.get_variable(‘w_ud‘,[1,9,channel_size,channel_size],initializer=tf.random_normal_initializer(0,math.sqrt(2.0/(9*channel_size*channel_size*2)))) with tf.variable_scope("scnn_u2d"): scnn_u2d = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_old[0],w_ud,[1,1,1,1],‘SAME‘)),output_list_old[1]) output_list_new.append(scnn_u2d) for i in range(2,h_size): with tf.variable_scope("scnn_u2d",reuse=True): scnn_u2d = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_new[i-1],w_ud,[1,1,1,1],‘SAME‘)),output_list_old[i]) output_list_new.append(scnn_u2d) #down2up conv output_list_old = output_list_new output_list_new = [] length = h_size-1 output_list_new.append(output_list_old[length]) w_du = tf.get_variable(‘w_du‘,[1,9,channel_size,channel_size],initializer=tf.random_normal_initializer(0,math.sqrt(2.0/(9*channel_size*channel_size*2)))) with tf.variable_scope(‘scnn_d2u‘): scnn_d2u = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_old[length],w_du,[1,1,1,1],‘SAME‘)),output_list_old[length-1]) output_list_new.append(scnn_d2u) for i in range(2,h_size): with tf.variable_scope("scnn_d2u",reuse=True): scnn_d2u = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_new[i-1],w_du,[1,1,1,1],‘SAME‘)),output_list_old[length-i]) output_list_new.append(scnn_d2u) output_list_new.reverse() #log.info("scnn_u2d_d2u output_list_new:{:}".format(output_list_new)) out_tensor = tf.stack(output_list_new,axis = 1) out_tensor = tf.squeeze(out_tensor,axis=2) return out_tensor def scnn_l2r_r2l(self,input_tensor): output_list_old = [] output_list_new = [] shape_list = input_tensor.get_shape().as_list() log.info("scnn_l2r_r2l shape_list:{:}".format(shape_list)) w_size = input_tensor.get_shape().as_list()[2] log.info("scnn_l2r_r2l w_size:{:}".format(w_size)) channel_size = input_tensor.get_shape().as_list()[3] #left2right conv for i in range(w_size): output_list_old.append(tf.expand_dims(input_tensor[:,:,i,:],axis=2)) output_list_new.append(tf.expand_dims(input_tensor[:,:,0,:],axis=2)) w_lr = tf.get_variable(‘w_lr‘,[9,1,channel_size,channel_size],initializer=tf.random_normal_initializer(0,math.sqrt(2.0/(9*channel_size*channel_size*5)))) with tf.variable_scope("scnn_l2r"): scnn_l2r = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_old[0],w_lr,[1,1,1,1],‘SAME‘)),output_list_old[1]) output_list_new.append(scnn_l2r) for i in range(2,w_size): with tf.variable_scope("scnn_l2r",reuse=True): scnn_l2r = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_new[i-1],w_lr,[1,1,1,1],‘SAME‘)),output_list_old[i]) output_list_new.append(scnn_l2r) #log.info("output_list_new:{:}".format(output_list_new)) #right2left conv output_list_old = output_list_new output_list_new = [] length = w_size-1 output_list_new.append(output_list_old[length]) w_rl = tf.get_variable(‘w_rl‘,[9,1,channel_size,channel_size],initializer=tf.random_normal_initializer(0,math.sqrt(2.0/(9*channel_size*channel_size*5)))) with tf.variable_scope(‘scnn_r2l‘): scnn_r2l = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_old[length],w_rl,[1,1,1,1],‘SAME‘)),output_list_old[length-1]) output_list_new.append(scnn_r2l) for i in range(2,w_size): with tf.variable_scope("scnn_r2l",reuse=True): scnn_r2l = tf.add(tf.nn.relu(tf.nn.conv2d(output_list_new[i-1],w_rl,[1,1,1,1],‘SAME‘)),output_list_old[length-i]) output_list_new.append(scnn_r2l) output_list_new.reverse() out_tensor = tf.stack(output_list_new,axis = 2) out_tensor = tf.squeeze(out_tensor,axis=3) return out_tensor def encode(self, input_tensor, name): ret = OrderedDict() with tf.variable_scope(name): # conv stage 1_1 conv_1_1 = self._conv_stage(input_tensor=input_tensor, k_size=3, out_dims=64, name=‘conv1_1‘) log.info("encode conv_1_1:{:}".format(conv_1_1.get_shape().as_list())) # conv stage 1_2 conv_1_2 = self._conv_stage(input_tensor=conv_1_1, k_size=3, out_dims=64, name=‘conv1_2‘) log.info("encode conv_1_2:{:}".format(conv_1_2.get_shape().as_list())) # pool stage 1 pool1 = self.maxpooling(inputdata=conv_1_2, kernel_size=2, stride=2, name=‘pool1‘) log.info("encode pool1:{:}".format(pool1.get_shape().as_list())) # conv stage 2_1 conv_2_1 = self._conv_stage(input_tensor=pool1, k_size=3, out_dims=128, name=‘conv2_1‘) log.info("encode conv_2_1:{:}".format(conv_2_1.get_shape().as_list())) # conv stage 2_2 conv_2_2 = self._conv_stage(input_tensor=conv_2_1, k_size=3, out_dims=128, name=‘conv2_2‘) log.info("encode conv_2_2:{:}".format(conv_2_2.get_shape().as_list())) # pool stage 2 pool2 = self.maxpooling(inputdata=conv_2_2, kernel_size=2, stride=2, name=‘pool2‘) log.info("encode pool2:{:}".format(pool2.get_shape().as_list())) # conv stage 3_1 conv_3_1 = self._conv_stage(input_tensor=pool2, k_size=3, out_dims=256, name=‘conv3_1‘) log.info("encode conv_3_1:{:}".format(conv_3_1.get_shape().as_list())) # conv_stage 3_2 conv_3_2 = self._conv_stage(input_tensor=conv_3_1, k_size=3, out_dims=256, name=‘conv3_2‘) log.info("encode conv_3_2:{:}".format(conv_3_2.get_shape().as_list())) # conv stage 3_3 conv_3_3 = self._conv_stage(input_tensor=conv_3_2, k_size=3, out_dims=256, name=‘conv3_3‘) log.info("encode conv_3_3:{:}".format(conv_3_3.get_shape().as_list())) ret[‘conv_3_3‘] = dict() ret[‘conv_3_3‘][‘data‘] = conv_3_3 ret[‘conv_3_3‘][‘shape‘] = conv_3_3.get_shape().as_list() # pool stage 3 pool3 = self.maxpooling(inputdata=conv_3_3, kernel_size=2, stride=2, name=‘pool3‘) log.info("encode pool3:{:}".format(pool3.get_shape().as_list())) ret[‘pool3‘] = dict() ret[‘pool3‘][‘data‘] = pool3 ret[‘pool3‘][‘shape‘] = pool3.get_shape().as_list() # conv stage 4_1 conv_4_1 = self._conv_stage(input_tensor=pool3, k_size=3, out_dims=512, name=‘conv4_1‘) log.info("encode conv_4_1:{:}".format(conv_4_1.get_shape().as_list())) # conv stage 4_2 conv_4_2 = self._conv_stage(input_tensor=conv_4_1, k_size=3, out_dims=512, name=‘conv4_2‘) log.info("encode conv_4_2:{:}".format(conv_4_2.get_shape().as_list())) # conv stage 4_3 conv_4_3 = self._conv_stage(input_tensor=conv_4_2, k_size=3, out_dims=512, name=‘conv4_3‘) log.info("encode conv_4_3:{:}".format(conv_4_3.get_shape().as_list())) # pool stage 4 pool4 = self.maxpooling(inputdata=conv_4_3, kernel_size=2, stride=2, name=‘pool4‘) log.info("encode pool4:{:}".format(pool4.get_shape().as_list())) ret[‘pool4‘] = dict() ret[‘pool4‘][‘data‘] = pool4 ret[‘pool4‘][‘shape‘] = pool4.get_shape().as_list() # conv stage 5_1 conv_5_1 = self._conv_stage(input_tensor=pool4, k_size=3, out_dims=512, name=‘conv5_1‘) log.info("encode conv_5_1:{:}".format(conv_5_1.get_shape().as_list())) # conv stage 5_2 conv_5_2 = self._conv_stage(input_tensor=conv_5_1, k_size=3, out_dims=512, name=‘conv5_2‘) log.info("encode conv_5_2:{:}".format(conv_5_2.get_shape().as_list())) # conv stage 5_3 conv_5_3 = self._conv_stage(input_tensor=conv_5_2, k_size=3, out_dims=512, name=‘conv5_3‘) log.info("encode conv_5_3:{:}".format(conv_5_3.get_shape().as_list())) # conv stage 6_1 conv_6_1 = self._conv_stage(input_tensor=conv_5_3, k_size=3, out_dims=128, name=‘conv6_1‘) log.info("encode conv_6_1:{:}".format(conv_6_1.get_shape().as_list())) scnn_ud = self.scnn_u2d_d2u(conv_6_1) log.info("encode scnn_ud:{:}".format(scnn_ud.get_shape().as_list())) scnn_lr = self.scnn_l2r_r2l(scnn_ud) log.info("encode scnn_lr:{:}".format(scnn_lr.get_shape().as_list())) # pool stage 5 pool5 = self.maxpooling(inputdata=scnn_lr, kernel_size=2, stride=2, name=‘pool5‘) log.info("encode pool5:{:}".format(pool5.get_shape().as_list())) ret[‘pool5‘] = dict() ret[‘pool5‘][‘data‘] = pool5 ret[‘pool5‘][‘shape‘] = pool5.get_shape().as_list() # fc stage 1 # fc6 = self._fc_stage(input_tensor=pool5, out_dims=4096, name=‘fc6‘, # use_bias=False, flags=flags) # fc stage 2 # fc7 = self._fc_stage(input_tensor=fc6, out_dims=4096, name=‘fc7‘, # use_bias=False, flags=flags) return ret if __name__ == ‘__main__‘: a = tf.placeholder(dtype=tf.float32, shape=[1, 2048, 2048, 3], name=‘input‘) encoder = VGG16Encoder(phase=tf.constant(‘train‘, dtype=tf.string)) ret = encoder.encode(a, name=‘encode‘) for layer_name, layer_info in ret.items(): print(‘layer name: {:s} shape: {}‘.format(layer_name, layer_info[‘shape‘]))
--dense_encoder.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import tensorflow as tf from collections import OrderedDict #from encoder_decoder_model import cnn_basenet import cnn_basenet class DenseEncoder(cnn_basenet.CNNBaseModel): """ 基于DenseNet的编码器 """ def __init__(self, l, n, growthrate, phase, with_bc=False, bc_theta=0.5): super(DenseEncoder, self).__init__() self._L = l self._block_depth = int((l - n - 1) / n) self._N = n self._growthrate = growthrate self._with_bc = with_bc self._phase = phase self._train_phase = tf.constant(‘train‘, dtype=tf.string) self._test_phase = tf.constant(‘test‘, dtype=tf.string) self._is_training = self._init_phase() self._bc_theta = bc_theta return def _init_phase(self): return tf.equal(self._phase, self._train_phase) def __str__(self): encoder_info = ‘A densenet with net depth: {:d} block nums: ‘ ‘{:d} growth rate: {:d} block depth: {:d}‘. format(self._L, self._N, self._growthrate, self._block_depth) return encoder_info def _composite_conv(self, inputdata, out_channel, name): with tf.variable_scope(name): bn_1 = self.layerbn(inputdata=inputdata, is_training=self._is_training, name=‘bn_1‘) relu_1 = self.relu(bn_1, name=‘relu_1‘) if self._with_bc: conv_1 = self.conv2d(inputdata=relu_1, out_channel=out_channel, kernel_size=1, padding=‘SAME‘, stride=1, use_bias=False, name=‘conv_1‘) bn_2 = self.layerbn(inputdata=conv_1, is_training=self._is_training, name=‘bn_2‘) relu_2 = self.relu(inputdata=bn_2, name=‘relu_2‘) conv_2 = self.conv2d(inputdata=relu_2, out_channel=out_channel, kernel_size=3, stride=1, padding=‘SAME‘, use_bias=False, name=‘conv_2‘) return conv_2 else: conv_2 = self.conv2d(inputdata=relu_1, out_channel=out_channel, kernel_size=3, stride=1, padding=‘SAME‘, use_bias=False, name=‘conv_2‘) return conv_2 def _denseconnect_layers(self, inputdata, name): with tf.variable_scope(name): conv_out = self._composite_conv(inputdata=inputdata, name=‘composite_conv‘, out_channel=self._growthrate) concate_cout = tf.concat(values=[conv_out, inputdata], axis=3, name=‘concatenate‘) return concate_cout def _transition_layers(self, inputdata, name): """ Mainly implement the Pooling layer mentioned in DenseNet paper :param inputdata: :param name: :return: """ input_channels = inputdata.get_shape().as_list()[3] with tf.variable_scope(name): # First batch norm bn = self.layerbn(inputdata=inputdata, is_training=self._is_training, name=‘bn‘) # Second 1*1 conv if self._with_bc: out_channels = int(input_channels * self._bc_theta) conv = self.conv2d(inputdata=bn, out_channel=out_channels, kernel_size=1, stride=1, use_bias=False, name=‘conv‘) # Third average pooling avgpool_out = self.avgpooling(inputdata=conv, kernel_size=2, stride=2, name=‘avgpool‘) return avgpool_out else: conv = self.conv2d(inputdata=bn, out_channel=input_channels, kernel_size=1, stride=1, use_bias=False, name=‘conv‘) # Third average pooling avgpool_out = self.avgpooling(inputdata=conv, kernel_size=2, stride=2, name=‘avgpool‘) return avgpool_out def _dense_block(self, inputdata, name): """ Mainly implement the dense block mentioned in DenseNet figure 1 :param inputdata: :param name: :return: """ block_input = inputdata with tf.variable_scope(name): for i in range(self._block_depth): block_layer_name = ‘{:s}_layer_{:d}‘.format(name, i + 1) block_input = self._denseconnect_layers(inputdata=block_input, name=block_layer_name) return block_input def encode(self, input_tensor, name): """ DenseNet编码 :param input_tensor: :param name: :return: """ encode_ret = OrderedDict() # First apply a 3*3 16 out channels conv layer # mentioned in DenseNet paper Implementation Details part with tf.variable_scope(name): conv1 = self.conv2d(inputdata=input_tensor, out_channel=16, kernel_size=3, use_bias=False, name=‘conv1‘) dense_block_input = conv1 # Second apply dense block stage for dense_block_nums in range(self._N): dense_block_name = ‘Dense_Block_{:d}‘.format(dense_block_nums + 1) # dense connectivity dense_block_out = self._dense_block(inputdata=dense_block_input, name=dense_block_name) # apply the trainsition part dense_block_out = self._transition_layers(inputdata=dense_block_out, name=dense_block_name) dense_block_input = dense_block_out encode_ret[dense_block_name] = dict() encode_ret[dense_block_name][‘data‘] = dense_block_out encode_ret[dense_block_name][‘shape‘] = dense_block_out.get_shape().as_list() return encode_ret if __name__ == ‘__main__‘: input_tensor = tf.placeholder(dtype=tf.float32, shape=[None, 384, 1248, 3], name=‘input_tensor‘) encoder = DenseEncoder(l=100, growthrate=16, with_bc=True, phase=tf.constant(‘train‘), n=5) ret = encoder.encode(input_tensor=input_tensor, name=‘Dense_Encode‘) for layer_name, layer_info in ret.items(): print(‘layer_name: {:s} shape: {}‘.format(layer_name, layer_info[‘shape‘]))
--fcn_decoder.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import tensorflow as tf #from encoder_decoder_model import cnn_basenet #from encoder_decoder_model import vgg_encoder #from encoder_decoder_model import dense_encoder import cnn_basenet import vgg_encoder import dense_encoder class FCNDecoder(cnn_basenet.CNNBaseModel): def __init__(self, phase): """ """ super(FCNDecoder, self).__init__() self._train_phase = tf.constant(‘train‘, dtype=tf.string) self._phase = phase self._is_training = self._init_phase() def _init_phase(self): """ :return: """ return tf.equal(self._phase, self._train_phase) def decode(self, input_tensor_dict, decode_layer_list, name): """ 解码特征信息反卷积还原 :param input_tensor_dict: :param decode_layer_list: 需要解码的层名称需要由深到浅顺序写 eg. [‘pool5‘, ‘pool4‘, ‘pool3‘] :param name: :return: """ ret = dict() with tf.variable_scope(name): # score stage 1 input_tensor = input_tensor_dict[decode_layer_list[0]][‘data‘] score = self.conv2d(inputdata=input_tensor, out_channel=64, kernel_size=1, use_bias=False, name=‘score_origin‘) ret[‘score‘] = dict() ret[‘score‘][‘data‘] = score ret[‘score‘][‘shape‘] = score.get_shape().as_list() decode_layer_list = decode_layer_list[1:] print("len(decode_layer_list):",len(decode_layer_list)) for i in range(len(decode_layer_list)): deconv = self.deconv2d(inputdata=score, out_channel=64, kernel_size=4, stride=2, use_bias=False, name=‘deconv_{:d}‘.format(i + 1)) input_tensor = input_tensor_dict[decode_layer_list[i]][‘data‘] score = self.conv2d(inputdata=input_tensor, out_channel=64, kernel_size=1, use_bias=False, name=‘score_{:d}‘.format(i + 1)) fused = tf.add(deconv, score, name=‘fuse_{:d}‘.format(i + 1)) score = fused ret[‘fuse_{:d}‘.format(i + 1)] = dict() ret[‘fuse_{:d}‘.format(i + 1)][‘data‘] = fused ret[‘fuse_{:d}‘.format(i + 1)][‘shape‘] = fused.get_shape().as_list() deconv_final = self.deconv2d(inputdata=score, out_channel=64, kernel_size=16, stride=8, use_bias=False, name=‘deconv_final‘) score_final = self.conv2d(inputdata=deconv_final, out_channel=2, kernel_size=1, use_bias=False, name=‘score_final‘) ret[‘logits‘] = score_final ret[‘deconv‘] = deconv_final ret[‘logits‘] = dict() ret[‘logits‘][‘data‘] = score_final ret[‘logits‘][‘shape‘] = score_final.get_shape().as_list() ret[‘deconv‘] = dict() ret[‘deconv‘][‘data‘] = deconv_final ret[‘deconv‘][‘shape‘] = deconv_final.get_shape().as_list() return ret if __name__ == ‘__main__‘: vgg_encoder = vgg_encoder.VGG16Encoder(phase=tf.constant(‘train‘, tf.string)) dense_encoder = dense_encoder.DenseEncoder(l=40, growthrate=12, with_bc=True, phase=‘train‘, n=5) decoder = FCNDecoder(phase=‘train‘) in_tensor = tf.placeholder(dtype=tf.float32, shape=[None, 256, 512, 3], name=‘input‘) vgg_encode_ret = vgg_encoder.encode(in_tensor, name=‘vgg_encoder‘) dense_encode_ret = dense_encoder.encode(in_tensor, name=‘dense_encoder‘) decode_ret = decoder.decode(vgg_encode_ret, name=‘decoder‘, decode_layer_list=[‘pool5‘, ‘pool4‘, ‘pool3‘]) for layer_name, layer_info in decode_ret.items(): print(‘layer name: {:s} shape: {}‘.format(layer_name, layer_info[‘shape‘]))
./merge_model
--merge_model.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import tensorflow as tf from encoder_decoder_model import vgg_encoder from encoder_decoder_model import fcn_decoder from encoder_decoder_model import dense_encoder from encoder_decoder_model import cnn_basenet from lanenet_model import lanenet_discriminative_loss from encoder_decoder_model import vgg_scnn_encoder import glog class LaneNet(cnn_basenet.CNNBaseModel): """ 实现语义分割模型 """ def __init__(self, phase, net_flag=‘vgg‘): """ """ super(LaneNet, self).__init__() self._net_flag = net_flag self._phase = phase if self._net_flag == ‘vgg‘: self._encoder = vgg_encoder.VGG16Encoder(phase=phase) elif self._net_flag == ‘vgg_scnn‘: self._encoder = vgg_scnn_encoder.VGG16Encoder(phase=phase) elif self._net_flag == ‘dense‘: self._encoder = dense_encoder.DenseEncoder(l=20, growthrate=8, with_bc=True, phase=phase, n=5) self._decoder = fcn_decoder.FCNDecoder(phase=phase) return def __str__(self): """ :return: """ info = ‘Semantic Segmentation use {:s} as basenet to encode‘.format(self._net_flag) return info def _build_model(self, input_tensor, name): """ 前向传播过程 :param input_tensor: :param name: :return: """ with tf.variable_scope(name): # first encode encode_ret = self._encoder.encode(input_tensor=input_tensor, name=‘encode‘) # second decode if self._net_flag.lower() == ‘vgg‘: decode_ret = self._decoder.decode(input_tensor_dict=encode_ret, name=‘decode‘, decode_layer_list=[‘pool5‘, ‘pool4‘, ‘pool3‘]) return decode_ret if self._net_flag.lower() == ‘vgg_scnn‘: decode_ret = self._decoder.decode(input_tensor_dict=encode_ret, name=‘decode‘, decode_layer_list=[‘pool5‘, ‘pool4‘, ‘pool3‘]) return decode_ret elif self._net_flag.lower() == ‘dense‘: decode_ret = self._decoder.decode(input_tensor_dict=encode_ret, name=‘decode‘, decode_layer_list=[‘Dense_Block_5‘, ‘Dense_Block_4‘, ‘Dense_Block_3‘]) return decode_ret def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name=‘inference‘) glog.info(‘compute_loss inference_ret:{:}‘.format(inference_ret)) # 计算二值分割损失函数 decode_logits = inference_ret[‘logits‘] binary_label_plain = tf.reshape( binary_label, shape=[binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2]]) glog.info(‘compute_loss binary_label_plain:{:}‘.format(binary_label_plain)) # 加入class weights unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain) counts = tf.cast(counts, tf.float32) glog.info(‘compute_loss counts:{:}‘.format(counts)) inverse_weights = tf.divide(1.0, tf.log(tf.add(tf.divide(tf.constant(1.0), counts), tf.constant(1.02)))) glog.info(‘compute_loss inverse_weights:{:}‘.format(inverse_weights)) inverse_weights = tf.gather(inverse_weights, binary_label) glog.info(‘compute_loss gather inverse_weights:{:}‘.format(inverse_weights)) binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label, logits=decode_logits, weights=inverse_weights) glog.info(‘compute_loss binary_segmenatation_loss:{:}‘.format(binary_segmenatation_loss)) binary_segmenatation_loss = tf.reduce_mean(binary_segmenatation_loss) glog.info(‘compute_loss reduce_mean binary_segmenatation_loss:{:}‘.format(binary_segmenatation_loss)) # 计算discriminative loss损失函数 decode_deconv = inference_ret[‘deconv‘] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name=‘pix_embedding_conv‘) pix_embedding = self.relu(inputdata=pix_embedding, name=‘pix_embedding_relu‘) # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) glog.info(‘compute_loss image_shape:{:}‘.format(image_shape)) disc_loss, l_var, l_dist, l_reg = lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) glog.info(‘compute_loss disc_loss:{:}‘.format(disc_loss)) # 合并损失 l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if ‘bn‘ in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss ret = { ‘total_loss‘: total_loss, ‘binary_seg_logits‘: decode_logits, ‘instance_seg_logits‘: pix_embedding, ‘binary_seg_loss‘: binary_segmenatation_loss, ‘discriminative_loss‘: disc_loss } return ret def inference(self, input_tensor, name): """ :param input_tensor: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name=‘inference‘) # 计算二值分割损失函数 decode_logits = inference_ret[‘logits‘] binary_seg_ret = tf.nn.softmax(logits=decode_logits) binary_seg_ret = tf.argmax(binary_seg_ret, axis=-1) # 计算像素嵌入 decode_deconv = inference_ret[‘deconv‘] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name=‘pix_embedding_conv‘) pix_embedding = self.relu(inputdata=pix_embedding, name=‘pix_embedding_relu‘) return binary_seg_ret, pix_embedding if __name__ == ‘__main__‘: model = LaneNet(tf.constant(‘train‘, dtype=tf.string)) input_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name=‘input‘) binary_label = tf.placeholder(dtype=tf.int64, shape=[1, 256, 512, 1], name=‘label‘) instance_label = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 1], name=‘label‘) ret = model.compute_loss(input_tensor=input_tensor, binary_label=binary_label, instance_label=instance_label, name=‘loss‘) for vv in tf.trainable_variables(): if ‘bn‘ in vv.name: continue print(vv.name)
--dirscriminative_loss.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import tensorflow as tf import glog def discriminative_loss_single( prediction, correct_label, feature_dim, label_shape, delta_v, delta_d, param_var, param_dist, param_reg): """ 论文equ(1)提到的实例分割损失函数 :param prediction: inference of network :param correct_label: instance label :param feature_dim: feature dimension of prediction :param label_shape: shape of label :param delta_v: cut off variance distance :param delta_d: cut off cluster distance :param param_var: weight for intra cluster variance :param param_dist: weight for inter cluster distances :param param_reg: weight regularization """ # 像素对齐为一行 correct_label = tf.reshape( correct_label, [ label_shape[1] * label_shape[0]]) reshaped_pred = tf.reshape( prediction, [ label_shape[1] * label_shape[0], feature_dim]) # 统计实例个数 unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) counts = tf.cast(counts, tf.float32) num_instances = tf.size(unique_labels) glog.info(‘discriminative_loss_single counts:{:} num_instances:{:}‘.format(counts,num_instances)) # 计算pixel embedding均值向量 segmented_sum = tf.unsorted_segment_sum( reshaped_pred, unique_id, num_instances) mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1))) mu_expand = tf.gather(mu, unique_id) # 计算公式的loss(var) distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1) distance = tf.subtract(distance, delta_v) distance = tf.clip_by_value(distance, 0., distance) distance = tf.square(distance) l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances) l_var = tf.div(l_var, counts) l_var = tf.reduce_sum(l_var) l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) # 计算公式的loss(dist) mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) mu_band_rep = tf.tile(mu, [1, num_instances]) mu_band_rep = tf.reshape( mu_band_rep, (num_instances * num_instances, feature_dim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # 去除掩模上的零点 intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1) zero_vector = tf.zeros(1, dtype=tf.float32) bool_mask = tf.not_equal(intermediate_tensor, zero_vector) mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) mu_norm = tf.norm(mu_diff_bool, axis=1) mu_norm = tf.subtract(2. * delta_d, mu_norm) mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) mu_norm = tf.square(mu_norm) l_dist = tf.reduce_mean(mu_norm) # 计算原始Discriminative Loss论文中提到的正则项损失 l_reg = tf.reduce_mean(tf.norm(mu, axis=1)) # 合并损失按照原始Discriminative Loss论文中提到的参数合并 param_scale = 1. l_var = param_var * l_var l_dist = param_dist * l_dist l_reg = param_reg * l_reg loss = param_scale * (l_var + l_dist + l_reg) return loss, l_var, l_dist, l_reg def discriminative_loss(prediction, correct_label, feature_dim, image_shape, delta_v, delta_d, param_var, param_dist, param_reg): """ 按照论文的思想迭代计算loss损失 :return: discriminative loss and its three components """ def cond(label, batch, out_loss, out_var, out_dist, out_reg, i): return tf.less(i, tf.shape(batch)[0]) def body(label, batch, out_loss, out_var, out_dist, out_reg, i): disc_loss, l_var, l_dist, l_reg = discriminative_loss_single( prediction[i], correct_label[i], feature_dim, image_shape, delta_v, delta_d, param_var, param_dist, param_reg) out_loss = out_loss.write(i, disc_loss) out_var = out_var.write(i, l_var) out_dist = out_dist.write(i, l_dist) out_reg = out_reg.write(i, l_reg) return label, batch, out_loss, out_var, out_dist, out_reg, i + 1 # TensorArray is a data structure that support dynamic writing output_ta_loss = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_var = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_dist = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) output_ta_reg = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) _, _, out_loss_op, out_var_op, out_dist_op, out_reg_op, _ = tf.while_loop( cond, body, [ correct_label, prediction, output_ta_loss, output_ta_var, output_ta_dist, output_ta_reg, 0]) out_loss_op = out_loss_op.stack() out_var_op = out_var_op.stack() out_dist_op = out_dist_op.stack() out_reg_op = out_reg_op.stack() disc_loss = tf.reduce_mean(out_loss_op) l_var = tf.reduce_mean(out_var_op) l_dist = tf.reduce_mean(out_dist_op) l_reg = tf.reduce_mean(out_reg_op) return disc_loss, l_var, l_dist, l_reg
--postpostprecess.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ LaneNet模型后处理 """ import numpy as np import matplotlib.pyplot as plt import cv2 import glog try: from cv2 import cv2 except ImportError: pass class LaneNetPoseProcessor(object): """ """ def __init__(self): """ """ pass @staticmethod def _morphological_process(image, kernel_size=5): """ :param image: :param kernel_size: :return: """ if image.dtype is not np.uint8: image = np.array(image, np.uint8) glog.info("_morphological_process image shape len:{:d}".format(len(image.shape))) if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) glog.info("_morphological_process image shape len:{:d}".format(len(image.shape))) kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size)) # close operation fille hole closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1) return closing @staticmethod def _connect_components_analysis(image): """ :param image: :return: """ glog.info("_connect_components_analysis image shape len:{:d}".format(len(image.shape))) if len(image.shape) == 3: gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray_image = image return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S) def postprocess(self, image, minarea_threshold=15): """ :param image: :param minarea_threshold: 连通域分析阈值 :return: """ # 首先进行图像形态学运算 morphological_ret = self._morphological_process(image, kernel_size=5) glog.info("postprocess image shape len:{:d}".format(len(image.shape))) # 进行连通域分析 connect_components_analysis_ret = self._connect_components_analysis(image=morphological_ret) glog.info("postprocess connect_components_analysis_ret:{:}".format(connect_components_analysis_ret)) # 排序连通域并删除过小的连通域 labels = connect_components_analysis_ret[1] stats = connect_components_analysis_ret[2] glog.info("postprocess labels:{:}".format(labels)) glog.info("postprocess stats:{:}".format(stats)) for index, stat in enumerate(stats): if stat[4] <= minarea_threshold: idx = np.where(labels == index) morphological_ret[idx] = 0 return morphological_ret if __name__ == ‘__main__‘: processor = LaneNetPoseProcessor() image = cv2.imread(‘D:/Code/github/tf_lanenet/data/training_data_example/gt_image_binary/0000.png‘, cv2.IMREAD_UNCHANGED) #IMREAD_GRAYSCALE postprocess_ret = processor.postprocess(image) plt.figure(‘src‘) plt.imshow(image) plt.figure(‘post‘) plt.imshow(postprocess_ret) plt.show()
--cluster.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 实现LaneNet中实例分割的聚类部分 """ import numpy as np import glog as log import matplotlib.pyplot as plt from sklearn.cluster import MeanShift from sklearn.cluster import DBSCAN import time import warnings import cv2 import glog try: from cv2 import cv2 except ImportError: pass class LaneNetCluster(object): """ 实例分割聚类器 """ def __init__(self): """ """ self._color_map = [np.array([255, 0, 0]), np.array([0, 255, 0]), np.array([0, 0, 255]), np.array([125, 125, 0]), np.array([0, 125, 125]), np.array([125, 0, 125]), np.array([50, 100, 50]), np.array([100, 50, 100])] pass @staticmethod def _cluster(prediction, bandwidth): """ 实现论文SectionⅡ的cluster部分 :param prediction: :param bandwidth: :return: """ ms = MeanShift(bandwidth, bin_seeding=True) # log.info(‘开始Mean shift聚类 ...‘) tic = time.time() try: ms.fit(prediction) except ValueError as err: log.error(err) return 0, [], [] # log.info(‘Mean Shift耗时: {:.5f}s‘.format(time.time() - tic)) labels = ms.labels_ cluster_centers = ms.cluster_centers_ num_clusters = cluster_centers.shape[0] # log.info(‘聚类簇个数为: {:d}‘.format(num_clusters)) return num_clusters, labels, cluster_centers @staticmethod def _cluster_v2(prediction): """ dbscan cluster :param prediction: :return: """ db = DBSCAN(eps=0.7, min_samples=200).fit(prediction) db_labels = db.labels_ unique_labels = np.unique(db_labels) unique_labels = [tmp for tmp in unique_labels if tmp != -1] log.info(‘聚类簇个数为: {:d}‘.format(len(unique_labels))) num_clusters = len(unique_labels) cluster_centers = db.components_ return num_clusters, db_labels, cluster_centers @staticmethod def _get_lane_area(binary_seg_ret, instance_seg_ret): """ 通过二值分割掩码图在实例分割图上获取所有车道线的特征向量 :param binary_seg_ret: :param instance_seg_ret: :return: """ idx = np.where(binary_seg_ret == 1) print("_get_lane_area idx:",idx) print("_get_lane_area idx len:",len(idx)) print("_get_lane_area idx len[0]:",len(idx[0])) print("_get_lane_area idx len[1]:",len(idx[1])) lane_embedding_feats = [] lane_coordinate = [] for i in range(len(idx[0])): lane_embedding_feats.append(instance_seg_ret[idx[0][i], idx[1][i]]) #print("_get_lane_area instance_seg_ret[idx[0][i], idx[1][i]]:",instance_seg_ret[idx[0][i], idx[1][i]]) lane_coordinate.append([idx[0][i], idx[1][i]]) #print("_get_lane_area idx[0][i]:",idx[0][i]," , idx[1][i]:", idx[1][i]) return np.array(lane_embedding_feats, np.float32), np.array(lane_coordinate, np.int64) @staticmethod def _thresh_coord(coord): """ 过滤实例车道线位置坐标点,假设车道线是连续的, 因此车道线点的坐标变换应该是平滑变化的不应该出现跳变 :param coord: [(x, y)] :return: """ pts_x = coord[:, 0] mean_x = np.mean(pts_x) idx = np.where(np.abs(pts_x - mean_x) < mean_x) return coord[idx[0]] @staticmethod def _lane_fit(lane_pts): """ 车道线多项式拟合 :param lane_pts: :return: """ if not isinstance(lane_pts, np.ndarray): lane_pts = np.array(lane_pts, np.float32) x = lane_pts[:, 0] y = lane_pts[:, 1] x_fit = [] y_fit = [] with warnings.catch_warnings(): warnings.filterwarnings(‘error‘) try: f1 = np.polyfit(x, y, 3) p1 = np.poly1d(f1) x_min = int(np.min(x)) x_max = int(np.max(x)) x_fit = [] for i in range(x_min, x_max + 1): x_fit.append(i) y_fit = p1(x_fit) except Warning as e: x_fit = x y_fit = y finally: return zip(x_fit, y_fit) def get_lane_mask(self, binary_seg_ret, instance_seg_ret): """ :param binary_seg_ret: :param instance_seg_ret: :return: """ lane_embedding_feats, lane_coordinate = self._get_lane_area(binary_seg_ret, instance_seg_ret) num_clusters, labels, cluster_centers = self._cluster(lane_embedding_feats, bandwidth=1.5) # 聚类簇超过八个则选择其中类内样本最多的八个聚类簇保留下来 if num_clusters > 8: cluster_sample_nums = [] for i in range(num_clusters): cluster_sample_nums.append(len(np.where(labels == i)[0])) sort_idx = np.argsort(-np.array(cluster_sample_nums, np.int64)) cluster_index = np.array(range(num_clusters))[sort_idx[0:4]] else: cluster_index = range(num_clusters) mask_image = np.zeros(shape=[binary_seg_ret.shape[0], binary_seg_ret.shape[1], 3], dtype=np.uint8) for index, i in enumerate(cluster_index): idx = np.where(labels == i) coord = lane_coordinate[idx] # coord = self._thresh_coord(coord) coord = np.flip(coord, axis=1) # coord = (coord[:, 0], coord[:, 1]) color = (int(self._color_map[index][0]), int(self._color_map[index][1]), int(self._color_map[index][2])) coord = np.array([coord]) cv2.polylines(img=mask_image, pts=coord, isClosed=False, color=color, thickness=2) # mask_image[coord] = color return mask_image if __name__ == ‘__main__‘: binary_seg_image = cv2.imread(‘D:/Code/github/tf_lanenet/data/training_data_example/gt_image_binary/0000.png‘, cv2.IMREAD_GRAYSCALE) print("binary_seg_image shape:",binary_seg_image.shape) binary_seg_image[np.where(binary_seg_image == 255)] = 1 print("binary_seg_image np.where(binary_seg_image == 255):",np.where(binary_seg_image == 255)) instance_seg_image = cv2.imread(‘D:/Code/github/tf_lanenet/data/training_data_example/gt_image_instance/0000.png‘, cv2.IMREAD_UNCHANGED) glog.info("__name__ instance_seg_image shape len:{:d}".format(len(instance_seg_image.shape))) instance_seg_image = cv2.cvtColor(instance_seg_image, cv2.COLOR_GRAY2BGR) glog.info("__name__ instance_seg_image shape len:{:d}".format(len(instance_seg_image.shape))) #print("instance_seg_image shape:",instance_seg_image.shape) ele_mex = np.max(instance_seg_image, axis=(0,1)) print("ele_mex:",ele_mex) for i in range(3): if ele_mex[i] == 0: scale = 1 else: scale = 255 / ele_mex[i] instance_seg_image[:, :, i] *= int(scale) embedding_image = np.array(instance_seg_image, np.uint8) cluster = LaneNetCluster() mask_image = cluster.get_lane_mask(binary_seg_ret=binary_seg_image,instance_seg_ret=instance_seg_image) det_img = embedding_image+mask_image plt.figure(‘det_img‘) plt.imshow(det_img[:, :, (2, 1, 0)]) #plt.figure(‘embedding‘) #plt.imshow(embedding_image[:, :, (2, 1, 0)]) #plt.figure(‘mask_image‘) #plt.imshow(mask_image[:, :, (2, 1, 0)]) plt.show()
--train_lane_scnn.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse import math import os import os.path as ops import time import cv2 import glog as log import numpy as np import tensorflow as tf from config import global_config from lanenet_model import lanenet_merge_model from data_provider import lanenet_data_processor CFG = global_config.cfg VGG_MEAN = [103.939, 116.779, 123.68] def init_args(): """ :return: """ parser = argparse.ArgumentParser() parser.add_argument(‘--dataset_dir‘, type=str,default=‘data/datasets_culane‘, help=‘The training dataset dir path‘) parser.add_argument(‘--net‘, type=str, default=‘vgg‘, help=‘Which base net work to use‘) parser.add_argument(‘--weights_path‘, type=str,default=‘model/lanenet_culane_vgg_2019-02-02-14-05-16.ckpt-200000‘,help=‘The pretrained weights path‘) return parser.parse_args() def minmax_scale(input_arr): """ :param input_arr: :return: """ min_val = np.min(input_arr) max_val = np.max(input_arr) output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val) return output_arr def train_net(dataset_dir, weights_path=None, net_flag=‘vgg‘): """ :param dataset_dir: :param net_flag: choose which base network to use :param weights_path: :return: """ train_dataset_file = ops.join(dataset_dir, ‘train.txt‘) val_dataset_file = ops.join(dataset_dir, ‘val.txt‘) print(‘train_dataset_file:‘,train_dataset_file) print(‘val_dataset_file:‘,val_dataset_file) assert ops.exists(train_dataset_file) train_dataset = lanenet_data_processor.DataSet(train_dataset_file) val_dataset = lanenet_data_processor.DataSet(val_dataset_file) with tf.device(‘/gpu:1‘): input_tensor = tf.placeholder(dtype=tf.float32, shape=[CFG.TRAIN.BATCH_SIZE, CFG.TRAIN.IMG_HEIGHT, CFG.TRAIN.IMG_WIDTH, 3], name=‘input_tensor‘) binary_label_tensor = tf.placeholder(dtype=tf.int64, shape=[CFG.TRAIN.BATCH_SIZE, CFG.TRAIN.IMG_HEIGHT, CFG.TRAIN.IMG_WIDTH, 1], name=‘binary_input_label‘) instance_label_tensor = tf.placeholder(dtype=tf.float32, shape=[CFG.TRAIN.BATCH_SIZE, CFG.TRAIN.IMG_HEIGHT, CFG.TRAIN.IMG_WIDTH], name=‘instance_input_label‘) phase = tf.placeholder(dtype=tf.string, shape=None, name=‘net_phase‘) net = lanenet_merge_model.LaneNet(net_flag=net_flag, phase=phase) # calculate the loss compute_ret = net.compute_loss(input_tensor=input_tensor, binary_label=binary_label_tensor, instance_label=instance_label_tensor, name=‘lanenet_model‘) total_loss = compute_ret[‘total_loss‘] binary_seg_loss = compute_ret[‘binary_seg_loss‘] disc_loss = compute_ret[‘discriminative_loss‘] pix_embedding = compute_ret[‘instance_seg_logits‘] # calculate the accuracy out_logits = compute_ret[‘binary_seg_logits‘] out_logits = tf.nn.softmax(logits=out_logits) out_logits_out = tf.argmax(out_logits, axis=-1) #out = tf.argmax(out_logits, axis=-1) #out = tf.expand_dims(out, axis=-1) out = tf.expand_dims(out_logits_out,axis=-1) idx = tf.where(tf.equal(binary_label_tensor, 1)) pix_cls_ret = tf.gather_nd(out, idx) accuracy = tf.count_nonzero(pix_cls_ret) accuracy = tf.divide(accuracy, tf.cast(tf.shape(pix_cls_ret)[0], tf.int64)) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(CFG.TRAIN.LEARNING_RATE, global_step, 100000, 0.1, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9).minimize(loss=total_loss, var_list=tf.trainable_variables(), global_step=global_step) # Set tf saver saver = tf.train.Saver() model_save_dir = ‘model/lanenet_culane‘ if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime(‘%Y-%m-%d-%H-%M-%S‘, time.localtime(time.time())) model_name = ‘lanenet_culane_{:s}_{:s}.ckpt‘.format(net_flag, str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # Set tf summary tboard_save_path = ‘tboard/lanenet_culane/{:s}‘.format(net_flag) if not ops.exists(tboard_save_path): os.makedirs(tboard_save_path) train_cost_scalar = tf.summary.scalar(name=‘train_cost‘, tensor=total_loss) val_cost_scalar = tf.summary.scalar(name=‘val_cost‘, tensor=total_loss) train_accuracy_scalar = tf.summary.scalar(name=‘train_accuracy‘, tensor=accuracy) val_accuracy_scalar = tf.summary.scalar(name=‘val_accuracy‘, tensor=accuracy) train_binary_seg_loss_scalar = tf.summary.scalar(name=‘train_binary_seg_loss‘, tensor=binary_seg_loss) val_binary_seg_loss_scalar = tf.summary.scalar(name=‘val_binary_seg_loss‘, tensor=binary_seg_loss) train_instance_seg_loss_scalar = tf.summary.scalar(name=‘train_instance_seg_loss‘, tensor=disc_loss) val_instance_seg_loss_scalar = tf.summary.scalar(name=‘val_instance_seg_loss‘, tensor=disc_loss) learning_rate_scalar = tf.summary.scalar(name=‘learning_rate‘, tensor=learning_rate) train_merge_summary_op = tf.summary.merge([train_accuracy_scalar, train_cost_scalar, learning_rate_scalar, train_binary_seg_loss_scalar, train_instance_seg_loss_scalar]) val_merge_summary_op = tf.summary.merge([val_accuracy_scalar, val_cost_scalar, val_binary_seg_loss_scalar, val_instance_seg_loss_scalar]) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allocator_type = ‘BFC‘ sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = CFG.TRAIN.EPOCHS log.info(‘Global configuration is as follows:‘) log.info(CFG) with sess.as_default(): tf.train.write_graph(graph_or_graph_def=sess.graph, logdir=‘‘, name=‘{:s}/lanenet_model.pb‘.format(model_save_dir)) if weights_path is None: log.info(‘Training from scratch‘) init = tf.global_variables_initializer() sess.run(init) else: log.info(‘Restore model from last model checkpoint {:s}‘.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) # 加载预训练参数 log.info(‘jim.chen train_net net_flag:‘,net_flag) if net_flag == ‘vgg‘ and weights_path is None: pretrained_weights = np.load( ‘./data/vgg16.npy‘, encoding=‘latin1‘).item() log.info(‘jim.chen train_net net_flag is 1vgg‘) for vv in tf.trainable_variables(): weights_key = vv.name.split(‘/‘)[-3] try: weights = pretrained_weights[weights_key][0] _op = tf.assign(vv, weights) sess.run(_op) except Exception as e: continue train_cost_time_mean = [] val_cost_time_mean = [] for epoch in range(train_epochs): # training part t_start = time.time() with tf.device(‘/cpu:0‘): gt_imgs, binary_gt_labels,instance_gt_labels = train_dataset.next_batch(CFG.TRAIN.BATCH_SIZE) gt_imgs = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp, interpolation=cv2.INTER_LINEAR) for tmp in gt_imgs] gt_imgs = [tmp - VGG_MEAN for tmp in gt_imgs] binary_gt_labels = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp, interpolation=cv2.INTER_NEAREST) for tmp in binary_gt_labels] binary_gt_labels = [np.expand_dims(tmp, axis=-1) for tmp in binary_gt_labels] instance_gt_labels = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp, interpolation=cv2.INTER_NEAREST) for tmp in instance_gt_labels] phase_train = ‘train‘ _, c, train_accuracy, train_summary, binary_loss, instance_loss, embedding, binary_seg_img = sess.run([optimizer, total_loss, accuracy, train_merge_summary_op, binary_seg_loss, disc_loss, pix_embedding, out_logits_out], feed_dict={input_tensor: gt_imgs, binary_label_tensor: binary_gt_labels, instance_label_tensor: instance_gt_labels, phase: phase_train}) if math.isnan(c) or math.isnan(instance_loss) or math.isnan(binary_loss): log.error(‘cost is: {:.5f}‘.format(c)) log.error(‘binary cost is: {:.5f}‘.format(binary_loss)) log.error(‘instance cost is: {:.5f}‘.format(instance_loss)) cv2.imwrite(‘nan_image.png‘, gt_imgs[0] + VGG_MEAN) cv2.imwrite(‘nan_instance_label.png‘, instance_gt_labels[0]) cv2.imwrite(‘nan_binary_label.png‘, binary_gt_labels[0] * 255) return if epoch % 100 == 0: cv2.imwrite(‘image.png‘, gt_imgs[0] + VGG_MEAN) cv2.imwrite(‘binary_label.png‘, binary_gt_labels[0] * 255) cv2.imwrite(‘instance_label.png‘, instance_gt_labels[0]) cv2.imwrite(‘binary_seg_img.png‘, binary_seg_img[0] * 255) for i in range(4): embedding[0][:, :, i] = minmax_scale(embedding[0][:, :, i]) embedding_image = np.array(embedding[0], np.uint8) cv2.imwrite(‘embedding.png‘, embedding_image) cost_time = time.time() - t_start train_cost_time_mean.append(cost_time) summary_writer.add_summary(summary=train_summary, global_step=epoch) # validation part with tf.device(‘/cpu:0‘): gt_imgs_val, binary_gt_labels_val, instance_gt_labels_val = val_dataset.next_batch(CFG.TRAIN.VAL_BATCH_SIZE) gt_imgs_val = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp, interpolation=cv2.INTER_LINEAR) for tmp in gt_imgs_val] gt_imgs_val = [tmp - VGG_MEAN for tmp in gt_imgs_val] binary_gt_labels_val = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp) for tmp in binary_gt_labels_val] binary_gt_labels_val = [np.expand_dims(tmp, axis=-1) for tmp in binary_gt_labels_val] instance_gt_labels_val = [cv2.resize(tmp, dsize=(CFG.TRAIN.IMG_WIDTH, CFG.TRAIN.IMG_HEIGHT), dst=tmp, interpolation=cv2.INTER_NEAREST) for tmp in instance_gt_labels_val] phase_val = ‘test‘ t_start_val = time.time() c_val, val_summary, val_accuracy, val_binary_seg_loss, val_instance_seg_loss = sess.run([total_loss, val_merge_summary_op, accuracy, binary_seg_loss, disc_loss], feed_dict={input_tensor: gt_imgs_val, binary_label_tensor: binary_gt_labels_val, instance_label_tensor: instance_gt_labels_val, phase: phase_val}) if epoch % 100 == 0: cv2.imwrite(‘test_image.png‘, gt_imgs_val[0] + VGG_MEAN) summary_writer.add_summary(val_summary, global_step=epoch) cost_time_val = time.time() - t_start_val val_cost_time_mean.append(cost_time_val) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: log.info(‘Epoch: {:d} total_loss= {:6f} binary_seg_loss= {:6f} instance_seg_loss= {:6f} accuracy= {:6f}‘ ‘ mean_cost_time= {:5f}s ‘. format(epoch + 1, c, binary_loss, instance_loss, train_accuracy, np.mean(train_cost_time_mean))) train_cost_time_mean.clear() if epoch % CFG.TRAIN.TEST_DISPLAY_STEP == 0: log.info(‘Epoch_Val: {:d} total_loss= {:6f} binary_seg_loss= {:6f} ‘ ‘instance_seg_loss= {:6f} accuracy= {:6f} ‘ ‘mean_cost_time= {:5f}s ‘. format(epoch + 1, c_val, val_binary_seg_loss, val_instance_seg_loss, val_accuracy, np.mean(val_cost_time_mean))) val_cost_time_mean.clear() if epoch % 2000 == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) sess.close() return if __name__ == ‘__main__‘: # init args args = init_args() # train lanenet train_net(args.dataset_dir, args.weights_path, net_flag=args.net)
以下是模型训练过程中生成的文件夹:
./summary
./figure
./checkpoint
在主目录下,执行python train_lanenet_scnn.py,没有问题的话,可以开始训练了...
下一篇: