张量流不训练(只有偏差改变)

Posted

技术标签:

【中文标题】张量流不训练(只有偏差改变)【英文标题】:tensorflow not training (only biases change) 【发布时间】:2017-09-14 06:10:30 【问题描述】:

我想训练一个卷积网络输出一个 0-100 的数字。但很快模型就会停止更新权重,只有全连接层中的偏差会改变。我无法理解为什么。

权重图片:

我玩过不同数量的层等等,但我总是遇到同样的问题,只有 FC 偏差发生变化。

这是我正在测试的当前代码。我已经去掉了辍学之类的东西。过度拟合目前不是问题。事实上,我想尝试过度拟合数据,以便我可以看到我的模型可以学到任何东西

from __future__ import print_function

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


###################################################################################
############################# Read Data ###########################################

with tf.name_scope("READ_DATA"):

  def read_my_file_format(filename_queue):
    reader = tf.WholeFileReader()
    key, record_string = reader.read(filename_queue)
    split_res = tf.string_split([key],'_')
    key = split_res.values[5]
    example = tf.image.decode_png(record_string)
    example = tf.image.rgb_to_grayscale(example, name=None)

    processed_example = resize_img(example)
    processed_example = reshape_img(processed_example)
    return processed_example, key


  def resize_img(imgg):
    return tf.image.resize_images(imgg,[102,525])

  def reshape_img(imgg):
    return tf.reshape(imgg,shape=[102,525,1])


  def input_pipeline( bsize=30, num_epochs=None):
    filename_queue = tf.train.string_input_producer(
        tf.train.match_filenames_once("./png_imgs/*.png"), num_epochs=num_epochs, shuffle=True)
    example, label = read_my_file_format(filename_queue)

    min_after_dequeue = bsize
    capacity = min_after_dequeue + 3 * 8

    example_batch, label_batch = tf.train.shuffle_batch(
        [example, label], batch_size=bsize, capacity=capacity,
        min_after_dequeue=min_after_dequeue)
    return  example_batch, label_batch

  imb_batch1,label_batch1 = input_pipeline()

  single_img, single_lbl = input_pipeline(bsize=1)

############################# Read Data ###########################################
###################################################################################



# Parameters
#learning_rate = 0.0001
training_iters = 200000
batch_size = 30

# Network Parameters
n_input = 600*300*3 
n_classes = 1 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, 102,525,1])
y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
learning_rate = tf.placeholder(tf.float32) 


# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, dropout):

  # Convolution Layer
    with tf.variable_scope('conv1') as scope:
      w = tf.get_variable('weights',[5,5,1,32], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
      conv1 = conv2d(x,w,b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    with tf.variable_scope('conv2') as scope:
      w = tf.get_variable('weights',[5,5,32,32], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
      conv2 = conv2d(conv1,w,b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    with tf.name_scope("Maxpool"):
      conv2 = maxpool2d(conv2,k=2)

    with tf.variable_scope('FC1') as scope:
      w = tf.get_variable('weights',[32*263*51,64], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[64],initializer=tf.random_normal_initializer())
      FC1 = tf.reshape(conv2,[-1,w.get_shape().as_list()[0]])
      FC1 = tf.add(tf.matmul(FC1,w),b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)


    with tf.variable_scope('FC2') as scope:
      w = tf.get_variable('weights',[64,1], initializer=tf.contrib.layers.xavier_initializer())
      b = tf.get_variable('biases',[1],initializer=tf.random_normal_initializer())
      FC2 = tf.add(tf.matmul(FC1,w),b)
      tf.summary.histogram('weights',w)
      tf.summary.histogram('biases',b)

    return FC2


# Construct model
pred = conv_net(x, keep_prob)

def cost():
  with tf.name_scope("Cost"):

    diff = tf.abs(tf.subtract(y,pred))
    cost=tf.reduce_mean(diff)
    print(cost)
  tf.summary.histogram('Label',y)
  tf.summary.histogram('predicted',pred)
  tf.summary.scalar('cost',cost)
  return cost

with tf.name_scope("Optimizer"):
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost())
 # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost())


# Initializing the variables
saver = tf.train.Saver()
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()


# Launch the graph
with tf.Session() as sess:

    sess.run(init)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    writer = tf.summary.FileWriter("/tmp/tensorboard/log01")
    writer.add_graph(sess.graph)
    step = 1
    l_rate= 0.1

    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        print("step: ",step)
        batch_x, batch_y = sess.run([imb_batch1,label_batch1])

        batch_y = batch_y.reshape(-1,1) 
        if step % 100 == 0 :
          l_rate = l_rate/5

        if l_rate < 0.000001 :
          l_rate= 0.000001

        if step > 20:
          _,sumry = sess.run([optimizer,merged], feed_dict=x: batch_x, y: batch_y,
                                         keep_prob: dropout, learning_rate: l_rate)
          writer.add_summary(sumry,step)
        else :
          sess.run(optimizer, feed_dict=x: batch_x, y: batch_y,
                                         keep_prob: dropout, learning_rate: l_rate)


        step += 1

    print("Training Done!")



    coord.request_stop()
    coord.join(threads)

代码中的某个地方是否存在愚蠢的错误导致这种情况?

【问题讨论】:

我的预感:对我来说,这可能表明学习率太大。由于您在训练期间改变了学习率......也许您可以尝试使用恒定的 LR,看看问题是否仍然存在? @Simmeman 你解决了你的问题吗,如果解决了,请分享你的解决方案,因为我也有这个错误。 @ashishkarel 我没能解决这个问题。我放弃了这种方法并使用了预训练模型,只更改了全连接层。 @ashishkarel 您是否设法弄清楚您的问题是什么(不使用预训练模型)? @mamafoku 抱歉,找不到解决方案。 【参考方案1】:

您的第一个全连接层没有非线性,因此与只有一个全连接层相比,它没有增加任何价值。

【讨论】:

以上是关于张量流不训练(只有偏差改变)的主要内容,如果未能解决你的问题,请参考以下文章

如何使用内置的张量流方法对特征和标签张量执行 sklearn 风格的训练测试拆分?

深度之眼PyTorch训练营第二期 ---2张量操作与线性回归

在张量流中改变张量的尺度

TensorFlow张量Tensor

从拥抱面权重构建张量流模型的问题

是否可以在没有训练操作的情况下可视化张量流图?