解释 Tensorboard 分布 - 权重不变，只有偏差

Posted 2023-03-27

技术标签:

【中文标题】解释 Tensorboard 分布 - 权重不变，只有偏差【英文标题】：Interpreting Tensorboard Distributions - Weights not Changing, only Biases 【发布时间】：2017-11-13 09:44:44 【问题描述】：

我有一个组织如下的神经网络：

conv1 - pool1 - local reponse normalization (lrn2) - conv2 - lrn2 - pool2 -
conv3 - pool3 - conv4 - pool4 - conv5 - pool5 - dense layer (local1) - 
local2 - softmax

查看张量板的分布后，我得到以下信息：

下图是随着时间的推移激活的输出。

因此，从损失图中可以看出，网络正在学习。此外，所有的偏差都很好地表明它们是由于学习而被修改的。但是重量呢，看起来它们并没有随着时间的推移而改变？我从它的数字中得到的结果合乎逻辑吗？请注意，我只发布了图中权重和偏差的图像子集。所有的权重数据都与我在这里展示的相似，对于偏差偏差似乎可以学习，而权重却没有！

这是我构建图表的方式：

# Parameters
learning_rate = 0.0001
batch_size = 1024
n_classes = 1  # 1 since we need the value of the retrainer.

weights = 
    'weights_conv1': tf.get_variable(name='weights1', shape=[5, 5, 3, 128], dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32)),
    'weights_conv2': tf.get_variable(name='weights2', shape=[3, 3, 128, 128], dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32)),
    'weights_conv3': tf.get_variable(name='weights3', shape=[3, 3, 128, 256], dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32)),
    'weights_conv4': tf.get_variable(name='weights4', shape=[3, 3, 256, 256], dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32)),
    'weights_conv5': tf.get_variable(name='weights5', shape=[3, 3, 256, 256], dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32)),


biases = 
    'bc1': tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32), trainable=True, name='biases1'),
    'bc2': tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32), trainable=True, name='biases2'),
    'bc3': tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32), trainable=True, name='biases3'),
    'bc4': tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32), trainable=True, name='biases4'),
    'bc5': tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32), trainable=True, name='biases5')


def inference(frames):
    # frames = tf.Print(frames, data=[tf.shape(frames)], message='f size is:')
    tf.summary.image('frame_resized', frames, max_outputs=32)
    frame_normalized_sub = tf.subtract(frames, tf.constant(128, dtype=tf.float32))
    frame_normalized = tf.divide(frame_normalized_sub, tf.constant(255.0), name='image_normalization')

    # conv1
    with tf.name_scope('conv1') as scope:
        conv_2d_1 = tf.nn.conv2d(frame_normalized, weights['weights_conv1'], strides=[1, 4, 4, 1], padding='SAME')
        conv_2d_1_plus_bias = tf.nn.bias_add(conv_2d_1, biases['bc1'])
        conv1 = tf.nn.relu(conv_2d_1_plus_bias, name=scope)

    tf.summary.histogram('con1_output_distribution', conv1)
    tf.summary.histogram('con1_before_relu', conv_2d_1_plus_bias)

    # norm1
    with tf.name_scope('norm1'):
        norm1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
    tf.summary.histogram('norm1_output_distribution', norm1)

    # pool1
    with tf.name_scope('pool1') as scope:
        pool1 = tf.nn.max_pool(norm1,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID',
                               name='pool1')
    tf.summary.histogram('pool1_output_distribution', pool1)

    # conv2
    with tf.name_scope('conv2') as scope:
        conv_2d_2 = tf.nn.conv2d(pool1, weights['weights_conv2'], strides=[1, 1, 1, 1], padding='SAME')
        conv_2d_2_plus_bias = tf.nn.bias_add(conv_2d_2, biases['bc2'])
        conv2 = tf.nn.relu(conv_2d_2_plus_bias, name=scope)

    tf.summary.histogram('conv2_output_distribution', conv2)
    tf.summary.histogram('con2_before_relu', conv_2d_2_plus_bias)

    # norm2
    with tf.name_scope('norm2'):
        norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                          name='norm2')
    tf.summary.histogram('norm2_output_distribution', norm2)

    # pool2
    with tf.name_scope('pool2'):
        pool2 = tf.nn.max_pool(norm2,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID',
                               name='pool2')
    tf.summary.histogram('pool2_output_distribution', pool2)

    # conv3
    with tf.name_scope('conv3') as scope:
        conv_2d_3 = tf.nn.conv2d(pool2, weights['weights_conv3'], strides=[1, 1, 1, 1], padding='SAME')
        conv_2d_3_plus_bias = tf.nn.bias_add(conv_2d_3, biases['bc3'])
        conv3 = tf.nn.relu(conv_2d_3_plus_bias, name=scope)

    tf.summary.histogram('con3_output_distribution', conv3)
    tf.summary.histogram('con3_before_relu', conv_2d_3_plus_bias)

    # conv4
    with tf.name_scope('conv4') as scope:
        conv_2d_4 = tf.nn.conv2d(conv3, weights['weights_conv4'], strides=[1, 1, 1, 1], padding='SAME')
        conv_2d_4_plus_bias = tf.nn.bias_add(conv_2d_4, biases['bc4'])
        conv4 = tf.nn.relu(conv_2d_4_plus_bias, name=scope)

    tf.summary.histogram('con4_output_distribution', conv4)
    tf.summary.histogram('con4_before_relu', conv_2d_4_plus_bias)

    # conv5
    with tf.name_scope('conv5') as scope:
        conv_2d_5 = tf.nn.conv2d(conv4, weights['weights_conv5'], strides=[1, 1, 1, 1], padding='SAME')
        conv_2d_5_plus_bias = tf.nn.bias_add(conv_2d_5, biases['bc5'])
        conv5 = tf.nn.relu(conv_2d_5_plus_bias, name=scope)

    tf.summary.histogram('con5_output_distribution', conv5)
    tf.summary.histogram('con5_before_relu', conv_2d_5_plus_bias)

    # pool3
    pool3 = tf.nn.max_pool(conv5,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='VALID',
                           name='pool5')
    tf.summary.histogram('pool3_output_distribution', pool3)

    # local1
    with tf.variable_scope('local1') as scope:
        # Move everything into depth so we can perform a single matrix multiply.
        shape_d = pool3.get_shape()
        shape = shape_d[1] * shape_d[2] * shape_d[3]
        # tf_shape = tf.stack(shape)
        tf_shape = 1024

        print("shape:", shape, shape_d[1], shape_d[2], shape_d[3])

        reshape = tf.reshape(pool3, [-1, tf_shape])
        weight_local1 = \
            tf.get_variable(name='weight_local1', shape=[tf_shape, 2046], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32))
        bias_local1 = tf.Variable(tf.constant(0.1, tf.float32, [2046]), trainable=True, name='bias_local1')
        local1_before_relu = tf.matmul(reshape, weight_local1) + bias_local1
        local1 = tf.nn.relu(local1_before_relu, name=scope.name)

    tf.summary.histogram('local1_output_distribution', local1)
    tf.summary.histogram('local1_before_relu', local1_before_relu)

    tf.summary.histogram('local1_weights', weight_local1)
    tf.summary.histogram('local1_biases', bias_local1)

    # local2
    with tf.variable_scope('local2') as scope:
        # Move everything into depth so we can perform a single matrix multiply.
        weight_local2 = \
            tf.get_variable(name='weight_local2', shape=[2046, 2046], dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False, dtype=tf.float32))
        bias_local2 = tf.Variable(tf.constant(0.1, tf.float32, [2046]), trainable=True, name='bias_local2')
        local2_before_relu = tf.matmul(local1, weight_local2) + bias_local2
        local2 = tf.nn.relu(local2_before_relu, name=scope.name)

    tf.summary.histogram('local2_output_distribution', local2)
    tf.summary.histogram('local2_before_relu', local2_before_relu)

    tf.summary.histogram('local2_weights', weight_local2)
    tf.summary.histogram('local2_biases', bias_local2)

    # linear Wx + b
    with tf.variable_scope('softmax_linear') as scope:
        weight_softmax = \
            tf.Variable(
                tf.truncated_normal([2046, n_classes], stddev=1 / 1024, dtype=tf.float32), name='weight_softmax')
        bias_softmax = tf.Variable(tf.constant(0.0, tf.float32, [n_classes]), trainable=True, name='bias_softmax')
        softmax_linear = tf.add(tf.matmul(local2, weight_softmax), bias_softmax, name=scope.name)

    tf.summary.histogram('softmax_output_distribution', softmax_linear)
    tf.summary.histogram('softmax_weights', weight_softmax)
    tf.summary.histogram('softmax_biases', bias_softmax)

    tf.summary.histogram('weights_conv1', weights['weights_conv1'])
    tf.summary.histogram('weights_conv2', weights['weights_conv2'])
    tf.summary.histogram('weights_conv3', weights['weights_conv3'])
    tf.summary.histogram('weights_conv4', weights['weights_conv4'])
    tf.summary.histogram('weights_conv5', weights['weights_conv5'])

    tf.summary.histogram('biases_conv1', biases['bc1'])
    tf.summary.histogram('biases_conv2', biases['bc2'])
    tf.summary.histogram('biases_conv3', biases['bc3'])
    tf.summary.histogram('biases_conv4', biases['bc4'])
    tf.summary.histogram('biases_conv5', biases['bc5'])

    return softmax_linear

# Note that this is the RMSE
with tf.name_scope('loss'):
    # Note that the dimension of cost is [batch_size, 1]. Every example has one output and a batch
    # is a number of examples.
    cost = tf.sqrt(tf.square(tf.subtract(predictions, y_valence)))
    cost_scalar = tf.reduce_mean(tf.multiply(cost, confidence_holder), reduction_indices=0)
    # Till here cost_scolar will have the following shape: [[#num]]... That is why I used cost_scalar[0]
    tf.summary.scalar("loss", cost_scalar[0])

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_scalar)

非常感谢任何帮助！

【问题讨论】：

也许权重没有改变，因为大部分激活值都在 0 左右。不确定。您确定图中使用了权重吗？如果是，请附上代码（或简化版）？我猜权重的形状是正确的，因为在使用摘要代码运行 mnist 后，张量板中的权重图实现了相似的形状（似乎变化不大），而在同时，该图显示了损失的下降和准确度的提高。 @I.Ayoub 我的卷积网络也有类似的问题。你解决了这个问题吗？你的学习率似乎很好而且足够小，但为什么权重直方图保持不变？会不会是epoch数不够大？ @mamafoku，我已经发布了这种行为总是如此。这也是真的。当您在 MNIST 数据集上训练模型时，您将获得几乎相同的权重直方图/分布形状。我没有发布答案，因为我没有足够的答案来解决这个问题。 【参考方案1】：

来自https://jhui.github.io/2017/03/12/TensorBoard-visualize-your-learning/

我认为分布只是用步长表示直方图的另一种方式。

我猜中间的大多数红线表示直方图中的最大值，每四行平均百分比除以 0 25% 50% 75% 每边

【讨论】：

以上是关于解释 Tensorboard 分布 - 权重不变，只有偏差的主要内容，如果未能解决你的问题，请参考以下文章

Tensorboard 权重直方图仅最后一层可见变化

Tensorboard 图片特征图权重参数可视化

在 tensorboard 中可视化 batch_norm 参数

Tensorboard 1.0 直方图选项卡

梯度分布上的 TensorBoard 常数尖峰