TensorFlow 多 GPU MNIST 分类器:准确率低
Posted
技术标签:
【中文标题】TensorFlow 多 GPU MNIST 分类器:准确率低【英文标题】:Tensorflow multi-GPU MNIST classifier: low accuracy 【发布时间】:2018-05-29 04:40:58 【问题描述】:我在 Tensorflow 中遇到了多个 GPU MNIST 分类器。代码运行没有错误,但准确性很差(30%)。我是 Tensorflow 的新手,所以我不知道问题出在哪里? GPU:2x GTX 1080 Ti。
我找到了几个关于多 GPU 的教程,但是代码很难理解。出于这个原因,我正在尝试从头开发 MNIST CNN 分类器。
from __future__ import print_function
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
with tf.device('/cpu:0'):
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img=tf.reshape(x, [-1, 28, 28, 1])
x_dict=
x_dict['x0'],x_dict['x1'] = tf.split(x_img,2)
y_dict=
y = tf.placeholder(tf.float32, [None, 10], name='y')
y_dict['y0'],y_dict['y1'] = tf.split(y,2)
opt=tf.train.GradientDescentOptimizer(0.01)
keep_prob = tf.placeholder(tf.float32)
w0=tf.get_variable('w0',initializer=tf.truncated_normal([5, 5,1,32], stddev=0.1))
b0=tf.get_variable('b0',initializer=tf.zeros([32]))
w1=tf.get_variable('w1',initializer=tf.truncated_normal([5,5,32,64], stddev=0.1))
b1=tf.get_variable('b1',initializer=tf.zeros([64]))
w2=tf.get_variable('w2',initializer=tf.truncated_normal([7*7*64,1024], stddev=0.1))
b2=tf.get_variable('b2',initializer=tf.zeros([1024]))
w3=tf.get_variable('w3',initializer=tf.truncated_normal([1024,10], stddev=0.1))
b3=tf.get_variable('b3',initializer=tf.zeros([10]))
grads=[]
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_conv1=tf.nn.relu(conv2d(xx,w0)+b0);
h_pool1=max_pool_2x2(h_conv1)
h_conv2=tf.nn.relu(conv2d(h_pool1,w1)+b1);
h_pool2=max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w2)+b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.nn.sigmoid(tf.matmul(h_fc1_drop,w3)+b3)
return y
for i in range(0,2):
with tf.device(('/gpu:0').format(i)):
with tf.variable_scope(('scope_gpu_0').format(i)):
yy=model_forward(x_dict[('x0').format(i)])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y0').format(i)] * tf.log(yy), reduction_indices=[1]))
grads.append(opt.compute_gradients(cross_entropy,tf.trainable_variables()))
with tf.device('/cpu:0'):
grad = average_gradients(grads)
train_step = opt.apply_gradients(grad)
yy=model_forward(x_dict['x0'])
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y_dict['y0'], 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter('C:\\tmp\\test\\', graph=tf.get_default_graph())
t1_1 = datetime.datetime.now()
for step in range(0,10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict=x: batch_x, y: batch_y, keep_prob: 0.5)
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict=x: mnist.test.images, y: mnist.test.labels, keep_prob: 1))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1-t1_1))
if __name__ == "__main__":
main()
【问题讨论】:
【参考方案1】:我注意到的问题:
您的交叉熵损失是错误的(有关详细信息,请参阅this question,简而言之,您正在计算二进制交叉熵)。 我放弃了手动梯度计算,转而使用tf.train.AdamOptimizer
。
我放弃了x
的输入拆分(这不是在 tensorflow 中进行分布式计算的正确方法)。
即使在一个 GPU 上,结果模型也很容易达到 99% 的准确率。
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img = tf.reshape(x, [-1, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, 10], name='y')
keep_prob = tf.placeholder(tf.float32)
stddev = 0.1
w0 = tf.get_variable('w0', initializer=tf.truncated_normal([5, 5, 1, 32], stddev=stddev))
b0 = tf.get_variable('b0', initializer=tf.zeros([32]))
w1 = tf.get_variable('w1', initializer=tf.truncated_normal([5, 5, 32, 64], stddev=stddev))
b1 = tf.get_variable('b1', initializer=tf.zeros([64]))
w2 = tf.get_variable('w2', initializer=tf.truncated_normal([7 * 7 * 64, 1024], stddev=stddev))
b2 = tf.get_variable('b2', initializer=tf.zeros([1024]))
w3 = tf.get_variable('w3', initializer=tf.truncated_normal([1024, 10], stddev=stddev))
b3 = tf.get_variable('b3', initializer=tf.zeros([10]))
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_conv1 = tf.nn.relu(conv2d(xx, w0) + b0)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, w1) + b1)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w2) + b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.matmul(h_fc1_drop, w3) + b3
return y
yy = model_forward(x_img)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=yy, labels=y))
train_step = tf.train.AdamOptimizer().minimize(loss)
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("/home/maxim/p/data/mnist-tf", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.global_variables_initializer())
t1_1 = datetime.datetime.now()
for step in range(0, 10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict=x: batch_x, y: batch_y, keep_prob: 0.5)
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict=x: mnist.test.images, y: mnist.test.labels, keep_prob: 1))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1 - t1_1))
if __name__ == "__main__":
main()
现在,如果您真的想要它,您可以进行数据或模型并行处理以利用您的 GPU 能力(有关于它的 a great post,但有时由于托管问题而无法正确呈现)。
【讨论】:
您好,感谢您的cmets。问题是多 GPU 计算。在本教程中手动计算梯度、平均它们并更新 cpu 上的权重:github.com/normanheckscher/mnist-multi-gpu 我想使用 softmax 函数,但由于错误不能在多 GPU 上使用,所以我使用了 sigmoid。跨度> 我对多 GPU 版本更感兴趣。我不确定如何解决 gpus 等之间的权重和偏差共享。【参考方案2】:连同前两个答案中提到的点,看看 average_gradients 函数中的return average_grads
,它是从第一个 for 循环的第一次迭代返回的,这意味着梯度将仅适用于第一个变量(可能是 w0 )。因此,只有 w0 得到更新,因此您的准确度非常低,因为其余变量保持其原始值(随机/零)。
【讨论】:
【参考方案3】:这是因为模型在 CPU 和其他 GPU 设备上的推理使用不同的权重和偏差。
例如:
for i in range(0,2):
with tf.device(('/gpu:0').format(i)):
with tf.variable_scope(('scope_gpu_0').format(i)) as infer_scope:
yy=model_forward(x_dict[('x0').format(i)])
infer_scope.reuse_variables()
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y0').format(i)] * tf.log(yy), reduction_indices=[1]))
grads.append(opt.compute_gradients(cross_entropy,tf.trainable_variables()))
您获得低准确度的原因是,如果没有指定 reuse_variables
() 并且您尝试在每个时期内调用模型推理,该图将创建一个具有随机权重和偏差初始化的新模型,这不是您想要的喜欢。
【讨论】:
以上是关于TensorFlow 多 GPU MNIST 分类器:准确率低的主要内容,如果未能解决你的问题,请参考以下文章
深度学习基于tensorflow的服装图像分类训练(数据集:Fashion-MNIST)
求助Tensorflow下跑mnist手写体数据集遇到Cuda compute capability问题
深度学习原理与框架-Tensorflow卷积神经网络-神经网络mnist分类
具有推理功能的 TensorFlow + Keras 多 GPU 模型
win10下通过Anaconda安装TensorFlow-GPU1.3版本,并配置pycharm运行Mnist手写识别程序