mnist实例
Posted yangxiaoling
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了mnist实例相关的知识,希望对你有一定的参考价值。
与滑动平均有关的两个报错:
1.
Traceback (most recent call last): File "/home/error/PycharmProjects/tensortry/MNIST_data/train.py", line 65, in <module> tf.app.run() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 124, in run _sys.exit(main(argv)) File "/home/error/PycharmProjects/tensortry/MNIST_data/train.py", line 62, in main train(mnist) File "/home/error/PycharmProjects/tensortry/MNIST_data/train.py", line 26, in train avg_op = avg_class.apply(tf.trainable_variables()) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/moving_averages.py", line 381, in apply var.name) TypeError: The variables must be half, float, or double: Variable:0
从报错来看,与滑动平均有关,数据类型除了问题,查了一遍发现是变量global_step导致的,因为global_step是由tf.Variable定义的 ,参数trainable的默认值为True,trainable为True的变量会自动添加到tf.trainable_variables()中。因此需要做以下修改:
1 # global_step = tf.Variable(tf.constant(0)) 2 global_step = tf.Variable(tf.constant(0), trainable=False) 3 4 # 滑动平均必须分开写,因为滑动平均是应用到测试数据上的 5 avg_class = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) 6 avg_op = avg_class.apply(tf.trainable_variables())
2.
/usr/bin/python3.5 /home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py Extracting /home/error/MNIST_DATA/train-images-idx3-ubyte.gz Extracting /home/error/MNIST_DATA/train-labels-idx1-ubyte.gz Extracting /home/error/MNIST_DATA/t10k-images-idx3-ubyte.gz Extracting /home/error/MNIST_DATA/t10k-labels-idx1-ubyte.gz 2018-09-01 16:25:36.504699: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 2018-09-01 16:25:36.526808: W tensorflow/core/framework/op_kernel.cc:1198] Not found: Key layer1/biases/ExponentialMovingAverage not found in checkpoint 2018-09-01 16:25:36.527364: W tensorflow/core/framework/op_kernel.cc:1198] Not found: Key layer2/weights/ExponentialMovingAverage not found in checkpoint 2018-09-01 16:25:36.529878: W tensorflow/core/framework/op_kernel.cc:1198] Not found: Key layer2/biases/ExponentialMovingAverage not found in checkpoint 2018-09-01 16:25:36.531286: W tensorflow/core/framework/op_kernel.cc:1198] Not found: Key layer1/weights/ExponentialMovingAverage not found in checkpoint Traceback (most recent call last): File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1350, in _do_call return fn(*args) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1329, in _run_fn status, run_metadata) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__ c_api.TF_GetCode(self.status.status)) tensorflow.python.framework.errors_impl.NotFoundError: Key layer1/biases/ExponentialMovingAverage not found in checkpoint [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]] During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 52, in <module> tf.app.run() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 124, in run _sys.exit(main(argv)) File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 49, in main evaluate(mnist) File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 37, in evaluate saver.restore(sess, ckpt.model_checkpoint_path) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1686, in restore {self.saver_def.filename_tensor_name: save_path}) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 895, in run run_metadata_ptr) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1128, in _run feed_dict_tensor, options, run_metadata) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1344, in _do_run options, run_metadata) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1363, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.NotFoundError: Key layer1/biases/ExponentialMovingAverage not found in checkpoint [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]] Caused by op ‘save/RestoreV2‘, defined at: File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 52, in <module> tf.app.run() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 124, in run _sys.exit(main(argv)) File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 49, in main evaluate(mnist) File "/home/error/PycharmProjects/tensortry/MNIST_data/evaluate.py", line 28, in evaluate saver = tf.train.Saver(variables_to_restore) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1239, in __init__ self.build() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1248, in build self._build(self._filename, build_save=True, build_restore=True) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1284, in _build build_save=build_save, build_restore=build_restore) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 765, in _build_internal restore_sequentially, reshape) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 428, in _AddRestoreOps tensors = self.restore_op(filename_tensor, saveable, preferred_shard) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 268, in restore_op [spec.tensor.dtype])[0]) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1031, in restore_v2 shape_and_slices=shape_and_slices, dtypes=dtypes, name=name) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper op_def=op_def) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3160, in create_op op_def=op_def) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1625, in __init__ self._traceback = self._graph._extract_stack() # pylint: disable=protected-access NotFoundError (see above for traceback): Key layer1/biases/ExponentialMovingAverage not found in checkpoint [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
从报错来看,问题出在加载模型时,滑动平均变量没找到。原因是程序中是对所有可训练变量求滑动平均,包括权重和偏置项,但是程序写成了下面这样,把求滑动平均放在了创建模型之前,这时权重和偏置项变量还没创建,当然就没有影子变量了:
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 滑动平均的适用对象为实数类型 y = inference.inference(x, regularizer) # 会产生4个变量
解决方法:把y放在variable_averages之前。
完整样例:
inference.py
1 #!coding:utf8 2 import tensorflow as tf 3 4 INPUT_NODE = 784 5 OUTPUT_NODE = 10 6 LAYERS_NODE = 500 # 隐藏层节点数 7 8 def get_weight_variables(shape, regularizer): 9 weights = tf.get_variable(‘weights‘, shape=shape, initializer=tf.truncated_normal_initializer(stddev=0.1)) 10 if regularizer: 11 tf.add_to_collection(‘losses‘, regularizer(weights)) 12 return weights 13 14 def inference(x, regularizer): 15 with tf.variable_scope(‘layer1‘): 16 weights = get_weight_variables([INPUT_NODE, LAYERS_NODE], regularizer) 17 biases = tf.get_variable(‘biases‘, shape=[LAYERS_NODE], initializer=tf.constant_initializer(0.0)) 18 layer1 = tf.nn.relu(tf.matmul(x, weights) + biases) 19 20 with tf.variable_scope(‘layer2‘): 21 weights = get_weight_variables([LAYERS_NODE, OUTPUT_NODE], regularizer) 22 biases = tf.get_variable(‘biases‘, shape=[OUTPUT_NODE], initializer=tf.constant_initializer(0.0)) 23 layer2 = tf.matmul(layer1, weights) + biases 24 25 return layer2
train.py
1 #!coding:utf8 2 import tensorflow as tf 3 from tensorflow.examples.tutorials.mnist import input_data 4 from tensortry.MNIST_data import inference 5 import os 6 7 REGULARIZER_RATE = 0.0001 # 正则化系数 8 MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率 9 10 LEARNING_RATE_BASE = 0.8 # 基础学习率 11 LEARNING_RATE_DECAY = 0.99 # 学习率的衰减率 12 13 BATCH_SIZE = 100 14 TRAINING_STEPS = 10000 15 16 MODEL_SAVE_PATH = ‘/home/error/model2/‘ 17 MODEL_NAME = ‘model.ckpt‘ 18 19 def train(mnist): 20 x = tf.placeholder(tf.float32, shape=[None, inference.INPUT_NODE], name=‘x-input‘) 21 y_ = tf.placeholder(tf.float32, shape=[None, inference.OUTPUT_NODE], name=‘y-input‘) 22 23 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZER_RATE) 24 25 global_step = tf.Variable(0, trainable=False) 26 27 y = inference.inference(x, regularizer) # 会产生4个变量 28 29 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) 30 variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 滑动平均的适用对象为实数类型 31 32 # 损失函数 33 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y) 34 cross_entropy_mean = tf.reduce_mean(cross_entropy) # 别忘了平均,否则求总损失时,各个损失的shape不同导致报错 35 tf.add_to_collection(‘losses‘, cross_entropy_mean) 36 loss = tf.add_n(tf.get_collection(‘losses‘)) 37 38 # 学习率 39 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True) 40 41 # 反向传播算法 42 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) 43 44 # 训练损失函数和滑动平均 45 with tf.control_dependencies([train_step, variable_averages_op]): 46 train_op = tf.no_op(name=‘train‘) 47 48 # 准确率 49 correct_perdiction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 50 accuracy = tf.reduce_mean(tf.cast(correct_perdiction, tf.float32)) 51 52 saver = tf.train.Saver() 53 54 with tf.Session() as sess: 55 tf.global_variables_initializer().run() 56 57 for i in range(TRAINING_STEPS): 58 xs, ys = mnist.train.next_batch(BATCH_SIZE) 59 _, loss_value, accuracy_value, step = sess.run([train_op, loss, accuracy, global_step], feed_dict={x: xs, y_: ys}) 60 if i % 1000 == 0: 61 print(‘after %d training step, loss in training batch is %g, accuracy is %g‘ % (step, loss_value, accuracy_value)) 62 63 # 持久化 64 # global_step参数可以让每个被保存模型的文件名末尾加上训练次数,比如`model.ckpt-1000`表示训练1000轮之后得到的模型。 65 saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) 66 67 def main(argv=None): 68 mnist = input_data.read_data_sets(‘/home/error/MNIST_DATA‘, one_hot=True) 69 train(mnist) 70 71 if __name__ == ‘__main__‘: 72 tf.app.run()
evaluate.py
1 #!coding:utf8 2 import tensorflow as tf 3 from tensorflow.examples.tutorials.mnist import input_data 4 from tensortry.MNIST_data import inference 5 from tensortry.MNIST_data import train 6 import time 7 8 EVAL_INTERVAL_SECS = 10 9 10 def evaluate(mnist): 11 with tf.Graph().as_default() as g: 12 x = tf.placeholder(tf.float32, shape=[None, inference.INPUT_NODE], name=‘x-input‘) 13 y_ = tf.placeholder(tf.float32, shape=[None, inference.OUTPUT_NODE], name=‘y-input‘) 14 validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels} 15 16 # regularizer = tf.contrib.layers.l2_regularizer(train.REGULARIZER_RATE) 17 18 y = inference.inference(x, None) # 测试时不关注正则化损失 19 20 # 准确率 21 correct_perdiction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) 22 accuracy = tf.reduce_mean(tf.cast(correct_perdiction, tf.float32)) 23 24 # 滑动平均 25 variable_averages = tf.train.ExponentialMovingAverage(train.MOVING_AVERAGE_DECAY) 26 variables_to_restore = variable_averages.variables_to_restore() 27 # 持久化 28 saver = tf.train.Saver(variables_to_restore) 29 30 while True: 31 with tf.Session() as sess: 32 ckpt = tf.train.get_checkpoint_state(train.MODEL_SAVE_PATH) # 通过checkpoint文件自动找到目录中最新模型的文件名, 返回的是一个类CheckpointState 33 # print(ckpt.model_checkpoint_path) # /home/error/model2/model.ckpt-9001 34 # print(ckpt.all_model_checkpoint_paths) # 返回5个模型构成的列表 35 if ckpt and ckpt.model_checkpoint_path: 36 # 加载最新的模型 37 saver.restore(sess, ckpt.model_checkpoint_path) 38 global_step = ckpt.model_checkpoint_path.split(‘/‘)[-1].split(‘-‘)[-1] #从文件名获取训练轮数 39 accuracy_score = sess.run(accuracy, feed_dict=validate_feed) 40 print(‘after %s training step, validation accuracy is %g‘ % (global_step, accuracy_score)) 41 else: 42 print(‘No checkpoint file found‘) 43 return 44 time.sleep(EVAL_INTERVAL_SECS) 45 46 47 def main(argv=None): 48 mnist = input_data.read_data_sets(‘/home/error/MNIST_DATA‘, one_hot=True) 49 evaluate(mnist) 50 51 if __name__ == ‘__main__‘: 52 tf.app.run()
以上是关于mnist实例的主要内容,如果未能解决你的问题,请参考以下文章