MNIST机器学习入门

Posted 2021-02-12 chenxiangzhen

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了MNIST机器学习入门相关的知识，希望对你有一定的参考价值。

1.1.1 简介

下载MNIST数据集，并打印一些基本信息：

python download.py

# 从tensorflow.examples.tutorials.mnist引入模块，这是TensorFlow为了教学MNIST而提前编制的程序
from tensorflow.examples.tutorials.mnist import input_data

# 从MNIST_data/中读取MNIST数据。这条语句在数据不存在时，会自动执行下载
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# 查看训练数据的大小
print(mnist.train.images.shape)
print(mnist.train.labels.shape)

# 查看验证数据的大小
print(mnist.validation.images.shape)
print(mnist.validation.labels.shape)

# 查看测试集的大小
print(mnist.test.images.shape)
print(mnist.test.labels.shape)

# 打印出第0幅图片的向量表示
print(mnist.train.images[0, :])

# 打印出第0幅图片的标签
print(mnist.train.labels[0, :])

1.1.2 实验：将MNIST数据集保存为图片

python save_pic.py

from tensorflow.examples.tutorials.mnist import input_data
import scipy.misc
import os

# 读取MNIST数据集
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# 将原始数据保存到MNIST_data/raw/文件夹下，若不存在，自动创建
save_dir = "MNIST_data/raw/"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# 保存前20张图片
for i in range(20):
    # mnist.train.images[i, :]就表示第i张图片（序号从0开始）
    image_array = mnist.train.images[i, :]
    # 还原为28x28维的图像
    image_array = image_array.reshape(28, 28)
    # 保存文件的格式为 mnist_train_0.jpg
    filename = save_dir + "mnist_train_%d.jpg" % i
    # 用scipy.misc.toimage转换为图像，再调用save直接保存
    scipy.misc.toimage(image_array, cmin=0.0, cmax=1.0).save(filename)

print("Please check: %s " % save_dir)

1.1.3 图像标签的独热表示

打印MNIST数据集中图片的标签：

python label.py

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

# 读取MNIST数据集
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# 看前20张训练图片的label
for i in range(20):
    # 得到one-hot表示，形如(0, 1, 0, 0, 0, 0, 0, 0, 0, 0)
    one_hot_label = mnist.train.labels[i, :]
    # 通过np.argmax我们可以直接获得原始的label, 因为只有1位为1，其他都是0
    label = np.argmax(one_hot_label)
    print("mnist_train_%d.jpg label: %d" % (i, label))

1.2.1 Softmax 回归

python softmax_regression.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 获取MNIST数据
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# 创建占位符x，代表待识别的图片
x = tf.placeholder(tf.float32, [None, 784])
# 权重
W = tf.Variable(tf.zeros([784, 10]))
# 偏置
b = tf.Variable(tf.zeros([10]))
# 模型输出
y_hat = tf.nn.softmax(tf.matmul(x, W) + b)
# 实际的图像标签
y = tf.placeholder(tf.float32, [None, 10])

# 交叉熵损失y*log(y_hat)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_hat)))

# 使用随机梯度下降对模型参数进行优化
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

# 创建一个Session运行优化步骤train_step
sess = tf.InteractiveSession()
# 初始化所有变量，分配内存
tf.global_variables_initializer().run()
print("Start training......")

# 进行1000步梯度下降
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys})

# 正确的预测结果
correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y, 1))

# 计算预测准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

1.2.2 两层卷积网络分类

python convolutional.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


def weight_variable(shape):
    """
    初始化权重
    :param shape:
    :return:
    """
    # 从截断的正态分布中输出随机值，生成的值服从具有指定平均值和标准偏差的正态分布，
    # 如果生成的值大于平均值2个标准偏差的值则丢弃重新选择。
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """
    初始化偏置
    :param shape:
    :return:
    """
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    """
    卷积操作
    :param x: [batch_size, in_height, in_width, n_channels]
    :param W: [filter_height, filter_width, in_channels, out_channels]
    :return:
    """
    # strides是一个1d输入，长度为4，其中stride[0]和stride[3]必须为1，一般格式为[1, stride, stride, 1]
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2_2(x):
    """
    池化操作
    :param x:
    :return:
    """
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


if __name__ == "__main__":
    # 读取数据
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    # 训练图像占位符(28*28=784)
    x = tf.placeholder(tf.float32, [None, 784])
    # 训练图像标签占位符
    y = tf.placeholder(tf.float32, [None, 10])

    # 将单张图像还原为28*28
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # 第一层卷积层
    # [filter_height, filter_width, in_channels, out_channels]
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2_2(h_conv1)

    # 第二层卷积层
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2_2(h_conv2)

    # 全连接层，输出为1024维的向量
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # 使用Dropout，keep_prob是一个占位符，训练时为0.5，测试时为1
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # 把1024维的向量转换成10维，对应10个类别
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # 我们不采用先Softmax再计算交叉熵的方法，而是直接用tf.nn.softmax_cross_entropy_with_logits直接计算
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    # 定义测试准确率
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 创建Session和变量初始化
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    # 训练20000步
    for i in range(1000):
        batch = mnist.train.next_batch(50)
        # 每100步报告一次在验证集上的准确度
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
            print("Step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})

    print("Test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0}))

可能出现的错误

下载数据集时可能出现网络问题，可以用下面两种方法中的一种解决：1. 使用合适的代理 2.在MNIST的官方网站上下载文件train-images-idx3-ubyte.gz、train-labels-idx1-ubyte.gz、t10k-images-idx3-ubyte.gz、t10k-labels-idx1-ubyte.gz，并将它们存储在MNIST_data/文件夹中。

拓展阅读

本章介绍的MNIST 数据集经常被用来检验机器学习模型的性能，在它的官网（地址：http://yann.lecun.com/exdb/mnist/ ）中，可以找到多达68 种模型在该数据集上的准确率数据，包括相应的论文出处。这些模型包括线性分类器、K 近邻方法、普通的神经网络、卷积神经网络等。
本章的两个MNIST 程序实际上来自于TensorFlow 官方的两个新手教程，地址为https://www.tensorflow.org/get_started/mnist/beginners 和 https://www.tensorflow.org/get_started/mnist/pros 。读者可以将本书的内容和官方的教程对照起来进行阅读。这两个新手教程的中文版地址为http://www.tensorfly.cn/tfdoc/tutorials/mnist_beginners.html 和http://www.tensorfly.cn/tfdoc/tutorials/mnist_pros.html。
本章简要介绍了TensorFlow 的tf.Tensor 类。tf.Tensor 类是TensorFlow的核心类，常用的占位符（tf.placeholder）、变量（tf.Variable）都可以看作特殊的Tensor。读者可以参阅https://www.tensorflow.org/programmers_guide/tensors 来更深入地学习它的原理。
常用tf.Variable 类来存储模型的参数，读者可以参阅https://www.tensorflow.org/programmers_guide/variables 详细了解它的运行机制，文档的中文版地址为http://www.tensorfly.cn/tfdoc/how_tos/ variables.html。
只有通过会话（Session）才能计算出tf.Tensor 的值。强烈建议读者在学习完tf.Tensor 和tf.Variable 后，阅读https://www.tensorflow.org/programmers_guide/graphs 中的内容，该文档描述了TensorFlow 中计算图和会话的基本运行原理，对理解TensorFlow 的底层原理有很大帮助。

以上是关于MNIST机器学习入门的主要内容，如果未能解决你的问题，请参考以下文章