我的模型的损失值为0，但它只是将所有输入分类到同一个类中，出了什么问题？

Question

我训练了这个模型来对数据集中的图像进行分类。当未训练权重时，损失值似乎正常，但在第一个时期之后，损失减少到0，并且所有输入图像被分类为0级。

如果添加了正则化，则权重更新更慢，但最终得到相同的结果，比如分类为0级且损失值为0的所有图像。

import tensorflow as tf
from tensorflow import keras
import numpy as np

EPOCH = 10
BATCH_SIZE = 30
DATA_SIZE = 60000
REGULARIZER = 0.001


def main():
    fashion_mnist = keras.datasets.fashion_mnist
    (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

    train_images = train_images / 255.0
    test_images = test_images / 255.0

    train_labels = train_labels.reshape((60000, 1))
    train_images = train_images.reshape((60000, 784))

    test_images = test_images.reshape((10000, 784))
    judge_labels = test_labels.reshape((10000, 1))

    x = tf.placeholder(tf.float32, (None, 784))
    y_ = tf.placeholder(tf.float32, (None, 1))

    w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]) * 10, dtype=tf.float32)
    # tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
    w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]) * 10, dtype=tf.float32)
    # tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
    w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]) * 10, dtype=tf.float32)
    # tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))

    bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
    bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)

    y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
    y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)

    y = tf.matmul(y2, w3)

    predict = tf.argmax(y, axis=1)

    y_spy = tf.nn.softmax(y, axis=1)

    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y)
    # loss = tf.reduce_mean(ce) + tf.add_n(tf.get_collection('losses'))
    loss = tf.reduce_mean(ce)
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        print('current out loss: ', end='')
        print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
        pre = sess.run(predict, feed_dict={x: test_images})
        miss = pre - test_labels
        print('right number: ', end='')
        print((np.sum(miss == 0)))

        for epoch in range(EPOCH):
            for i in range(DATA_SIZE // BATCH_SIZE):
                start = i * BATCH_SIZE
                end = (i + 1) * BATCH_SIZE
                _ = sess.run([train_step], feed_dict={x: train_images[start:end],
                                                                                                y_: train_labels[start:end]})
            print('epochs %d :' % epoch)
            print('current in loss: ', end='')
            print(sess.run(loss, feed_dict={x: train_images[start:end],
                                            y_: train_labels[start:end]}))
            print('current out loss: ', end='')
            print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
            miss = sess.run(predict, feed_dict={x: test_images}) - test_labels
            print('right number: ', end='')
            print((np.sum(miss == 0)))


if __name__ == "__main__":
    main()