如何解释分类生成对抗网络中的损失函数？

Posted 2023-03-12

技术标签:

【中文标题】如何解释分类生成对抗网络中的损失函数？【英文标题】：How to interpret the loss function in the categorical generative adversarial net? 【发布时间】：2017-11-08 04:23:15 【问题描述】：

所以我一直在实施here 中描述的分类生成对抗网络。

[约斯特·T·斯普林伯格。无监督和半监督学习分类生成对抗网络，2016 年 4 月。]

formula

这是第 6 页中介绍的损失函数，问题是公式使用了 arg_max，这很奇怪，因为我可以在各种框架（例如 Tensorflow）上使用的大多数优化器仅在 arg_min。

那么请大家告诉我如何实现这个公式？

这是我实现的代码。

import tensorflow as tf
import numpy as np
import PIL.Image as Image
# constants
X_dim = 256
Y_dim = 2
Z_dim = 256 * 256
value_lambda = 1.0

X = tf.placeholder(tf.float32, shape=[None, X_dim, X_dim, 1])
Y = tf.placeholder(tf.float32, shape=[None, Y_dim])
Z = tf.placeholder(tf.float32, shape=[None, Z_dim])

initializer = tf.contrib.layers.variance_scaling_initializer
activation_function = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(0.5)

custom_filter = np.ones(shape=[32, 256, 256, 1], dtype=np.float)
custom_filter[:, 255, :, :] = 0
custom_filter[:, :, 255, :] = 0

custom_filter = tf.constant(custom_filter, dtype=tf.float32)


def discriminator(x, name=None):
    with tf.name_scope(name, "discriminator", [x]) as scope:

        D_conv_1 = tf.layers.conv2d(inputs=x, filters=16, kernel_size=[
                                    5, 5], padding='SAME', activation=activation_function, kernel_regularizer=regularizer)
        # [256, 256]
        D_mean_pool_1 = tf.nn.pool(D_conv_1, window_shape=[
                                   2, 2], pooling_type='AVG', padding='VALID', strides=[2, 2])
        # [128, 128]
        D_conv_2 = tf.layers.conv2d(D_mean_pool_1, filters=32, kernel_size=[
                                    3, 3], padding='SAME', activation=activation_function, kernel_regularizer=regularizer)
        # [128, 128]
        D_mean_pool_2 = tf.nn.pool(D_conv_2, window_shape=[
                                   2, 2], pooling_type='AVG', padding='VALID', strides=[2, 2])
        # [64, 64]
        D_conv_3 = tf.layers.conv2d(D_mean_pool_2, filters=64, kernel_size=[
                                    3, 3], padding='SAME', activation=activation_function, kernel_regularizer=regularizer)
        # [64, 64]
        D_mean_pool_3 = tf.nn.pool(D_conv_3, window_shape=[
                                   2, 2], pooling_type='AVG', padding='VALID', strides=[2, 2])
        # [32, 32]
        D_conv_4 = tf.layers.conv2d(D_mean_pool_3, filters=128, kernel_size=[
                                    3, 3], padding='SAME', activation=activation_function, kernel_regularizer=regularizer)
        # [32, 32]
        D_mean_pool_4 = tf.nn.pool(D_conv_4, window_shape=[
                                   2, 2], pooling_type='AVG', padding='VALID', strides=[2, 2])
        # [16, 16]
        D_conv_5 = tf.layers.conv2d(D_mean_pool_4, filters=256, kernel_size=[
                                    3, 3], padding='SAME', activation=activation_function,  kernel_regularizer=regularizer)
        # [16, 16]
        D_mean_pool_5 = tf.nn.pool(D_conv_5, window_shape=[
                                   4, 4], pooling_type='AVG', padding='VALID', strides=[4, 4])
        # [4, 4]
        D_conv_6 = tf.layers.conv2d(D_mean_pool_5, filters=2, kernel_size=[
                                    3, 3], padding='SAME', activation=activation_function,  kernel_regularizer=regularizer)
        # [4, 4]
        D_mean_pool_6 = tf.nn.pool(D_conv_6, window_shape=[
                                   4, 4], pooling_type='AVG', padding='VALID', strides=[4, 4])
        # [1, 1], and finally, [batch_size][1][1][2]
        D_logit = tf.reshape(D_mean_pool_6, shape=[32, 2])
        # [batch_size][2]

        return D_logit

        '''
        D_hidden_layer_1 = tf.layers.dense(
            inputs=x, units=255, activation=activation_function)
        D_hidden_layer_2 = tf.layers.dense(
            inputs=D_hidden_layer_1, units=16, activation=activation_function)
        D_logit = tf.layers.dense(inputs=D_hidden_layer_2, units=Y_dim,
                                  activation=activation_function)

        return D_logit
        '''


def generator(z, name=None):
    with tf.name_scope(name, "generator", [z]) as scope:
        # z[32, 4096]
        input = tf.reshape(z, shape=[32, 256, 256, 1])
        # input[32, 64, 64, 1]
        G_conv_1 = tf.layers.conv2d(input, filters=96, kernel_size=[
                                    8, 8], padding='SAME', activation=activation_function)
        # [32, 64, 64, 96]
        # G_upscaled_1 = tf.image.resize_bicubic(images=G_conv_1, size=[128, 128])
        # [32, 128, 128, 96]
        G_conv_2 = tf.layers.conv2d(G_conv_1, filters=64, kernel_size=[
                                    5, 5], padding='SAME', activation=activation_function)
        # [32, 128, 128, 64]
        # G_upscaled_2 = tf.image.resize_bicubic(G_conv_2, size=[256, 256])
        # [32, 256, 256, 64]
        G_conv_3 = tf.layers.conv2d(G_conv_2, filters=64, kernel_size=[
                                    5, 5], padding='SAME', activation=activation_function)
        # [32, 256, 256, 64]
        G_conv_4 = tf.layers.conv2d(G_conv_3, filters=1, kernel_size=[
                                    5, 5], padding='SAME', activation=activation_function)

        # [32, 256, 256, 1]
        G_logit = G_conv_4 * custom_filter
        # [32, 256, 256, 1], but filtered out the last column and row

        return G_logit

        '''
        G_hidden_layer_1 = tf.layers.dense(
            inputs=z, units=255, activation=activation_function)
        G_outputs = tf.layers.dense(inputs=G_hidden_layer_1, units=X_dim,
                                    activation=activation_function)

        return G_outputs
        '''


with tf.name_scope("training") as scope:
    # Getting samples from random data
    G_sample = generator(Z)
    # Getting logits
    D_logit_real = discriminator(X)
    D_logit_fake = discriminator(G_sample)

    # Applying softmax
    D_proba_real = tf.nn.softmax(logits=D_logit_real)
    D_proba_real = tf.clip_by_value(
        D_proba_real, clip_value_min=1e-4, clip_value_max=1.0)
    D_proba_fake = tf.nn.softmax(logits=D_logit_fake)
    D_proba_fake = tf.clip_by_value(
        D_proba_fake, clip_value_min=1e-4, clip_value_max=1.0)

    with tf.name_scope("category_1") as sub_scope:
        # Getting Shannon's entrophy in X's distribution
        D_log_real = tf.log(D_proba_real)
        D_entrophy_real = D_proba_real * D_log_real
        D_mean_real = tf.reduce_sum(D_entrophy_real, axis=1)
        D_mean_real = -D_mean_real
        D_entrophy_real_mean = tf.reduce_mean(D_mean_real, axis=0)
        D_entrophy_real_mean = tf.reshape(D_entrophy_real_mean, shape=[1])

    with tf.name_scope("category_2") as sub_scope:
        # Gettning Shannon's entrophy in Z's distribution
        G_log_fake = tf.log(D_proba_fake)
        G_entrophy_fake = D_proba_fake * G_log_fake
        G_mean = tf.reduce_sum(G_entrophy_fake, axis=1)
        G_mean = -G_mean
        G_entrophy_fake_mean = tf.reduce_mean(G_mean, axis=0)
        G_entrophy_fake_mean = tf.reshape(G_entrophy_fake_mean, shape=[1])

    with tf.name_scope("category_3") as sub_scope:
        # Getting Shannon's entrophy between classes
        D_class_mean = tf.reduce_mean(D_proba_real, axis=0, keep_dims=True)
        D_class_mean_log = tf.log(D_class_mean)
        D_class_entropy = D_class_mean * D_class_mean_log
        D_class = tf.reduce_sum(D_class_entropy, axis=1)
        D_class = -D_class
        D_class = tf.reshape(D_class, shape=[1])

        G_class_mean = tf.reduce_mean(D_proba_fake, axis=0, keep_dims=True)
        G_class_mean_log = tf.log(G_class_mean)
        G_class_entrophy = G_class_mean * G_class_mean_log
        G_class = tf.reduce_sum(G_class_entrophy, axis=1)
        G_class = -G_class
        G_class = tf.reshape(G_class, shape=[1])

    with tf.name_scope("supervised") as sub_scope:
        # Getting cross entrophy for labeled data
        D_labeled = Y * D_log_real
        D_cross_entrophy = tf.reduce_sum(D_labeled, axis=1)
        D_cross_entrophy = -D_cross_entrophy
        D_supervised = tf.reduce_mean(D_cross_entrophy, axis=0)
        D_supervised_weighted = value_lambda * D_supervised
        D_supervised_weighted = tf.reshape(D_supervised_weighted, shape=[1])

    D_loss = D_class - D_entrophy_real_mean + \
        G_entrophy_fake_mean + D_supervised_weighted
    G_loss = -G_class + G_entrophy_fake_mean
    D_loss = -D_loss

    D_solver = tf.train.AdamOptimizer().minimize(D_loss)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss)

# with tf.name_scope("testing") as scope:

【问题讨论】：

你试过什么？让我们看一些代码。 @Alex 添加了代码！这是很多代码@user3551261。有帮助的是最小代码集和预期输出。我怀疑您只需要 2 或 3 行代码和 2 或 3 行示例输出如果您确实要求使用 arg_max 而不是 arg_min，那么一个非常简短的示例会更有帮助。我想提供帮助，但运行您发布的所有上述代码的前景是令人生畏的开始提供帮助。 【参考方案1】：

我做了一些研究，并向在一家大公司从事深度学习研究的朋友提出了一些问题。事实证明，生成对抗网络不擅长分类工作。所以我改变了主意，用 GoogLenet 实现了它。问题解决了！

【讨论】：

以上是关于如何解释分类生成对抗网络中的损失函数？的主要内容，如果未能解决你的问题，请参考以下文章