尝试使用遮罩层Tensorflow时,重量无法广播到值错误
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了尝试使用遮罩层Tensorflow时,重量无法广播到值错误相关的知识,希望对你有一定的参考价值。
- 基于该主题的Alex Graves paper,我正在尝试重新实现手写生成网络。我正在使用IAM handwriting database中的数据集,其中包含白板上笔位置的记录,其中每个时间步长记录2个值(x和y坐标)。
- 时间步长被收集为笔划(笔在白板上接触的一系列点)。
- 笔划被收集到文本行中。
- 每行文本被视为一个单独的批次
- 此外,我出于定制目的修改了混合密度网络(MDN)层的实现。
(现在开始我的假设)
- 随着序列长度的变化,必须预先填充每个序列。
- 我认为其他值必须用
tf.keras.layers.Masking()
掩盖 - 尽管,当我包括这一层时,代码在添加自定义损失函数的阶段破裂了,但异常
ValueError: weights can not be broadcast to values. values.rank=0. weights.rank=2. values.shape=(). weights.shape=(8, 1939).
完整代码:
MDN实现
# -*-encoding: utf-8-*-
# Author: Danil Kovalenko
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow_probability import distributions as tfd
def elu_plus_one_plus_epsilon(x):
"""ELU activation with a very small addition to help prevent
NaN in loss."""
return keras.backend.elu(x) + 1 + keras.backend.epsilon()
def biased_softmax(bias=0):
def activation(x):
return keras.activations.softmax(x * (1. + bias))
return activation
def biased_exp(bias=0):
def activation(x):
return tf.exp(x - bias)
return activation
class MDN(layers.Layer):
"""A Mixture Density Network Layer for Keras.
This layer has a few tricks to avoid NaNs in the loss function when training:
- Activation for variances is ELU + 1 + 1e-8 (to avoid very small values)
- Mixture weights (pi) are trained in as logits, not in the softmax space.
A loss function needs to be constructed with the same output dimension and number of mixtures.
A sampling function is also provided to sample from distribution parametrized by the MDN outputs.
"""
def __init__(self, output_dimension, num_mixtures, bias=0, **kwargs):
self.output_dim = output_dimension
self.num_mix = num_mixtures
self.bias = bias
with tf.name_scope('MDN'):
# end of stroke probability
self.mdn_e = layers.Dense(1, name='mdn_e', activation='sigmoid')
# mixing values, logits
self.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi',
activation=biased_softmax(bias))
# means
self.mdn_mu = layers.Dense(self.output_dim * self.num_mix,
name='mdn_mu1')
# std`s
self.mdn_std = layers.Dense(self.output_dim * self.num_mix,
name='mdn_std1',
activation=elu_plus_one_plus_epsilon)
# correlation
# self.mdn_rho = layers.Dense(self.num_mix, name='mdn_rho',
# activation='tanh')
self.layers = [self.mdn_e, self.mdn_pi, self.mdn_mu,
self.mdn_std,
# self.mdn_rho,
]
super(MDN, self).__init__(**kwargs)
def build(self, input_shape):
with tf.name_scope('layers'):
for layer in self.layers:
layer.build(input_shape)
super(MDN, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
tf.print(mask)
return mask
def call(self, x, mask=None):
tf.print(mask)
with tf.name_scope('MDN'):
mdn_out = layers.concatenate([l(x) for l in self.layers],
name='mdn_outputs')
return mdn_out
def get_config(self):
config = {
"output_dimension": self.output_dim,
"num_mixtures": self.num_mix,
"bias": self.bias
}
base_config = super(MDN, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_mixture_loss_func(output_dim, num_mixes, eps=1e-8):
"""
Construct a loss functions for the MDN layer parametrised
by number of mixtures.
"""
def mdn_loss_func(y_true, y_pred):
# Split the inputs into parameters, 1 for end-of-stroke, `num_mixes`
# for other
# y_true = tf.reshape(tensor=y_true, shape=y_pred.shape)
y_pred = tf.reshape(y_pred,
[-1, (2 * num_mixes * output_dim + 1) + num_mixes],
name='reshape_ypreds')
y_true = tf.reshape(y_true,
[-1, output_dim + 1],
name='reshape_ytrue')
out_e, out_pi, out_mus, out_stds = tf.split(
y_pred,
num_or_size_splits=[1,
num_mixes,
num_mixes * output_dim,
num_mixes * output_dim],
name='mdn_coef_split',
axis=-1
)
cat = tfd.Categorical(logits=out_pi)
components_splits = [output_dim] * num_mixes
mus = tf.split(out_mus, num_or_size_splits=components_splits, axis=1)
stds = tf.split(out_stds, num_or_size_splits=components_splits, axis=1)
components = [tfd.MultivariateNormalDiag(loc=mu_i, scale_diag=std_i)
for mu_i, std_i in zip(mus, stds)]
mix = tfd.Mixture(cat=cat, components=components)
xs, ys, es = tf.unstack(y_true, axis=-1)
X = tf.stack((xs, ys), axis=-1)
stroke = tfd.Bernoulli(logits=out_e)
loss1 = tf.negative(mix.log_prob(X))
loss2 = tf.negative(stroke.log_prob(es))
loss = tf.add(loss1, loss2)
loss = tf.reduce_mean(loss)
return loss
# Actually return the loss function
with tf.name_scope('MDN'):
return mdn_loss_func
def get_mixture_mse_accuracy(output_dim, num_mixes):
"""
Construct an MSE accuracy function for the MDN layer
that takes one sample and compares to the true value.
"""
# Construct a loss function with the right number of mixtures and outputs
def mse_func(y_true, y_pred):
# Reshape inputs in case this is used in a TimeDistributed layer
y_pred = tf.reshape(y_pred,
[-1, (2 * num_mixes * output_dim + 1) + num_mixes],
name='reshape_ypreds')
y_true = tf.reshape(y_true,
[-1, output_dim + 1],
name='reshape_ytrue')
out_e, out_pi, out_mus, out_stds = tf.split(
y_pred,
num_or_size_splits=[1,
num_mixes,
num_mixes * output_dim,
num_mixes * output_dim],
name='mdn_coef_split',
axis=-1
)
cat = tfd.Categorical(logits=out_pi)
components_splits = [output_dim] * num_mixes
mus = tf.split(out_mus, num_or_size_splits=components_splits, axis=1)
stds = tf.split(out_stds, num_or_size_splits=components_splits, axis=1)
components = [tfd.MultivariateNormalDiag(loc=mu_i, scale_diag=std_i)
for mu_i, std_i in zip(mus, stds)]
mix = tfd.Mixture(cat=cat, components=components)
stroke = tfd.Bernoulli(logits=out_e)
pos_samp = mix.sample()
stroke_samp = tf.cast(stroke.sample(), tf.float32)
samp = tf.concat((pos_samp, stroke_samp), axis=-1)
mse = tf.reduce_mean(tf.square(samp - y_true), axis=-1)
# Todo: temperature adjustment for sampling functon.
return mse
# Actually return the loss_func
with tf.name_scope('MDNLayer'):
return mse_func
网络
#!/usr/bin/env python3
# -*-encoding: utf-8-*-
# Author: Danil Kovalenko
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Masking, add, Input, concatenate
from tensorflow.keras.models import Sequential, Model
from mdn import MDN, get_mixture_loss_func
from custom_mdn import MDN as _MDN, get_mixture_loss_func as _get_mixture_loss_func, get_mixture_mse_accuracy
def get_lstm(amt, params):
return [LSTM(**params) for i in range(amt)]
def define_model2(N, batch_size, time_steps, vector_size, num_mixtures):
"+ skip connections"
lstm_params = {'units': N,
'activation': 'tanh',
'return_sequences': True,
'batch_input_shape': (batch_size, time_steps, vector_size)
}
enter = Input(batch_shape=(batch_size, time_steps, vector_size))
mask = Masking(mask_value=(0, 0, 0),
batch_input_shape=(batch_size,time_steps, vector_size)
)(enter)
raw_lstm1, raw_lstm2, raw_lstm3 = get_lstm(3, lstm_params)
input_proxy = Dense(N)(mask)
lstm1 = raw_lstm1(mask)
lvl1_out = add([input_proxy, lstm1])
lstm2 = raw_lstm2(lvl1_out)
lvl2_out = add([input_proxy, lstm2])
lstm3 = raw_lstm3(lvl2_out)
lvl3_out = add([input_proxy, lstm3])
out_proxy = Dense(vector_size)
lstm1_proxy = out_proxy(lstm1)
lstm2_proxy = out_proxy(lstm2)
lstm3_proxy = out_proxy(lstm3)
out_dense = Dense(units=vector_size, activation='linear')(lvl3_out)
out_proxy = add([out_dense, lstm1_proxy, lstm2_proxy, lstm3_proxy])
out = _MDN(vector_size - 1, num_mixtures)(out_proxy)
m = Model(inputs=enter, outputs=out)
m.compile(optimizer='rmsprop',
loss=_get_mixture_loss_func(vector_size - 1, num_mixtures),
# metrics=[get_mixture_mse_accuracy(vector_size - 1, num_mixtures), ]
)
return m
if __name__ == '__main__':
N = 10
with h5py.File('../dataset.h5', 'r') as f:
X = f['lines'][:]
X = X[:200]
batch_size = 8
_, time_steps, vector_size = X.shape
m = define_model2(N, batch_size, time_steps - 1, vector_size, 5)
# print(m.summary())
size = X.shape[0] - X.shape[0] % batch_size
X_train = X[:size, :-1, :]
Y_train = X[:size, 1:, :]
X_train = tf.convert_to_tensor(X_train.astype(np.float64))
Y_train = tf.convert_to_tensor(Y_train.astype(np.float64))
m.fit(X_train, Y_train,
batch_size=None, epochs=1)
m.save('hwg_model3.h5')
例外
Traceback (most recent call last):
File "/home/godspell/UserData/Scripts/studyprojects4/handwritings_gen/models/rnn_model.py", line 66, in <module>
m = define_model2(N, batch_size, time_steps - 1, vector_size, 5)
File "/home/godspell/UserData/Scripts/studyprojects4/handwritings_gen/models/rnn_model.py", line 53, in define_model2
loss=_get_mixture_loss_func(vector_size - 1, num_mixtures),
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 446, in compile
self._compile_weights_loss_and_weighted_metrics()
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1592, in _compile_weights_loss_and_weighted_metrics
self.total_loss = self._prepare_total_loss(masks)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1656, in _prepare_total_loss
reduction=losses_utils.ReductionV2.NONE)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/losses_utils.py", line 107, in compute_weighted_loss
losses, sample_weight)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/losses/util.py", line 148, in scale_losses_by_sample_weight
sample_weight = weights_broadcast_ops.broadcast_weights(sample_weight, losses)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/weights_broadcast_ops.py", line 167, in broadcast_weights
with ops.control_dependencies((assert_broadcastable(weights, values),)):
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/weights_broadcast_ops.py", line 103, in assert_broadcastable
weights_rank_static, values.shape, weights.shape))
ValueError: weights can not be broadcast to values. values.rank=0. weights.rank=2. values.shape=(). weights.shape=(8, 1939).
以上是关于尝试使用遮罩层Tensorflow时,重量无法广播到值错误的主要内容,如果未能解决你的问题,请参考以下文章