RNN 推理所需的解码器目标
Posted
技术标签:
【中文标题】RNN 推理所需的解码器目标【英文标题】:Decoder targets required for RNN inference 【发布时间】:2019-04-20 20:40:50 【问题描述】:我一直在尝试使用 deepfix 工具 (https://bitbucket.org/iiscseal/deepfix) 进行一些实验,该工具是用于纠正常见编程错误的 seq2seq
模型。
我对代码进行了更改,使其与TF-1.12
兼容,因为原始代码包含tensorflow.contrib.seq2seq
版本TF-1.12
不支持的函数(仅在TF-1.0.x
中)。
主要变化是在neural_net/train.py
中定义的seq2seq_model
。
下面是修改后的代码。我是tensorflow RNN
的新手,我使用在线代码的帮助对解码器部分进行了编码。
class seq2seq_model():
PAD = 0
EOS = 1
def __init__(self, vocab_size, embedding_size, max_output_seq_len,
cell_type='LSTM', memory_dim=300, num_layers=4, dropout=0.2,
attention=True,
scope=None,
verbose=False):
assert 0 <= dropout and dropout <= 1, '0 <= dropout <= 1, you passed dropout='.format(
dropout)
tf.set_random_seed(1189)
self.attention = attention
self.max_output_seq_len = max_output_seq_len
self.memory_dim = memory_dim
self.num_layers = num_layers
self.dropout = dropout
self.scope = scope
if dropout != 0:
self.keep_prob = tf.placeholder(tf.float32)
else:
self.keep_prob = None
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.encoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self.decoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self._make_graph()
if self.scope is not None:
saver_vars = [var for var in tf.global_variables(
) if var.name.startswith(self.scope)]
else:
saver_vars = tf.global_variables()
if verbose:
print 'root-scope:', self.scope
print "\n\nDiscovered %d saver variables." % len(saver_vars)
for each in saver_vars:
print each.name
self.saver = tf.train.Saver(saver_vars, max_to_keep=5)
@property
def decoder_hidden_units(self):
return self.memory_dim
def _make_graph(self):
self._init_placeholders()
self._init_decoder_train_connectors()
self._init_embeddings()
self._init_simple_encoder()
self._init_decoder()
self._init_optimizer()
def _init_placeholders(self):
""" Everything is time-major """
self.encoder_inputs = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='encoder_inputs',
)
self.encoder_inputs_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='encoder_inputs_length',
)
self.decoder_targets = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='decoder_targets'
)
self.decoder_targets_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='decoder_targets_length',
)
def _init_decoder_train_connectors(self):
with tf.name_scope('decoderTrainFeeds'):
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.PAD
self.decoder_train_inputs = tf.concat(
[EOS_SLICE, self.decoder_targets], axis=0, name="decoder_train_inputs")
self.decoder_train_length = self.decoder_targets_length + 1
decoder_train_targets = tf.concat(
[self.decoder_targets, PAD_SLICE], axis=0)
decoder_train_targets_seq_len, _ = tf.unstack(
tf.shape(decoder_train_targets))
decoder_train_targets_eos_mask = tf.one_hot(self.decoder_train_length - 1,
decoder_train_targets_seq_len,
on_value=self.EOS, off_value=self.PAD,
dtype=tf.int32)
decoder_train_targets_eos_mask = tf.transpose(
decoder_train_targets_eos_mask, [1, 0])
decoder_train_targets = tf.add(decoder_train_targets,
decoder_train_targets_eos_mask, name="decoder_train_targets")
self.decoder_train_targets = decoder_train_targets
self.loss_weights = tf.ones([
batch_size,
tf.reduce_max(self.decoder_train_length)
], dtype=tf.float32, name="loss_weights")
def _init_embeddings(self):
with tf.variable_scope("embedding") as scope:
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
self.embedding_matrix = tf.get_variable(
name="embedding_matrix",
shape=[self.vocab_size, self.embedding_size],
initializer=initializer,
dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.encoder_inputs,
name="encoder_inputs_embedded")
self.decoder_train_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.decoder_train_inputs,
name="decoder_train_inputs_embedded")
def _init_simple_encoder(self):
with tf.variable_scope("Encoder") as scope:
(self.encoder_outputs, self.encoder_state) = (
tf.nn.dynamic_rnn(cell=self.encoder_cell,
inputs=self.encoder_inputs_embedded,
sequence_length=self.encoder_inputs_length,
time_major=True,
dtype=tf.float32)
)
def _init_decoder(self):
with tf.variable_scope("decoder") as scope:
# def output_fn(outputs):
# return tf.contrib.layers.fully_connected(outputs, self.vocab_size, scope=scope,
# name = "output_fn")
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
train_helper = seq2seq.TrainingHelper(
inputs=self.decoder_train_inputs_embedded,
sequence_length=self.decoder_train_length,
time_major=True,
name="train_helper")
pred_helper = seq2seq.SampleEmbeddingHelper(
embedding=self.embedding_matrix,
start_tokens=tf.ones([batch_size], dtype=tf.int32) * self.EOS,
end_token=self.EOS)
# name="pred_helper")
def _decode(helper, scope, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
attention_states = tf.transpose(
self.encoder_outputs, [1, 0, 2])
attention_mechanism = seq2seq.BahdanauAttention(
num_units=self.decoder_hidden_units, memory=attention_states,
name="attention_mechanism")
attention_cell = seq2seq.AttentionWrapper(
self.decoder_cell, attention_mechanism,
name="atttention_wrapper")
out_cell = tf.contrib.rnn.OutputProjectionWrapper(
attention_cell, self.vocab_size, reuse=reuse)
# name="output_cell")
decoder = seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
# name="decoder")
outputs = seq2seq.dynamic_decode(
decoder=decoder, output_time_major=True,
impute_finished=True)
# name="outputs")
return outputs
(self.decoder_logits_train, self.decoder_state_train, _) = _decode(train_helper, "decoder")
(self.decoder_logits_inference, self.decoder_state_inference, _) = _decode(pred_helper, "decoder", reuse=True)
self.decoder_logits_train = self.decoder_logits_train.rnn_output
self.decoder_logits_inference = self.decoder_logits_inference.rnn_output
# self.decoder_logits_train = output_fn(self.decoder_outputs_train)
self.decoder_prediction_train = tf.argmax(
self.decoder_logits_train, axis=-1, name='decoder_prediction_train')
scope.reuse_variables()
self.decoder_prediction_inference = tf.argmax(self.decoder_logits_inference, axis=-1,
name='decoder_prediction_inference')
def _init_optimizer(self):
logits = tf.transpose(self.decoder_logits_train, [1, 0, 2])
targets = tf.transpose(self.decoder_train_targets, [1, 0])
self.loss = seq2seq.sequence_loss(logits=logits, targets=targets,
weights=self.loss_weights)
self.optimizer = tf.train.AdamOptimizer()
gvs = self.optimizer.compute_gradients(self.loss)
def ClipIfNotNone(grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1., 1)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(capped_gvs)
def make_feed_dict(self, x, x_len, y, y_len):
feed_dict =
self.encoder_inputs: x,
self.encoder_inputs_length: x_len,
self.decoder_targets: y,
self.decoder_targets_length: y_len,
if self.dropout != 0:
feed_dict.update(self.keep_prob: 1.0 - self.dropout)
return feed_dict
def load_parameters(self, sess, filename):
self.saver.restore(sess, filename)
def save_parameters(self, sess, filename, global_step=None):
self.saver.save(sess, filename, global_step=global_step)
def train_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
_, loss = session.run([self.train_op, self.loss], feed_dict)
return loss
def validate_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
loss, decoder_prediction, decoder_train_targets = session.run([self.loss,
self.decoder_prediction_inference,
self.decoder_train_targets], feed_dict)
return loss, np.array(decoder_prediction).T, np.array(decoder_train_targets).T
def sample(self, session, X, X_len):
feed_dict = self.encoder_inputs: X,
self.encoder_inputs_length: X_len
if self.dropout != 0:
feed_dict.update(self.keep_prob: 1.0)
decoder_prediction = session.run(
self.decoder_prediction_inference, feed_dict)
return np.array(decoder_prediction).T
这段代码有一些问题:
-
主要问题 -
seq2seq.train_step()
和 seq2seq.validate_step()
函数正在工作,但是当我使用 seq2seq.sample()
进行实际推理时,我收到一个错误,要求我为 decoder_targets
提供一个值。这是一个意外行为,因为 SampleEmbeddingHelper
函数用于不需要解码器目标的推理。错误:
InvalidArgumentError(参见上面的回溯):您必须输入一个值 对于具有 dtype int32 的占位符张量“ids/decoder_targets”和 shape [?,?] [[node ids/decoder_targets(定义于 .../code/neural_net/train.py:241) = Placeholderdtype=DT_INT32, 形状=[?,?], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
-
当我尝试使用
GreedyEmbeddingHelper
而不是SampleEmbeddingHelper
,然后运行decoder_logits_inference
操作时,机器挂起并在一段时间后耗尽内存。虽然SampleEmbeddingHelper
工作正常。
【问题讨论】:
【参考方案1】:好吧,SampleEmbeddingHelper
确实需要解码器目标,因为它混合了GreedyEmbeddingHelper
(推断模式)和tf.contrib.seq2seq.TrainingHelper
(教师强制)的一部分。我想你只需要使用GreedyEmbeddingHelper
。
从一开始,参数是完全随机的(如果没有预训练的话)。
也许你已经看到 seq2seq 模型的前几个循环的结果完全是一团糟。
所以如果你使用GreedyEmbeddingHelper
,它会根据前一个输出一个结果,当然没有人教它“在哪里停止”,所以它通常会无限循环,直到你的内存耗尽。要解决这个问题,您需要在tf.contrib.seq2seq.dynamic_decode
中设置句子长度的上限。
参数是maximum_iterations
。如图所示
tf.contrib.seq2seq.dynamic_decode
【讨论】:
嘿,我解决了这个问题。 SampleEmbeddingHelper 不需要解码器目标。但是我正在计算解码器目标的批量大小,这导致了图形依赖性,因此发生了错误。我现在修好了。而且,我将根据您的建议检查第二个问题。感谢您的回复。以上是关于RNN 推理所需的解码器目标的主要内容,如果未能解决你的问题,请参考以下文章
为 Android 编译 Opus 编解码器所需的 Makefile
UIViewController 和所需的初始化?(编码器:)
Apple iOS ARKit:“传感器无法提供所需的输入”错误并停止工作
sprop-parameter-sets 或 profile-level-id 是解码 H264 所需的 SDP 参数吗?