python 采用TF实现最简单的seq2seq
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 采用TF实现最简单的seq2seq相关的知识,希望对你有一定的参考价值。
#seq2seq
#用于实现seq2seq模拟处理文本,这里没有文本数据,直接是文本转好了的数字序列。
#但是这里的程序没有提前通过word2vec训练好的每个词的mebedding,所以
#这里是将one-hot层当作encoder-decoder输入层的第一层,embedding层是seq2seq的第二层,embedding层参与整体训练
#而不是像正常的embedding应该来自于已经训练好的word embedding matrix输入。
#但是与一般的seq2seq一样,输入数据是[batch_size,max_sequence_length]的索引值张量
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
PAD = 0
EOS = 1
vocab_size = 10
input_embedding_size = 20
encoder_hidden_units = 20
decoder_hidden_units = 20
batch_size = 100
#用于生成元数据的函数,调用一次就生成batch_size条元数据,每条元数据是一个list长度有范围但不固定,
#元数据中的每个值都有范围vocab_lower~vocab_upper
def random_sequences(length_from, length_to, vocab_lower, vocab_upper, batch_size):
def random_length():
if length_from == length_to:
return length_from
return np.random.randint(length_from, length_to + 1)
while True:
yield [
np.random.randint(low=vocab_lower, high=vocab_upper, size=random_length()).tolist()
for _ in range(batch_size)
]
batches = random_sequences(length_from=3, length_to=10,
vocab_lower=2, vocab_upper=10,
batch_size=batch_size)
def make_batch(inputs, max_sequence_length=None):
sequence_lengths = [len(seq) for seq in inputs]
batch_size = len(inputs)
if max_sequence_length is None:
max_sequence_length = max(sequence_lengths)
inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32)
for i, seq in enumerate(inputs):
for j, element in enumerate(seq):
inputs_batch_major[i, j] = element
inputs_time_major = inputs_batch_major.swapaxes(0, 1) #这里需要对每个批次的数据转置因为
#后面处理是按照行取,这里的行代表一条记录,应该batch个记录的第i个字符对应的数字。
return inputs_time_major, sequence_lengths
train_graph = tf.Graph()
with train_graph.as_default():
#shape全都是None是允许的
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs')
decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs')
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets')
#这里的embeddings作为一个可训练trainable=True的参数矩阵,因为没有提前准备好的词向量库,所以就相当于这个embeddings
#层就属于encoder-decoder中的输入前面的一层,也要训练。这样效果会很差。
embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32)
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs)
encoder_cell = tf.contrib.rnn.LSTMCell(encoder_hidden_units)
encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
encoder_cell, encoder_inputs_embedded,
dtype=tf.float32, time_major=True,
)
decoder_cell = tf.contrib.rnn.LSTMCell(decoder_hidden_units)
decoder_outputs, decoder_final_state = tf.nn.dynamic_rnn(
decoder_cell, decoder_inputs_embedded,
initial_state=encoder_final_state,
dtype=tf.float32, time_major=True, scope="plain_decoder",
)
decoder_logits = tf.contrib.layers.fully_connected(decoder_outputs, vocab_size) #需要全连接层将隐藏层转化为输出层
#其中非线性函数可以在参数中设置
decoder_prediction = tf.argmax(decoder_logits, 2)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32), #将labels转换为one_hot向量
logits=decoder_logits,
) #计算损失
loss = tf.reduce_mean(stepwise_cross_entropy) #计算这一批次中的batch_size条数据记录的平均损失
#在tf.nn.softmax/sigmoid_cross_entropy_with_logits之后一般就是tf.reduce_mean/sum/min/max/...
train_op = tf.train.AdamOptimizer().minimize(loss)
loss_track = []
epochs = 3001
with tf.Session(graph=train_graph) as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
batch = next(batches)
encoder_inputs_, _ = make_batch(batch)
decoder_targets_, _ = make_batch([(sequence) + [EOS] for sequence in batch])
decoder_inputs_, _ = make_batch([[EOS] + (sequence) for sequence in batch])
feed_dict = {encoder_inputs: encoder_inputs_, decoder_inputs: decoder_inputs_,
decoder_targets: decoder_targets_,
}
_, l = sess.run([train_op, loss], feed_dict)
loss_track.append(l)
if epoch == 0 or epoch % 1000 == 0:
print('loss: {}'.format(sess.run(loss, feed_dict)))
predict_ = sess.run(decoder_prediction, feed_dict)
for i, (inp, pred) in enumerate(zip(feed_dict[encoder_inputs].T, predict_.T)):
print('input > {}'.format(inp))
print('predicted > {}'.format(pred))
if i >= 20:
break
plt.plot(loss_track)
plt.show()
#程序中缺少tf.summary,tf.add_collections等操作,以及复杂的数据预处理操作。
以上是关于python 采用TF实现最简单的seq2seq的主要内容,如果未能解决你的问题,请参考以下文章
module has no attribute 'seq2seq'