pytorch seq2seq模型中加入teacher_forcing机制
Posted liuxinyu12378
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pytorch seq2seq模型中加入teacher_forcing机制相关的知识,希望对你有一定的参考价值。
在循环内加的teacher forcing机制,这种为目标确定的时候,可以这样加。
目标不确定,需要在循环外加。
decoder.py 中的修改
""" 实现解码器 """ import torch.nn as nn import config import torch import torch.nn.functional as F import numpy as np import random class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.embedding = nn.Embedding(num_embeddings=len(config.ns), embedding_dim=50, padding_idx=config.ns.PAD) # 需要的hidden_state形状:[1,batch_size,64] self.gru = nn.GRU(input_size=50, hidden_size=64, num_layers=1, bidirectional=False, batch_first=True, dropout=0) # 假如encoder的hidden_size=64,num_layer=1 encoder_hidden :[2,batch_sizee,64] self.fc = nn.Linear(64, len(config.ns)) def forward(self, encoder_hidden,target): # 第一个时间步的输入的hidden_state decoder_hidden = encoder_hidden # [1,batch_size,encoder_hidden_size] # 第一个时间步的输入的input batch_size = encoder_hidden.size(1) decoder_input = torch.LongTensor([[config.ns.SOS]] * batch_size).to(config.device) # [batch_size,1] # print("decoder_input:",decoder_input.size()) # 使用全为0的数组保存数据,[batch_size,max_len,vocab_size] decoder_outputs = torch.zeros([batch_size, config.max_len, len(config.ns)]).to(config.device) for t in range(config.max_len): decoder_output_t, decoder_hidden = self.forward_step(decoder_input, decoder_hidden) decoder_outputs[:, t, :] = decoder_output_t # 获取当前时间步的预测值 value, index = decoder_output_t.max(dim=-1) if random.randint(0,100) >70: #teacher forcing机制 decoder_input = target[:,t].unsqueeze(-1) else: decoder_input = index.unsqueeze(-1) # [batch_size,1] # print("decoder_input:",decoder_input.size()) return decoder_outputs, decoder_hidden def forward_step(self, decoder_input, decoder_hidden): ‘‘‘ 计算一个时间步的结果 :param decoder_input: [batch_size,1] :param decoder_hidden: [batch_size,encoder_hidden_size] :return: ‘‘‘ decoder_input_embeded = self.embedding(decoder_input) # print("decoder_input_embeded:",decoder_input_embeded.size()) out, decoder_hidden = self.gru(decoder_input_embeded, decoder_hidden) # out :【batch_size,1,hidden_size】 out_squeezed = out.squeeze(dim=1) # 去掉为1的维度 out_fc = F.log_softmax(self.fc(out_squeezed), dim=-1) # [bathc_size,vocab_size] # out_fc.unsqueeze_(dim=1) #[bathc_size,1,vocab_size] # print("out_fc:",out_fc.size()) return out_fc, decoder_hidden def evaluate(self, encoder_hidden): # 第一个时间步的输入的hidden_state decoder_hidden = encoder_hidden # [1,batch_size,encoder_hidden_size] # 第一个时间步的输入的input batch_size = encoder_hidden.size(1) decoder_input = torch.LongTensor([[config.ns.SOS]] * batch_size).to(config.device) # [batch_size,1] # print("decoder_input:",decoder_input.size()) # 使用全为0的数组保存数据,[batch_size,max_len,vocab_size] decoder_outputs = torch.zeros([batch_size, config.max_len, len(config.ns)]).to(config.device) decoder_predict = [] # [[],[],[]] #123456 ,targe:123456EOS,predict:123456EOS123 for t in range(config.max_len): decoder_output_t, decoder_hidden = self.forward_step(decoder_input, decoder_hidden) decoder_outputs[:, t, :] = decoder_output_t # 获取当前时间步的预测值 value, index = decoder_output_t.max(dim=-1) decoder_input = index.unsqueeze(-1) # [batch_size,1] # print("decoder_input:",decoder_input.size()) decoder_predict.append(index.cpu().detach().numpy()) # 返回预测值 decoder_predict = np.array(decoder_predict).transpose() # [batch_size,max_len] return decoder_outputs, decoder_predict
seq2seq.py
""" 完成seq2seq模型 """ import torch.nn as nn from encoder import Encoder from decoder import Decoder class Seq2Seq(nn.Module): def __init__(self): super(Seq2Seq, self).__init__() self.encoder = Encoder() self.decoder = Decoder() def forward(self, input, input_len,target): encoder_outputs, encoder_hidden = self.encoder(input, input_len) decoder_outputs, decoder_hidden = self.decoder(encoder_hidden,target) return decoder_outputs def evaluate(self, input, input_len): encoder_outputs, encoder_hidden = self.encoder(input, input_len) decoder_outputs, decoder_predict = self.decoder.evaluate(encoder_hidden) return decoder_outputs, decoder_predict
train.py
""" 进行模型的训练 """ import torch import torch.nn.functional as F from seq2seq import Seq2Seq from torch.optim import Adam from dataset import get_dataloader from tqdm import tqdm import config import numpy as np import pickle from matplotlib import pyplot as plt from eval import eval import os model = Seq2Seq().to(config.device) optimizer = Adam(model.parameters()) if os.path.exists("./models/model.pkl"): model.load_state_dict(torch.load("./models/model.pkl")) optimizer.load_state_dict(torch.load("./models/optimizer.pkl")) loss_list = [] def train(epoch): data_loader = get_dataloader(train=True) bar = tqdm(data_loader, total=len(data_loader)) for idx, (input, target, input_len, target_len) in enumerate(bar): input = input.to(config.device) target = target.to(config.device) input_len = input_len.to(config.device) optimizer.zero_grad() decoder_outputs = model(input, input_len,target) # [batch_Size,max_len,vocab_size] predict = decoder_outputs.view(-1, len(config.ns)) target = target.view(-1) loss = F.nll_loss(predict, target, ignore_index=config.ns.PAD) loss.backward() optimizer.step() loss_list.append(loss.item()) bar.set_description("epoch:{} idx:{} loss:{:.6f}".format(epoch, idx, np.mean(loss_list))) if idx % 100 == 0: torch.save(model.state_dict(), "./models/model.pkl") torch.save(optimizer.state_dict(), "./models/optimizer.pkl") pickle.dump(loss_list, open("./models/loss_list.pkl", "wb")) if __name__ == ‘__main__‘: for i in range(5): train(i) eval() plt.figure(figsize=(50, 8)) plt.plot(range(len(loss_list)), loss_list) plt.show()
以上是关于pytorch seq2seq模型中加入teacher_forcing机制的主要内容,如果未能解决你的问题,请参考以下文章
批处理如何在 pytorch 的 seq2seq 模型中工作?