Pytorch LSTM-VAE Sentence Generator:RuntimeError:梯度计算所需的变量之一已被就地操作修改
Posted
技术标签:
【中文标题】Pytorch LSTM-VAE Sentence Generator:RuntimeError:梯度计算所需的变量之一已被就地操作修改【英文标题】:Pytorch LSTM- VAE Sentence Generator: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation 【发布时间】:2021-03-22 08:34:56 【问题描述】:我正在尝试将 LSTM VAE 作为未来使用 pytorch 工作的学习阶段。 我设法让它在一些小的测试数据上工作,但现在我想在我的实际数据上运行它,我不断收到这个错误:
RuntimeError:梯度计算所需的变量之一已被就地操作修改:[torch.FloatTensor [10, 40]],即 TBackward 的输出 0,处于版本 2;而是预期的版本 1。提示:上面的回溯显示了未能计算其梯度的操作。有问题的变量在那里或以后的任何地方都被改变了。祝你好运!
任何有关如何解决此错误以及通常只是让我的代码更高效的帮助都会很棒! 这是回溯的适当部分:
File "<ipython-input-16-7fe0e9e30e5d>", line 190, in <module>
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
File "<ipython-input-16-7fe0e9e30e5d>", line 166, in train_batch
reconstruction, hidden, kld = model(x, G_inp, None, None)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 93, in forward
mu, logvar, z = self.encoder(x)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 37, in forward
out1, self.hidden = self.lstm(x, self.hidden)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py", line 582, in forward
self.dropout, self.training, self.bidirectional, self.batch_first)
(Triggered internally at /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
allow_unreachable=True) # allow_unreachable flag
代码如下:(请原谅所有的 .clone(),我读到这可能是一个解决方案,所以我在没有帮助的情况下到处测试它)
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from keras.preprocessing.text import Tokenizer
from keras import preprocessing
import torch.nn.functional as F
import math
import random
#encoder
class Encoder(nn.Module):
def __init__(self,embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size):
super(Encoder,self).__init__()
self.n_layers_E = n_layers_E
self.n_hidden_E = n_hidden_E
self.batch_size = batch_size
self.dim_z = dim_z
self.lstm = nn.LSTM(input_size = embedding_dim, hidden_size = n_hidden_E, num_layers = n_layers_E, batch_first=True, bidirectional = True)
self.hidden_to_mu = nn.Linear(2*n_hidden_E,dim_z)
self.hidden_to_logvar = nn.Linear(2*n_hidden_G, dim_z)
self.hidden = (torch.zeros(2*n_layers_E, batch_size, n_hidden_E),torch.zeros(2*n_layers_E, batch_size, n_hidden_E))
def forward(self,x):
batch_size, n_seq, n_embed = x.size()
#batch_size, n_seq = x.size()
out1, self.hidden = self.lstm(x, self.hidden)
e_hidden = self.hidden[0].view(batch_size, 2 * self.n_hidden_E).clone()
#e_hidden = torch.cat(list(hidden),dim = 0)
mu = self.hidden_to_mu(e_hidden)
logvar = self.hidden_to_logvar(e_hidden)
epsilon = torch.randn([batch_size, self.dim_z])
z = mu + torch.exp(logvar*0.5)*epsilon
return mu, logvar, z
class Generator(nn.Module):
def __init__(self,n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size , batch_size):
super(Generator,self).__init__()
self.n_hidden_G = n_hidden_G
self.n_layers_G = n_layers_G
self.n_z = dim_z
self.batch_size = batch_size
self.LSTM = nn.LSTM(input_size = embedding_dim + dim_z, hidden_size = n_hidden_G, num_layers = n_layers_G, batch_first = True)
self.fc = nn.Linear(n_hidden_G, vocab_size)
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
def forward(self,x,z, g_hidden = None):
batch_size,n_seq, n_embed = x.size()
#batch_size, n_seq= x.size()
z = torch.cat([z]*n_seq,1).view(batch_size, n_seq, self.n_z)
x = torch.cat([x,z], dim = 2)
if g_hidden is None: #if we are validating
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
else: #if we are training
self.hidden = g_hidden
output, self.hidden = self.LSTM(x, self.hidden)
output = self.fc(output)
return output, self.hidden
class VAE(nn.Module):
def __init__(self, embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G ):
super(VAE, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.encoder = Encoder(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size)
self.generator = Generator(n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size, batch_size )
self.n_z = dim_z
def forward(self, x, G_inp, z, G_hidden):
if z is None:
batch_size, n_seq = x.size()
x = self.embedding(x)
mu, logvar, z = self.encoder(x)
kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1).mean()
else:
kld = None
G_inp = self.embedding(G_inp)
logit, G_hidden = self.generator(G_inp,z, G_hidden)
return logit, G_hidden, kld
train_df = pd.read_csv("train.csv", header =None)[0:500]
test_df = pd.read_csv("test.csv",header =None)[0:500]
train = train_df.iloc[:,0]
max_words = 2000
max_len = 25
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(train)
sequences = tok.texts_to_sequences(train)
sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen = max_len)
#tok.sequences_to_texts(sequences)
n_hidden_E = 10
n_layers_E = 1
embedding_dim = 10
vocab_size = max_words
n_hidden_G = 10
n_layers_G = 2
dim_z = 10
train_size = len(train)
batch_size = 100
rec_coef = 7
lr = 0.01
epochs = 100
def create_generator_input(x, train):
G_inp = x[:, 0:max_len-1].clone() #input for generator should exclude last word of sequence
# if train == False:
# return G_inp
# r = np.random.rand(G_inp.size(0), G_inp.size(1))
# #Perform word_dropout according to random values (r) generated for each word
# for i in range(len(G_inp)):
# for j in range(1,G_inp.size(1)):
# if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]:
# G_inp[i, j] = vocab.stoi[opt.unk_token]
return G_inp
def producebatches(x,batch_size):
k = math.floor(x.shape[0]/batch_size)
total = (k)*batch_size
flatten = x[0:total].flatten()
batches = flatten.reshape((k,batch_size,x.shape[1]))
return batches
batches = producebatches(sequences_matrix, batch_size)
model = VAE(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.BCELoss(reduction = 'sum')
model.train()
def train_batch(x,G_inp,step,train =True):
reconstruction, hidden, kld = model(x, G_inp, None, None)
reconstruction2 = reconstruction.view(-1, vocab_size).clone() #converting into shape (batch_size*(n_seq-1), n_vocab) to facilitate performing F.cross_entropy()
#y = x[:, 1:x.size(1)].clone() #target for generator should exclude first word of sequence
#y = y.contiguous().view(-1)
G_inp2 = G_inp.contiguous().view(-1) #converting into shape (batch_size*(n_seq-1),1) to facilitate performing F.cross_entropy()
rec_loss = F.cross_entropy(reconstruction2,G_inp2)
kld_coef = (math.tanh((step - 15000)/1000) + 1) / 2
#kld_coef = min(1,step/(200000.0))
loss = rec_coef*rec_loss + kld_coef*kld
if train == True:
torch.autograd.set_detect_anomaly(True) #skip below step if we are performing validation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
return rec_loss.item(), kld.item()
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
def generate_sentences(n):
model.eval()
sentences = []
for i in range(n):
z = torch.randn([1, dim_z])
hidden = (torch.zeros(n_layers_G,1, n_hidden_G)
,torch.zeros(n_layers_G, 1, n_hidden_G))
G_inp = torch.LongTensor(1,1).fill_(1)
str_ind = []
while len(str_ind)<49:
with torch.autograd.no_grad():
logit, G_hidden, _ = model(None, G_inp, z, hidden)
probs = F.softmax(logit[0],dim=1)
G_inp = torch.multinomial(probs,1)
str_ind.append(G_inp[0][0].item())
sentences.append(str_ind)
return sentences
t = generate_sentences(1)
【问题讨论】:
【参考方案1】:首先,您可以在每个 epoch 之后重新初始化隐藏层。这将克服您在没有任何重大更改的情况下面临的错误:
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
model.hidden = (torch.zeros(n_layers_G, batch_size, n_hidden_G)
,torch.zeros(n_layers_G, batch_size, n_hidden_G))
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
另外,你可以避免对激活函数的inplace操作和dropout操作(inplace = False)
(我认为不是你的情况)。
【讨论】:
以上是关于Pytorch LSTM-VAE Sentence Generator:RuntimeError:梯度计算所需的变量之一已被就地操作修改的主要内容,如果未能解决你的问题,请参考以下文章
[English learning][sentence] good sentence
[English learning][sentence] good sentence
[English learning][sentence] good sentence
文献阅读:Sentence-BERT:Sentence Embeddings using Siamese BERT-Networks