Pytorch RNN 错误:RuntimeError:输入必须有 3 个维度得到 1

Posted

技术标签:

【中文标题】Pytorch RNN 错误:RuntimeError:输入必须有 3 个维度得到 1【英文标题】:Pytorch RNN error: RuntimeError: input must have 3 dimensions got 1 【发布时间】:2021-11-04 19:10:29 【问题描述】:

我正在尝试根据代码 here 训练 RNN

我还发现了两个类似的帖子,但无法从中推断出我应该如何解决我的问题 here 和 here

这个错误很容易解释,模型需要 3 个维度,但我只给出 1 个维度。但是,我不知道在哪里解决这个问题。我知道一个好的堆栈帖子是包含数据,但我不确定如何在帖子中包含示例张量。道歉。

我的输入是 300d 词嵌入,我的输出是一个长度为 11 的热编码向量,其中模型在 11 个输出维度中的每一个维度中做出分类选择。

我将从数据加载器开始,然后从那里开始编写代码。

from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dat, labels):
        self.labels = labels
        self.dat = dat

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        label = self.labels[idx]
        dat = self.dat[idx]
        sample = "Sample": dat, "Class": label
        return sample

我将我的普通 RNN 定义如下。

class VanillaRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(VanillaRNN, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, inputs):
        
        batch_size = inputs.size(0)

        # Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(inputs, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

和我的训练循环如下

def plot_train_val(x, train, val, train_label,
                   val_label, title, y_label,
                   color):

  plt.plot(x, train, label=train_label, color=color)
  plt.plot(x, val, label=val_label, color=color, linestyle='--')
  plt.legend(loc='lower right')
  plt.xlabel('epoch')
  plt.ylabel(y_label)
  plt.title(title)


def count_parameters(model):
  parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
  return parameters


def init_weights(m):
  if type(m) in (nn.Linear, nn.Conv1d):
    nn.init.xavier_uniform_(m.weight)



# Training functioN
def train(model, device, train_loader, valid_loader, epochs, learning_rate):

  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  
  train_loss, validation_loss = [], []
  train_acc, validation_acc = [], []

  for epoch in range(epochs):
    #train
    model.train()
    running_loss = 0.
    correct, total = 0, 0
    steps = 0
    for idx, batch in enumerate(train_loader):
      text = batch["Sample"].to(device)
      target = batch['Class'].to(device)
      target = torch.autograd.Variable(target).long()
      text, target = text.to(device), target.to(device)
      # add micro for coding training loop
      optimizer.zero_grad()
      output, hideden = model(text)
      print(output.shape, target.shape, target.view(-1).shape)
      loss = criterion(output, target.view(-1))
      loss.backward()
      optimizer.step()
      steps += 1
      running_loss += loss.item()

      # get accuracy
      _, predicted = torch.max(output, 1)
      print(predicted)
      #predicted = torch.round(output.squeeze())
      total += target.size(0)
      correct += (predicted == target).sum().item()

    train_loss.append(running_loss/len(train_loader))
    train_acc.append(correct/total)

    print(f'Epoch: epoch + 1, '
          f'Training Loss: running_loss/len(train_loader):.4f, '
          f'Training Accuracy: 100*correct/total: .2f%')

    # evaluate on validation data
    model.eval()
    running_loss = 0.
    correct, total = 0, 0

    with torch.no_grad():
      for idx, batch in enumerate(valid_loader):
        text = batch["Sample"].to(device)
        print(type(text), text.shape)
        target = batch['Class'].to(device)
        target = torch.autograd.Variable(target).long()
        text, target = text.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(text)
        
        loss = criterion(output, target)
        running_loss += loss.item()

        # get accuracy
        _, predicted = torch.max(output, 1)
        #predicted = torch.round(output.squeeze())
        total += target.size(0)
        correct += (predicted == target).sum().item()

    validation_loss.append(running_loss/len(valid_loader))
    validation_acc.append(correct/total)

    print (f'Validation Loss: running_loss/len(valid_loader):.4f, '
           f'Validation Accuracy: 100*correct/total: .2f%')

  return train_loss, train_acc, validation_loss, validation_acc

当我使用以下命令运行模型时,我收到下面提供的错误。提前感谢您的帮助。

# Model hyperparamters
#vocab_size = len(word_array)
learning_rate = 1e-3
output_size = 11
input_size = 300
epochs = 10
hidden_dim = 100
n_layers = 2

# Initialize model, training and testing
set_seed(SEED)
vanilla_rnn_model = VanillaRNN(input_size, output_size, hidden_dim, n_layers)

#vanilla_rnn_model = VanillaRNN(output_size, input_size, RNN_size, fc_size, DEVICE)
vanilla_rnn_model.to(DEVICE)

vanilla_rnn_start_time = time.time()
vanilla_train_loss, vanilla_train_acc, vanilla_validation_loss, vanilla_validation_acc = train(vanilla_rnn_model,
                                                                                               DEVICE,
                                                                                               train_loader,
                                                                                               valid_loader,
                                                                                               epochs = epochs,
                                                                                               learning_rate = learning_rate)

错误:(

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-31-bfd2f8f3456f> in <module>()
     19                                                                                                valid_loader,
     20                                                                                                epochs = epochs,
---> 21                                                                                                learning_rate = learning_rate)
     22 print("--- Time taken to train = %s seconds ---" % (time.time() - vanilla_rnn_start_time))
     23 #test_accuracy = test(vanilla_rnn_model, DEVICE, test_iter)

6 frames
<ipython-input-30-db1fa6c8b625> in train(model, device, train_loader, valid_loader, epochs, learning_rate)
     45       # add micro for coding training loop
     46       optimizer.zero_grad()
---> 47       output, hideden = model(text)
     48       print(output.shape, target.shape, target.view(-1).shape)
     49       loss = criterion(output, target.view(-1))

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-26-c34b90b3cbc3> in forward(self, x)
     21 
     22         # Passing in the input and hidden state into the model and obtaining outputs
---> 23         out, hidden = self.rnn(x, hidden)
     24 
     25         # Reshaping the outputs such that it can be fit into the fully connected layer

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
    263         assert hx is not None
    264         input = cast(Tensor, input)
--> 265         self.check_forward_args(input, hx, batch_sizes)
    266         _impl = _rnn_impls[self.mode]
    267         if batch_sizes is None:

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
    227 
    228     def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]):
--> 229         self.check_input(input, batch_sizes)
    230         expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
    231 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
    201             raise RuntimeError(
    202                 'input must have  dimensions, got '.format(
--> 203                     expected_input_dim, input.dim()))
    204         if self.input_size != input.size(-1):
    205             raise RuntimeError(

RuntimeError: input must have 3 dimensions, got 1

【问题讨论】:

请更新完整的错误回溯。 @Berriel 刚刚更新了 OP 以包含该信息。谢谢! textmodel(text) 之前的形状是什么?顺便说一句,这个回溯被 IPython 截断:( 我粘贴了完整的错误信息,对此感到抱歉。 @Berriel 文本大小为:torch.Size([300]) 目标大小为 torch.Size([11]) 【参考方案1】:

首先,您需要将数据集包装在适当的数据加载器中,您可以执行以下操作:

from torch.utils.data import DataLoader

# [...]

# define a batch_size, I'll use 4 as an example
batch_size = 4

train_dset = CustomDataset(X2, y)  # your current code (change train_loader to train_dset)
train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True))

此时,text 现在应该是[4, 300]

然后,你说你的序列长度等于1。要修复错误,你可以使用unsqueeze添加长度维度:

# [...]
output, hideden = model(text.unsqueeze(1))
# [...]

现在,text 应该是 [4, 1, 300],这里有 3 个维度,RNN 前向调用是 expecting(您的 RNN 有 batch_first=True):

输入:形状为(L, N, H_in) 的张量,当batch_first=False(N, L, H_in)batch_first=True 包含输入序列的特征。 (...)

【讨论】:

感谢@Berriel 的耐心和周到的反馈!

以上是关于Pytorch RNN 错误:RuntimeError:输入必须有 3 个维度得到 1的主要内容,如果未能解决你的问题,请参考以下文章

使用 Pytorch 前向传播 RNN

在 pytorch 中使用我自己的数据集训练简单的 RNN

Pytorch_循环神经网络RNN

PyTorch建立RNN相关模型

PyTorch nn.RNN 参数全解析

小白学习PyTorch教程九基于Pytorch训练第一个RNN模型