RuntimeError: module must have its parameters and buffers on device cuda:1 (device_ids[0]) but found
Posted
技术标签:
【中文标题】RuntimeError: module must have its parameters and buffers on device cuda:1 (device_ids[0]) but found one on device: cpu【英文标题】:RuntimeError: module must have its parameters and buffers on device cuda:1 (device_ids[0]) but found one of them on device: cpu 【发布时间】:2021-03-26 18:34:38 【问题描述】:我正在代码中实现 nn.DataParallel 类,并将我的所有模型封装在这个类中。它给出了一个错误模棱两可的错误。
类代码
import time
import os
import argparse
import numpy as np
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as LS
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
from torchvision import transforms
parser = argparse.ArgumentParser()
parser.add_argument(
'--batch-size', '-N', type=int, default=16, help='batch size')
parser.add_argument(
'--train', '-f', required=True, type=str, help='folder of training images')
parser.add_argument(
'--max-epochs', '-e', type=int, default=4, help='max epochs')
parser.add_argument('--lr', type=float, default=0.0005, help='learning rate')
parser.add_argument('--cuda', '-g', action='store_true', help='enables cuda')
parser.add_argument(
'--iterations', type=int, default=16, help='unroll iterations')
parser.add_argument('--checkpoint', type=int, help='unroll iterations')
args = parser.parse_args()
## load 32x32 patches from images
import dataset
train_transform = transforms.Compose([
transforms.ToTensor(),
])
train_set = dataset.ImageFolder(root=args.train, transform=train_transform)
train_loader = data.DataLoader(
dataset=train_set, batch_size=args.batch_size, shuffle=True, num_workers=1)
print('total images: ; total batches: '.format(
len(train_set), len(train_loader)))
## load networks on GPU
import network
print("Devices are ", torch.cuda.device_count())
torch.cuda.set_device(1)
encoder = nn.DataParallel(network.EncoderCell(), device_ids = [1, 3])
binarizer = nn.DataParallel(network.Binarizer(), device_ids = [1, 3])
decoder = nn.DataParallel(network.DecoderCell(), device_ids = [1, 3])
solver = optim.Adam(
[
'params': encoder.parameters()
,
'params': binarizer.parameters()
,
'params': decoder.parameters()
,
],
lr=args.lr)
def resume(epoch=None):
if epoch is None:
s = 'iter'
epoch = 0
else:
s = 'epoch'
encoder.load_state_dict(
torch.load('checkpoint/encoder__:08d.pth'.format(s, epoch)))
binarizer.load_state_dict(
torch.load('checkpoint/binarizer__:08d.pth'.format(s, epoch)))
decoder.load_state_dict(
torch.load('checkpoint/decoder__:08d.pth'.format(s, epoch)))
def save(index, epoch=True):
if not os.path.exists('checkpoint'):
os.mkdir('checkpoint')
if epoch:
s = 'epoch'
else:
s = 'iter'
torch.save(encoder.state_dict(), 'checkpoint/encoder__:08d.pth'.format(
s, index))
torch.save(binarizer.state_dict(),
'checkpoint/binarizer__:08d.pth'.format(s, index))
torch.save(decoder.state_dict(), 'checkpoint/decoder__:08d.pth'.format(
s, index))
# resume()
scheduler = LS.MultiStepLR(solver, milestones=[3, 10, 20, 50, 100], gamma=0.5)
last_epoch = 0
if args.checkpoint:
resume(args.checkpoint)
last_epoch = args.checkpoint
scheduler.last_epoch = last_epoch - 1
for epoch in range(last_epoch + 1, args.max_epochs + 1):
scheduler.step()
for batch, data in enumerate(train_loader):
batch_t0 = time.time()
## init lstm state
encoder_h_1 = (Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()),
Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()))
# print(encoder_h_1)
encoder_h_2 = (Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()),
Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()))
encoder_h_3 = (Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()),
Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()))
decoder_h_1 = (Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()),
Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()))
decoder_h_2 = (Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()),
Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()))
decoder_h_3 = (Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()),
Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()))
decoder_h_4 = (Variable(torch.zeros(data.size(0), 128, 16, 16).cuda()),
Variable(torch.zeros(data.size(0), 128, 16, 16).cuda()))
patches = Variable(data.cuda())
solver.zero_grad()
losses = []
res = patches - 0.5
bp_t0 = time.time()
for _ in range(args.iterations):
encoded, encoder_h_1, encoder_h_2, encoder_h_3 = encoder(
res, encoder_h_1, encoder_h_2, encoder_h_3)
codes = binarizer(encoded)
output, decoder_h_1, decoder_h_2, decoder_h_3, decoder_h_4 = decoder(
codes, decoder_h_1, decoder_h_2, decoder_h_3, decoder_h_4)
res = res - output
losses.append(res.abs().mean())
bp_t1 = time.time()
loss = sum(losses) / args.iterations
loss.backward()
solver.step()
batch_t1 = time.time()
print(
'[TRAIN] Epoch[](/); Loss: :.6f; Backpropagation: :.4f sec; Batch: :.4f sec'.
format(epoch, batch + 1,
len(train_loader), loss.data, bp_t1 - bp_t0, batch_t1 -
batch_t0))
print((':.4f ' * args.iterations +
'\n').format(*[l.data for l in losses]))
index = (epoch - 1) * len(train_loader) + batch
## save checkpoint every 500 training steps
if index % 500 == 0:
save(0, False)
save(epoch)
追溯。
total images: 9271670; total batches: 579480
Devices are 4
/data1/khawar/khawar/Conference/CVPR/lib/python3.5/site-packages/torch/optim/lr_scheduler.py:82: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule.See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
"https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
Traceback (most recent call last):
File "train.py", line 152, in <module>
res, encoder_h_1, encoder_h_2, encoder_h_3)
File "/data1/khawar/khawar/Conference/CVPR/lib/python3.5/site-packages/torch/nn/modules/module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
File "/data1/khawar/khawar/Conference/CVPR/lib/python3.5/site-packages/torch/nn/parallel/data_parallel.py", line 146, in forward
"them on device: ".format(self.src_device_obj, t.device))
RuntimeError: module must have its parameters and buffers on device cuda:1 (device_ids[0]) but found one of them on device: cpu
【问题讨论】:
您的一些张量设置为使用 cuda 运行,有些则没有。您不能使用不在同一设备上的张量执行计算。因此,请检查您获得的每个张量,将其设置为您想要的设备 是的,如何将此代码完全转换为 nn.DataParallel 类?我有四个gpu。此代码在 1 个 gpu 上运行良好,但一个 epoch 需要 6 天 【参考方案1】:将 DataParallel 模块移动到 CUDA 内存:
encoder = nn.DataParallel(network.EncoderCell(), device_ids = [1, 3]).cuda()
binarizer = nn.DataParallel(network.Binarizer(), device_ids = [1, 3]).cuda()
decoder = nn.DataParallel(network.DecoderCell(), device_ids = [1, 3]).cuda()
【讨论】:
以上是关于RuntimeError: module must have its parameters and buffers on device cuda:1 (device_ids[0]) but found的主要内容,如果未能解决你的问题,请参考以下文章
解决ret = umr_sum(arr, axis, dtype, out, keepdims)RuntimeError: The size of tensor a (400) must match
解决ret = umr_sum(arr, axis, dtype, out, keepdims)RuntimeError: The size of tensor a (400) must match(
VSCode v1.35 - RuntimeError: CMake must be installed to build the following extensions: dlib
代码debug:RuntimeError: You must implement the backward function for custom autograd.Function.
Keras 编码器-解码器模型 RuntimeError: You must compile your model before using it
RuntimeError: module compiled against API version 0xb but this version of numpy is 0xa