无法使用 Cuda 引用编译 dll

Posted 2023-02-16

技术标签:

【中文标题】无法使用 Cuda 引用编译 dll【英文标题】：Can't compile dll with Cuda references 【发布时间】：2017-03-12 19:54:54 【问题描述】：

我正在拼命地尝试用 CUDA 函数制作一个 dll，但我无法让它工作。

我尝试了这里解释的方法：Creating DLL from CUDA using nvcc 编译，但我有以下错误：

nvcc : 
warning: __declspec attributes ignored
At line:1 char:1
+ nvcc -o  ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : NotSpecified: (...ributes ignored:String) [], RemoteException
    + FullyQualifiedErrorId : NativeCommandError



...\kernel.cu(81): warning: __declspec attributes ignored


...\cudaFFT.h(21): warning: __declspec attributes ignored


.../kernel.cu(81): warning: __declspec attributes ignored


nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
kernel.cu
   CrÚation de la bibliothÞque C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.lib et de l'objet C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.exp
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftPlan1d rÚfÚrencÚ dans la fonction AllocateMemoryForFFTs
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftExecD2Z rÚfÚrencÚ dans la fonction ComputeFFT
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftDestroy rÚfÚrencÚ dans la fonction DeAllocateMemoryForFFTs
C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.dll : fatal error LNK1120: 3 externes non rÚsolus

首先__declspec 似乎被忽略了，之后编译器似乎找不到我在 cuda 库中使用的函数。我真的不习惯手工编译。通常，我依靠 IDE 来完成，因此我完全迷失了这里。

代码如下：

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <iostream>
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes, project
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>


#define LIBRARY_EXPORTS 1

#ifdef LIBRARY_EXPORTS  
#define LIBRARY_API __declspec(dllexport)   
#else  
#define LIBRARY_API __declspec(dllimport)   
#endif 

#include "cudaFFT.h"

#ifdef __cplusplus
extern "C" 
#endif

int LIBRARY_API __cdecl numberOfGpus()

    int nDevices;
    cudaGetDeviceCount(&nDevices);

    return nDevices;



cufftDoubleReal *host_input;
cufftDoubleReal *device_input;

cufftDoubleComplex *host_output;
cufftDoubleComplex *device_output;

cufftHandle plan;




cudaError LIBRARY_API __cdecl AllocateMemoryForFFTs(int maxSize, int maxBatch)
    

        int width = maxSize; int height = maxBatch;

        cudaError err = cudaMallocHost((void **)&host_input, sizeof(cufftDoubleReal) * width * height);
        if (err)
            return err;

        err = cudaMallocHost((void **)&host_output, sizeof(cufftDoubleComplex) * (width / 2 + 1) * height);
        if (err)
            return err;

        err = cudaMalloc((void **)&device_input, sizeof(cufftDoubleReal) * width * height);
        if (err)
            return err;

        err = cudaMalloc((void **)&device_output, sizeof(cufftDoubleComplex) * (width / 2 + 1) * height);
        if (err)
            return err;

        cufftResult res = cufftPlan1d(&plan, width, CUFFT_D2Z, height);
        if (res)
            return (cudaError)res;

        return cudaSuccess;
    


double* LIBRARY_API __cdecl GetInputDataPointer()
    
        return host_input;
    

cudaError LIBRARY_API __cdecl ComputeFFT(int size, int batch, double2** result)
    
        cudaError err = cudaMemcpy(device_input, host_input, sizeof(cufftDoubleReal) * size * batch, cudaMemcpyHostToDevice);
        if (err)
            return err;

        cufftResult res = cufftExecD2Z(plan, device_input, device_output);
        if (res)
            return (cudaError)res;


        err = cudaMemcpy(host_output, device_output, sizeof(cufftDoubleComplex) * (size / 2 + 1) * batch, cudaMemcpyDeviceToHost);
        if (err)
            return err;

        *result = host_output;
        return cudaSuccess;
    

void LIBRARY_API __cdecl DeAllocateMemoryForFFTs()
    
        cufftDestroy(plan);
        cudaFree(device_input);
        cudaFree(device_output);
        cudaFreeHost(host_input);
        cudaFreeHost(host_output);
    

#ifdef __cplusplus

#endif

【问题讨论】：

【参考方案1】：

好的，我发现了我的问题，我将解决方案留在这里以防它可以帮助某人。

我从 .cu 中删除了 LIBRARY_API 关键字在 .h 中，我将 LIBRARY_API 移到了每个声明的开头。我更改了 vs 中的项目属性以生成 dll。让VS编译

【讨论】：

以上是关于无法使用 Cuda 引用编译 dll的主要内容，如果未能解决你的问题，请参考以下文章