错误：推力程序中未定义标识符“atomicOr”

Posted 2023-03-23

技术标签:

【中文标题】错误：推力程序中未定义标识符“atomicOr”【英文标题】：error : identifier "atomicOr" is undefined in Thrust program 【发布时间】：2014-10-04 18:52:57 【问题描述】：

我发现在 Visual Studio 2012 编译的 Thrust 程序中无法识别 Cuda atomicOr 函数。

我已经读到在调用 NVidia nvcc 编译器时应该已经包含所有头文件。大多数关于这个问题的帖子都表明这一定意味着架构设置不正确。

我已经根据其他帖子使用这些设置进行了尝试： How to set CUDA compiler flags in Visual Studio 2010?

...以及使用： http://s1240.photobucket.com/user/fireshot8888/media/cuda_settings.png.html

main.cpp:

#include <thrust/device_vector.h>
#include <cstdlib>
#include <iostream>

#include "cuda.h"

using namespace std;

//Visual C++ compiled main function to launch the GPU calling code
int main(int argc, char *argv[])

    //Just some random data hand keyed to make it a complete example for stack overflow while not being too complicated
    float data[] = 1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4,
    1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4;

    thrust::host_vector<float> h_data(data, data+20);   //Holds the contents of the file as they are read; it will be cleared once we are done with it.

    const int numVars = 10;
    int numBins = 4;
    int rowCount = 2;

    doHistogramGPU(numVars, h_data, numBins, rowCount);


    return 0;

cuda.cu:

#include "cuda.h"

#include <iostream>

#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>

//I GAVE THIS A TRY BUT IT DID NOT FIX MY ISSUE::::
#include <cuda_runtime.h>
#include <cuda.h>

using namespace std;

//Function to call the kernel
void doHistogramGPU(int numVars, thrust::host_vector<float> h_buffer, int numBins, int numRecords)

    int dataSize = sizeof(BYTE_UNIT);
    int shiftSize = dataSize - 1;

    thrust::device_vector<float> d_buffer(h_buffer.begin(), h_buffer.end());

    int bitVectorSize = ceil(numRecords * numVars / (float)dataSize);

    thrust::device_vector<BYTE_UNIT> d_bitData(bitVectorSize * numBins);

    thrust::counting_iterator<int> counter(0);
    auto zipInFirst = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.begin(), counter));
    auto zipInLast = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.end(), counter + d_buffer.size()));


    float minValues[] = 579.8, 72.16, 0.000385, 7.576e-005, 6.954e-005, 0, 0, 2.602e-012, 1.946e-013, 7.393e-015;
    float maxValues[] = 1053, 22150, 0.7599, 0.7596, 0.24, 0.2398, 0.1623, 1.167e-007, 4.518e-006, 5.322e-008;

    //Get things loaded onto the device then call the kernel
    thrust::device_vector<float> d_minValues(minValues, minValues+10);
    thrust::device_vector<float> d_maxValues(maxValues, maxValues+10);

    thrust::device_ptr<float> minDevPtr = &d_minValues[0];
    thrust::device_ptr<float> maxDevPtr = &d_maxValues[0];
    thrust::device_ptr<BYTE_UNIT> dataDevPtr = &d_bitData[0];

    //Invoke the Thrust Kernel
    thrust::for_each(zipInFirst, zipInLast, BinFinder(thrust::raw_pointer_cast(dataDevPtr), thrust::raw_pointer_cast(minDevPtr), thrust::raw_pointer_cast(maxDevPtr), numVars, numBins, numRecords));

    cout << endl;

    return;

cuda.h:

#ifndef CUDA_H
#define CUDA_H

#include <thrust/device_vector.h>
#include <iostream>

//I tried these here, too...
#include <cuda_runtime.h>
#include <cuda.h>


using namespace std;


typedef long BYTE_UNIT; //32 bit storage

void doHistogramGPU(int numvars, thrust::host_vector<float> h_buffer, int numBins, int numRecords);

struct BinFinder

    BYTE_UNIT * data;
    float * rawMinVector;
    float * rawMaxVector;
    int numVars;
    int numBins;
    int numRecords;


    BinFinder(BYTE_UNIT * data, float * rawMinVector, float * rawMaxVector, int numVars, int numBins, int numRecords)
    
        this -> data = data;
        this -> rawMinVector = rawMinVector;
        this -> rawMaxVector = rawMaxVector;
        this -> numVars = numVars;
        this -> numBins = numBins;
        this -> numRecords = numRecords;
    

    //This kernel converts the multidimensional bin representation to a single dimensional representation
    template <typename Tuple>
    __device__ void operator()( Tuple param ) 
    
        int dataSize = sizeof(BYTE_UNIT);
        int shiftSize = dataSize - 1;

        int bitVectorSize = ceil(numRecords * numVars / float(dataSize));


        float value = thrust::get<0>(param);
        int id = thrust::get<1>(param); 

        //Look up the min and max values for this data column using the index
        float min = rawMinVector[id % numVars];
        float max = rawMaxVector[id % numVars];


        //Calculate the bin id
        float percentage = (value - min) / float(max - min);

        char bin = percentage * numBins;

        if (bin == numBins)
        
            bin--;
        

        //////////////////////////////////////////////////////////////
        //Set a 1 in the appropriate bitvector for the calculated bin
        //////////////////////////////////////////////////////////////      

        //What I originally tried to do that appeared to have generated race conditions (using data from a file):
        //data[bin * bitVectorSize + id / dataSize] |= (1 << (shiftSize - id % dataSize));   

        //What I've been trying to do now that generates a compilation error:
        atomicOr(data + (bin * bitVectorSize + id / dataSize), 1 << (shiftSize - id % dataSize)); //<----THIS DOESN'T COMPILE!!!!!!!!!



;


#endif

cuda.cu（包括我的 cuda.h 文件）的 nvcc 命令：

 "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/bin/nvcc.exe" "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu" -c -o "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/build/CMakeFiles/CudaLib.dir//Debug/CudaLib_generated_cuda.cu.obj" -ccbin "C:/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin" -m64 -Xcompiler ,\"/DWIN32\",\"/D_WINDOWS\",\"/W3\",\"/GR\",\"/EHsc\",\"/D_DEBUG\",\"/MDd\",\"/Zi\",\"/Ob0\",\"/Od\",\"/RTC1\" -DNVCC "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include" "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include"

nvcc 的完整错误输出：

1>nvcc : 警告 : 'compute_10' 和 'sm_10' 架构已弃用，可能会在未来的版本中删除。 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(107)：警告：变量“minValues”已声明但从未被引用 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(108)：警告：变量“maxValues”已声明但从未被引用 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(462)：警告：变量“shiftSize”已声明但从未被引用 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(602): 警告：对非常量的引用的初始值必须是左值 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(618)：警告：无法访问代码中的动态初始化 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(522)：警告：变量“shiftSize”已声明但从未被引用 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(975)：警告：对非常量的引用的初始值必须是左值 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(993)：警告：对非常量的引用的初始值必须是左值 1> 1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(1022)：警告：变量“shiftSize”已声明但从未被引用 1> 1>c:\users\datahead8888\documents\visual studio 2012\projects\thrust-space-data\src\cuda.h(188): 错误：标识符“atomicOr”未定义 1> 在以下期间检测到： 1> 实例化“void BinFinder::operator()(Tuple) [with Tuple=thrust::detail::tuple_of_iterator_references]” 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/function.h(119): 这里 1> 实例化“结果推力::detail::device_function::operator()(const Argument &) const [with Function=BinFinder, Result=void, Argument=thrust::detail::tuple_of_iterator_references, int,thrust::null_type , 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type>]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(82): 这里 1> 实例化“thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure::result_typethrust::system::cuda::detail::for_each_n_detail::for_each_n_closure::operator()() [with RandomAccessIterator =thrust::zip_iterator>，thrust::counting_iterator，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust:: null_type>>, Size=unsigned int, UnaryFunction=BinFinder, Context=thrust::system::cuda::detail::detail::blocked_thread_array]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(49): 这里 1> 实例化“无效推力::system::cuda::detail::detail::launch_closure_by_value(Closure) [with Closure=thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure>，推力::计数迭代器，推力：：null_type，推力：：null_type，推力：：null_type，推力：：null_type，推力：：null_type，推力：：null_type，推力：：null_type，推力：：null_type>>，无符号整数，BinFinder，推力::system::cuda::detail::detail::blocked_thread_array>]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(77)：这里 1> 实例化“thrust::system::cuda::detail::detail::closure_launcher_base::launch_function_tthrust::system::cuda::detail::detail::closure_launcher_base::get_launch_function() [with Closure=thrust ::system::cuda::detail::for_each_n_detail::for_each_n_closure>，thrust::counting_iterator，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust::null_type，thrust:: null_type、thrust::null_type、thrust::null_type>>、无符号整数、BinFinder、thrust::system::cuda::detail::detail::blocked_thread_array>、launch_by_value=true]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(185)：这里 1> [未显示 2 个实例化上下文] 1> 实例化“thrust::tuplethrust::system::cuda::detail::for_each_n_detail::configure_launch(Size) [with Closure=thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure>,推力::counting_iterator, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type>>, 无符号整数, BinFinder,thrust::system::cuda::detail::detail::blocked_thread_array>, Size=long long]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(163)：这里 1> 实例化“RandomAccessIteratorthrust::system::cuda::detail::for_each_n(thrust::system::cuda::detail::execution_policy &, RandomAccessIterator, Size, UnaryFunction) [with DerivedPolicy=thrust::system: :cuda::detail::tag、RandomAccessIterator=thrust::zip_iterator>、thrust::counting_iterator、thrust::null_type、thrust::null_type、thrust::null_type、thrust::null_type、thrust::null_type、thrust:: null_type、thrust::null_type、thrust::null_type>>、Size=long long、UnaryFunction=BinFinder]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(191)：这里 1> 实例化“RandomAccessIteratorthrust::system::cuda::detail::for_each(thrust::system::cuda::detail::execution_policy &, RandomAccessIterator, RandomAccessIterator, UnaryFunction) [with DerivedPolicy=thrust::system: :cuda::detail::tag、RandomAccessIterator=thrust::zip_iterator>、thrust::counting_iterator、thrust::null_type、thrust::null_type、thrust::null_type、thrust::null_type、thrust::null_type、thrust:: null_type、thrust::null_type、thrust::null_type>>、UnaryFunction=BinFinder]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/for_each.inl(43): 这里 1> 实例化“InputIterator 推力::for_each(const推力::detail::execution_policy_base &, InputIterator, InputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=thrust:: zip_iterator>, 推力::counting_iterator, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type, 推力::null_type>>, UnaryFunction=BinFinder]" 1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/for_each.inl(57): 这里 1> 实例化“InputIteratorthrust::for_each(InputIterator, InputIterator, UnaryFunction) [with InputIterator=thrust::zip_iterator>,thrust::counting_iterator,thrust::null_type,thrust::null_type,thrust::null_type,thrust:: null_type、thrust::null_type、thrust::null_type、thrust::null_type、thrust::null_type>>、UnaryFunction=BinFinder]" 1> C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(597)：这里 1> 1> 在“C:/Users/DATAHE~1/AppData/Local/Temp/tmpxft_00001f78_00000000-8_cuda.cpp1.ii”的编译中检测到1个错误。 1> cuda.cu

【问题讨论】：

请提供演示问题的完整代码。另外，请编辑您的问题，以包括由 Visual Studio 发出的用于编译此文件的确切编译命令，以及确切的错误输出。另请注意，如果您在字节指针上执行atomicOr，则CUDA 提供了no such function。（我不知道BYTE_UNIT 是什么，因为你没有提供完整的代码。）我创建了一个更简单的示例来显示编译问题，并讨论了我为什么要首先使用 atomicOr。请编辑您的问题以包括由 Visual Studio 发出的用于编译此文件的确切编译命令，以及确切的错误输出。这应该是从 Visual Studio 输出窗口剪切和粘贴的简单问题。我添加了 nvcc 命令和带有错误的详细输出。我实际上使用了该项目的原始版本，因为我无法让 Cuda 选项显示在 Visual Studio 中项目的略读副本中。我不确定在 Stack Overflow 中格式化输出的简单方法 - 它看起来像 > 并且需要在每行 nvcc 输出的开头添加空格。我想一个很好的问题是，为什么在我像在屏幕截图和我链接的其他网站中那样配置设置之后，它会打印关于 compute_10 和 sm_10 架构的警告。 【参考方案1】：

未定义的原因是您没有正确指定项目设置以编译支持原子的架构（cc1.1 或更高版本）。

您需要修改编译操作的设置，以针对您的 GPU 支持的架构以及支持原子的架构进行编译。

您的编译命令根本不包括架构开关，因此默认架构 (cc1.0) 是目标。此架构不支持原子，并且在 CUDA 6 中已弃用，因此编译器会发出警告，让您知道您正在为已弃用的架构进行编译。

您需要研究可用的问题和文档以了解如何设置目标架构，并且您必须确保不包含 cc1.0，否则编译将失败。（例如，在您链接的this question 中，使用答案中讨论的方法，而不是问题中的方法。问题中描述的方法不起作用。阅读所有答案，注意可以进行此设置的项目属性位置和文件特定位置。）

如果您在安排设置时遇到困难，您可以尝试打开一个依赖于原子的 CUDA 示例项目，例如simple atomic intrinsics 并从该项目中删除现有代码，并将您的代码放入其中。然后，您应该从该项目中选择正确的项目设置以使用原子。

【讨论】：

以上是关于错误：推力程序中未定义标识符“atomicOr”的主要内容，如果未能解决你的问题，请参考以下文章