即使在初始化结果参数之后，CUDA atomicAdd也会产生错误的结果

Question

atomicAdd产生错误的结果。我甚至在设备中初始化结果但仍然无法正常工作。这是为什么？

这是我调用的函数：

__global__
void getHammingDistance(char *str1, char *str2, int *result)
{
    int idx = blockIdx.x*blockDim.x+threadIdx.x;

    if (idx < 3)
    {
        if (str1[idx] != str2[idx])
        {
            atomicAdd(result, 1);

        }
    }
}

但atomicAdd给出了错误的结果。

int getDist()
{

    int k = 9;
    int min = INT_MAX;
    char *dev_str1, *dev_str2;
    int *dev_result;
    int blockSize = 1024;
    int gridSize = (int) ceil((float) 4 / blockSize);
    int result = 0;


    char *str1 = "AAA";
    char *str2 = "ATG";


    cudaMalloc((void**) &dev_str1, sizeof(char)  *20);
    cudaMalloc((void**) &dev_str2, sizeof(char)  * 20);
    cudaMalloc((void**) &dev_result, sizeof(int) * 10);

    cudaMemcpy(dev_str1, &str1, 20 * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_str2, &str2, 20 * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_result, &result,  10 * sizeof(int), cudaMemcpyHostToDevice);

    getHammingDistance<<<gridSize, blockSize>>>(dev_str1, dev_str2, dev_result);
    cudaMemcpy(&result, dev_result, 10 * sizeof(int), cudaMemcpyDeviceToHost);

    cout << result;
    cudaFree(dev_str1);
    cudaFree(dev_str2);
    cudaFree(dev_result);


    return result;
}

这是为什么？它应该工作。