如何减少推力的元组（也返回一个元组）？

Posted 2023-03-23

技术标签:

【中文标题】如何减少推力的元组（也返回一个元组）？【英文标题】：How to do tuple reduction in thrust (also return a tuple)? 【发布时间】：2020-09-06 05:38:43 【问题描述】：

假设我有一个由两个thrust::device_vector 组成的元组。我希望输出是一个由两个标量组成的元组，它们分别是两个向量的和。例如，

input tuple consisting of two vectors:
a: 3, 5, 2 
b: 6, 1, 7
output tuple consisting of two scalars:
10
14

我认为这应该非常简单，但不知何故我还没有找到方法。我的代码

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

template<typename T>
struct TestTuplePlus

    __host__ __device__
    thrust::tuple<T, T> operator()(thrust::tuple<T, T>& t0, thrust::tuple<T, T>& t1)
    
            return thrust::make_tuple(thrust::get<0>(t0) + thrust::get<0>(t1), thrust::get<1>(t0) + thrust::get<1>(t1));
    
;

int main()

  thrust::device_vector<float> a(3, 0);
  thrust::device_vector<float> b(3, 0);

  a[0] = 3;
  a[1] = 5;
  a[2] = 2;
  b[0] = 6;
  b[1] = 1;
  b[2] = 7;

  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

  // reduce to a tuple
  thrust::tuple<float, float> result = thrust::reduce(begin, end, thrust::make_tuple<float,float>(0,0), TestTuplePlus<float>()); // produce compilation error

  return 0;

编译错误：

/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/function.h(96): error: function "TestTuplePlus<T>::operator() [with T=float]" cannot be called with the given argument list
            argument types are: (thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>)
            object type is: TestTuplePlus<float>
          detected during:
            instantiation of "Result thrust::detail::wrapped_function<Function, Result>::operator()(Argument1 &, const Argument2 &) const [with Function=TestTuplePlus<float>, Result=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, Argument1=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, Argument2=thrust::detail::tuple_of_iterator_references<thrust::device_reference<float>, thrust::device_reference<float>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/detail/sequential/reduce.h(61): here
            instantiation of "OutputType thrust::system::detail::sequential::reduce(thrust::system::detail::sequential::execution_policy<DerivedPolicy> &, InputIterator, InputIterator, OutputType, BinaryFunction) [with DerivedPolicy=thrust::detail::seq_t, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, OutputType=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(71): here
            instantiation of "T thrust::reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, T, BinaryFunction) [with DerivedPolicy=thrust::detail::seq_t, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/reduce.h(1022): here
            instantiation of "T thrust::cuda_cub::reduce_n(thrust::cuda_cub::execution_policy<Derived> &, InputIt, Size, T, BinaryOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Size=signed long, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryOp=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/reduce.h(1037): here
            instantiation of "T thrust::cuda_cub::reduce(thrust::cuda_cub::execution_policy<Derived> &, InputIt, InputIt, T, BinaryOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryOp=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(71): here
            instantiation of "T thrust::reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, T, BinaryFunction) [with DerivedPolicy=thrust::cuda_cub::tag, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(186): here
            instantiation of "T thrust::reduce(InputIterator, InputIterator, T, BinaryFunction) [with InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]"

说实话，我不知道如何解决它。

我确实找到了这个post，但我也没有编译它。

无论如何，有没有一种简单的方法可以在 cuda 中减少元组？

【问题讨论】：

你的原始代码对我来说编译得很好，使用 cuda-10.1.243。 【参考方案1】：

编译错误的出现，奇怪的是，由于推力实际上将不同的元组类型传递给你的函子的第一个和第二个参数。这可以从这个差异中推断出来：

error: function "TestTuplePlus<T>::operator() [with T=float]" cannot be called with the given argument list
        argument types are: (thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, ...

我们被告知的第一个论点：

argument types are: (thrust::tuple<float, float, thrust::null_type, ...

我们被告知第二个论点：

thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, ...

第一个元组包含两个 float 数量。第二个元组包含两个 float 引用。这些不是同一类型的包。因此，没有一个单一的改编：

thrust::tuple<T, T>

可以同时符合这两种类型。因此，您的模板化仿函数的单个实例化不可能同时符合两者。

我们可以通过允许模板化函子有两种模板化类型来解决这个问题，每个参数一个。下面的代码演示了一种可能的解决方案：

$ cat t1727.cu
#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

struct TestTuplePlus

    template<typename T, typename T1>
    __host__ __device__
    thrust::tuple<T, T> operator()(thrust::tuple<T, T> t0, thrust::tuple<T1, T1> t1)
    
            return thrust::make_tuple(thrust::get<0>(t0) + thrust::get<0>(t1), thrust::get<1>(t0) + thrust::get<1>(t1));
    
;

int main()

  thrust::device_vector<float> a(3, 0);
  thrust::device_vector<float> b(3, 0);

  a[0] = 3;
  a[1] = 5;
  a[2] = 2;
  b[0] = 6;
  b[1] = 1;
  b[2] = 7;

  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

  // reduce to a tuple
  thrust::tuple<float, float> result = thrust::reduce(begin, end, thrust::make_tuple<float,float>(0,0), TestTuplePlus()); // produce compilation error
  std::cout << "a sum: " << thrust::get<0>(result) << " b sum: " << thrust::get<1>(result) << std::endl;
  return 0;

$ nvcc -std=c++11 t1727.cu -o t1727
$ ./t1727
a sum: 10 b sum: 14
$

（CUDA 10.1.243）

我确信其他方法也是可行的。请注意，我选择将运算符本身而不是整个结构模板化。这消除了在主机代码中指定模板类型的需要。同样，我确信其他方法也是可行的。

我将无法回答与“为什么推力会以这种方式工作？”相关的问题

如果您觉得这种行为很麻烦，您可能希望提交thrust issue。

我不声明此代码或我发布的任何其他代码的正确性。使用我发布的任何代码的任何人都需要自担风险。我只是声称我试图解决原始帖子中的问题，并提供一些解释。我并不是说我的代码没有缺陷，或者它适用于任何特定目的。使用（或不使用）风险自负。

【讨论】：

【参考方案2】：

编译错误是由于您的仿函数中缺少“const”限定符，即它应该是：

thrust::tuple<T, T> operator()(const thrust::tuple<T, T>& t0, const thrust::tuple<T, T>& t1)

【讨论】：

以上是关于如何减少推力的元组（也返回一个元组）？的主要内容，如果未能解决你的问题，请参考以下文章