使用带步幅的推力计数迭代器
Posted
技术标签:
【中文标题】使用带步幅的推力计数迭代器【英文标题】:Using Thrust counting iterators with strides 【发布时间】:2014-09-11 06:40:05 【问题描述】:我正在寻找一种使用 thrust::counting_iterator
函数的方法,以便并行化以下 for 循环:
for (int stride = 0 ; stride < N * M ; stride+=M) // N iterations
// Body of the loop
代码如下:
struct functor ()
__host__ __device__ void operator() (const int i)
// Body of the loop
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N * M;
thrust::for_each (it1 , it2 , functor());
我知道counting_iterator
将迭代器递增 1,那么有没有办法以 M 递增?
【问题讨论】:
【参考方案1】:这是arbitrary transformation example 和strided range example 的组合。
下面,我正在考虑一个转换的例子
D[i] = A[i] + B[i] * C[i]
代码如下:
#include <thrust/for_each.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
// for printing
#include <thrust/copy.h>
#include <ostream>
#define STRIDE 2
template <typename Iterator>
class strided_range
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
difference_type stride;
stride_functor(difference_type stride)
: stride(stride)
__host__ __device__
difference_type operator()(const difference_type& i) const
return stride * i;
;
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride)
iterator begin(void) const
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
iterator end(void) const
return begin() + ((last - first) + (stride - 1)) / stride;
protected:
Iterator first;
Iterator last;
difference_type stride;
;
struct arbitrary_functor
template <typename Tuple>
__host__ __device__
void operator()(Tuple t)
// D[i] = A[i] + B[i] * C[i];
thrust::get<3>(t) = thrust::get<0>(t) + thrust::get<1>(t) * thrust::get<2>(t);
;
int main(void)
// allocate storage
thrust::device_vector<float> A(5);
thrust::device_vector<float> B(5);
thrust::device_vector<float> C(5);
thrust::device_vector<float> D(5);
// initialize input vectors
A[0] = 3; B[0] = 6; C[0] = 2;
A[1] = 4; B[1] = 7; C[1] = 5;
A[2] = 0; B[2] = 2; C[2] = 7;
A[3] = 8; B[3] = 1; C[3] = 4;
A[4] = 2; B[4] = 8; C[4] = 3;
typedef thrust::device_vector<float>::iterator Iterator;
strided_range<Iterator> posA(A.begin(), A.end(), STRIDE);
strided_range<Iterator> posB(B.begin(), B.end(), STRIDE);
strided_range<Iterator> posC(C.begin(), C.end(), STRIDE);
strided_range<Iterator> posD(D.begin(), D.end(), STRIDE);
// apply the transformation
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(posA.begin(), posB.begin(), posC.begin(), posD.begin())),
thrust::make_zip_iterator(thrust::make_tuple(posA.end(), posB.end(), posC.end(), posD.end())),
arbitrary_functor());
// print the output
for(int i = 0; i < 5; i++)
std::cout << A[i] << " + " << B[i] << " * " << C[i] << " = " << D[i] << std::endl;
【讨论】:
【参考方案2】:为什么不在仿函数中将i
变量乘以M
?
如果M
在编译时已知,它可能是:
struct functor
__host__ __device__ void operator() (const int my_i)
int i = my_i *M;
// Body of the loop
;
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (it1 , it2 , functor());
如果M
仅在运行时已知,我们可以将其作为初始化参数传递给函子:
struct functor
int my_M;
functor(int _M) : my_M(_M) ();
__host__ __device__ void operator() (const int my_i)
int i = my_i *my_M;
// Body of the loop
;
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (it1 , it2 , functor(M));
您还可以将计数迭代器包装在转换迭代器中,它接受计数迭代器并将其乘以 M:
struct functor
__host__ __device__ void operator() (const int i)
// Body of the loop
;
using namespace thrust::placeholders;
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (make_transform_iterator(it1, _1 * M) , thrust::make_transform_iterator(it2, _1 * M) , functor());
最后一个示例使用thrust placeholder expressions,尽管它可以通过一个额外的普通函子等效地实现,该函子返回其参数乘以它的参数。
这是一个完整的示例,展示了所有 3 种方法:
$ cat t492.cu
#include <stdio.h>
#include <thrust/transform.h>
#include <thrust/for_each.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/host_vector.h>
#include <thrust/functional.h>
#define N 5
#define M 4
using namespace thrust::placeholders;
struct my_functor_1
__host__ __device__ void operator() (const int i)
printf("functor 1 value: %d\n", i);
;
struct my_functor_2
__host__ __device__ void operator() (const int my_i)
int i = my_i*M;
printf("functor 2 value: %d\n", i);
;
struct my_functor_3
int my_M;
my_functor_3(int _M) : my_M(_M) ;
__host__ __device__ void operator() (const int my_i)
int i = my_i *my_M;
printf("functor 3 value: %d\n", i);
;
int main()
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each(thrust::host, it1, it2, my_functor_1());
thrust::for_each(thrust::host, it1, it2, my_functor_2());
thrust::for_each(thrust::host, it1, it2, my_functor_3(M));
thrust::for_each(thrust::host, thrust::make_transform_iterator(it1, _1 * M), thrust::make_transform_iterator(it2, _1 * M), my_functor_1());
return 0;
$ nvcc -arch=sm_20 -o t492 t492.cu
$ ./t492
functor 1 value: 0
functor 1 value: 1
functor 1 value: 2
functor 1 value: 3
functor 1 value: 4
functor 2 value: 0
functor 2 value: 4
functor 2 value: 8
functor 2 value: 12
functor 2 value: 16
functor 3 value: 0
functor 3 value: 4
functor 3 value: 8
functor 3 value: 12
functor 3 value: 16
functor 1 value: 0
functor 1 value: 4
functor 1 value: 8
functor 1 value: 12
functor 1 value: 16
$
【讨论】:
第三种方法(使用 make_transform_iterator)正是我想要的!谢谢。以上是关于使用带步幅的推力计数迭代器的主要内容,如果未能解决你的问题,请参考以下文章