OpenCL 双调排序 CPU 版

Posted cuancuancuanhao

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了OpenCL 双调排序 CPU 版相关的知识,希望对你有一定的参考价值。

? 学习了双调排序,参考(https://blog.csdn.net/xbinworld/article/details/76408595)

● 使用 CPU 排序的代码

  1 #include <stdio.h>
  2 
  3 #define LENGTH 1024
  4 #define ASCENDING   1
  5 #define DESCENDING  0
  6 
  7 int a[LENGTH];
  8 
  9 void compare(int i, int j, int dir)
 10 {
 11     if (dir == (a[i]>a[j]))
 12     {
 13         int h = a[i];
 14         a[i] = a[j];
 15         a[j] = h;
 16     }
 17 }
 18 
 19 void bitonicMerge01(int lo, int cnt, int dir)// 先再大跨度(半区间长)上调整元素,再递归地在小跨度上进行相同的调整
 20 {
 21     if (cnt > 1)
 22     {
 23         int k = cnt / 2;
 24         for (int i = lo; i < lo + k; i++)
 25             compare(i, i + k, dir);
 26         bitonicMerge01(lo, k, dir);
 27         bitonicMerge01(lo + k, k, dir);
 28     }
 29 }
 30 
 31 void bitonicSort01(int lo, int cnt, int dir)// 先递归地要求小跨度区间依次排成 “升↗降↘升↗降↘” 再在较大跨度上进行合并
 32 {
 33     if (cnt > 1)
 34     {
 35         int k = cnt / 2;
 36         bitonicSort01(lo, k, ASCENDING);
 37         bitonicSort01(lo + k, k, DESCENDING);
 38         bitonicMerge01(lo, cnt, dir);
 39     }
 40 }
 41 
 42 void bitonicMerge02(int l, int r, const int dir)
 43 {
 44     if (r - l > 0)
 45     {
 46         int stride = (r - l) / 2 + 1;
 47         for (int i = l; i < l + stride; i++)
 48             compare(i, i + stride, dir);
 49         bitonicMerge02(l, l + stride - 1, dir);
 50         bitonicMerge02(l + stride, r, dir);
 51     }
 52 }
 53 
 54 void bitonicSort02(int l, int r, const int dir)
 55 {
 56     if (r - l > 0)
 57     {
 58         int rNew = l + (r - l) / 2;
 59         bitonicSort02(l, rNew, ASCENDING);
 60         bitonicSort02(rNew + 1, r, DESCENDING);
 61         bitonicMerge02(l, r, dir);
 62     }
 63 }
 64 
 65 void bitonicMerge03(int l, int r, const int dir)
 66 {
 67     if (r - l > 1)
 68     {
 69         int stride = (r - l) / 2;
 70         for (int i = l; i < l + stride; i++)
 71             compare(i, i + stride, dir);
 72         bitonicMerge03(l, l + stride, dir);
 73         bitonicMerge03(l + stride, r, dir);
 74     }
 75 }
 76 
 77 void bitonicSort03(int l, int r, const int dir)
 78 {
 79     if (r - l > 1)
 80     {
 81         int rNew = l + (r - l) / 2;
 82         bitonicSort03(l, rNew, ASCENDING);
 83         bitonicSort03(rNew, r, DESCENDING);
 84         bitonicMerge03(l, r, dir);
 85     }
 86 }
 87 
 88 int main()
 89 {
 90     int i, error;
 91     srand(97);    
 92     for (i = 0; i < LENGTH; a[i++] = rand());
 93     
 94     printf("\n");
 95     for (i = 0; i < LENGTH; i++)
 96     {
 97         printf("%5d,", a[i]);
 98         if ((i + 1) % 10 == 0)
 99             printf("\n");
100     }
101 
102     //bitonicSort01(0, LENGTH, ASCENDING);        // 使用起点和长度
103     //bitonicSort02(0, LENGTH - 1, ASCENDING);    // 使用左端点和右端点(都包含)
104     bitonicSort03(0, LENGTH, ASCENDING);          // 使用左端点和右端点(左包含右不包含)
105 
106     printf("\n");
107     for (i = 0, error = -1; i < LENGTH; i++)
108     {
109         printf("%5d,", a[i]);        
110         if (i < LENGTH - 1 && a[i] > a[i + 1])
111             error = i;
112         if ((i + 1) % 10 == 0)
113             printf("\n");
114     }
115     if (error != -1)
116         printf("\n\nerror at i==%d, a[i]==%d, a[i+1]==%d", error, a[error], a[error + 1]);
117 
118     getchar();
119     return 0;
120 }

● 输出结果(临时改为排序 64 个元素,每行显示 16个)

  355, 1738,17358,10050,26329,32242,15361,10484, 6442,24073,24274,32373,18693,32410,10489,26548,
21618,11292,16736, 2813,14168,26214, 3533,18194,25922,25449,31426,27518,28770,10781,31394,19238,
 8663,22972,   20,23841,26411,28975,30636,24521,14624,10949, 1225,12818,24492,16427, 1465,22299,
14146,10905, 8996,22531,23379,13129,28327,30718,23061,26669,21695,28328,21891,18657,26089, 7321,

   20,  355, 1225, 1465, 1738, 2813, 3533, 6442, 7321, 8663, 8996,10050,10484,10489,10781,10905,
10949,11292,12818,13129,14146,14168,14624,15361,16427,16736,17358,18194,18657,18693,19238,21618,
21695,21891,22299,22531,22972,23061,23379,23841,24073,24274,24492,24521,25449,25922,26089,26214,
26329,26411,26548,26669,27518,28327,28328,28770,28975,30636,30718,31394,31426,32242,32373,32410,

 

以上是关于OpenCL 双调排序 CPU 版的主要内容,如果未能解决你的问题,请参考以下文章

键/值数组的双调排序

ATI显卡纯OpenCL版驱动是啥意思?

三十分钟理解:双调排序Bitonic Sort,适合并行计算的排序算法

基于Batcher比较器的双调排序网络

我的 OpenCL 代码在 GPU 上比在 CPU 上慢

1 个 cpu 设备上 OpenCL 的并行性