常见排序算法导读(11)[桶排序]

Posted veli

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了常见排序算法导读(11)[桶排序]相关的知识,希望对你有一定的参考价值。

上一节讲了基数排序(Radix Sort),这一节介绍桶排序(Bucket Sort or Bin Sort)。和基数排序一样,桶排序也是一种分布式排序。

桶排序(Bucket Sort)的基本思想

  1. 将待排对象序列按照一定hash算法分发到N个桶中
  2. 对每一个桶的待排对象进行排序
  3. 遍历N个桶,收集所有非空桶里的有序对象(子序列)组成一个统一的有序对象序列

在每一个桶中,如果采用链式存储的话,1.和2.可以合并在一起操作,也就是在分发的过程中保证每一个桶里的对象是桶内有序的。

例如: 设有5个桶, 待排对象序列为 {29, 25, 3, 49, 9, 37, 21, 43}

1. 分发(scatter) (注:图片来源戳这里)

2. 桶内排序(sort)

3. 收集(gather)

从上面的3张图中,我们可以很直观地了解桶排序的过程。在观看了动画Bucket Sort后,我决定采用动画中给出的hash算法和对每一个桶采用单链表存储结构给出C代码实现。动画中给出的hash算法如下:

Linked List Array index = Value * NUMBER_OF_ELEMENTS/(MAXINUM_ARRAY_VALUE + 1)
e.g. (348 * 30)/1000 = 10
     (15  * 30)/1000 = 0
Note that NUMBER_OF_ELEMENTS is the number of buckets, which is 30.

桶排序的C代码实现

1. 基本排序原理介绍

/*
 * Bucket Sort
 *
 *      Bucket sort(or bin sort), is a sorting algorithm that works by
 *      distributing the elements of an array into a number of buckets.
 *      Each bucket is then sorted individually, either using a different
 *      sorting algorithm, or by recursively applying the bucket sorting
 *      algorithm.
 *
 *      Typically, bucket sort works as follows:
 *      1. Set up an array of initially empty "buckets"
 *      2. Scatter: go over the original array, putting each object in
 *                  its bucket
 *      3. Sort each non-empty bucket
 *      4. Gather : visit the buckets in order and put all elements back
 *                  into the original array
 *
 *      Note that step#2 and step#3 are merged into one step since we use
 *      single linked list for per bucket for better performance. Right
 *      here we just use insertion sorting algorithm to initiliaze a single
 *      linked list.
 *
 *      In addition, we define N(=10) buckets, and use such hash algorithm in
 *      the following,
 *              a) get max number of a[] as MAX
 *              b) get width of the max number (i.e. MAX) as WIDTH
 *                 e.g. MAX = 9,   WIDTH = 1;
 *                      MAX = 99,  WIDTH = 2;
 *                      MAX = 999, WIDTH = 3;
 *              c) index = a[i] * N / (10 ** WIDTH)
 *      then we can dispatch a[i] to bucket[index]
 */

2. 单链表定义及基本操作

 1 typedef struct list_s {
 2         int data;
 3         struct list_s *next;
 4 } list_t;
 5 
 6 static void
 7 list_init(list_t **head, list_t *node)
 8 {
 9         if (*head == NULL) {
10                 *head = node;
11                 return;
12         }
13 
14         /* get both prev and next of the node to insert */
15         list_t *node_prev = *head;
16         list_t *node_next = NULL;
17         for (list_t *p = *head; p != NULL; p = p->next) {
18                 if (p->data < node->data) {
19                         node_prev = p;
20                         continue;
21                 }
22 
23                 node_next = p;
24                 break;
25         }
26 
27         if (node_next == NULL) { /* append node to the tail */
28                 node_prev->next = node;
29         } else {
30                 if (node_next == node_prev) { /* == *head */
31                         node->next = *head;
32                         *head = node;
33                         return;
34                 }
35 
36                 /* node_prev -> node -> node_next */
37                 node_prev->next = node;
38                 node->next = node_next;
39         }
40 }
41 
42 static void
43 list_show(list_t *head)
44 {
45         if (head == NULL)
46                 return;
47 
48         for (list_t *p = head; p != NULL; p = p->next)
49                 printf("%d ", p->data);
50         printf("\\n");
51 }
52 
53 static void
54 list_fini(list_t *head)
55 {
56         list_t *p = head;
57         while (p != NULL) {
58                 list_t *q = p;
59                 p = p->next;
60                 free(q);
61         }
62 }

3. 核心步骤之一:分发scatter()

 1 /*
 2  * Get width of a number
 3  * e.g.
 4  *   for i in [  0 .. 9  ] // width = 1
 5  *   for i in [ 10 .. 99 ] // width = 2
 6  *   for i in [100 .. 999] // width = 3
 7  *   ...
 8  */
 9 static int
10 get_width_of_num(int num)
11 {
12         int w = 1;
13         for (int q = num / 10; q != 0; q /= 10)
14                 w++;
15         return w;
16 }
17 
18 static int
19 get_hash_base(int a[], size_t n)
20 {
21         /* get max one of a[] */
22         int max = a[0];
23         for (int i = 0; i < n; i++) {
24                 if (max < a[i])
25                        max = a[i];
26         }
27 
28         /* get hash base which is 10**N, N=1, 2, ... */
29         int base = 1;
30         for (int i = 0; i < get_width_of_num(max); i++)
31                 base *= 10;
32 
33         return base;
34 }
35 
36 static void
37 scatter(list_t **bucket, size_t m, int a[], size_t n)
38 {
39         int base = get_hash_base(a, n);
40 
41         for (int i = 0; i < n; i++) {
42                 /* 1. new a node for a[i] */
43                 list_t *nodep = NULL;
44                 nodep = (list_t *)malloc(sizeof (list_t));
45                 if (nodep == NULL) /* error: failed to malloc */
46                         return;
47 
48                 nodep->data = a[i];
49                 nodep->next = NULL;
50 
51                 /* 2. dispatch the new node to bucket[j] */
52                 int j = a[i] * m / base;
53                 list_init(&(bucket[j]), nodep);
54         }
55 }

4. 核心步骤之二:收集gather()

 1 static void
 2 gather(list_t **bucket, size_t m, int a[], size_t n)
 3 {
 4         int k = 0;
 5         for (int i = 0; i < m; i++) {
 6                 if (bucket[i] == NULL)
 7                         continue;
 8 
 9                 for (list_t *p = bucket[i]; p != NULL; p = p->next) {
10                         a[k++] = p->data;
11 
12                         if (k >= n) /* overflow */
13                                 break;
14                 }
15 
16                 list_fini(bucket[i]);
17         }
18 }

5. 桶排序bucketsort()

 1 void
 2 bucketsort(int a[], size_t n)
 3 {
 4         /* alloc bucket[] */
 5 #define BUCKET_NUM 10
 6         list_t **bucket = (list_t **)malloc(sizeof (list_t *) * BUCKET_NUM);
 7         if (bucket == NULL) /* error: failed to malloc */
 8                 return;
 9         for (int i = 0; i < BUCKET_NUM; i++)
10                 bucket[i] = NULL;
11 
12         /* scatter elements in a[] to bucket[] */
13         scatter(bucket, BUCKET_NUM, a, n);
14 
15         /* gather a[] by walking bucket[] */
16         gather(bucket, BUCKET_NUM, a, n);
17 
18         free(bucket);
19 }

6. 完整的C代码

o bucketsort.c (或访问这里)

  1 /*
  2  * Bucket Sort
  3  *
  4  *      Bucket sort(or bin sort), is a sorting algorithm that works by
  5  *      distributing the elements of an array into a number of buckets.
  6  *      Each bucket is then sorted individually, either using a different
  7  *      sorting algorithm, or by recursively applying the bucket sorting
  8  *      algorithm.
  9  *
 10  *      Typically, bucket sort works as follows:
 11  *      1. Set up an array of initially empty "buckets"
 12  *      2. Scatter: go over the original array, putting each object in
 13  *                  its bucket
 14  *      3. Sort each non-empty bucket
 15  *      4. Gather : visit the buckets in order and put all elements back
 16  *                  into the original array
 17  *
 18  *      Note that step#2 and step#3 are merged into one step since we use
 19  *      single linked list for per bucket for better performance. Right
 20  *      here we just use insertion sorting algorithm to initiliaze a single
 21  *      linked list.
 22  *
 23  *      In addition, we define N(=10) buckets, and use such hash algorithm in
 24  *      the following,
 25  *              a) get max number of a[] as MAX
 26  *              b) get width of the max number (i.e. MAX) as WIDTH
 27  *                 e.g. MAX = 9,   WIDTH = 1;
 28  *                      MAX = 99,  WIDTH = 2;
 29  *                      MAX = 999, WIDTH = 3;
 30  *              c) index = a[i] * N / (10 ** WIDTH)
 31  *      then we can dispatch a[i] to bucket[index]
 32  */
 33 
 34 #include <stdio.h>
 35 #include <stdlib.h>
 36 #include <string.h>
 37 
 38 typedef enum bool_s {false, true} bool_t;
 39 
 40 bool_t g_isint = true;
 41 
 42 typedef struct list_s {
 43         int data;
 44         struct list_s *next;
 45 } list_t;
 46 
 47 static void
 48 list_init(list_t **head, list_t *node)
 49 {
 50         if (*head == NULL) {
 51                 *head = node;
 52                 return;
 53         }
 54 
 55         /* get both prev and next of the node to insert */
 56         list_t *node_prev = *head;
 57         list_t *node_next = NULL;
 58         for (list_t *p = *head; p != NULL; p = p->next) {
 59                 if (p->data < node->data) {
 60                         node_prev = p;
 61                         continue;
 62                 }
 63 
 64                 node_next = p;
 65                 break;
 66         }
 67 
 68         if (node_next == NULL) { /* append node to the tail */
 69                 node_prev->next = node;
 70         } else {
 71                 if (node_next == node_prev) { /* == *head */
 72                         node->next = *head;
 73                         *head = node;
 74                         return;
 75                 }
 76 
 77                 /* node_prev -> node -> node_next */
 78                 node_prev->next = node;
 79                 node->next = node_next;
 80         }
 81 }
 82 
 83 static void
 84 list_show(list_t *head)
 85 {
 86         if (head == NULL)
 87                 return;
 88 
 89         for (list_t *p = head; p != NULL; p = p->next)
 90                 printf("%d ", p->data);
 91         printf("\\n");
 92 }
 93 
 94 static void
 95 list_fini(list_t *head)
 96 {
 97         list_t *p = head;
 98         while (p != NULL) {
 99                 list_t *q = p;
100                 p = p->next;
101                 free(q);
102         }
103 }
104 
105 static void
106 show(int a[], size_t n)
107 {
108         if (g_isint) {
109                 for (int i = 0; i < n; i++)
110                         printf("%-2d ", a[i]);
111         } else {
112                 for (int i = 0; i < n; i++)
113                         printf("%-2c ", a[i]);
114         }
115         printf("\\n");
116 }
117 
118 /*
119  * Get width of a number
120  * e.g.
121  *   for i in [  0 .. 9  ] // width = 1
122  *   for i in [ 10 .. 99 ] // width = 2
123  *   for i in [100 .. 999] // width = 3
124  *   ...
125  */
126 static int
127 get_width_of_num(int num)
128 {
129         int w = 1;
130         for (int q = num / 10; q != 0; q /= 10)
131                 w++;
132         return w;
133 }
134 
135 static int
136 get_hash_base(int a[], size_t n)
137 {
138         /* get max one of a[] */
139         int max = a[0];
140         for (int i = 0; i < n; i++) {
141                 if (max < a[i])
142                        max = a[i];
143         }
144 
145         /* get hash base which is 10**N, N=1, 2, ... */
146         int base = 1;
147         for (int i = 0; i < get_width_of_num(max); i++)
148                 base *= 10;
149 
150         return base;
151 }
152 
153 static void
154 scatter(list_t **bucket, size_t m, int a[], size_t n)
155 {
156         int base = get_hash_base(a, n);
157 
158         for (int i = 0; i < n; i++) {
159                 /* 1. new a node for a[i] */
160                 list_t *nodep = NULL;
161                 nodep = (list_t *)malloc(sizeof (list_t));
162                 if (nodep == NULL) /* error: failed to malloc */
163                         return;
164 
165                 nodep->data = a[i];
166                 nodep->next = NULL;
167 
168                 /* 2. dispatch the new node to bucket[j] */
169                 int j = a[i] * m / base;
170                 list_init(&(bucket[j]), nodep);
171 
172                 /* NOTE: dump bucket[j] just for visual observation */
173                 printf("%d:%d\\t\\t%d\\tbucket[%d] : ", i, j, a[i], j);
174                 list_show(bucket[j]);
175         }
176 }
177 
178 static void
179 gather(list_t **bucket, size_t m, int a[], size_t n)
180 {
181         int k = 0;
182         for (int i = 0; i < m; i++) {
183                 if (bucket[i] == NULL)
184                         continue;
185 
186                 for (list_t *p = bucket[i]; p != NULL; p = p->next) {
187                         a[k++] = p->data;
188 
189                         if (k >= n) /* overflow */
190                                 break;
191                 }
192 
193                 list_fini(bucket[i]);
194         }
195 }
196 
197 void
198 bucketsort(int a[], size_t n)
199 {
200         /* alloc bucket[] */
201 #define BUCKET_NUM 10
202         list_t **bucket = (list_t **)malloc(sizeof (list_t *) * BUCKET_NUM);
203         if (bucket == NULL) /* error: failed to malloc */
204                 return;
205         for (int i = 0; i < BUCKET_NUM; i++)
206                 bucket[i] = NULL;
207 
208         /* scatter elements in a[] to bucket[] */
209         scatter(bucket, BUCKET_NUM, a, n);
210 
211         /* gather a[] by walking bucket[] */
212         gather(bucket, BUCKET_NUM, a, n);
213 
214         free(bucket);
215 }
216 
217 int
218 main(int argc, char *argv[])
219 {
220         if (argc < 2) {
221                 fprintf(stderr, "Usage: %s <C1> [C2] ...\\n", argv[0]);
222                 return -1;
223         }
224 
225         argc--;
226         argv++;
227 
228         int n = argc;
229         int *a = (int *)malloc(sizeof(int) * n);
230 #define VALIDATE(p) do { if (p == NULL) return -1; } while (0)
231         VALIDATE(a);
232 
233         char *s = getenv("ISINT");
234         if (s != NULL && strncmp(s, "true", 4) == 0)
235                 g_isint = true;
236         else if (s != NULL && strncmp(s, "false", 4) == 0)
237                 g_isint = false;
238 
239         if (g_isint) {
常见排序算法导读(10)[基数排序]

常见排序算法导读[简单选择排序]

算法渣-排序-桶排序

八十五再探希尔排序,桶排序,计数排序和基数排序

排序算法 (11.基数排序)

第六章 算法秘籍之桶排序