稀疏矩阵及其压缩格式

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了稀疏矩阵及其压缩格式相关的知识,希望对你有一定的参考价值。

参考技术A 一般情况下,稀疏矩阵指的是元素大部分是0的矩阵(有些资料定义非零元素不超过5%的矩阵,为稀疏矩阵), 矩阵的稀疏性可以用一个分数来量化,即矩阵中零元素的个数除以矩阵中元素的总数。存储稀疏矩阵时只描述其非零元素的值及所在位置, tensorflow的sparse_tensor类型还会存储稀疏矩阵的形状.

存储稀疏矩阵时常用的有如下三种压缩格式:

这种存储格式比较简单易懂,每一个元素需要用一个三元组来表示,分别是(行号,列号,数值),对应上图右边的一列。这种方式简单,但是记录单信息多(行列),每个三元组自己可以定位,因此空间不是最优。

这是经常用的一种,我们会经常在一些标准的线性代数库或者数值运算库中看到此方式存储;CSR是比较标准的一种,也需要三类数据来表达:数值,列号,以及行偏移。CSR不是三元组,而是整体的编码方式。数值和列号与COO一致,表示一个元素以及其列号,行偏移表示某一行的第一个元素在values里面的起始偏移位置。如上图中,第一行的第一个元素1在values中是第0个, 所以是0偏移,第二行元素第一个元素2是2偏移,第三行第一个元素5是4偏移,第4行第一个元素6是7偏移。在行偏移的最后补上矩阵总的元素个数,本例中一共是9个非零元素。

CSC是和CSR相对应的一种方式,即按列压缩的意思。

以上图中矩阵为例:
Column Offsets:[0 2 5 7 9]
Row Indices:[0 2 0 1 3 1 2 2 3]
Values: [1 5 7 2 6 8 3 9 4]
Values中的元素要按列写, 跟COO和CSR不同, 指定了Values的元素顺序之后就可以写Row Indices了, 然后根据每一列第一个元素在Values中的位置确定偏移量Column Offsets. 如第一列第一个元素1是0偏移, 第二列第一个元素7是2偏移, 第三列第一个元素8是5偏移, 第四列第一个元素9是7偏移, 共9个元素.

稀疏矩阵 part 5

? 目前为止能跑的所有代码及其结果(2019年2月24日),之后添加:DIA 乘法 GPU 版;其他维度的乘法(矩阵乘矩阵);其他稀疏矩阵格式之间的相互转化

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <memory.h>
  4 #include <malloc.h>
  5 #include <math.h>
  6 #include <time.h>
  7 #include "cuda_runtime.h"
  8 #include "device_launch_parameters.h"
  9 
 10 #define ROW         (8192)
 11 #define COL         (8192)
 12 #define RATIO       0.1                         // 系数矩阵非零元素占比
 13 #define EPSILON     0.0001                      // 浮点数比较
 14 #define THREAD_SIZE 256
 15 #define SEED        1                           // (unsigned int)time(NULL)
 16 #define MAX(x,y)    (((x)>(y))?(x):(y))
 17 #define CEIL(x,y)   (int)(( x - 1 ) /  y + 1)
 18 #define INT                                     // 数字格式用 int 还是 float
 19 
 20 #ifdef INT
 21 typedef int format;
 22 #else
 23 typedef float format;
 24 #endif
 25 
 26 // 矩阵存储格式
 27 typedef struct          // 顺序格式
 28 {
 29     int     row;        // 行数
 30     int     col;        // 列数
 31     int     count;      // 非零元个数(用于转换,不用于计算)
 32     format  *data;      // 元素的值
 33 }
 34 MAT;
 35 
 36 typedef struct          // Compressed Sparse Row 格式
 37 {
 38     int     row;        // 行数
 39     int     col;        // 列数
 40     format  *data;      // 元素的值
 41     int     *index;     // 元素的列号
 42     int     *ptr;       // 每行首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数 
 43 }
 44 CSR;
 45 
 46 typedef struct          // Compressed Sparse Row 格式
 47 {
 48     int     row;        // 行数
 49     int     col;        // 列数
 50     format  *data;      // 元素的值
 51     int     *index;     // 元素的行号
 52     int     *ptr;       // 每列首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数 
 53 }
 54 CSC;
 55 
 56 
 57 typedef struct          // ELLPACK 格式
 58 {
 59     int     row;        // 行数
 60     int     col;        // 列数,相当于MAT格式的行数
 61     int     colOrigin;  // 原列数,相当于MAT格式的列数
 62     format  *data;      // 元素的值
 63     int     *index;     // 元素在MAT格式中的列号
 64 }
 65 ELL;
 66 
 67 typedef struct          // Coordinate 格式
 68 {
 69     int     row;        // 行数
 70     int     col;        // 列数
 71     int     count;      // 非零元个数
 72     int     *rowIndex;  // 行向量
 73     int     *colIndex;  // 列向量
 74     format  *data;      // 元素的值
 75 }
 76 COO;
 77 
 78 typedef struct              // Diagonal 格式
 79 {
 80     int     row;            // 行数
 81     int     col;            // 列数    
 82     int     colOrigin;      // 原列数
 83     format  *data;          // 元素的值
 84     int     *index;         // 原矩阵各对角线是否非零
 85 }
 86 DIA;
 87 
 88 // 全局指针
 89 __managed__ MAT *aMAT, *xMAT, *yMATRef, *yMATCal;
 90 __managed__ CSR *aCSR;
 91 __managed__ ELL *aELL;
 92 __managed__ COO *aCOO;
 93 
 94 // 通用函数
 95 __host__ __device__ inline void checkNULL(const void *input)            // 有点问题,设备函数无法使用 exit(1) 来推出
 96 {
 97     if (input == NULL)
 98         printf("
	find a NULL!");
 99     return;
100 }
101 
102 __host__ inline void checkCudaError(cudaError input)
103 {
104     if (input != cudaSuccess)
105     {
106         printf("
	find a cudaError!");
107         exit(1);
108     }
109     return;
110 }
111 
112 int checkEqual(const format * in1, const format * in2, const int length)// 注意两向量相等时返 0,否则返回“值不等的元素下标加一”
113 {
114     int i;
115     for (i = 0; i < length; i++)
116     {
117 #ifdef INT
118         if (in1[i] != in2[i])
119 #else
120         if (fabs(in1[i] - in2[i]) > EPSILON)
121 #endif
122         {
123             printf("
	Error at i = %d
	in1[i] = %10f, in2[i] = %10f
", i, (float)in1[i], (float)in2[i]);
124             return i + 1;
125         }
126     }
127     printf("
	Check output, passed.
");
128     return 0;
129 }
130 
131 // 打印矩阵
132 void print(const char* info, const MAT *in)// 打印MAT格式的矩阵
133 {
134     printf("%s
	MAT,
	row = %d, col = %d, count = %d", info, in->row, in->col, in->count);
135     printf("
	data =
	");
136     for (int i = 0; i < in->row * in->col; i++)
137     {
138 #ifdef INT
139         printf("%d ", in->data[i]);
140 #else
141         printf("%.2f ", in->data[i]);
142 #endif
143         if ((i + 1) % in->col == 0)
144             printf("
	");
145     }
146     printf("
");
147     return;
148 }
149 
150 void print(const char* info, const CSR *in)// 打印CSR格式的矩阵
151 {
152     printf("%s
	CSR,
	row = %d, col = %d", info, in->row, in->col);
153     printf("
	data =
	");
154     for (int i = 0; i < in->ptr[in->row]; i++)
155 #ifdef INT
156         printf("%d ", in->data[i]);
157 #else
158         printf("%.2f ", in->data[i]);
159 #endif
160     printf("
	index =
	");
161     for (int i = 0; i < in->ptr[in->row]; i++)
162         printf("%d ", in->index[i]);
163     printf("
	ptr =
	");
164     for (int i = 0; i < in->row + 1; i++)
165         printf("%d ", in->ptr[i]);
166     printf("

");
167     return;
168 }
169 
170 void print(const char* info, const ELL *in)// 打印ELL格式的矩阵
171 {
172     int i;
173     printf("%s
	ELL,
	row = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin);
174     printf("
	data =
	");
175     for (i = 0; i < in->row * in->col; i++)
176     {
177 #ifdef INT
178         printf("%d ", in->data[i]);
179 #else
180         printf("%.2f ", in->data[i]);
181 #endif
182         if ((i + 1) % in->col == 0)
183             printf("
	");
184     }
185     printf("index =
	");
186     for (i = 0; i < in->row * in->col; i++)
187     {
188         printf("%d ", in->index[i]);
189         if ((i + 1) % in->col == 0)
190             printf("
	");
191     }
192     printf("

");
193     return;
194 }
195 
196 void print(const char* info, const COO *in)// 打印COO格式的矩阵
197 {
198     int i;
199     printf("%s
	COO,
	row = %d, col = %d, count = %d", info, in->row, in->col, in->count);
200     printf("
	(rowIndex, colIndex, data) =
	");
201     for (i = 0; i < in->count; i++)
202     {
203 #ifdef INT
204         printf("(%d,%d,%d)
	", in->rowIndex[i], in->colIndex[i], in->data[i]);
205 #else
206         printf("(%d,%d,%.2f)
	", in->rowIndex[i], in->colIndex[i], in->data[i]);
207 #endif
208     }
209     printf("

");
210     return;
211 }
212 
213 void print(const char* info, const DIA *in)// 打印DIA格式的矩阵
214 {
215     int i;
216     printf("%s
	DIA,
	row = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin,in->colOrigin);
217     printf("
	data =
	");
218     int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
219     for (int i = 0, j = 0; i < in->row + in->col - 1; i++)
220     {
221         if (in->index[i] == 1)
222         {
223             inverseIndex[j] = i;
224             j++;
225         }
226     }  
227     for (i = 0; i < in->row * in->col; i++)
228     {                                                                            
229         if (i / in->col < in->row - 1 - inverseIndex[i % in->col] || i / in->col > inverseIndex[in->col - 1] - inverseIndex[i % in->col])
230             printf("* ");
231         else
232 #ifdef INT
233             printf("%d ", in->data[i]);
234 #else
235             printf("%.2f ", in->data[i]);
236 #endif
237         if ((i + 1) % in->col == 0)
238             printf("
	");
239     }
240     printf("index =
	");
241     for (i = 0; i < in->row + in->col - 1; i++)    
242         printf("%d ", in->index[i]);       
243     printf("

");
244     free(inverseIndex);
245     return;
246 }
247 
248 // 矩阵初始化与清理
249 MAT *initializeMAT(const int row, const int col, const int count = 0)// 初始化MAT,注意非零元默认为 0
250 {
251     MAT *temp;
252     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(MAT)));
253     temp->row = row;
254     temp->col = col;
255     temp->count = count;
256     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
257     return temp;
258 }
259 
260 // 统计MAT形式的矩阵中的非零元素个数
261 #define COUNT_MAT(in)                               262 {                                                   263     checkNULL(in);                                  264     int i, zero;                                    265     for (i = zero = 0; i < in->row * in->col; i++)  266         zero += !in->data[i];                       267     in->count = in->row * in->col - zero;           268 }
269 
270 int freeMatrix(MAT *in)// 释放MAT
271 {
272     checkNULL(in);
273     cudaFree(in->data);
274     cudaFree(in);
275     return 0;
276 }
277 
278 CSR * initializeCSR(const int row, const int col, const int count)// 初始化CSR,要求给出 count
279 {
280     CSR *temp;
281     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(CSR)));
282     temp->row = row;
283     temp->col = col;
284     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
285     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * count));
286     checkCudaError(cudaMallocManaged((void **)&temp->ptr, sizeof(int) * (row + 1)));
287     return temp;
288 }
289 
290 int freeMatrix(CSR *in)// 释放CSR
291 {
292     checkNULL(in);
293     cudaFree(in->data);
294     cudaFree(in->index);
295     cudaFree(in->ptr);
296     cudaFree(in);
297     return 0;
298 }
299 
300 ELL * initializeELL(const int row, const int col, const int colOrigin)// 初始化ELL,要求给出原列数
301 {
302     ELL *temp;
303     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(ELL)));
304     temp->row = row;
305     temp->col = col;
306     temp->colOrigin = colOrigin;
307     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
308     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * row * col));
309     return temp;
310 }
311 
312 int freeMatrix(ELL *in)// 释放ELL
313 {
314     cudaFree(in->data);
315     cudaFree(in->index);
316     cudaFree(in);
317     return 0;
318 }
319 
320 COO * initializeCOO(const int row, const int col, const int count)// 初始化COO,要求给出 count
321 {
322     COO * temp;
323     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(COO)));
324     temp->row = row;
325     temp->col = col;
326     temp->count = count;
327     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
328     checkCudaError(cudaMallocManaged((void **)&temp->rowIndex, sizeof(int) * count));
329     checkCudaError(cudaMallocManaged((void **)&temp->colIndex, sizeof(int) * count));
330     return temp;
331 }
332 
333 int freeMatrix(COO *in)// 释放COO
334 {
335     checkNULL(in);
336     cudaFree(in->data);
337     cudaFree(in->rowIndex);
338     cudaFree(in->colIndex);
339     cudaFree(in);
340     return 0;
341 }
342 
343 DIA * initializeDIA(const int row, const int col, const int colOrigin)// 初始化DIA,要求给出原行数、原列数
344 {
345     DIA * temp;
346     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(DIA)));
347     temp->row = row;
348     temp->col = col;
349     temp->colOrigin = colOrigin;
350     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
351     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(format) * (row + col - 1)));
352     return temp;
353 }
354 
355 int freeMatrix(DIA *in)// 释放DIA
356 {
357     checkNULL(in);
358     cudaFree(in->data);
359     cudaFree(in->index);
360     cudaFree(in);
361     return 0;
362 }
363 
364 // 矩阵格式的相互转化
365 CSR * MATToCSR(const MAT *in)                                       // MAT 转 CSR
366 {
367     checkNULL(in);
368     CSR * out = initializeCSR(in->row, in->col, in->count);
369     checkNULL(out);
370     
371     out->ptr[0] = 0;
372     for (int i = 0, j = 0, k = 1; i < in->row * in->col; i++)       // i 遍历 in->data
373     {
374         if (in->data[i] != 0)                                       // 找到非零元
375         {
376             if (j == in->count)                                     // 在 out->data 已经填满了的基础上又发现了非零元,错误
377                 return NULL;
378             out->data[j] = in->data[i];                             // 填充非零元素
379             out->index[j] = i % in->col;                            // 填充列号
380             j++;
381         }
382         if ((i + 1) % in->col == 0)                                 // 到了最后一列,写入行指针号
383             out->ptr[k++] = j;
384     }
385     return out;
386 }
387 
388 MAT * CSRToMAT(const CSR *in)                                       // CSR转MAT
389 {
390     checkNULL(in);
391     MAT *out = initializeMAT(in->row, in->col, in->ptr[in->row]);
392     checkNULL(out);
393 
394     memset(out->data, 0, sizeof(format) * in->row * in->col);
395     for (int i = 0; i < in->row; i++)                               // i 遍历行
396     {                                                
397         for (int j = in->ptr[i]; j < in->ptr[i + 1]; j++)           // j 遍历列 
398             out->data[i * in->col + in->index[j]] = in->data[j];
399     }
400     return out;
401 }
402 
403 ELL * MATToELL(const MAT *in)// MAT转ELL
404 {
405     checkNULL(in);
406 
407     int i, j, maxElement; 
408     for (i = j = maxElement = 0; i < in->row * in->col; i++)                    // i 遍历 in->data,j 记录该行非零元素数,maxElement 记录一行非零元素最大值
409     {     
410         if (in->data[i] != 0)                                                   // 找到非零元       
411             j++;                                                       
412         if ((i + 1) % in->col == 0)                                             // 行末,更新 maxElement                        
413         {
414             maxElement = MAX(j, maxElement);                    
415             j = 0;                                                              // 开始下一行之前清空 j
416         }
417     }
418     format* temp_data=(format *)malloc(sizeof(format) * in->row * maxElement);  // 临时数组,将列数压缩到 maxElement
419     checkNULL(temp_data);
420     int* temp_index = (int *)malloc(sizeof(int) * in->row * maxElement);
421     checkNULL(temp_index);
422     memset(temp_data, 0, sizeof(format) * in->row * maxElement);
423     memset(temp_index, 0, sizeof(int) * in->row * maxElement);
424     for (i = j = 0; i < in->row * in->col; i++)                                 // i 遍历 in->data,j 记录该行非零元素数,把 in 中每行的元素往左边推
425     {        
426         if (in->data[i] != 0)                                                   // 找到非零元
427         {
428             temp_data[i / in->col * maxElement + j] = in->data[i];              // 存放元素
429             temp_index[i / in->col * maxElement + j] = i % in->col;             // 记录所在的列号
430             j++;                                                    
431         }
432         if ((i + 1) % in->col == 0)                                             // 行末,将剩余位置的下标记作 -1,即无效元素
433         {            
434             for (j += i / in->col * in->col; j < maxElement * (i / in->col + 1); j++)   // 使得 j 指向本行最后一个非零元素之后的元素,再开始填充
435                 temp_index[j] = -1;                                 
436             j = 0;                                                              // 开始下一行之前清空 j
437         }
438     }    
439     ELL *out = initializeELL(maxElement, in->row, in->col);                     // 最终输出,如果不转置的话不要这部分
440     checkNULL(out);
441     for (i = 0; i < out->row * out->col; i++)                                   // 将 temp_data 和 temp_index 转置以提高缓存利用
442     {
443         out->data[i] = temp_data[i % out->col * out->row + i / out->col];
444         out->index[i] = temp_index[i % out->col * out->row + i / out->col];
445     }
446     free(temp_data);
447     free(temp_index);
448     return out;
449 }
450 
451 MAT * ELLToMAT(const ELL *in)                                                   // ELL转MAT
452 {
453     checkNULL(in);
454     MAT *out = initializeMAT(in->col, in->colOrigin);
455     checkNULL(out);
456 
457     for (int i = 0; i < in->row * in->col; i++)                                 // i 遍历 out->data 
458     {
459         if (in->index[i] < 0)                                                   // 注意跳过无效元素
460             continue;
461         out->data[i % in->col * in->colOrigin + in->index[i]] = in->data[i];
462     }
463     COUNT_MAT(out);
464     return out;
465 }
466 
467 COO * MATToCOO(const MAT *in)                               // MAT转COO
468 {
469     checkNULL(in);
470     COO *out = initializeCOO(in->row, in->col, in->count);
471 
472     for (int i=0, j = 0; i < in->row * in->col; i++)
473     {
474 #ifdef INT
475         if (in->data[i] != 0)
476 #else
477         if (fabs(in->data[i]) > EPSILON)
478 #endif
479         {
480             out->data[j] = in->data[i];
481             out->rowIndex[j] = i / in->col;
482             out->colIndex[j] = i % in->col;
483             j++;
484         }
485     }
486     return out;
487 }
488 
489 MAT * COOToMAT(const COO *in)                               // COO转MAT
490 {
491     checkNULL(in);
492     MAT *out = initializeMAT(in->row, in->col, in->count);
493     checkNULL(out);
494 
495     for (int i = 0; i < in->row * in->col; i++)
496         out->data[i] = 0;
497     for (int i = 0; i < in->count; i++)
498         out->data[in->rowIndex[i] * in->col + in->colIndex[i]] = in->data[i];
499     return out;
500 }
501 
502 DIA * MATToDIA(const MAT *in)                                       // MAT转DIA
503 {
504     checkNULL(in);
505 
506     int *index = (int *)malloc(sizeof(int)*(in->row + in->col - 1));
507     for (int diff = in->row - 1; diff > 0; diff--)                  // 左侧零对角线情况
508     {        
509         int flagNonZero = 0;
510         for (int i = 0; i < in->col && i + diff < in->row; i++)     // i 沿着对角线方向遍历 in->data,flagNonZero 记录该对角线是否全部为零元
511         {            
512 #ifdef INT
513             if (in->data[(i + diff) * in->col + i] != 0)
514 #else
515             if (fabs(in->data[(i + diff) * in->col + i]) > EPSILON)
516 #endif            
517                 flagNonZero = 1;
518         }
519         index[in->row - 1 - diff] = flagNonZero;                    // 标记该对角线上有非零元
520     }
521     for (int diff = in->col - 1; diff >= 0; diff--)                 // 右侧零对角线情况
522     {                                                                                                                 
523         int flagNonZero = 0;
524         for (int j = 0; j < in->row && j + diff < in->col; j++)
525         {
526 #ifdef INT
527             if (in->data[j * in->col + j + diff] != 0)
528 #else
529             if (fabs(in->data[j * in->col + j + diff]) > EPSILON)
530 #endif            
531                 flagNonZero = 1;
532         }
533         index[in->row - 1 + diff] = flagNonZero;                    // 标记该对角线上有非零元
534     }       
535     int *prefixSumIndex = (int *)malloc(sizeof(int)*(in->row + in->col - 1));
536     prefixSumIndex[0] = index[0];
537     for (int i = 1; i < in->row + in->col - 1; i++)                 // 闭前缀和,prefixSumIndex[i] 表示原矩阵第 0 ~ i 条对角线中共有多少条非零对角线(含)
538         prefixSumIndex[i] = prefixSumIndex[i-1] + index[i];         // index[in->row + in->col -2] 表示原矩阵非零对角线条数,等于 DIA 矩阵列数
539     DIA *out = initializeDIA(in->row, prefixSumIndex[in->row + in->col - 2], in->col);
540     checkNULL(out);
541 
542     memset(out->data, 0, sizeof(int)*out->row * out->col);
543     for (int i = 0; i < in->row + in->col - 1; i++)             
544         out->index[i] = index[i];                                   // index 搬进 out
545     for (int i = 0; i < in->row; i++)                               // i,j 遍历原矩阵,将元素搬进 out
546     {
547         for (int j = 0; j < in->col; j++)
548         {
549             int temp = j - i + in->row - 1;
550             if (index[temp] == 0)
551                 continue;            
552             out->data[i * out->col + (temp > 0 ? prefixSumIndex[temp - 1] : 0)] = in->data[i * in->col + j];    // 第 row - 1 行第 0 列元素 temp == 0,单独处理
553         }
554     }
555     free(index);
556     free(prefixSumIndex);
557     return out;
558 }
559 
560 MAT * DIAToMAT(const DIA *in)                                       // DIA转MAT
561 {
562     checkNULL(in);
563     MAT *out = initializeMAT(in->row, in->colOrigin);
564     checkNULL(out);
565 
566     int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
567     for (int i = 0, j = 0; i < in->row + in->col - 1; i++)          // 求一个 index 的逆,即 DIA 中第 i 列对应原矩阵第 inverseIndex[i] 对角线
568     {                                                               // 原矩阵对角线编号 (row-1, 0) 为第 0 条,(0, 0) 为第 row - 1 条,(col-1, 0) 为第 row + col - 2 条
569         if (in->index[i] == 1)
570         {
571             inverseIndex[j] = i;
572             j++;
573         }
574     }
575     for (int i = 0; i < in->row; i++)                               // i 遍历 in->data 行,j 遍历 in->data 列
576     {
577         for (int j = 0; j < in->col; j++)
578         {
579             if (i < in->row - 1 - inverseIndex[j] || i > inverseIndex[in->col - 1] - inverseIndex[j])   // 跳过两边呈三角形的无效元素
580                 continue;
581             out->data[i * in->col + inverseIndex[j] - in->row + 1] = in->data[i * in->col + j];         // 利用 inverseIndex 来找钙元素在原距震中的位置
582         }
583     }
584     free(inverseIndex);
585     return out;
586 }
587 
588 // 各种形式的矩阵和一个向量的乘法
589 int dotCPU(const MAT *a, const MAT *x, MAT *y)      // CPU MAT 乘法
590 {
591     checkNULL(a); checkNULL(x); checkNULL(y);
592     if (a->col != x->row)
593     {
594         printf("dotMATCPU dimension mismatch!
");
595         return 1;
596     }
597     
598     y->row = a->row;
599     y->col = x->col;
600     for (int i = 0; i < a->row; i++)
601     {
602         format sum = 0;
603         for (int j = 0; j < a->col; j++)        
604             sum += a->data[i * a->col + j] * x->data[j];                
605         y->data[i] = sum;        
606     }
607     COUNT_MAT(y);
608     return 0;
609 }
610 
611 int dotCPU(const CSR *a, const MAT *x, MAT *y)      // CPU CSR 乘法
612 {
613     checkNULL(a); checkNULL(x); checkNULL(y);
614     if (a->col != x->row)
615     {
616         printf("dotCSRCPU dimension mismatch!
");
617         return 1;
618     }
619     
620     y->row = a->row;
621     y->col = x->col;
622     for (int i = 0; i < a->row; i++)                            // i 遍历 ptr,j 遍历行内数据,A 中为 0 的元素不参加乘法
623     {
624         format sum = 0;
625         for (int j = a->ptr[i]; j < a->ptr[i + 1]; j++)
626             sum += a->data[j] * x->data[a->index[j]];
627         y->data[i] = sum;
628     }
629     COUNT_MAT(y);
630     return 0;
631 }
632 
633 int dotCPU(const ELL *a, const MAT *x, MAT *y)      // CPU ELL乘法
634 {
635     checkNULL(a); checkNULL(x); checkNULL(y);
636     if (a->colOrigin != x->row)
637     {
638         printf("dotELLCPU dimension mismatch!
");
639         return 1;
640     }
641 
642     y->row = a->col;
643     y->col = x->col;
644     for (int i = 0; i<a->col; i++)
645     {
646         format sum = 0;
647         for (int j = 0; j < a->row; j++)
648         {
649             int temp = a->index[j * a->col + i];
650             if (temp < 0)                                   // 跳过无效元素
651                 continue;
652             sum += a->data[j * a->col + i] * x->data[temp];
653         }
654         y->data[i] = sum;
655     }
656     COUNT_MAT(y);
657     return 0;
658 }
659 
660 int dotCPU(const COO *a, const MAT *x, MAT *y)// CPU COO乘法
661 {
662     checkNULL(a); checkNULL(x); checkNULL(y);
663     if (a->col != x->row)
664     {
665         printf("dotCOOCPU null!
");
666         return 1;
667     }
668 
669     y->row = a->row;
670     y->col = x->col;
671     for (int i = 0; i<a->count; i++)
672         y->data[a->rowIndex[i]] += a->data[i] * x->data[a->colIndex[i]];
673     COUNT_MAT(y);
674     return 0;
675 }
676 
677 int dotCPU(const DIA *a, const MAT *x, MAT *y)// CPU DIA乘法
678 {
679     checkNULL(a); checkNULL(x); checkNULL(y);
680     if (a->colOrigin != x->row)
681     {
682         printf("dotDIACPU null!
");
683         return 1;
684     }    
685     y->row = a->row;
686     y->col = x->col;
687     int * inverseIndex = (int *)malloc(sizeof(int) * a->col);
688     for (int i = 0, j = 0; i < a->row + a->col - 1; i++)
689     {
690         if (a->index[i] == 1)
691         {
692             inverseIndex[j] = i;
693             j++;
694         }
695     }
696     for (int i = 0; i < a->row; i++)
697     {
698         format sum = 0;
699         for (int j = 0; j < a->col; j++)
700         {
701             if (i < a->row - 1 - inverseIndex[j] || i > inverseIndex[a->col - 1] - inverseIndex[j])
702                 continue;
703             sum += a->data[i * a->col + j] * x->data[i + inverseIndex[j] - a->row + 1];
704         }
705         y->data[i] = sum;
706     }
707     COUNT_MAT(y);
708     free(inverseIndex);
709     return 0;
710 }
711 
712 __global__ void dotGPU(const MAT *a, const MAT *x, MAT *y)// GPU MAT乘法
713 {
714     int id = blockIdx.x * blockDim.x + threadIdx.x;
715     if (id < a->row)
716     {
717         format sum = 0;
718         for (int i = 0; i < a->col; i++)
719             sum += a->data[id * a->col + i] * x->data[i];
720         y->data[id] = sum;
721     }
722     if (id == 0)
723     {
724         y->row = a->row;
725         y->col = x->col;
726         COUNT_MAT(y);
727     }
728     return;
729 }
730 
731 __global__ void dotGPU(const CSR *a, const MAT *x, MAT *y)// GPU CSR乘法
732 {
733     int id = blockIdx.x * blockDim.x + threadIdx.x;
734     if (id < a->row)
735     {
736         format sum = 0;
737         for (int j = a->ptr[id]; j < a->ptr[id + 1]; j++)
738             sum += a->data[j] * x->data[a->index[j]];
739         y->data[id] = sum;
740     }
741     if (id == 0)
742     {
743         y->row = a->row;
744         y->col = x->col;
745         COUNT_MAT(y);
746     }
747     return;
748 }
749 
750 __global__ void dotGPU(const ELL *a, const MAT *x, MAT *y)// GPU ELL乘法
751 {
752     int id = blockIdx.x * blockDim.x + threadIdx.x;
753     if (id < a->col)
754     {
755         format sum = 0;
756         for (int j = 0; j < a->row; j++)            
757             sum += a->data[id + j * a->col] * (a->index[id + j * a->col] < 0 ? 0 : x->data[a->index[id + j * a->col]]);
758         y->data[id] = sum;
759     }
760     if (id == 0)
761     {
762         y->row = a->col;
763         y->col = x->col;
764         COUNT_MAT(y);
765     }
766     return;
767 }
768 
769 __global__ void dotGPU(const COO *a, const MAT *x, MAT *y)// GPU COO乘法
770 {
771     int id = blockIdx.x * blockDim.x + threadIdx.x;
772     if (id < a->count)
773         atomicAdd(&(y->data[a->rowIndex[id]]), a->data[id] * x->data[a->colIndex[id]]);
774     if (id == 0)
775     {
776         y->row = a->row;
777         y->col = x->col;
778         COUNT_MAT(y);
779     }
780     return;
781 }
782 
783 int test()// 测试矩阵打印和CPU计算的函数
784 {
785     const int row = 4, col = 5;
786     
787     MAT* a = initializeMAT(row, col);
788     a->data[0] = 3, a->data[2] = 1, a->data[4] = 5, a->data[11] = 2;
789     a->data[12] = 4, a->data[13] = 1, a->data[15] = 1, a->data[18] = 1;
790     COUNT_MAT(a);
791 
792     MAT* x = initializeMAT(col, 1);
793     for (int i = 0; i < x->row; i++)
794         x->data[i] = i + 1;
795     COUNT_MAT(x);
796 
797     MAT* y = initializeMAT(row, 1);
798     COUNT_MAT(y);
799 
800     print("MAT a =", a);    
801     print("MAT x =", x);
802     dotCPU(a, x, y);    
803     print("MAT y = a * x = ", y);
804    
805     CSR* b = MATToCSR(a);        
806     print("CSR b = a = ", b);
807     memset(y->data, 0, sizeof(format) * y->row * y->col);
808     dotCPU(b, x, y);    
809     print("MAT y = b * x = ", y);
810     MAT* c = CSRToMAT(b);
811     print("MAT c = b =", c);
812     freeMatrix(b);
813     
814     ELL* d = MATToELL(a);
815     print("ELL d = a =", d);
816     memset(y->data, 0, sizeof(format) * y->row * y->col);
817     dotCPU(d, x, y);
818     print("MAT y = d * x =", y);    
819     c = ELLToMAT(d);
820     print("MAT c = d =", c);
821     freeMatrix(d);
822 
823     COO* e = MATToCOO(a);
824     print("ELL e = a = ", e);
825     memset(y->data, 0, sizeof(format) * y->row * y->col);
826     dotCPU(e, x, y);
827     print("MAT y = e * x = ", y);    
828     c = COOToMAT(e);
829     print("MAT c = e =", c);
830     freeMatrix(e);
831    
832     DIA* f = MATToDIA(a);
833     print("DIA f = a = ", f);
834     memset(y->data, 0, sizeof(format) * y->row * y->col);
835     dotCPU(f, x, y);
836     print("MAT y = f * x = ", y);
837     c = DIAToMAT(f);
838     print("MAT c = f =", c);
839     freeMatrix(f);
840 
841     freeMatrix(a);
842     freeMatrix(x);
843     freeMatrix(y);   
844     freeMatrix(c);    
845     return 0;
846 }
847 
848 int main()
849 {
850     test();
851    
852     clock_t timeCPU;
853     cudaEvent_t startGPU, stopGPU;
854     float timeGPU;
855     cudaEventCreate(&startGPU);
856     cudaEventCreate(&stopGPU);
857 
858     printf("
	Start. Matrix dimension:	%d × %d", ROW, COL);
859 
860     // 初始化
861     timeCPU = clock();
862     aMAT = initializeMAT(ROW, COL);
863     xMAT = initializeMAT(COL, 1);
864     yMATRef = initializeMAT(ROW, 1);
865     yMATCal = initializeMAT(ROW, 1);
866 
867     srand(SEED);
868 #ifdef INT
869     for (int i = 0; i < COL * ROW; i++)
870         aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? (rand() - RAND_MAX / 2) % 32 : 0;
871     COUNT_MAT(aMAT);
872     int count = aMAT->count;
873     for (int i = 0; i < COL; i++)
874         xMAT->data[i] = i % 32;
875     COUNT_MAT(xMAT);
876 #else
877     for (int i = 0; i < COL * ROW; i++)
878         aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? ((float)rand() / RAND_MAX) : 0;
879     aMAT->count = countMAT(aMAT);
880     for (int i = 0; i < COL; i++)
881         xMAT->data[i] = ((float)rand() / RAND_MAX);
882     xMAT->count = countMAT(xMAT);
883 #endif
884     printf("
	Initialized. Time:		%d ms
", clock() - timeCPU);
885 
886     //CPU计算部分
887     //MAT 格式
888     timeCPU = clock();
889     dotCPU(aMAT, xMAT, yMATRef);
890     timeCPU = clock() - timeCPU;
891     printf("
	dotMATCPU time:	%8.3f ms
", (float)timeCPU);
892 
893     // CSR格式                    
894     aCSR = MATToCSR(aMAT);
895     timeCPU = clock();
896     dotCPU(aCSR, xMAT, yMATCal);
897     timeCPU = clock() - timeCPU;
898     printf("
	dotCSRCPU time:	%8.3f ms
", (float)timeCPU);
899     checkEqual(yMATRef->data, yMATCal->data, ROW);    
900 
901     // ELL格式
902     aELL = MATToELL(aMAT);
903     timeCPU = clock();
904     dotCPU(aELL, xMAT, yMATCal);
905     timeCPU = clock() - timeCPU;
906     printf("
	dotELLCPU time:	%8.3f ms
", (float)timeCPU);
907     checkEqual(yMATRef->data, yMATCal->data, ROW);    
908 
909     // COO格式
910     aCOO = MATToCOO(aMAT);
911     timeCPU = clock();
912     dotCPU(aCOO, xMAT, yMATCal);
913     timeCPU = clock() - timeCPU;
914     printf("
	dotCOOCPU time:	%8.3f ms
", (float)timeCPU);
915     checkEqual(yMATRef->data, yMATCal->data, ROW);
916 
917     // GPU计算部分
918     // MAT格式
919     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
920     yMATCal->count = 0;
921     cudaEventRecord(startGPU, 0);
922     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aMAT, xMAT, yMATCal);
923     cudaDeviceSynchronize();                                                  
924     cudaEventRecord(stopGPU, 0);                                              
925     cudaEventSynchronize(stopGPU);
926     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
927     printf("
	dotMATGPU time:	%8.3f ms
", timeGPU);
928     checkEqual(yMATRef->data, yMATCal->data, ROW);
929     freeMatrix(aMAT);
930 
931     // CSR格式
932     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
933     yMATCal->count = 0;
934     cudaEventRecord(startGPU, 0);
935     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aCSR, xMAT, yMATCal);
936     cudaDeviceSynchronize();
937     cudaEventRecord(stopGPU, 0);
938     cudaEventSynchronize(stopGPU);
939     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
940     printf("
	dotCSRGPU time:	%8.3f ms
", timeGPU);
941     checkEqual(yMATRef->data, yMATCal->data, ROW);
942     freeMatrix(aCSR);
943 
944     // ELL格式
945     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
946     yMATCal->count = 0;
947     cudaEventRecord(startGPU, 0);
948     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aELL, xMAT, yMATCal);
949     cudaDeviceSynchronize();
950     cudaEventRecord(stopGPU, 0);
951     cudaEventSynchronize(stopGPU);
952     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
953     printf("
	dotELLGPU time:	%8.3f ms
", timeGPU);
954     checkEqual(yMATRef->data, yMATCal->data, ROW);
955     freeMatrix(aELL);
956 
957     // COO格式
958     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
959     yMATCal->count = 0;
960     cudaEventRecord(startGPU, 0);
961     dotGPU << <CEIL(count, THREAD_SIZE), THREAD_SIZE >> > (aCOO, xMAT, yMATCal);
962     cudaDeviceSynchronize();
963     cudaEventRecord(stopGPU, 0);
964     cudaEventSynchronize(stopGPU);
965     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
966     printf("
	dotCOOGPU time:	%8.3f ms
", timeGPU);
967     checkEqual(yMATRef->data, yMATCal->data, ROW);
968     freeMatrix(aCOO);
969 
970     // 清理内存    
971     freeMatrix(xMAT);
972     freeMatrix(yMATRef);
973     freeMatrix(yMATCal); 
974 
975     getchar();
976     return 0;
977 }

● 输出结果

MAT a =
        MAT,
        row = 4, col = 5, count = 8
        3 0 1 0 5
        0 0 0 0 0
        0 2 4 1 0
        1 0 0 1 0

MAT x =
        MAT,
        row = 5, col = 1, count = 5
        1
        2
        3
        4
        5

MAT y = a * x =
        MAT,
        row = 4, col = 1, count = 3
        31
        0
        20
        5

CSR b = a =
        CSR,
        row = 4, col = 5
        3 1 5 2 4 1 1 1
        0 2 4 1 2 3 0 3
        0 3 3 6 8

MAT y = b * x =
        MAT,
        row = 4, col = 1, count = 3
        31
        0
        20
        5

MAT c = b =
        MAT,
        row = 4, col = 5, count = 8
        3 0 1 0 5
        0 0 0 0 0
        0 2 4 1 0
        1 0 0 1 0

ELL d = a =
        ELL,
        row = 3, col = 4, colOrigin = 5
        3 0 2 1
        1 0 4 1
        5 0 1 0

        0 0 1 0
        2 0 2 3
        4 -1 3 0


MAT y = d * x =
        MAT,
        row = 4, col = 1, count = 3
        31
        0
        20
        5

MAT c = d =
        MAT,
        row = 4, col = 5, count = 7
        3 0 1 0 5
        0 0 0 0 0
        0 2 4 1 0
        0 0 0 1 0

ELL e = a =
        COO,
        row = 4, col = 5, count = 8
        (0,0,3)
        (0,2,1)
        (0,4,5)
        (2,1,2)
        (2,2,4)
        (2,3,1)
        (3,0,1)
        (3,3,1)

MAT y = e * x
        MAT,
        row = 4, col = 1, count = 3
        31
        0
        20
        5

MAT c = e =
        MAT,
        row = 4, col = 5, count = 8
        3 0 1 0 5
        0 0 0 0 0
        0 2 4 1 0
        1 0 0 1 0


        Start. Matrix dimension:        8192 × 8192
        Initialized. Time:                 0.000 ms

        dotMATCPU time:  304.000 ms

        dotCSRCPU time:    3.000 ms

        Check output, passed.

        dotCELLPU time:  103.000 ms

        Check output, passed.

        dotCOOCPU time:   11.000 ms

        Check output, passed.

        dotMATGPU time:    5.133 ms

        Check output, passed.

        dotCSRGPU time:    2.263 ms

        Check output, passed.

        dotELLGPU time:    1.253 ms

        Check output, passed.

        dotCOOGPU time:    4.754 ms

        Check output, passed.

 

以上是关于稀疏矩阵及其压缩格式的主要内容,如果未能解决你的问题,请参考以下文章

以压缩稀疏行格式 (csr_matrix) 对矩阵中的值取对数

稀疏矩阵压缩存储:CSR

如何将块压缩行转换为密集矩阵?

稀疏矩阵的压缩与还原

稀疏矩阵的压缩存储思想?

稀疏矩阵压缩