A Brief Bloom Filter(英文标题唬人罢了)
Posted 25th-engineer
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了A Brief Bloom Filter(英文标题唬人罢了)相关的知识,希望对你有一定的参考价值。
控制台字体颜色参考了https://blog.csdn.net/key_point/article/details/52667273这篇博文。代码使用哈希表实现了一个简易布隆过滤器,过滤器中存储了C++的所有关键字用于查询操作!详解待后续文章,先上代码!
头文件BitMap.h:
1 /*BitMap.h*/
2
3 #ifndef BITMAP_H_INCLUDED
4 #define BITMAP_H_INCLUDED
5
6 #include <stdio.h>
7 #include <assert.h>
8 #include <stdlib.h>
9 #include <windows.h>
10
11 typedef struct BitMap
12 {
13 size_t* _bits;
14 size_t _range;
15 } BitMap;
16
17 #endif // BITMAP_H_INCLUDED
头文件BloomFilter.h:
1 /*BloomFilter.h*/
2
3 #ifndef BLOOMFILTER_H_INCLUDED
4 #define BLOOMFILTER_H_INCLUDED
5
6 #include "BitMap.h"
7
8 typedef size_t(*HASH_FUNC)(const char* str);
9
10 typedef struct /*5个哈希函数*/
11 {
12 BitMap _bm;
13 HASH_FUNC hashfunc1;
14 HASH_FUNC hashfunc2;
15 HASH_FUNC hashfunc3;
16 HASH_FUNC hashfunc4;
17 HASH_FUNC hashfunc5;
18 } BloomFilter;
19
20 void BloomFilterInit(BloomFilter* bf, size_t range);
21 void BloomFilterSet(BloomFilter* bf, const char* x);
22 void BloomFilterReset(BloomFilter* bf, const char* x);
23 void BloomFilterTest();
24 void BloomFilterDestory(BloomFilter* bf);
25
26
27
28 #endif // BLOOMFILTER_H_INCLUDED
源文件BloomFilter.c:
1 /*BloomFilter.c*/
2
3 #include "BloomFilter.h"
4
5 static size_t BKDRHash(const char* str)
6 {
7 size_t seed = 131; // 31 131 1313 13131 131313
8 size_t hash = 0;
9 while (*str)
10 {
11 hash = hash * seed + (*str++);
12 }
13 return (hash & 0x7FFFFFFF);
14 }
15
16 size_t SDBMHash(const char* str)
17 {
18 size_t ch;
19 size_t hash = 0;
20 while (ch = (size_t)*str++)
21 {
22 hash = 65599 * hash + ch;
23 //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
24 }
25 return hash;
26 }
27
28 size_t RSHash(const char* str)
29 {
30 size_t ch;
31 size_t hash = 0;
32 size_t magic = 63689;
33 while (ch = (size_t)*str++)
34 {
35 hash = hash * magic + ch;
36 magic *= 378551;
37 }
38 return hash;
39 }
40
41 size_t FHash(const char *str)
42 {
43 size_t ch;
44 size_t hash = 0;
45 size_t magic = 61456;
46 while (ch = (size_t)*str++)
47 {
48 hash = hash * magic + ch;
49 magic = magic * 45616;
50 }
51 return hash;
52 }
53
54 size_t FiHash(const char *str)
55 {
56 size_t ch;
57 size_t hash = 0;
58 size_t magic = 60000;
59 while (ch = (size_t)*str++)
60 {
61 hash = hash * magic + ch;
62 magic = magic * 111;
63 }
64 return hash;
65 }
66
67 void BloomFilterInit(BloomFilter* bf, size_t range)
68 {
69 assert(bf);
70 BitMapInit(&bf->_bm, range);
71 bf->hashfunc1 = BKDRHash;
72 bf->hashfunc2 = SDBMHash;
73 bf->hashfunc3 = RSHash;
74 bf->hashfunc4 = FHash;
75 bf->hashfunc5 = FiHash;
76 }
77
78 void BloomFilterSet(BloomFilter* bf, const char* x)
79 {
80 size_t hash1, hash2, hash3, hash4, hash5;
81 hash1 = bf->hashfunc1(x) % bf->_bm._range;
82 hash2 = bf->hashfunc2(x) % bf->_bm._range;
83 hash3 = bf->hashfunc3(x) % bf->_bm._range;
84 hash4 = bf->hashfunc4(x) % bf->_bm._range;
85 hash5 = bf->hashfunc5(x) % bf->_bm._range;
86 BitMapSet(&bf->_bm, hash1);
87 BitMapSet(&bf->_bm, hash2);
88 BitMapSet(&bf->_bm, hash3);
89 BitMapSet(&bf->_bm, hash4);
90 BitMapSet(&bf->_bm, hash5);
91 }
92 int BloomFilterFind(BloomFilter* bf, const char* x)
93 {
94
95 size_t hash1, hash2, hash3, hash4, hash5;
96 hash1 = bf->hashfunc1(x) % bf->_bm._range;
97 hash2 = bf->hashfunc2(x) % bf->_bm._range;
98 hash3 = bf->hashfunc3(x) % bf->_bm._range;
99 hash4 = bf->hashfunc4(x) % bf->_bm._range;
100 hash5 = bf->hashfunc5(x) % bf->_bm._range;
101 if (BitMapTest(&bf->_bm, hash1) == -1)
102 return -1;
103 if (BitMapTest(&bf->_bm, hash2) == -1)
104 return -1;
105 if (BitMapTest(&bf->_bm, hash3) == -1)
106 return -1;
107 if (BitMapTest(&bf->_bm, hash4) == -1)
108 return -1;
109 if (BitMapTest(&bf->_bm, hash5) == -1)
110 return -1;
111 return 0;
112 }
113
114 void BloomFilterDestory(BloomFilter* bf)
115 {
116 assert(bf);
117 BitMapDestroy(&bf->_bm);
118 }
源文件test.c:
1 /*test.c*/
2
3 #include "BloomFilter.h"
4
5 int main()
6 {
7 int n;
8 BloomFilter bf;
9 char str[50000];
10 FILE *out, *in;
11 out = fopen("关键字.txt", "r");
12 BloomFilterInit(&bf, 10000);
13 while (!feof(out))
14 {
15
16 fscanf(out, "%s", str);
17 /* printf("%s
",str);*/
18 BloomFilterSet(&bf, str);
19 }
20 printf(" ****************请输入你要执行的操作************************
");
21 printf(" ****************1.查询是否存在该关键字***********************
");
22 printf(" ****************2.贮存关键字*********************************
");
23 printf(" ****************3.结束此次操作*******************************
");
24 while (scanf("%d", &n) != EOF)
25 {
26 if (n == 3)
27 break;
28 if (n == 1)
29 {
30 printf("请输入你要查询的代码,按#回到主界面
");
31 while (1)
32 {
33
34 scanf("%s", str);
35 if (str[0] == ‘#‘)
36 break;
37
38 if (BloomFilterFind(&bf, str) == 0)
39 {
40 printf("该关键字(");
41 SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
42 FOREGROUND_BLUE |FOREGROUND_INTENSITY );
43 printf("%s", str);
44 SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
45 FOREGROUND_RED |FOREGROUND_GREEN | FOREGROUND_BLUE);
46 printf(")在表中!
");
47 }
48 else
49 {
50 printf("该关键字(");
51 SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
52 FOREGROUND_RED |FOREGROUND_INTENSITY );
53 printf("%s",str);
54 SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
55 FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE);
56 printf(")不在表中,请检查是否拼写错误!或者将其加入表中!
");
57 }
58 }
59 }
60 if (n == 2)
61 {
62 while (1)
63 {
64 printf("请输入你要保存的关键字
按0退出
");
65 scanf("%s", str);
66 if (str[0] == ‘0‘)
67 break;
68 in = fopen("关键字.txt", "a");
69 fprintf(in, "
%s", str);
70 }
71 }
72 }
73 return 0;
74 }
程序运行截图如下:
以上是关于A Brief Bloom Filter(英文标题唬人罢了)的主要内容,如果未能解决你的问题,请参考以下文章