A Brief Bloom Filter(英文标题唬人罢了)

Posted 25th-engineer

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了A Brief Bloom Filter(英文标题唬人罢了)相关的知识,希望对你有一定的参考价值。

       控制台字体颜色参考了https://blog.csdn.net/key_point/article/details/52667273这篇博文。代码使用哈希表实现了一个简易布隆过滤器,过滤器中存储了C++的所有关键字用于查询操作!详解待后续文章,先上代码!

       头文件BitMap.h:

 1 /*BitMap.h*/
 2 
 3 #ifndef BITMAP_H_INCLUDED
 4 #define BITMAP_H_INCLUDED
 5 
 6 #include <stdio.h>
 7 #include <assert.h>
 8 #include <stdlib.h>
 9 #include <windows.h>
10 
11 typedef struct BitMap
12 {
13     size_t* _bits;
14     size_t _range;
15 } BitMap;
16 
17 #endif // BITMAP_H_INCLUDED

       头文件BloomFilter.h:

 1 /*BloomFilter.h*/
 2 
 3 #ifndef BLOOMFILTER_H_INCLUDED
 4 #define BLOOMFILTER_H_INCLUDED
 5 
 6 #include "BitMap.h"
 7 
 8 typedef size_t(*HASH_FUNC)(const char* str);
 9 
10 typedef struct                     /*5个哈希函数*/
11 {
12     BitMap _bm;
13     HASH_FUNC hashfunc1;
14     HASH_FUNC hashfunc2;
15     HASH_FUNC hashfunc3;
16     HASH_FUNC hashfunc4;
17     HASH_FUNC hashfunc5;
18 } BloomFilter;
19 
20 void BloomFilterInit(BloomFilter* bf, size_t range);
21 void BloomFilterSet(BloomFilter* bf, const char* x);
22 void BloomFilterReset(BloomFilter* bf, const char* x);
23 void BloomFilterTest();
24 void BloomFilterDestory(BloomFilter* bf);
25 
26 
27 
28 #endif // BLOOMFILTER_H_INCLUDED

       源文件BloomFilter.c:

  1 /*BloomFilter.c*/
  2 
  3 #include "BloomFilter.h"
  4 
  5 static size_t BKDRHash(const char* str)
  6 {
  7     size_t seed = 131;  // 31 131 1313 13131 131313
  8     size_t hash = 0;
  9     while (*str)
 10     {
 11         hash = hash * seed + (*str++);
 12     }
 13     return (hash & 0x7FFFFFFF);
 14 }
 15 
 16 size_t SDBMHash(const char* str)
 17 {
 18     size_t ch;
 19     size_t hash = 0;
 20     while (ch = (size_t)*str++)
 21     {
 22         hash = 65599 * hash + ch;
 23         //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
 24     }
 25     return hash;
 26 }
 27 
 28 size_t RSHash(const char* str)
 29 {
 30     size_t ch;
 31     size_t hash = 0;
 32     size_t magic = 63689;
 33     while (ch = (size_t)*str++)
 34     {
 35         hash = hash * magic + ch;
 36         magic *= 378551;
 37     }
 38     return hash;
 39 }
 40 
 41 size_t FHash(const char *str)
 42 {
 43     size_t ch;
 44     size_t hash = 0;
 45     size_t magic = 61456;
 46     while (ch = (size_t)*str++)
 47     {
 48         hash = hash * magic + ch;
 49         magic = magic * 45616;
 50     }
 51     return hash;
 52 }
 53 
 54 size_t FiHash(const char *str)
 55 {
 56     size_t ch;
 57     size_t hash = 0;
 58     size_t magic = 60000;
 59     while (ch = (size_t)*str++)
 60     {
 61         hash = hash * magic + ch;
 62         magic = magic * 111;
 63     }
 64     return hash;
 65 }
 66 
 67 void BloomFilterInit(BloomFilter* bf, size_t range)
 68 {
 69     assert(bf);
 70     BitMapInit(&bf->_bm, range);
 71     bf->hashfunc1 = BKDRHash;
 72     bf->hashfunc2 = SDBMHash;
 73     bf->hashfunc3 = RSHash;
 74     bf->hashfunc4 = FHash;
 75     bf->hashfunc5 = FiHash;
 76 }
 77 
 78 void BloomFilterSet(BloomFilter* bf, const char* x)
 79 {
 80     size_t hash1, hash2, hash3, hash4, hash5;
 81     hash1 = bf->hashfunc1(x) % bf->_bm._range;
 82     hash2 = bf->hashfunc2(x) % bf->_bm._range;
 83     hash3 = bf->hashfunc3(x) % bf->_bm._range;
 84     hash4 = bf->hashfunc4(x) % bf->_bm._range;
 85     hash5 = bf->hashfunc5(x) % bf->_bm._range;
 86     BitMapSet(&bf->_bm, hash1);
 87     BitMapSet(&bf->_bm, hash2);
 88     BitMapSet(&bf->_bm, hash3);
 89     BitMapSet(&bf->_bm, hash4);
 90     BitMapSet(&bf->_bm, hash5);
 91 }
 92 int BloomFilterFind(BloomFilter* bf, const char* x)
 93 {
 94 
 95     size_t hash1, hash2, hash3, hash4, hash5;
 96     hash1 = bf->hashfunc1(x) % bf->_bm._range;
 97     hash2 = bf->hashfunc2(x) % bf->_bm._range;
 98     hash3 = bf->hashfunc3(x) % bf->_bm._range;
 99     hash4 = bf->hashfunc4(x) % bf->_bm._range;
100     hash5 = bf->hashfunc5(x) % bf->_bm._range;
101     if (BitMapTest(&bf->_bm, hash1) == -1)
102         return -1;
103     if (BitMapTest(&bf->_bm, hash2) == -1)
104         return -1;
105     if (BitMapTest(&bf->_bm, hash3) == -1)
106         return -1;
107     if (BitMapTest(&bf->_bm, hash4) == -1)
108         return -1;
109     if (BitMapTest(&bf->_bm, hash5) == -1)
110         return -1;
111     return 0;
112 }
113 
114 void BloomFilterDestory(BloomFilter* bf)
115 {
116     assert(bf);
117     BitMapDestroy(&bf->_bm);
118 }

       源文件test.c:

 1 /*test.c*/
 2 
 3 #include "BloomFilter.h"
 4 
 5 int main()
 6 {
 7     int n;
 8     BloomFilter bf;
 9     char str[50000];
10     FILE *out, *in;
11     out = fopen("关键字.txt", "r");
12     BloomFilterInit(&bf, 10000);
13     while (!feof(out))
14     {
15 
16         fscanf(out, "%s", str);
17         /*  printf("%s
",str);*/
18         BloomFilterSet(&bf, str);
19     }
20     printf("                      ****************请输入你要执行的操作************************
");
21     printf("                      ****************1.查询是否存在该关键字***********************
");
22     printf("                      ****************2.贮存关键字*********************************
");
23     printf("                      ****************3.结束此次操作*******************************
");
24     while (scanf("%d", &n) != EOF)
25     {
26         if (n == 3)
27             break;
28         if (n == 1)
29         {
30             printf("请输入你要查询的代码,按#回到主界面
");
31             while (1)
32             {
33 
34                 scanf("%s", str);
35                 if (str[0] == #)
36                     break;
37 
38                 if (BloomFilterFind(&bf, str) == 0)
39                 {
40                     printf("该关键字(");
41                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
42                              FOREGROUND_BLUE |FOREGROUND_INTENSITY );
43                     printf("%s", str);
44                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
45                              FOREGROUND_RED |FOREGROUND_GREEN | FOREGROUND_BLUE);
46                     printf(")在表中!
");
47                 }
48                 else
49                 {
50                     printf("该关键字(");
51                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
52                              FOREGROUND_RED |FOREGROUND_INTENSITY );
53                     printf("%s",str);
54                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
55                              FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE);
56                     printf(")不在表中,请检查是否拼写错误!或者将其加入表中!
");
57                 }
58             }
59         }
60         if (n == 2)
61         {
62             while (1)
63             {
64                 printf("请输入你要保存的关键字
按0退出
");
65                 scanf("%s", str);
66                 if (str[0] == 0)
67                     break;
68                 in = fopen("关键字.txt", "a");
69                 fprintf(in, "
%s", str);
70             }
71         }
72     }
73     return 0;
74 }

       程序运行截图如下:

       技术分享图片

以上是关于A Brief Bloom Filter(英文标题唬人罢了)的主要内容,如果未能解决你的问题,请参考以下文章

url去重 --布隆过滤器 bloom filter原理及python实现

布隆过滤器+布隆过滤器(Bloom Filter)详解

Bloom filter(布隆过滤器)概念与原理

Bloom Filter

Bloom-Filter

RocksDB系列八:Bloom Filter