记录几个经典的字符串hash算法
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了记录几个经典的字符串hash算法相关的知识,希望对你有一定的参考价值。
记录几个经典的字符串hash算法,方便以后查看:
推荐一篇文章:
http://www.partow.net/programming/hashfunctions/#
(1)暴雪字符串hash
1 #include <stdio.h> 2 #include <string.h> 3 #include <stdlib.h> 4 #include <stdint.h> 5 #include <stdbool.h> 6 7 #define MAXTABLELEN 102400000 8 9 typedef struct _HASHTABLE 10 { 11 long nHashA; 12 long nHashB; 13 bool bExists; 14 }HASHTABLE, *PHASHTABLE; 15 16 const unsigned long nTableLength = MAXTABLELEN; 17 unsigned long m_tablelength; // 哈希索引表长度 18 HASHTABLE *m_HashIndexTable; 19 unsigned long cryptTable[0x500]; 20 21 int collc = 0; 22 int errstr = 0; 23 24 void InitCryptTable() 25 { 26 unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i; 27 28 for( index1 = 0; index1 < 0x100; index1++ ) 29 { 30 for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 ) 31 { 32 unsigned long temp1, temp2; 33 seed = (seed * 125 + 3) % 0x2AAAAB; 34 temp1 = (seed & 0xFFFF) << 0x10; 35 seed = (seed * 125 + 3) % 0x2AAAAB; 36 temp2 = (seed & 0xFFFF); 37 cryptTable[index2] = ( temp1 | temp2 ); 38 } 39 } 40 } 41 42 /************************************************************************/ 43 /*函数名:HashString 44 *功 能:求取哈希值 45 *返回值:返回hash值 46 ************************************************************************/ 47 unsigned long HashString(char *lpszString, unsigned long dwHashType) 48 { 49 unsigned char *key = (unsigned char *)lpszString; 50 unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE; 51 int ch; 52 53 while(*key != 0) 54 { 55 ch = toupper(*key++); 56 57 seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2); 58 seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3; 59 } 60 return seed1; 61 } 62 /************************************************************************/ 63 /*函数名:Hashed 64 *功 能:检测一个字符串是否被hash过 65 *返回值:如果存在,返回位置;否则,返回-1 66 ************************************************************************/ 67 unsigned long Hashed(char * lpszString) 68 69 { 70 const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2; 71 //不同的字符串三次hash还会碰撞的率无限接近于不可能 72 unsigned long nHash = HashString(lpszString, HASH_OFFSET); 73 unsigned long nHashA = HashString(lpszString, HASH_A); 74 unsigned long nHashB = HashString(lpszString, HASH_B); 75 unsigned long nHashStart = nHash % m_tablelength; 76 unsigned long nHashPos = nHashStart; 77 78 while (m_HashIndexTable[nHashPos].bExists) 79 { 80 if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB) 81 return nHashPos; 82 else 83 nHashPos = (nHashPos + 1) % m_tablelength; 84 85 if (nHashPos == nHashStart) 86 break; 87 } 88 errstr++; 89 90 return -1; //没有找到 91 } 92 93 /************************************************************************/ 94 /*函数名:Hash 95 *功 能:hash一个字符串 96 *返回值:成功,返回true;失败,返回false 97 ************************************************************************/ 98 bool Hash(char * lpszString) 99 { 100 const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2; 101 unsigned long nHash = HashString(lpszString, HASH_OFFSET); 102 unsigned long nHashA = HashString(lpszString, HASH_A); 103 unsigned long nHashB = HashString(lpszString, HASH_B); 104 unsigned long nHashStart = nHash % m_tablelength, 105 nHashPos = nHashStart; 106 107 while (m_HashIndexTable[nHashPos].bExists) 108 { 109 nHashPos = (nHashPos + 1) % m_tablelength; 110 if (nHashPos == nHashStart) //一个轮回 111 { 112 collc ++; 113 //hash表中没有空余的位置了,无法完成hash 114 return false; 115 } 116 } 117 m_HashIndexTable[nHashPos].bExists = true; 118 m_HashIndexTable[nHashPos].nHashA = nHashA; 119 m_HashIndexTable[nHashPos].nHashB = nHashB; 120 121 return true; 122 } 123 124 int InitHashTable() 125 { 126 int i; 127 128 InitCryptTable(); 129 m_tablelength = nTableLength; 130 131 m_HashIndexTable = (HASHTABLE *)malloc(nTableLength * sizeof(HASHTABLE)); 132 if (NULL == m_HashIndexTable) { 133 printf("Init HashTable failure!!\n"); 134 return -1; 135 } 136 137 for (i = 0; i < nTableLength; i++ ) 138 { 139 m_HashIndexTable[i].nHashA = 0; 140 m_HashIndexTable[i].nHashB = 0; 141 m_HashIndexTable[i].bExists = false; 142 } 143 144 return 0; 145 } 146 147 void do_test() 148 { 149 int count = 0; 150 FILE *fp; 151 char url[2048] = {0}; 152 153 fp = fopen("urllist", "rb+"); 154 if (NULL == fp) { 155 return; 156 } 157 158 if (InitHashTable()) { 159 return; 160 } 161 162 while (!feof(fp)) { 163 fgets(url, 2048, fp); 164 Hash(url); 165 count++; 166 } 167 168 printf("count: %d\n", count); 169 170 fclose(fp); 171 } 172 173 /*test main*/ 174 int main() 175 { 176 do_test(); 177 178 printf("conflict: %d\n", collc); 179 printf("not find: %d\n", errstr); 180 181 return 0; 182 }
(2)字符串hash算法 ELFhash
#include <stdio.h> #include <string.h> #include <stdlib.h> #define MOD 10 int ELFhash(char*key) { unsigned long h=0; while(*key) { h = (h << 4) + *key++; unsigned long g = h & 0xF0000000L; if(g) h ^= g >> 24; h &= ~g; } return h % MOD; } int main(int argc, char **argv) { if (argc < 2) { printf("using %s <string>\n", argv[0]); return -1; } int num = 0; num = ELFhash(argv[1]); printf("num is %d\n", num); }
以上是关于记录几个经典的字符串hash算法的主要内容,如果未能解决你的问题,请参考以下文章