c_cpp 为字符串实现哈希表和哈希函数
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了c_cpp 为字符串实现哈希表和哈希函数相关的知识,希望对你有一定的参考价值。
// ================================================
// refer: http://www.math.ucla.edu/~wittman/10b.1.10w/Lectures/Lec23.pdf
// a hash routine for string
/* why use prime number 31?
http://stackoverflow.com/questions/1145217/why-should-hash-functions-use-a-prime-number-modulus
another good post:
https://computinglife.wordpress.com/2008/11/20/why-do-hash-functions-use-prime-numbers/
Because you want the number you are multiplying by and the number of
buckets you are inserting into to have orthogonal prime factorizations.
Suppose there are 8 buckets to insert into. If the number you are using
to multiply by is some multiple of 8, then the bucket inserted into will
only be determined by the least significant entry (the one not multiplied
at all). Similar entries will collide. Not good for a hash function.
31 is a large enough prime that the number of buckets is unlikely to be
divisible by it (and in fact, modern java HashMap implementations keep
the number of buckets to a power of 2).
*/
int hash(const string& key, int table_size) {
int hash_val = 0;
for(int i=0; i<key.size(); i++)
hash_val = 31 * hash_val + key[i]; // potential overflow
hash_val %= table_size;
if(hash_val < 0) // handle overflow
hash_val += table_size;
return hash_val;
}
}
template<typename T, typename K>
class HashTable {
private:
vector<vector<T>> table;
public:
HashTable(int table_size) {
table.resize(table_size);
}
void insert(T new_item) {
int index = new_item.getHash(table.size()); // call new_item's method
table[index].push_back(new_item);
}
T* find(K key) {
// idea: create a dummy item so that we can look up this key's hash index
// this tells us what bucket to look in
T tmp_item;
tmp_item.setKey(key);
int index = tmp_item.getHash(table.size());
for(int i=0; i<table[index].size(); i++)
if(table[index][i].getKey() == key)
return &table[index][i];
return NULL;
}
void erase(T* pos) {
if(pos == NULL) return;
int index = pos->getHash(table.size());
int i = 0;
while(i < table[index].size() && &table[index][i] != pos) i++;
// shift all remaining items after position i
for(int j=i; j<table[index].size()-1; j++)
table[index][j] = table[index][j+1];
table[index].pop_back();
}
};
// ================================================
// hash table is a vector of linked lists
// insert element at the head or at the tail
// key k is stored in list at T[h(k)]
// refer: http://www.eecs.wsu.edu/~ananth/CptS223/Lectures/hashing.pdf
template <typename T>
class HashTable {
private:
vector<list<T>> table;
int curr_size;
void rehash();
int myhash(const T& x) const {
int hash_val = hash(x);
hash_val %= table.size();
if(hash_val < 0)
hash_val += table.size();
return hash_val; // this is the hash table index for element x
}
public:
explicit HashTable(int size = 101);
bool contains(const T& x) const {
list<T> &which_bucket = table[myhash(x)];
return find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end() ;
}
void clear() {
for(auto l : table)
l.clear();
}
bool insert(const T& x) {
list<T>& which_bucket = table[myhash(x)];
// already inside table
if(find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end())
return false;
which_bucket.push_back(x);
// rehash if table get crowded
curr_size++;
if(curr_size > table.size())
rehash();
return true;
}
void remove(const T& x) {
list<T> &which_bucket = table[myhash(x)];
list<T>::iterator it = find(which_bucket.begin(), which_bucket.end(), x);
if(it == which_bucket.end())
return false;
which_bucket.erase(it);
--curr_size;
return true;
}
};
以上是关于c_cpp 为字符串实现哈希表和哈希函数的主要内容,如果未能解决你的问题,请参考以下文章