哈希表（散列表）二次探测

Posted 2020-07-24

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了哈希表（散列表）二次探测相关的知识，希望对你有一定的参考价值。

#pragma once
#include<iostream>
#include<string>
using namespace std;
enum State
{
EMPTY,
DELETE,
EXIST,
};
template<class K, class V>
struct HashTableNode
{
K _key;
V _value;
};
template<class K>
struct __HashFunc  //默认的返回哈希键值key的 仿函数
{
size_t operator()(const K& key)
{
return key;
}
};
//特化string的__HashFunc 仿函数
template<>
struct __HashFunc<string>
{
size_t operator()(const string& str)
{
size_t key = 0;
for (size_t i = 0; i < str.size(); i++)
{
key += str[i];
}
return key;
}
};
//实现哈希表的Key/Value形式的二次探测
template<class K, class V, class HashFunc = __HashFunc<K>>
class HashTable
{
typedef HashTableNode<K, V> Node;
public:
HashTable(size_t capacity = 10)
:_tables(new Node[capacity])
, _size(0)
, _states(new State[capacity])
, _capacity(capacity)
{
// memset 有问题 是以字节为单位初始化的 但第二个参数值为int
//memset(_states, EMPTY, sizeof(State) * capacity);
for (size_t i = 0; i < capacity; i++)
{
_states[i] = EMPTY;
}
}
~HashTable()
{
if (NULL != _tables)
{
delete[] _tables;
_tables = NULL;
}
if (NULL != _states)
{
delete[] _states;
_states = NULL;
}
}
bool Insert(const K& key, const V& value)
{
_CheckCapacity();
//用GetNextIndex 解决哈希冲突
size_t index = _HashFunc(key);
// 二次探测   
size_t i = 1;
while (_states[index] == EXIST)
{
index = _GetNextIndex(index, i++);
if (index >= _capacity)
{
index = index % _capacity;
}
}
_tables[index]._key = key;
_tables[index]._value = value;
_states[index] = EXIST;
_size++;
return true;
}
Node* Find(const K& key)
{
size_t index = _HashFunc(key);
size_t start = index;
size_t i = 1;
// 存在 或者 被删除 两种状态
while (_states[index] != EMPTY)
{
if (_tables[index]._key == key)
{
if (_states[index] == EXIST)
{
return index;
}
else // 被删除 DELETE
{
return -1;
}
}
index = _GetNextIndex(index, i++);
if (index >= _capacity)
{
index = index % _capacity;
}
// 因为有填充因子 不为100%  不会出现全满且key！=_key 导致死循环的情况
}
return -1;
}
bool Remove(const K& key)
{
int index = Find(key);
if (index != -1)
{
_states[index] = DELETE;
--_size;
return true;
}
return false;
}
// 二次探测计算出存放位置
size_t _HashFunc(const K& key)
{
HashFunc hf;
return hf(key) % _capacity; //  仿函数hf() 
}
//   哈希冲突时 得到下一个index的可以利用上一个index的值 这样能提高效率 比如 string的index计算就比较费时
size_t _GetNextIndex(size_t prev, size_t i)
{
return prev + 2 * i - 1;
}
void Print()
{
for (size_t i = 0; i < _capacity; i++)
{
if (_states[i] == EXIST)
{
cout << i << "EXIST:" << _tables[i]._key << "-------" << _tables[i]._value << endl;
}
else if (_states[i] == DELETE)
{
cout << i << "DELETE:" << _tables[i]._key << "-------" << _tables[i]._value << endl;
}
else
{
cout << i << "EMPTY:" << _tables[i]._key << "-------" << _tables[i]._value << endl;
}
}
}
void Swap(HashTable<K, V, HashFunc>& ht)
{
swap(_size, ht._size);
swap(_states, ht._states);
swap(_tables, ht._tables);
swap(_capacity, ht._capacity);
}
protected:
void _CheckCapacity() // 扩容
{
// 动态的 可扩容的
// 高效哈希表的载荷因子大概在0.7-0.8较好
if (10 * _size / _capacity >= 7)  // _size/_capacity为0 因为都是整形 所以乘10
// 保证载荷因子在0.7之内
{
HashTable<K, V, HashFunc> tmp(2 * _capacity);
for (size_t i = 0; i < _capacity; i++)
{
if (_states[i] == EXIST)
{
tmp.Insert(_tables[i]._key, _tables[i]._value);
}
}
Swap(tmp);
}
}
protected:
Node* _tables;
State* _states;//状态表
size_t _size;
size_t _capacity;
};

以上是关于哈希表（散列表）二次探测的主要内容，如果未能解决你的问题，请参考以下文章

问题 B: DS哈希查找—二次探测再散列(关键字互不相同）