一致性Hash简单介绍和使用
Posted wzzkaifa
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了一致性Hash简单介绍和使用相关的知识,希望对你有一定的参考价值。
背景:
一致性Hash用于分布式缓存系统,将Key值映射到详细机器Ip上,而且添加和删除1台机器的数据移动量较小,对现网影响较小
实现:
1 Hash环:将节点的Hash值映射到一个Hash环中。每一个Key顺时针第一个找到的节点。就是这个Key被路由到的机器
2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀。扩缩容影响较小
一致性Hash用于分布式缓存系统,将Key值映射到详细机器Ip上,而且添加和删除1台机器的数据移动量较小,对现网影响较小
实现:
1 Hash环:将节点的Hash值映射到一个Hash环中。每一个Key顺时针第一个找到的节点。就是这个Key被路由到的机器
2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀。扩缩容影响较小
代码实例:
/* * @ 一致性Hash模拟測试 * @ 结论:模拟4台机器扩容1台。遍历Key[0,999983] - 一致性Hash需移动181161个Key,约占18%(1/5左右,符合预期效果) - 取模Hash需移动799984个Key,约占80% * @ 2014.05.30 */ #include <stdint.h> #include <iostream> #include <string.h> #include <sstream> #include <map> #include <vector> using namespace std; #define HASH_MOD (999983) template <class T> string ToStr(const T &t) { stringstream stream; stream << t; return stream.str(); } uint32_t APHash(string &sKey) { char *key = (char*)sKey.c_str(); unsigned int hash = 0; for (int i=0; *key; i++) { if ((i & 1) == 0) { hash ^= ((hash<<7)^(*key++)^(hash>>3)); } else { hash ^= (~((hash<<11)^(*key++)^(hash>>5))); } } return hash%HASH_MOD; } class CMyConHash { public: /* 加入一台机器(IP) */ void AddIp(const string &sIp) { // 每一个IP分配128个虚拟节点,原因:结合APHash实验结果分布较均匀 for (int i = 0; i < 128; i ++) { string sCode = sIp + ToStr(i) + "#Hash"; uint32_t uVirKey = APHash(sCode); mapVirKey2Ip[uVirKey] = sIp; mapIp2VirKey[sIp].push_back(uVirKey); } } /* 删除一台机器(IP) */ void DelIp(const string &sIp) { if (mapIp2VirKey.count(sIp) == 0) { cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl; return; } vector<uint32_t> vecVirKey = mapIp2VirKey[sIp]; for (int i = 0; i < vecVirKey.size(); i ++) { uint32_t uVirKey = vecVirKey[i]; if (mapVirKey2Ip[uVirKey] == sIp) { // 得推断下。有可能2个IP虚拟节点相等后覆盖了 mapVirKey2Ip.erase(uVirKey); } } mapIp2VirKey.erase(sIp); } /* 路由:给每一个Key找到负责的机器(IP) */ int FindIp(uint32_t uKey, string &sIp) { if (mapVirKey2Ip.size() == 0) { cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl; return -1; } bool bFind = false; uint32_t uVirKey; map<uint32_t, string>::iterator iter; // 遍历std::map是按Key大小顺序输出(差别std::tr1::unordered_map) for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++) { uVirKey = iter->first; if (uVirKey > uKey%HASH_MOD) { sIp = iter->second; bFind = true; break; } } if (!bFind) { // 找不到比Key小的虚拟节点,故使用最小的虚拟节点(环) iter = mapVirKey2Ip.begin(); uVirKey = iter->first; sIp = iter->second; } //cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl; return 0; } /* 打印各个IP负责的Key区域大小。影响因素:1 Hash函数 2 虚拟节点个数 */ /* 4台机器的情况,相对还是较均匀: Ip=202.168.14.241,Cnt=251649 Ip=202.168.14.242,Cnt=257902 Ip=202.168.14.243,Cnt=245945 Ip=202.168.14.244,Cnt=235516 */ void EchoIpState() { map<string, uint32_t> mapIpCnt; map<uint32_t, string>::iterator iter = mapVirKey2Ip.end(); iter --; uint32_t uPreKey = iter->first; string sPreIp = iter->second; do { iter --; uint32_t uVirKey = iter->first; string sIp = iter->second; if (mapIpCnt.count(sPreIp) == 0) { mapIpCnt[sPreIp] = uPreKey-uVirKey; } else { mapIpCnt[sPreIp] += uPreKey-uVirKey; } uPreKey = uVirKey; sPreIp = sIp; } while (iter != mapVirKey2Ip.begin()); cout << "Ip Size=" << mapIpCnt.size() << endl; map<string, uint32_t>::iterator iter1; for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++) { cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl; } } private: map< uint32_t, string > mapVirKey2Ip; map< string, vector<uint32_t> > mapIp2VirKey; }; class CMyModHash { public: void AddIp(const string &sIp) { vecIpList.push_back(sIp); } void FindIp(uint32_t uKey, string &sIp) { sIp = vecIpList[uKey%vecIpList.size()]; } void EchoIpState() { cout << "Ip Cnt=" << vecIpList.size() << endl; } private: vector<string> vecIpList; }; int main() { CMyConHash oMyHash; // CMyModHash oMyHash; // 模拟初始化4台机器 oMyHash.AddIp("202.168.14.241"); oMyHash.AddIp("202.168.14.242"); oMyHash.AddIp("202.168.14.243"); oMyHash.AddIp("202.168.14.244"); oMyHash.EchoIpState(); // 保存下各个Key路由的机器 string sIp, arrKeyIp[HASH_MOD]; for (uint32_t key = 0; key < HASH_MOD; key ++) { oMyHash.FindIp(key, sIp); arrKeyIp[key] = sIp; } // 模拟加入1台机器 oMyHash.AddIp("202.168.14.245"); oMyHash.EchoIpState(); // 推断多少Key相应数据须要移动机器 uint32_t uCnt = 0; for (uint32_t key = 0; key < HASH_MOD; key ++) { oMyHash.FindIp(key, sIp); if (arrKeyIp[key] != sIp) { uCnt ++; } } cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl; return 0; }
以上是关于一致性Hash简单介绍和使用的主要内容,如果未能解决你的问题,请参考以下文章