HashMap的神秘面纱
Posted 骆宏
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了HashMap的神秘面纱相关的知识,希望对你有一定的参考价值。
在java集合框架中,map接口是一个强大,易用的类,借助于该类,可以实现非常复杂的数据结构。比如:
HashMap<String, Map<String, HashMap>>>,HashMap<String, List<HashMap<String, String>>>。
在刚毕业那会,参加了阿里巴巴的一个面试,面试官当时问了关于HashMap的实现,而自己只能回答出大一学习数据结构时的用数组实现的简单hash查找算法。经过一年多的工作,现在重新阅读了下HashMap的源码,终于明白了HashMap的设计原理以及内部的实现。
hash查找算法
在初学数据结构时,我们都知道hash查找算法,该算法的简单描述如下:给定一组数据,然后写一个查询效率,设置值效率为O(1)的算法。在大一,课本介绍了几个简单的数据结构,比如:队列,树,hash桶。从简单的介绍中,我们可以知道,查找效率为O(1)的只有hash算法。hash算法比较简单,原理就是:用一个hash算法,对数据进行hash求值,然后得到一个hash值后,再对数组长度取余,然后将数据保存在指定下标数组中,数组下标就是hash值对数组长度的取余。下面用一个简单的图来表示:
HashMap
经过上面的简单介绍,我们对于hash查找算法有了初步的了解。但是上面的hash查找算法只是一个HashMap的最基础的雏形,因为还存在着非常多的不足,比如:数组的长度需要固定下来,hash值如果相同时会覆盖掉原先的值,最开始的数组长度设置为多少合适等。带着下面的疑问,我们直接阅读HashMap的源码,进行一一的探索,下面直接贴HashMap的源码,但是将一些无关的代码进行了删减,将注释转换为了中文。
package java.util;
import java.io.*;
/**
* 这里面介绍了HashMap的一些相关内容
* 主要提到了两个内容:一个是初始化容器大小,另外一个是loadFactor(当容器的使用率到达这个阀值时,重新rehash,便于动态扩大容器)
* 这两个初始化参数都会直接影响着HashMap的性能,下面举例一个场景
* 比如你已经有100W的数据等待着进行存入到HashMap中,如果你调用的是如下代码
* Map<?, ?> map = new HashMap<>();
* 由于未指定容器大小,所以HashMap内部实例化时,就会使用默认的大小
* 但是在设置值进去HashMap时,就会导致该map不断的rehash,性能非常底下
* /
public class HashMap<K,V>
extends AbstractMap<K,V>
implements Map<K,V>, Cloneable, Serializable
//初始化容器大小
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16
//容器最大值
static final int MAXIMUM_CAPACITY = 1 << 30;
//阀值,使用率超过75%时,调用rehash,动态扩大容器,默认值
static final float DEFAULT_LOAD_FACTOR = 0.75f;
//一个空hashmap
static final Entry<?,?>[] EMPTY_TABLE = ;
//动态扩大容器时使用的一个hashMap
transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;
//当前数据量
transient int size;
//扩大容器时的大小,计算公式为:capacity * load factor
int threshold;
//使用率阀值,默认为:DEFAULT_LOAD_FACTOR,也可以由初始化参数设定
final float loadFactor;
//iterator遍历时会用到,防止hashmap被修改
transient int modCount;
static final int ALTERNATIVE_HASHING_THRESHOLD_DEFAULT = Integer.MAX_VALUE;
//忽略
private static class Holder
static final int ALTERNATIVE_HASHING_THRESHOLD;
static
String altThreshold = java.security.AccessController.doPrivileged(
new sun.security.action.GetPropertyAction(
"jdk.map.althashing.threshold"));
int threshold;
try
threshold = (null != altThreshold)
? Integer.parseInt(altThreshold)
: ALTERNATIVE_HASHING_THRESHOLD_DEFAULT;
// disable alternative hashing if -1
if (threshold == -1)
threshold = Integer.MAX_VALUE;
if (threshold < 0)
throw new IllegalArgumentException("value must be positive integer.");
catch(IllegalArgumentException failed)
throw new Error("Illegal value for 'jdk.map.althashing.threshold'", failed);
ALTERNATIVE_HASHING_THRESHOLD = threshold;
//计算hash值时的一个种子,用于尽量每次保证hash值的唯一性
transient int hashSeed = 0;
//实例化一个hashmap,initialCapacity容器大小,loadFactor阀值大小,这个api对于已经知道数据大小时,非常有用
public HashMap(int initialCapacity, float loadFactor)
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
this.loadFactor = loadFactor;
threshold = initialCapacity;
init();
//实例化一个hashmap,initialCapacity容器大小,这个api对于已经知道数据大小时,非常有用
public HashMap(int initialCapacity)
this(initialCapacity, DEFAULT_LOAD_FACTOR);
//实例化一个默认hashmap,initialCapacity容器大小为16,load factor为0.75,
//都是默认的,这个api在默认不知道数据大小时调用
public HashMap()
this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
//copy the original map data to this new map
public HashMap(Map<? extends K, ? extends V> m)
this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1,
DEFAULT_INITIAL_CAPACITY), DEFAULT_LOAD_FACTOR);
inflateTable(threshold);
putAllForCreate(m);
private static int roundUpToPowerOf2(int number)
// assert number >= 0 : "number must be non-negative";
int rounded = number >= MAXIMUM_CAPACITY
? MAXIMUM_CAPACITY
: (rounded = Integer.highestOneBit(number)) != 0
? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded
: 1;
return rounded;
//扩大容器
private void inflateTable(int toSize)
int capacity = roundUpToPowerOf2(toSize);
threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
table = new Entry[capacity];
initHashSeedAsNeeded(capacity);
void init()
final boolean initHashSeedAsNeeded(int capacity)
boolean currentAltHashing = hashSeed != 0;
boolean useAltHashing = sun.misc.VM.isBooted() &&
(capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
boolean switching = currentAltHashing ^ useAltHashing;
if (switching)
hashSeed = useAltHashing
? sun.misc.Hashing.randomHashSeed(this)
: 0;
return switching;
//hash算法,对每个值进行求hash值
final int hash(Object k)
int h = hashSeed;
if (0 != h && k instanceof String)
return sun.misc.Hashing.stringHash32((String) k);
h ^= k.hashCode();
// This function ensures that hashCodes that differ only by
// constant multiples at each bit position have a bounded
// number of collisions (approximately 8 at default load factor).
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
//对数组取余
static int indexFor(int h, int length)
// assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";
return h & (length-1);
//存取的数据大小
public int size()
return size;
//是否为空
public boolean isEmpty()
return size == 0;
//查找
public V get(Object key)
if (key == null)
return getForNullKey();
Entry<K,V> entry = getEntry(key);
return null == entry ? null : entry.getValue();
//查找key为null的值
private V getForNullKey()
if (size == 0)
return null;
for (Entry<K,V> e = table[0]; e != null; e = e.next)
if (e.key == null)
return e.value;
return null;
//查找
public boolean containsKey(Object key)
return getEntry(key) != null;
//查找
final Entry<K,V> getEntry(Object key)
if (size == 0)
return null;
int hash = (key == null) ? 0 : hash(key); //进行hash算法去hash值
for (Entry<K,V> e = table[indexFor(hash, table.length)]; //对数组长度取余,然后遍历该hash值的列表,找到值(调用equals判断是否相等)
e != null;
e = e.next)
Object k;
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
return e;
return null;
//设置值进入map
public V put(K key, V value)
if (table == EMPTY_TABLE)
inflateTable(threshold);
if (key == null)
return putForNullKey(value);
int hash = hash(key); //进行hash算法去hash值
int i = indexFor(hash, table.length); //对数组长度取余数
for (Entry<K,V> e = table[i]; e != null; e = e.next) //取出指定下标的一个列表,然后遍历整个列表,查找equals相等的对象
Object k;
//判断hash值相等,并且key的equals方法相等
if (e.hash == hash && ((k = e.key) == key || key.equals(k))) //如果该值存在了,替换掉原来的值
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
modCount++;
addEntry(hash, key, value, i); //值不存在,添加到hashmap
return null;
//设置key=null的值
private V putForNullKey(V value)
for (Entry<K,V> e = table[0]; e != null; e = e.next)
if (e.key == null)
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
modCount++;
addEntry(0, null, value, 0);
return null;
//插入并且创建值
private void putForCreate(K key, V value)
int hash = null == key ? 0 : hash(key);
int i = indexFor(hash, table.length);
//先查看该key是否存在
for (Entry<K,V> e = table[i]; e != null; e = e.next)
Object k;
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
e.value = value;
return;
//不存在然后创建
createEntry(hash, key, value, i);
private void putAllForCreate(Map<? extends K, ? extends V> m)
for (Map.Entry<? extends K, ? extends V> e : m.entrySet())
putForCreate(e.getKey(), e.getValue());
//扩大map的容量
void resize(int newCapacity)
Entry[] oldTable = table;
int oldCapacity = oldTable.length;
if (oldCapacity == MAXIMUM_CAPACITY)
threshold = Integer.MAX_VALUE;
return;
Entry[] newTable = new Entry[newCapacity];
transfer(newTable, initHashSeedAsNeeded(newCapacity));
table = newTable;
threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
//copy
void transfer(Entry[] newTable, boolean rehash)
int newCapacity = newTable.length;
for (Entry<K,V> e : table)
while(null != e)
Entry<K,V> next = e.next;
if (rehash)
e.hash = null == e.key ? 0 : hash(e.key);
int i = indexFor(e.hash, newCapacity);
e.next = newTable[i];
newTable[i] = e;
e = next;
//插入全部
public void putAll(Map<? extends K, ? extends V> m)
int numKeysToBeAdded = m.size();
if (numKeysToBeAdded == 0)
return;
if (table == EMPTY_TABLE)
inflateTable((int) Math.max(numKeysToBeAdded * loadFactor, threshold));
if (numKeysToBeAdded > threshold)
int targetCapacity = (int)(numKeysToBeAdded / loadFactor + 1);
if (targetCapacity > MAXIMUM_CAPACITY)
targetCapacity = MAXIMUM_CAPACITY;
int newCapacity = table.length;
while (newCapacity < targetCapacity)
newCapacity <<= 1;
if (newCapacity > table.length)
resize(newCapacity);
for (Map.Entry<? extends K, ? extends V> e : m.entrySet())
put(e.getKey(), e.getValue());
//删除内容
public V remove(Object key)
Entry<K,V> e = removeEntryForKey(key);
return (e == null ? null : e.value);
//删除内容
final Entry<K,V> removeEntryForKey(Object key)
if (size == 0)
return null;
int hash = (key == null) ? 0 : hash(key);
int i = indexFor(hash, table.length);
Entry<K,V> prev = table[i];
Entry<K,V> e = prev;
while (e != null)
Entry<K,V> next = e.next;
Object k;
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
modCount++;
size--;
if (prev == e)
table[i] = next;
else
prev.next = next;
e.recordRemoval(this);
return e;
prev = e;
e = next;
return e;
//删除内容
final Entry<K,V> removeMapping(Object o)
if (size == 0 || !(o instanceof Map.Entry))
return null;
Map.Entry<K,V> entry = (Map.Entry<K,V>) o;
Object key = entry.getKey();
int hash = (key == null) ? 0 : hash(key);
int i = indexFor(hash, table.length);
Entry<K,V> prev = table[i];
Entry<K,V> e = prev;
while (e != null)
Entry<K,V> next = e.next;
if (e.hash == hash && e.equals(entry))
modCount++;
size--;
if (prev == e)
table[i] = next;
else
prev.next = next;
e.recordRemoval(this);
return e;
prev = e;
e = next;
return e;
//清空所有数据
public void clear()
modCount++;
Arrays.fill(table, null);
size = 0;
//判断value是否存在
public boolean containsValue(Object value)
if (value == null)
return containsNullValue();
Entry[] tab = table;
for (int i = 0; i < tab.length ; i++)
for (Entry e = tab[i] ; e != null ; e = e.next)
if (value.equals(e.value))
return true;
return false;
//是否存在null value
private boolean containsNullValue()
Entry[] tab = table;
for (int i = 0; i < tab.length ; i++)
for (Entry e = tab[i] ; e != null ; e = e.next)
if (e.value == null)
return true;
return false;
//返回浅克隆map
public Object clone()
HashMap<K,V> result = null;
try
result = (HashMap<K,V>)super.clone();
catch (CloneNotSupportedException e)
// assert false;
if (result.table != EMPTY_TABLE)
result.inflateTable(Math.min(
(int) Math.min(
size * Math.min(1 / loadFactor, 4.0f),
// we have limits...
HashMap.MAXIMUM_CAPACITY),
table.length));
result.entrySet = null;
result.modCount = 0;
result.size = 0;
result.init();
result.putAllForCreate(this);
return result;
/**
* 存储数据的节点
* 这里面我们看到数据结构为一个key,value,next的数据结构
* 其中next是链表数据结构,也就是如果这个节点有下一个节点,那么next指向下一个节点
* 这个设计用于解决两个key的hash值时,用于维护两个节点的先后顺序,这样子可以保证hash值相同也能存取的特点
*
*/
static class Entry<K,V> implements Map.Entry<K,V>
final K key;
V value;
Entry<K,V> next;
int hash;
/**
* Creates new entry.
*/
Entry(int h, K k, V v, Entry<K,V> n)
value = v;
next = n;
key = k;
hash = h;
public final K getKey()
return key;
public final V getValue()
return value;
public final V setValue(V newValue)
V oldValue = value;
value = newValue;
return oldValue;
public final boolean equals(Object o)
if (!(o instanceof Map.Entry))
return false;
Map.Entry e = (Map.Entry)o;
Object k1 = getKey();
Object k2 = e.getKey();
if (k1 == k2 || (k1 != null && k1.equals(k2)))
Object v1 = getValue();
Object v2 = e.getValue();
if (v1 == v2 || (v1 != null && v1.equals(v2)))
return true;
return false;
public final int hashCode()
return Objects.hashCode(getKey()) ^ Objects.hashCode(getValue());
public final String toString()
return getKey() + "=" + getValue();
void recordAccess(HashMap<K,V> m)
void recordRemoval(HashMap<K,V> m)
//添加节点
void addEntry(int hash, K key, V value, int bucketIndex)
//如果容器的使用率超过了设定的阀值,并且这个key的hash查找失败了,就重新扩大容器为原来的两倍容量
if ((size >= threshold) && (null != table[bucketIndex]))
resize(2 * table.length);
hash = (null != key) ? hash(key) : 0;
bucketIndex = indexFor(hash, table.length);
createEntry(hash, key, value, bucketIndex);
//创建节点
void createEntry(int hash, K key, V value, int bucketIndex)
Entry<K,V> e = table[bucketIndex]; //将新添加的内容添加到队列的第一个位置,将原来的第一个队列元素设置为当前内容的下一个元素
table[bucketIndex] = new Entry<>(hash, key, value, e);
size++;
//iterator设计模式,用于访问整个map
private abstract class HashIterator<E> implements Iterator<E>
Entry<K,V> next; // next entry to return
int expectedModCount; // For fast-fail
int index; // current slot
Entry<K,V> current; // current entry
HashIterator()
expectedModCount = modCount;
if (size > 0) // advance to first entry
Entry[] t = table;
while (index < t.length && (next = t[index++]) == null)
;
public final boolean hasNext()
return next != null;
final Entry<K,V> nextEntry()
if (modCount != expectedModCount)
throw new ConcurrentModificationException();
Entry<K,V> e = next;
if (e == null)
throw new NoSuchElementException();
if ((next = e.next) == null)
Entry[] t = table;
while (index < t.length && (next = t[index++]) == null)
;
current = e;
return e;
public void remove()
if (current == null)
throw new IllegalStateException();
if (modCount != expectedModCount)
throw new ConcurrentModificationException();
Object k = current.key;
current = null;
HashMap.this.removeEntryForKey(k);
expectedModCount = modCount;
private final class ValueIterator extends HashIterator<V>
public V next()
return nextEntry().value;
private final class KeyIterator extends HashIterator<K>
public K next()
return nextEntry().getKey();
private final class EntryIterator extends HashIterator<Map.Entry<K,V>>
public Map.Entry<K,V> next()
return nextEntry();
// Subclass overrides these to alter behavior of views' iterator() method
Iterator<K> newKeyIterator()
return new KeyIterator();
Iterator<V> newValueIterator()
return new ValueIterator();
Iterator<Map.Entry<K,V>> newEntryIterator()
return new EntryIterator();
//放回整个map的试图:全部的key列表
private transient Set<Map.Entry<K,V>> entrySet = null;
public Set<K> keySet()
Set<K> ks = keySet;
return (ks != null ? ks : (keySet = new KeySet()));
private final class KeySet extends AbstractSet<K>
public Iterator<K> iterator()
return newKeyIterator();
public int size()
return size;
public boolean contains(Object o)
return containsKey(o);
public boolean remove(Object o)
return HashMap.this.removeEntryForKey(o) != null;
public void clear()
HashMap.this.clear();
//返回全部的value列表
public Collection<V> values()
Collection<V> vs = values;
return (vs != null ? vs : (values = new Values()));
private final class Values extends AbstractCollection<V>
public Iterator<V> iterator()
return newValueIterator();
public int size()
return size;
public boolean contains(Object o)
return containsValue(o);
public void clear()
HashMap.this.clear();
//返回全部的<key,value>列表
public Set<Map.Entry<K,V>> entrySet()
return entrySet0();
private Set<Map.Entry<K,V>> entrySet0()
Set<Map.Entry<K,V>> es = entrySet;
return es != null ? es : (entrySet = new EntrySet());
private final class EntrySet extends AbstractSet<Map.Entry<K,V>>
public Iterator<Map.Entry<K,V>> iterator()
return newEntryIterator();
public boolean contains(Object o)
if (!(o instanceof Map.Entry))
return false;
Map.Entry<K,V> e = (Map.Entry<K,V>) o;
Entry<K,V> candidate = getEntry(e.getKey());
return candidate != null && candidate.equals(e);
public boolean remove(Object o)
return removeMapping(o) != null;
public int size()
return size;
public void clear()
HashMap.this.clear();
//支持序列化
private void writeObject(java.io.ObjectOutputStream s)
throws IOException
// Write out the threshold, loadfactor, and any hidden stuff
s.defaultWriteObject();
// Write out number of buckets
if (table==EMPTY_TABLE)
s.writeInt(roundUpToPowerOf2(threshold));
else
s.writeInt(table.length);
// Write out size (number of Mappings)
s.writeInt(size);
// Write out keys and values (alternating)
if (size > 0)
for(Map.Entry<K,V> e : entrySet0())
s.writeObject(e.getKey());
s.writeObject(e.getValue());
private static final long serialVersionUID = 362498820763181265L;
private void readObject(java.io.ObjectInputStream s)
throws IOException, ClassNotFoundException
// Read in the threshold (ignored), loadfactor, and any hidden stuff
s.defaultReadObject();
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new InvalidObjectException("Illegal load factor: " +
loadFactor);
// set other fields that need values
table = (Entry<K,V>[]) EMPTY_TABLE;
// Read in number of buckets
s.readInt(); // ignored.
// Read number of mappings
int mappings = s.readInt();
if (mappings < 0)
throw new InvalidObjectException("Illegal mappings count: " +
mappings);
// capacity chosen by number of mappings and desired load (if >= 0.25)
int capacity = (int) Math.min(
mappings * Math.min(1 / loadFactor, 4.0f),
// we have limits...
HashMap.MAXIMUM_CAPACITY);
// allocate the bucket array;
if (mappings > 0)
inflateTable(capacity);
else
threshold = capacity;
init(); // Give subclass a chance to do its thing.
// Read the keys and values, and put the mappings in the HashMap
for (int i = 0; i < mappings; i++)
K key = (K) s.readObject();
V value = (V) s.readObject();
putForCreate(key, value);
int capacity() return table.length;
float loadFactor() return loadFactor;
从上面的源码中可以发现,整个HashMap的核心思想依旧是hash查找算法,但是添加了一些额外的特性,让HashMap更加强大,易用。下面给出一个HashMap的数据结构图,方便理解与总结。
测试效率
下面写了一个简单的测试,看图片的代码以及结果。总结
HashMap的核心思想仍然是hash查找算法,然后基于hash查找算法,结合了链表的数据结构,实现了一个强大的数据结构。好了,具体的内容,可以阅读源码,从代码中找到答案。以上是关于HashMap的神秘面纱的主要内容,如果未能解决你的问题,请参考以下文章