HashMap源码浅析

Posted 2022-12-27 darkclouds
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了HashMap源码浅析相关的知识，希望对你有一定的参考价值。
HashMap个人感觉是根据hash值分了类别的的链表或者红黑树。
public class HashMap<K, V> extends AbstractMap<K, V> implements Map<K, V>, Cloneable, Serializable 

    private static final long serialVersionUID = 362498820763181265L;
    // 默认的HashMap的存储容量大小
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16
    // 最大存储容量
    static final int MAXIMUM_CAPACITY = 1 << 30;
    // 默认的负载因子大小
    static final float DEFAULT_LOAD_FACTOR = 0.75f;
    // 每条链表长度到达8时开始转化为红黑树
    static final int TREEIFY_THRESHOLD = 8;
    // 每个红黑树存储结点个数为减少至6时解除红黑树，转化为链表
    static final int UNTREEIFY_THRESHOLD = 6;
    // 转化为红黑树的最小容量，table的长度超过64时，链表才会转化为红黑树
    static final int MIN_TREEIFY_CAPACITY = 64;
    // 存放结点的数组
    transient Node<K, V>[] table;
    // 用来保存缓存
    transient Set<Map.Entry<K, V>> entrySet;
    // 当前存储的键值对的个数
    transient int size;
    // 修改次数
    transient int modCount;
    // 使用的table容量size到达多少时进行扩容，一般为12
    int threshold;
    // 负载因子,当table使用到达负载因子大小时，进行table的扩容，DEFAULT_LOAD_FACTOR = 0.75f就说明，到达75%时扩容
    // 默认容量是16，那么使用容量到达12时进行扩容
    final float loadFactor;

    // 链表时期所使用的结点
    static class Node<K, V> implements Map.Entry<K, V> 
        final int hash;
        final K key;
        V value;
        Node<K, V> next;

        Node(int hash, K key, V value, Node<K, V> next) 
            this.hash = hash;
            this.key = key;
            this.value = value;
            this.next = next;
        

        // hash值为key的hashcode与value的hashcode进行异或
        public final int hashCode() 
            return Objects.hashCode(key) ^ Objects.hashCode(value);
        

        public final boolean equals(Object o) 
            if (o == this)
                return true;
            if (o instanceof Map.Entry) 
                Map.Entry<?, ?> e = (Map.Entry<?, ?>) o;
                if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue()))
                    return true;
            
            return false;
        
    

    // hash值为hashcode与hashcode右移16位进行异或
    static final int hash(Object key) 
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    

    // 除了这个无参构造，还有3个有参构造
    // 其中两个有参构造是可以设置初始的hashmap默认大小和负载因子大小
    // 剩下的一个是可以直接将一个map作为参数复制到本hashmap中
    public HashMap() 
        this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
    

    // 我们平时最常用的put操作
    public V put(K key, V value) 
        return putVal(hash(key), key, value, false, true);
    

    // put操作调用该方法，onlyIfAbsent为false的话如果存在旧值会用新的值覆盖
    // evict的调用在void afterNodeInsertion(boolean evict) 方法，可以看到这是一个空的方法
    final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) 
        // tab将指向存储结点的成员变量table
        Node<K, V>[] tab;
        // p将指向索引位置的那个结点
        Node<K, V> p;
        // n将为table的长度，不是table中所存储的键值对的个数，因为table有可能有一部分是null
        // i将为根据hash辅助计算出的索引位置
        int n, i;
        if ((tab = table) == null || (n = tab.length) == 0)
            // 如果table没有初始化，resize将重新设置table的大小
            n = (tab = resize()).length;
        // p指向根据hash值辅助计算所指向的那个结点
        // i为该索引位置，其计算方式为key的hash值和table的长度值减1进行与操作得出的值
        // 所以键值对只要key的hashcode向右移16位后的低几位的数字只要相同就会是同一个索引位置，放到同一个链表中，并不需要hashcode完全一致
        if ((p = tab[i = (n - 1) & hash]) == null)
            // p是null，说明那个索引位置就是空的，所以不需要去遍历链表或者红黑树来看是否已经存在这么一个结点
            // 如果是这样，那就会返回索引位置原来所存储的null
            tab[i] = newNode(hash, key, value, null);
        else 
            // p不是null，说明那个索引位置是有一条链表或者一个红黑树的，需要进行遍历检查要存储的键值对是否已经存在
            // e将指向查询出的那个结点
            Node<K, V> e;
            // 当前遍历的结点的key，也就是p的key
            K k;
            if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k))))
                // 如果p所存储的键值对和我们现在要存储的键值对相等，那运气很好，毕竟数组第一个结点就是
                e = p;
            else if (p instanceof TreeNode)
                // 如果p是一个树结点，那么应该使用遍历树的方式来遍历查询
                e = ((TreeNode<K, V>) p).putTreeVal(this, tab, hash, key, value);
            else 
                // 否则说明当前是链表，那么应该使用遍历链表的方式来遍历查询
                for (int binCount = 0;; ++binCount) 
                    if ((e = p.next) == null) 
                        // 进入if，说明已经遍历到了链表的末尾了，这里e为null
                        p.next = newNode(hash, key, value, null);
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                            // 当前链表所存储的结点已经超过了8，需要转换为红黑树
                            treeifyBin(tab, hash);
                        break;
                    
                    // 链表中的每个结点都需要进行比较
                    if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k))))
                        break;
                    p = e;
                
            
            // 经过上面的遍历，e可能已经指向了旧的结点
            if (e != null)  // existing mapping for key
                V oldValue = e.value;

                if (!onlyIfAbsent || oldValue == null)
                    // 如果可以进行值覆盖
                    e.value = value;
                // afterNodeAccess(e)是个空的方法
                afterNodeAccess(e);
                return oldValue;
            
        
        ++modCount;
        // 当前的table是否已经空间不足，需要重新设置大小
        if (++size > threshold)
            resize();
        // afterNodeInsertion(evict)是个空的方法
        afterNodeInsertion(evict);
        return null;
    

    // 内部树结点类
    static final class TreeNode<K, V> extends LinkedHashMap.Entry<K, V> 
        TreeNode<K, V> parent; // red-black tree links
        TreeNode<K, V> left;
        TreeNode<K, V> right;
        TreeNode<K, V> prev; // needed to unlink next upon deletion
        boolean red;

        TreeNode(int hash, K key, V val, Node<K, V> next) 
            super(hash, key, val, next);
        

        final TreeNode<K, V> putTreeVal(HashMap<K, V> map, Node<K, V>[] tab, int h, K k, V v) 
            // key的class
            Class<?> kc = null;
            // 是否查询，需要遍历一遍当前的红黑树，看是否已经存在这么一个键值对
            boolean searched = false;
            // root是树的根结点
            TreeNode<K, V> root = (parent != null) ? root() : this;
            // 这里是根据每个结点的存的key的hash值来作为结点间的比较
            for (TreeNode<K, V> p = root;;) 
                // dir作为当前结点的key的hash值比这个要存入的键值对的hash值大还是小
                // 以此来判断应该接下来是去遍历左孩子还是右孩子
                // ph是当前结点的key的hash值
                int dir, ph;
                K pk;
                if ((ph = p.hash) > h)
                    dir = -1;
                else if (ph < h)
                    dir = 1;
                else if ((pk = p.key) == k || (k != null && k.equals(pk)))
                    // 如果相等说明要存储的键值对已经在当前结点中
                    return p;
                else if ((kc == null && (kc = comparableClassFor(k)) == null)
                        // 根据两个key的copare方法进行比较是否key相同
                        || (dir = compareComparables(kc, k, pk)) == 0) 
                    if (!searched) 
                        // 只有遍历的是根结点才根据hash值、key以及class遍历整个树进行查找
                        TreeNode<K, V> q, ch;
                        searched = true;
                        if (((ch = p.left) != null && (q = ch.find(h, k, kc)) != null)
                                || ((ch = p.right) != null && (q = ch.find(h, k, kc)) != null))
                            return q;
                    
                    // tieBreakOrder(k, pk)根据hashcode值比较k与pk的大小,返回-1或1
                    // 前面比较的相当于是损失了精度的hashcode，hash是hashcode与hashcode右移16位进行异或后的值，而tieBreakOrder(k,
                    // pk)是直接比较hashcode
                    dir = tieBreakOrder(k, pk);
                
                // xp指向当前遍历的结点p，因为马上p就会指向它的孩子，有可能p会变为null
                TreeNode<K, V> xp = p;
                if ((p = (dir <= 0) ? p.left : p.right) == null) 
                    // p为null，说明整个红黑树没有找到当前要存储的这个键值对
                    Node<K, V> xpn = xp.next;
                    // newTreeNode方法为return new TreeNode<>(hash, key, value,
                    // next);
                    TreeNode<K, V> x = map.newTreeNode(h, k, v, xpn);
                    // 设定这个新结点为当前为左孩子还是右孩子
                    if (dir <= 0)
                        xp.left = x;
                    else
                        xp.right = x;
                    // 需要设置next、prev，因为有可能这个红黑树一会儿还会再转换为链表
                    xp.next = x;
                    x.parent = x.prev = xp;
                    if (xpn != null)
                        // xpn不为null，说明后面还有结点
                        ((TreeNode<K, V>) xpn).prev = x;
                    moveRootToFront(tab, balanceInsertion(root, x));
                    return null;
                
            
        

        // 确保树符合红黑树，现在最下层的结点，也就是叶子点有可能有问题，总的想法是用变色让发生问题的结点逐步趋向上层，用旋转达到平衡
        static <K, V> TreeNode<K, V> balanceInsertion(TreeNode<K, V> root, TreeNode<K, V> x) 
            // 新加入的结点永远初始状态为红色,因为如果是黑色，会直接违反每条路径上的黑色结点数目相同这一约定
            x.red = true;
            for (TreeNode<K, V> xp, xpp, xppl, xppr;;) 
                if ((xp = x.parent) == null) 
                    x.red = false;
                    // 已经遍历到根节点
                    return x;
                 else if (!xp.red || (xpp = xp.parent) == null)
                    // 父结点为黑色，那么就能直接加入，所以根节点还是原来的，直接返回
                    // 或者父节点为黑色，说明父结点为已经完成平衡的更新后的根结点
                    // 如果xpp为空，说明父结点为根结点，也直接返回
                    return root;
                if (xp == (xppl = xpp.left)) 
                    // *到这里，父节点为红色且为祖结点的左孩子*
                    if ((xppr = xpp.right) != null && xppr.red) 
                        // 叔结点不为空且叔结点为红色
                        // 这种情况只进行变色，虽然可以右旋加变换一次新加入的结点的颜色达到同样的效果，但是那样步骤多，耗时
                        // 父和叔均变为黑色,现在父和叔以及新加入的结点没有问题了
                        // 有问题的结点从祖结点开始,祖结点变为红色
                        // 在这之后会有相应的左旋或右旋操作，最终完成时，根结点会更新，变为其它的结点
                        xppr.red = false;
                        xp.red = false;
                        xpp.red = true;
                        x = xpp;
                     else 
                        // 叔结点为空或颜色为黑色，将有问题的结点向上推，只能选择旋转加变色
                        if (x == xp.right) 
                            // 这里左旋是为了之后的右旋
                            // 经过这一步操作，只关心颜色的话红色结点变为由右孩子变为左孩子
                            // rotateLeft第二个参数是围绕哪个结点进行左旋，这里是父节点，结点的顺序旋转后会有些变化
                            root = rotateLeft(root, x = xp);
                            xpp = (xp = x.parent) == null ? null : xp.parent;
                        
                        // 经过上面的左旋后，原本的父节点变为x，旧的x变为父节点，但是祖结点还是原来的祖结点
                        // 这里围绕祖结点进行右旋，祖结点变色
                        if (xp != null) 
                            xp.red = false;
                            if (xpp != null) 
                                xpp.red = true;
                                root = rotateRight(root, xpp);
                            
                        
                        // 经过上面的左旋和右旋加变色，新加入的结点或者父节点二者之一成为了祖结点，原本的祖结点变为了现在祖结点的右孩子(红色)
                        // 而叔结点变为原本的祖结点的右孩子，符合红色结点的孩子结点为黑色,此时二叉树平衡
                        // 只有旋转才能平衡，如果进入了这个else，经过上面的步骤就完成了平衡
                    
                 else 
                    // 同上
                    if (xppl != null && xppl.red) 
                        xppl.red = false;
                        xp.red = false;
                        xpp.red = true;
                        x = xpp;
                     else 
                        if (x == xp.left) 
                            root = rotateRight(root, x = xp);
                            xpp = (xp = x.parent) == null ? null : xp.parent;
                        
                        if (xp != null) 
                            xp.red = false;
                            if (xpp != null) 
                                xpp.red = true;
                                root = rotateLeft(root, xpp);
                            
                        
                    
                
            
        

        // 将根节点设置到table索引位置，就是链表的头结点
        static <K, V> void moveRootToFront(Node<K, V>[] tab, TreeNode<K, V> root) 
            int n;
            if (root != null && tab != null && (n = tab.length) > 0) 
                int index = (n - 1) & root.hash;
                TreeNode<K, V> first = (TreeNode<K, V>) tab[index];
                if (root != first) 
                    Node<K, V> rn;
                    tab[index] = root;
                    TreeNode<K, V> rp = root.prev;
                    if ((rn = root.next) != null)
                        ((TreeNode<K, V>) rn).prev = rp;
                    if (rp != null)
                        rp.next = rn;
                    if (first != null)
                        first.prev = root;
                    root.next = first;
                    root.prev = null;
                
                assert checkInvariants(root);
            
            // 总的来说，就是把原本的第一个结点first设置到root后面，也就是说first成为第二个结点
            // root结点的前后结点互相设置为相邻结点，root结点设置为头结点
        

        // 前面的方法看过了，这里就很清楚的知道是要找到根结点和新插入的结点，做一次平衡插入，将新的根结点设置到table的相应索引位置
        final void treeify(Node<K, V>[] tab) 
            TreeNode<K, V> root = null;
            for (TreeNode<K, V> x = this, next; x != null; x = next) 
                next = (TreeNode<K, V>) x.next;
                x.left = x.right = null;
                if (root == null) 
                    x.parent = null;
                    x.red = false;
                    root = x;
                 else 
                    K k = x.key;
                    int h = x.hash;
                    Class<?> kc = null;
                    for (TreeNode<K, V> p = root;;) 
                        int dir, ph;
                        K pk = p.key;
                        if ((ph = p.hash) > h)
                            dir = -1;
                        else if (ph < h)
                            dir = 1;
                        else if ((kc == null && (kc = comparableClassFor(k)) == null)
                                || (dir = compareComparables(kc, k, pk)) == 0)
                            dir = tieBreakOrder(k, pk);

                        TreeNode<K, V> xp = p;
                        if ((p = (dir <= 0) ? p.left : p.right) == null) 
                            x.parent = xp;
                            if (dir <= 0)
                                xp.left = x;
                            else
                                xp.right = x;
                            root = balanceInsertion(root, x);
                            break;
                        
                    
                
            
            moveRootToFront(tab, root);
        
        
        //从根结点开始遍历
        final TreeNode<K,V> getTreeNode(int h, Object k) 
            return ((parent != null) ? root() : this).find(h, k, null);
        
        //kc为key的class
        final TreeNode<K,V> find(int h, Object k, Class<?> kc) 
            //这里的this通常为根结点或者根结点的左右孩子
            TreeNode<K,V> p = this;
            do 
                int ph, dir; K pk;
                TreeNode<K,V> pl = p.left, pr = p.right, q;
                if ((ph = p.hash) > h)
                    p = pl;
                else if (ph < h)
                    p = pr;
                else if ((pk = p.key) == k || (k != null && k.equals(pk)))
                    return p;
                else if (pl == null)
                    p = pr;
                else if (pr == null)
                    p = pl;
                else if ((kc != null ||
                          (kc = comparableClassFor(k)) != null) &&
                        //调用key的compare方法，比较两个key是否相等
                         (dir = compareComparables(kc, k, pk)) != 0)
                    p = (dir < 0) ? pl : pr;
                else if ((q = pr.find(h, k, kc)) != null)
                    return q;
                else
                    p = pl;
             while (p != null);
            return null;
            //总之中序遍历整个树，找到了就返回，找不到返回空
        

    

    // 链表转树
    final void treeifyBin(Node<K, V>[] tab, int hash) 
        int n, index;
        Node<K, V> e;
        if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
            // 如果当前talbe的长度没有超过64个，扩容
            resize();
        else if ((e = tab[index = (n - 1) & hash]) != null) 
            // 如果当前talbe的长度超过64个，链表转化为红黑树
            // 每个链表中的每个结点都生成一个对应的树结点
            TreeNode<K, V> hd = null, tl = null;
            do 
                // replacementTreeNode(e, null)方法返回为return new
                // TreeNode<>(p.hash, p.key, p.value, next);
                TreeNode<K, V> p = replacementTreeNode(e, null);
                if (tl == null)
                    hd = p;
                else 
                    p.prev = tl;
                    tl.next = p;
                
                tl = p;
             while ((e = e.next) != null);
            // 前面已经对每一个链表中的结点都相应的生成了一个树结点，hd代表头结点，也是树暂时的根结点，此时这些树结点和table还没有关系
            if ((tab[index] = hd) != null)
                // 调用这个方法是为了对红黑树进行一次排序，并将真正的符合要求的根结点设置到table
                hd.treeify(tab);
        
    
    //我们平时常用的get方法
    public V get(Object key) 
        Node<K,V> e;
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    
    //根据hash值和key获取一个结点
    final Node<K,V> getNode(int hash, Object key) 
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (first = tab[(n - 1) & hash]) != null) 
            if (first.hash == hash && // always check first node
                ((k = first.key) == key || (key != null && key.equals(k))))
                //第一个结点的key相同就返回第一个结点
                return first;
            if ((e = first.next) != null) 
                //第一个结点不是，从第二个结点开始遍历
                if (first instanceof TreeNode)
                    //现在是一个树，那么用遍历树的方式去查询是否有这么一个结点
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key);
                
                do 
                    //否则是一个链表，遍历链表查看是否有这么一个结点
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;
                 while ((e = e.next) != null);
            
        
        //没有找到符合的结点
        return null;
上面的各个内部类的代码仅凭个人主观放了一部分，没有放全，其余的remove、isEmpty等方法有兴趣的同学可以自己看看。
以上是关于HashMap源码浅析的主要内容，如果未能解决你的问题，请参考以下文章