LevelDB跳表skipList

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了LevelDB跳表skipList相关的知识,希望对你有一定的参考价值。

LevelDB跳表skipList

db/skiplist.h

#include <atomic>
#include <cassert>
#include <cstdlib>

#include "util/arena.h"
#include "util/random.h"

namespace leveldb {

class Arena;

template <typename Key, class Comparator>
class SkipList {
 private:
  struct Node;

 public:
  // Create a new SkipList object that will use "cmp" for comparing keys,
  // and will allocate memory using "*arena".  Objects allocated in the arena
  // must remain allocated for the lifetime of the skiplist object.
  explicit SkipList(Comparator cmp, Arena* arena);

  SkipList(const SkipList&) = delete;
  SkipList& operator=(const SkipList&) = delete;

  // Insert key into the list.
  // REQUIRES: nothing that compares equal to key is currently in the list.
  void Insert(const Key& key);

  // Returns true iff an entry that compares equal to key is in the list.
  bool Contains(const Key& key) const;

  // Iteration over the contents of a skip list
  class Iterator {
   public:
    // Initialize an iterator over the specified list.
    // The returned iterator is not valid.
    explicit Iterator(const SkipList* list);

    // Returns true iff the iterator is positioned at a valid node.
    bool Valid() const;

    // Returns the key at the current position.
    // REQUIRES: Valid()
    const Key& key() const;

    // Advances to the next position.
    // REQUIRES: Valid()
    void Next();

    // Advances to the previous position.
    // REQUIRES: Valid()
    void Prev();

    // Advance to the first entry with a key >= target
    void Seek(const Key& target);

    // Position at the first entry in list.
    // Final state of iterator is Valid() iff list is not empty.
    void SeekToFirst();

    // Position at the last entry in list.
    // Final state of iterator is Valid() iff list is not empty.
    void SeekToLast();

   private:
    const SkipList* list_;
    Node* node_;
    // Intentionally copyable
  };

 private:
    //跳表结点的最大层数
  enum { kMaxHeight = 12 };
	//返回跳表中所有结点中最大的层数
  inline int GetMaxHeight() const {
    return max_height_.load(std::memory_order_relaxed);
  }
	//创建一个层数为height,值为key的结点
  Node* NewNode(const Key& key, int height);
    //随机返回一个层数在1到kMaxHeight的层数
  int RandomHeight();
    //判断两个key是否相等
  bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }

  // Return true if key is greater than the data stored in "n"
    //判断key是不是位于结点n之后,如果是就返回true,如果不是就返回false
    //n如果为nullptr,那么就返回false
  bool KeyIsAfterNode(const Key& key, Node* n) const;

  // Return the earliest node that comes at or after key.
  // Return nullptr if there is no such node.
  //
  // If prev is non-null, fills prev[level] with pointer to previous
  // node at "level" for every level in [0..max_height_-1].
    //返回第一个key_值比参数key大或者相等的结点,如果prev不为空,那么就填充prev中数组的值
  Node* FindGreaterOrEqual(const Key& key, Node** prev) const;

  // Return the latest node with a key < key.
  // Return head_ if there is no such node.
    //返回最后一个key_值比参数key小的结点
  Node* FindLessThan(const Key& key) const;

  // Return the last node in the list.
  // Return head_ if list is empty.
    //返回最后一个结点
  Node* FindLast() const;

  // Immutable after construction
    //用于比较key
  Comparator const compare_;
    //用于内存池管理
  Arena* const arena_;  // Arena used for allocations of nodes
	//感觉头结点不保存东西呀,head_就是没有保存数据,只是用来当做表头
  Node* const head_;

  // Modified only by Insert().  Read racily by readers, but stale
  // values are ok.
    //保存所有结点中的最大的层数
  std::atomic<int> max_height_;  // Height of the entire list

  // Read/written only by Insert().
    //用于产生随机数
  Random rnd_;
};

// Implementation details follow
    //泛型参数Key,Comparator
template <typename Key, class Comparator>
struct SkipList<Key, Comparator>::Node {
    //加explicit参数,限制只能用Key类型参数初始化
  explicit Node(const Key& k) : key(k) {}
	
  Key const key;

  // Accessors/mutators for links.  Wrapped in methods so we can
  // add the appropriate barriers as necessary.
  Node* Next(int n) {
    assert(n >= 0);
    // Use an \'acquire load\' so that we observe a fully initialized
    // version of the returned Node.
      //位于这条语句后面的访问内存的指令不要被编译器重排到这条指令之前
      //获取原子变量的值
    return next_[n].load(std::memory_order_acquire);
  }
  void SetNext(int n, Node* x) {
    assert(n >= 0);
    // Use a \'release store\' so that anybody who reads through this
    // pointer observes a fully initialized version of the inserted node.
      //位于这条语句前面的访问内存的指令不要被编译器重排到这条指令之后
      //atomic.store(),将参数值保存到原子变量中
    next_[n].store(x, std::memory_order_release);
  }

  // No-barrier variants that can be safely used in a few locations.
  Node* NoBarrier_Next(int n) {
    assert(n >= 0);
      //relaxed就是没有屏障,任凭编译器重排指令
    return next_[n].load(std::memory_order_relaxed);
  }
  void NoBarrier_SetNext(int n, Node* x) {
    assert(n >= 0);
    next_[n].store(x, std::memory_order_relaxed);
  }

 private:
  // Array of length equal to the node height.  next_[0] is lowest level link.
    //因为下面向内存池申请内存的时候申请的这个数组的长度是height-1,但是Node自己携带了一个1,所以这个数组的长度是height,即使这里申明的是1,但是也可以访问n(就是因为内存池)
  std::atomic<Node*> next_[1];
};

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
    const Key& key, int height) {
  char* const node_memory = arena_->AllocateAligned(
      //其实这个长度就是一个Node的大小加上height-1个next_数组元素的大小
      sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1));
    //定位new,事实上并没有向操作系统申请内存
    //new (ptr*)Type(value);
  return new (node_memory) Node(key);
}

template <typename Key, class Comparator>
    //迭代器的构造函数
inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
  list_ = list;
  node_ = nullptr;
}

template <typename Key, class Comparator>
    //当迭代器指向的跳表的结点不是nullptr,就返回true
inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
  return node_ != nullptr;
}

template <typename Key, class Comparator>
    //返回迭代器指向的结点的key
inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
  assert(Valid());
  return node_->key;
}

template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Next() {
  assert(Valid());
    //Node的这个数组里面指向的是其他的Node,而这个数组的第一个元素指向的就是下一个Node
  node_ = node_->Next(0);
}

template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Prev() {
  // Instead of using explicit "prev" links, we just search for the
  // last node that falls before key.
  assert(Valid());
    //直接找到比当前结点小的最后一个结点,就是当前结点的前一个结点了
  node_ = list_->FindLessThan(node_->key);
  if (node_ == list_->head_) {
      //如果当前结点是头结点?那么就返回空
    node_ = nullptr;
  }
}

template <typename Key, class Comparator>
    //将迭代器定位到比key_比target大或者相等的第一个结点
inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
  node_ = list_->FindGreaterOrEqual(target, nullptr);
}

template <typename Key, class Comparator>
    //将迭代器定位到第一个保存了数据的结点,其实就是head的下一个结点
inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
  node_ = list_->head_->Next(0);
}

template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
  node_ = list_->FindLast();
  if (node_ == list_->head_) {
    node_ = nullptr;
  }
}

template <typename Key, class Comparator>
int SkipList<Key, Comparator>::RandomHeight() {
  // Increase height with probability 1 in kBranching
  static const unsigned int kBranching = 4;
  int height = 1;
  while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
    height++;
  }
  assert(height > 0);
  assert(height <= kMaxHeight);
  return height;
}

template <typename Key, class Comparator>
    //如果key值在跳表中位于结点n之后,那么返回true,否则返回false
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  // null n is considered infinite
  return (n != nullptr) && (compare_(n->key, key) < 0);
}

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
    //找到第一个比key值大或者相等的结点
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
                                              Node** prev) const {
  Node* x = head_;
  int level = GetMaxHeight() - 1;
  while (true) {
      //找到当前结点指向的最后一个结点,有可能为nullptr
    Node* next = x->Next(level);
      //如果next为nullptr,那么KeyIsAfterNode返回false
      //如果key值比next的key_小,也返回false
      //如果key值比next的key_大或者相等,返回true
    if (KeyIsAfterNode(key, next)) {
      // Keep searching in this list
        //x重新赋值为比key大或者相等的结点
      x = next;
    } else {
        //将前面的结点的数组重新定位,其实就是指针重新指向x,这是附加功能,暂时不用管
      if (prev != nullptr) prev[level] = x;
      if (level == 0) {
          //level等于0,说明了当前结点的下一个结点为空,或者key比当前结点的下一个结点的key_小
          //直接返回下一个结点
        return next;
      } else {
        // Switch to next list
          //其实就是让next指向更前面的结点,就是让next更小
        level--;
      }
    }
  }
}

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
    //找到跳表中的最后一个key_值小于key的结点
SkipList<Key, Comparator>::FindLessThan(const Key& key) const {
    //定位到skipList的第一个结点
  Node* x = head_;
    //获取每个结点的最大高度
  int level = GetMaxHeight() - 1;
  while (true) {
      //断言,x最开始被初始化为head_,所以第一次断言一定能够成功
      //后面断言的条件就是比较x->key和key,如果x->key的值比key的值小,那么就断言成功
    assert(x == head_ || compare_(x->key, key) < 0);
      //获取结点数组的第level个元素,它指向了当前结点的后面某个结点
    Node* next = x->Next(level);
      //如果指向的后面的这个结点为nullptr或者指向的这个结点的key值大于或者等于key,那么if判定成功
    if (next == nullptr || compare_(next->key, key) >= 0) {
        //如果level==0,说明了当前结点的相邻的下一个结点就是为空或者key值大于或者等于传入的key
        //那么直接返回当前结点就可以了
      if (level == 0) {
        return x;
      } else {
        // Switch to next list
          //将level--,获取当前结点指向的另一个结点,重新上面的判断
        level--;
      }
    } else {
        //上面的if条件判断不通过,说明了当前结点指向的这个next结点为空或者他的key值小于传入的key
        //那么将这个结点赋值给当前结点
      x = next;
    }
  }
}

template <typename Key, class Comparator>
    //找到跳表的最后一个结点
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
    const {
  Node* x = head_;
  int level = GetMaxHeight() - 1;
  while (true) {
    Node* next = x->Next(level);
    if (next == nullptr) {
        //如果指向的结点是nullptr
      if (level == 0) {
          //下一个结点就是nullptr,说明了这就是最后一个结点
        return x;
      } else {
        // Switch to next list
          //指向更前面的结点
        level--;
      }
    } else {
        //指向的后面的结点不是空,那么就将x重新定位到更后面的结点
      x = next;
    }
  }
}

template <typename Key, class Comparator>
SkipList<Key, Comparator>::SkipList(Comparator cmp, Arena* arena)
    : compare_(cmp),
      arena_(arena),
    
      head_(NewNode(0 /* any key will do */, kMaxHeight)),
      max_height_(1),
      rnd_(0xdeadbeef) {
  for (int i = 0; i < kMaxHeight; i++) {
    head_->SetNext(i, nullptr);
  }
}

template <typename Key, class Comparator>
void SkipList<Key, Comparator>::Insert(const Key& key) {
  // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
  // here since Insert() is externally synchronized.
  Node* prev[kMaxHeight];
    //经过这一步,prev[0]里面保存的应该是最后一个比即将插入的值小的结点
  Node* x = FindGreaterOrEqual(key, prev);

  // Our data structure does not allow duplicate insertion
  assert(x == nullptr || !Equal(key, x->key));

  int height = RandomHeight();
  if (height > GetMaxHeight()) {
    for (int i = GetMaxHeight(); i < height; i++) {
      prev[i] = head_;
    }
    // It is ok to mutate max_height_ without any synchronization
    // with concurrent readers.  A concurrent reader that observes
    // the new value of max_height_ will see either the old value of
    // new level pointers from head_ (nullptr), or a new value set in
    // the loop below.  In the former case the reader will
    // immediately drop to the next level since nullptr sorts after all
    // keys.  In the latter case the reader will use the new node.
    max_height_.store(height, std::memory_order_relaxed);
  }

  x = NewNode(key, height);
  for (int i = 0; i < height; i++) {
    // NoBarrier_SetNext() suffices since we will add a barrier when
    // we publish a pointer to "x" in prev[i].
      //就是相当于链表插入结点嘛,新结点的下一个位置指向的是之前结点指向的下一个结点
    x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
      //之前结点指向的下一个结点就变成了新结点
    prev[i]->SetNext(i, x);
  }
}

template <typename Key, class Comparator>
    //查找是否包含某个key
bool SkipList<Key, Comparator>::Contains(const Key& key) const {
  Node* x = FindGreaterOrEqual(key, nullptr);
  if (x != nullptr && Equal(key, x->key)) {
    return true;
  } else {
    return false;
  }
}

}  // namespace leveldb

以上是关于LevelDB跳表skipList的主要内容,如果未能解决你的问题,请参考以下文章

LSM-Tree - LevelDb Skiplist跳表

SkipList 跳表

SkipList跳表基本原理

SkipList跳表基本原理

SkipList 之详细分析

跳跃表 SkipList数据结构原理及实现