leveldb 学习记录SSTable：Block操作

Posted 2021-01-12 itdef

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了leveldb 学习记录SSTable：Block操作相关的知识，希望对你有一定的参考价值。

block结构示意图

sstable中Block 头文件如下:

class Block {
 public:
  // Initialize the block with the specified contents.
  // Takes ownership of data[] and will delete[] it when done.
  Block(const char* data, size_t size);

  ~Block();

  size_t size() const { return size_; }
  Iterator* NewIterator(const Comparator* comparator);

 private:
  uint32_t NumRestarts() const;

  const char* data_;
  size_t size_;
  uint32_t restart_offset_;     // Offset in data_ of restart array

  // No copying allowed
  Block(const Block&);
  void operator=(const Block&);

  class Iter;
};

重启点在上个章节已经介绍过了

"“重启点”是干什么的呢？简单来说就是进行数据压缩，减少存储空间。我们一再强调，Block内容里的KV记录是按照Key大小有序的，这样的话，相邻的两条记录很可能Key部分存在重叠，比如key i=“the car”，Key i+1=“the color”,那么两者存在重叠部分“the c”，为了减少Key的存储量，Key i+1可以只存储和上一条Key不同的部分“olor”，两者的共同部分从Key i中可以获得。记录的Key在Block内容部分就是这么存储的，主要目的是减少存储开销。“重启点”的意思是：在这条记录开始，不再采取只记载不同的Key部分，而是重新记录所有的Key值，假设Key i+1是一个重启点，那么Key里面会完整存储“the color”，而不是采用简略的“olor”方式。但是如果记录条数比较多，随机访问一条记录，需要从头开始一直解析才行，这样也产生很大的开销，所以设置了多个重启点，Block尾部就是指出哪些记录是这些重启点的。 "

//获取BLOCK中的重启点数目
inline uint32_t Block::NumRestarts() const {
  assert(size_ >= 2*sizeof(uint32_t));
  return DecodeFixed32(data_ + size_ - sizeof(uint32_t));　　//重启点在block最后8字节(uint32_t)中 
}

Block的创建和销毁

Block::Block(const char* data, size_t size)
    : data_(data),
      size_(size) {
  if (size_ < sizeof(uint32_t)) {
    size_ = 0;  // Error marker
  } else {
    restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);　　//重启点数目1个uint32 每个重启点的偏移记录 uint32  合记共(1+NumRestarts())* sizeof(uint32_t)
    if (restart_offset_ > size_ - sizeof(uint32_t)) {
      // The size is too small for NumRestarts() and therefore
      // restart_offset_ wrapped around.
      size_ = 0;
    }
  }
}

Block::~Block() {
  delete[] data_;
}

Block中每个entry的解码

entry结构如上图的 KeyValuePair

static inline const char* DecodeEntry(const char* p, const char* limit,
                                      uint32_t* shared,
                                      uint32_t* non_shared,
                                      uint32_t* value_length) {
  if (limit - p < 3) return NULL;    //至少包含3个 共享字节
  *shared = reinterpret_cast<const unsigned char*>(p)[0];
  *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
  *value_length = reinterpret_cast<const unsigned char*>(p)[2];
  if ((*shared | *non_shared | *value_length) < 128) {
    // Fast path: all three values are encoded in one byte each
　　 //三个记录的值或操作后 均没有超过128  即最高位为0

    p += 3;
  } else {
    if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL;
    if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL;
    if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
  }

  if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
    return NULL;
  }
  return p;
}

Block使用的迭代器

class Block::Iter : public Iterator

基本数据结构

class Block::Iter : public Iterator {
 private:
  const Comparator* const comparator_;
  const char* const data_;      // underlying block contents
  uint32_t const restarts_;     // Offset of restart array (list of fixed32)
  uint32_t const num_restarts_; // Number of uint32_t entries in restart array

  // current_ is offset in data_ of current entry.  >= restarts_ if !Valid
  uint32_t current_;
  uint32_t restart_index_;  // Index of restart block in which current_ falls
  std::string key_;
  Slice value_;
  Status status_;

  inline int Compare(const Slice& a, const Slice& b) const {
    return comparator_->Compare(a, b);
  }
}

// Return the offset in data_ just past the end of the current entry.
  //下一个记录的起点就是当前记录的末尾偏移  
  //当前记录加上记录的长度 和 BLOCK的起点的差 就是偏移
  inline uint32_t NextEntryOffset() const {
    return (value_.data() + value_.size()) - data_;
  }

  uint32_t GetRestartPoint(uint32_t index) {
     //data_ + restarts_就是记录各个重启点偏移的数组 
     //根据重启点index 计算偏移data_ + restarts_  ,里面就是第index个重启点的偏移   
    assert(index < num_restarts_);
    return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
  }

  void SeekToRestartPoint(uint32_t index) {
    key_.clear();
    restart_index_ = index;
    // current_ will be fixed by ParseNextKey();

    //value结束就是KEY的开始 所以使用value_记录
    uint32_t offset = GetRestartPoint(index);
    value_ = Slice(data_ + offset, 0);
  }

  bool ParseNextKey() {
    current_ = NextEntryOffset();            //获取下一个entry的偏移
    const char* p = data_ + current_;
    const char* limit = data_ + restarts_;  // 所有BLOCK内数据不可能超过restart
    if (p >= limit) {
      // No more entries to return.  Mark as invalid.
      current_ = restarts_;
      restart_index_ = num_restarts_;
      return false;
    }

    // Decode next entry
    uint32_t shared, non_shared, value_length;
    //解析获取 key的共享字段长度 非共享字段长度和value的长度
    p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
    if (p == NULL || key_.size() < shared) {
      CorruptionError();
      return false;
    } else {
      key_.resize(shared);    //key保存了其他entry的key 但是可以保留共享长度的字符串
      key_.append(p, non_shared);    //再添加非共享长度的字符串 就是当前KEY内容
      value_ = Slice(p + non_shared, value_length);    //value 就是略过key的偏移
      //编译restart点 确认restart点的偏移是离自己最近的    restart_index_< current_ < (restart_index_ + 1)
      while (restart_index_ + 1 < num_restarts_ &&
             GetRestartPoint(restart_index_ + 1) < current_) {
        ++restart_index_;
      }
      return true;
    }
  }
};

参考：

https://www.cnblogs.com/itdef/p/9789620.html

以上是关于leveldb 学习记录SSTable：Block操作的主要内容，如果未能解决你的问题，请参考以下文章