1.string与slice
位置在:slice.h
leveldb的slice 由const char* data_; 和size_t size_;数据和长度两部分组成,有以string作为输入的构造函数因此 db->Put(leveldb::WriteOptions(), key, value)时key,value可以直接传入string触发slice构造函数形成隐式转换,to_string函数则将slice转换回string
提示:使用slice的原因
相比string类型,slice作为返回值时只返回指针和长度,避免了key和value的复制,并且不以‘/0’作为终止符可以存储‘/0’这个字符作为数据
提示:使用inline的原因
某些重载函数定义为 inline
主要是为了优化性能,内联函数不用参数传递栈帧创建销毁节约调用开销,特别是在频繁调用这些函数的情况下。同时,inline
关键字也允许将函数定义放在头文件中,从而在多个编译单元中共享。这些优化对于 LevelDB 这种性能要求高的数据库系统是非常重要的。
class LEVELDB_EXPORT Slice {
public:
// Create an empty slice.
Slice() : data_(""), size_(0) {}
// Create a slice that refers to d[0,n-1].
Slice(const char* d, size_t n) : data_(d), size_(n) {}
// Create a slice that refers to the contents of "s"
Slice(const std::string& s) : data_(s.data()), size_(s.size()) {}
// Create a slice that refers to s[0,strlen(s)-1]
Slice(const char* s) : data_(s), size_(strlen(s)) {}
// Intentionally copyable.
Slice(const Slice&) = default;
Slice& operator=(const Slice&) = default;
// Return a pointer to the beginning of the referenced data
const char* data() const { return data_; }
// Return the length (in bytes) of the referenced data
size_t size() const { return size_; }
// Return true iff the length of the referenced data is zero
bool empty() const { return size_ == 0; }
// Return the ith byte in the referenced data.
// REQUIRES: n < size()
char operator[](size_t n) const {
assert(n < size());
return data_[n];
}
// Change this slice to refer to an empty array
void clear() {
data_ = "";
size_ = 0;
}
// Drop the first "n" bytes from this slice.
void remove_prefix(size_t n) {
assert(n <= size());
data_ += n;
size_ -= n;
}
// Return a string that contains the copy of the referenced data.
std::string ToString() const { return std::string(data_, size_); }
// Three-way comparison. Returns value:
// < 0 iff "*this" < "b",
// == 0 iff "*this" == "b",
// > 0 iff "*this" > "b"
int compare(const Slice& b) const;
// Return true iff "x" is a prefix of "*this"
bool starts_with(const Slice& x) const {
return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0));
}
private:
const char* data_;
size_t size_;
};
inline bool operator==(const Slice& x, const Slice& y) {
return ((x.size() == y.size()) &&
(memcmp(x.data(), y.data(), x.size()) == 0));
}
inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); }
inline int Slice::compare(const Slice& b) const {
const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
int r = memcmp(data_, b.data_, min_len);
if (r == 0) {
if (size_ < b.size_)
r = -1;
else if (size_ > b.size_)
r = +1;
}
return r;
}
2.错误处理status
位置在:status.h
status用于异常抛出的返回参数,这里主要关注leveldb的6种状态码如下:
private:
enum Code {
kOk = 0, //正常
kNotFound = 1, //没找到
kCorruption = 2, //数据异常崩溃
kNotSupported = 3, //不支持
kInvalidArgument = 4, //非法参数
kIOError = 5 //io操作错误
};
3.key比较函数接口Comparator
位置在:comparator.h
leveldb按照key排序存储,所以需要对key的比较,comparator是纯虚类如下:
class LEVELDB_EXPORT Comparator {
public:
virtual ~Comparator();
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
virtual int Compare(const Slice& a, const Slice& b) const = 0;
// The name of the comparator. Used to check for comparator
// mismatches (i.e., a DB created with one comparator is
// accessed using a different comparator.
//
// The client of this package should switch to a new name whenever
// the comparator implementation changes in a way that will cause
// the relative ordering of any two keys to change.
//
// Names starting with "leveldb." are reserved and should not be used
// by any clients of this package.
virtual const char* Name() const = 0;
// Advanced functions: these are used to reduce the space requirements
// for internal data structures like index blocks.
// If *start < limit, changes *start to a short string in [start,limit).
// Simple comparator implementations may return with *start unchanged,
// i.e., an implementation of this method that does nothing is correct.
virtual void FindShortestSeparator(std::string* start,
const Slice& limit) const = 0;
// Changes *key to a short string >= *key.
// Simple comparator implementations may return with *key unchanged,
// i.e., an implementation of this method that does nothing is correct.
virtual void FindShortSuccessor(std::string* key) const = 0;
};
具体实现在comparator.cc,继承虚类并实现虚函数,在 LevelDB 中,FindShortestSeparator
和 FindShortSuccessor
是两个高级函数,用于优化内部数据结构的空间利用率。它们主要用于减少索引块等内部数据结构的空间需求。当插入新的键时,LevelDB 会使用 FindShortestSeparator
来尽量缩短键的前缀,从而减少存储这些键所需的空间。FindShortSuccessor
的作用是找到一个比给定键稍大的键,用于类似的空间优化目的。FindShortestSeparator
和 FindShortSuccessor
是 LevelDB 中用于优化内部数据结构空间利用率的两个高级函数。它们通过缩短键的表示形式,在不违反键顺序的前提下,减少存储这些键所需的空间。例子:FindShortestSeparator
(“abcd”,”abzf”)返回“abd”,FindShortSuccessor
(“abcd”)返回“b”
class BytewiseComparatorImpl : public Comparator {
public:
BytewiseComparatorImpl() = default;
const char* Name() const override { return "leveldb.BytewiseComparator"; }
int Compare(const Slice& a, const Slice& b) const override {
return a.compare(b);
}
void FindShortestSeparator(std::string* start,
const Slice& limit) const override {
// Find length of common prefix
size_t min_length = std::min(start->size(), limit.size());
size_t diff_index = 0;
while ((diff_index < min_length) &&
((*start)[diff_index] == limit[diff_index])) {
diff_index++;
}
if (diff_index >= min_length) {
// Do not shorten if one string is a prefix of the other
} else {
uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
if (diff_byte < static_cast<uint8_t>(0xff) &&
diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
(*start)[diff_index]++;
start->resize(diff_index + 1);
assert(Compare(*start, limit) < 0);
}
}
}
void FindShortSuccessor(std::string* key) const override {
// Find first character that can be incremented
size_t n = key->size();
for (size_t i = 0; i < n; i++) {
const uint8_t byte = (*key)[i];
if (byte != static_cast<uint8_t>(0xff)) {
(*key)[i] = byte + 1;
key->resize(i + 1);
return;
}
}
// *key is a run of 0xffs. Leave it alone.
}
};
} // namespace
const Comparator* BytewiseComparator() {
static NoDestructor<BytewiseComparatorImpl> singleton;
return singleton.get();
}
4.迭代器接口
位置在:iterator.h
同样是虚类,具体实现如table/two_level_iterator.cc中的实现
valid判断迭代器是否指向了数据是否非法
seektofirst将迭代器指向集合第一个元素
seektolast将迭代器指向集合最后一个元素
seek将迭代器指向key为target的元素
next迭代器向后一个元素
prev迭代器向前一个元素
key返回迭代器指的元素的key
value返回迭代器指的元素的data
提示:CleanupNode用途
析构函数遍历cleanup_中的所有节点,用来迭代器相关资源的释放
class LEVELDB_EXPORT Iterator {
public:
Iterator();
Iterator(const Iterator&) = delete;
Iterator& operator=(const Iterator&) = delete;
virtual ~Iterator();
// An iterator is either positioned at a key/value pair, or
// not valid. This method returns true iff the iterator is valid.
virtual bool Valid() const = 0;
// Position at the first key in the source. The iterator is Valid()
// after this call iff the source is not empty.
virtual void SeekToFirst() = 0;
// Position at the last key in the source. The iterator is
// Valid() after this call iff the source is not empty.
virtual void SeekToLast() = 0;
// Position at the first key in the source that is at or past target.
// The iterator is Valid() after this call iff the source contains
// an entry that comes at or past target.
virtual void Seek(const Slice& target) = 0;
// Moves to the next entry in the source. After this call, Valid() is
// true iff the iterator was not positioned at the last entry in the source.
// REQUIRES: Valid()
virtual void Next() = 0;
// Moves to the previous entry in the source. After this call, Valid() is
// true iff the iterator was not positioned at the first entry in source.
// REQUIRES: Valid()
virtual void Prev() = 0;
// Return the key for the current entry. The underlying storage for
// the returned slice is valid only until the next modification of
// the iterator.
// REQUIRES: Valid()
virtual Slice key() const = 0;
// Return the value for the current entry. The underlying storage for
// the returned slice is valid only until the next modification of
// the iterator.
// REQUIRES: Valid()
virtual Slice value() const = 0;
// If an error has occurred, return it. Else return an ok status.
virtual Status status() const = 0;
// Clients are allowed to register function/arg1/arg2 triples that
// will be invoked when this iterator is destroyed.
//
// Note that unlike all of the preceding methods, this method is
// not abstract and therefore clients should not override it.
using CleanupFunction = void (*)(void* arg1, void* arg2);
void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
private:
// Cleanup functions are stored in a single-linked list.
// The list's head node is inlined in the iterator.
struct CleanupNode {
// True if the node is not used. Only head nodes might be unused.
bool IsEmpty() const { return function == nullptr; }
// Invokes the cleanup function.
void Run() {
assert(function != nullptr);
(*function)(arg1, arg2);
}
// The head node is used if the function pointer is not null.
CleanupFunction function;
void* arg1;
void* arg2;
CleanupNode* next;
};
CleanupNode cleanup_head_;
};